Select Git revision
SmartGridModel.hpp
folien-code.py 6.03 KiB
# %% [markdown]
# # Code zu Folien
#
# Dieses Skript bzw. Jupyter-Notebook enthält den Code, der auch auf den Folien "Pandas & Seaborn" enthalten ist. Zum Vorbereiten, Mitmachen oder Nacharbeiten.
# %% import Pandas
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
# %% Iris Flower Dataset
url = 'https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv'
df = pd.read_csv(url)
# offline-Alternative:
# from sklearn.datasets import load_iris
# df = pd.concat(load_iris(return_X_y=True, as_frame=True), axis='columns')
# df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
# df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
df['species'] = df['species'].astype('category')
df
# %% Informationen
print(df.shape)
print(df.columns)
print(df.dtypes)
print(df.index)
df.info()
# %% Statistischer Überblick
display(df.describe())
display(df.describe(exclude='number'))
# %% Kuchendiagramm
counts = df['species'].value_counts()
display(counts)
counts.plot.pie(startangle=60, autopct='{:.2f}%'.format)
plt.ylabel('species')
# %% Boxplot
df.boxplot(column='petal_length', by='species')
# %% Boxplots aller Features
fig, axs = plt.subplots(2, 2, sharey=False) # y-Achsen unabhängig
pd.plotting.boxplot(df, by='species', ax=axs) # übergebe axs
[ax.set_xlabel('') for ax in axs.ravel()] # entferne x-Labels
fig.tight_layout()
# %% Violinenplot
import seaborn as sns
sns.violinplot(hue='species', y='petal_length', data=df)
# %% Scatterplots
df.plot.scatter(x='petal_length', y='petal_width', c='species', colormap='viridis', alpha=0.7)
# %% Pair Plot
sns.pairplot(df, hue='species', plot_kws={'alpha': 0.5})
# %% Parallele Koordinaten Plot, unskaliert
pd.plotting.parallel_coordinates(df, 'species', colormap='viridis', alpha=.5)
# %% Parallele Koordinaten Plot, normiert
from sklearn.preprocessing import minmax_scale
num_cols = df.columns.drop('species')
df_scaled = df.copy()
df_scaled[num_cols] = minmax_scale(df[num_cols])
pd.plotting.parallel_coordinates(df_scaled, 'species', colormap='viridis', alpha=.5)
# %% Parallele Koordinaten Plot, custom Code from https://stackoverflow.com/a/60401570/2414411
import numpy as np
from matplotlib.path import Path
import matplotlib.patches as patches
ys = df.drop(columns='species')
ynames = ys.columns
ys = ys.to_numpy()
ymins = ys.min(axis=0)
ymaxs = ys.max(axis=0)
dys = ymaxs - ymins
ymins -= dys * 0.05 # add 5% padding below and above
ymaxs += dys * 0.05
# reverse axis 1 to have less crossings
# ymaxs[1], ymins[1] = ymins[1], ymaxs[1]
# dys = ymaxs - ymins
# transform all data to be compatible with the main axis
zs = np.zeros_like(ys)
zs[:, 0] = ys[:, 0]
zs[:, 1:] = (ys[:, 1:] - ymins[1:]) / dys[1:] * dys[0] + ymins[0]
fig, host = plt.subplots(figsize=(10, 4))
axes = [host] + [host.twinx() for i in range(ys.shape[1] - 1)]
for i, ax in enumerate(axes):
ax.set_ylim(ymins[i], ymaxs[i])
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
if ax != host:
ax.spines['left'].set_visible(False)
ax.yaxis.set_ticks_position('right')
ax.spines["right"].set_position(("axes", i / (ys.shape[1] - 1)))
host.set_xlim(0, ys.shape[1] - 1)
host.set_xticks(range(ys.shape[1]))
host.set_xticklabels(ynames, fontsize=14)
host.tick_params(axis='x', which='major', pad=7)
host.spines['right'].set_visible(False)
host.xaxis.tick_top()
# host.set_title('Parallel Coordinates Plot — Iris', fontsize=18, pad=12)
colors = plt.cm.viridis([0, 128, 255])
target_names = df['species'].unique()
target = df['species'].cat.codes
legend_handles = [None for _ in target_names]
for j in range(ys.shape[0]):
# create bezier curves
verts = list(zip([x for x in np.linspace(0, len(ys) - 1, len(ys) * 3 - 2, endpoint=True)],
np.repeat(zs[j, :], 3)[1:-1]))
codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
path = Path(verts, codes)
patch = patches.PathPatch(path, facecolor='none',
lw=2, alpha=0.5, edgecolor=colors[target[j]])
legend_handles[target[j]] = patch
host.add_patch(patch)
host.legend(legend_handles, target_names,
loc='lower center', bbox_to_anchor=(0.5, -0.18),
ncol=len(target_names), fancybox=True, shadow=True)
# %% Parallele Koordinaten Plot mit Plotly Express
import plotly.express as px
# fig = px.parallel_coordinates(df, color="species", labels={'species': tuple('ABC')})
fig = px.parallel_coordinates(df, color=df["species"].cat.codes)
fig.data[0]['dimensions'][-1]['label'] = 'species'
fig.show()
# %% Slicing
cp = df.copy()
cp.loc[1, 'sepal_width'] = 1
cp.loc[0:2, 'petal_length'] = 2
cp.loc[0, 'sepal_width':'petal_width'] = 3
cp.loc[1:, 'sepal_length'] = 4
cp.loc[:2, :'sepal_width'] = 5
cp.loc[:49, :].to_csv('iris-setosa.csv')
cp
# %% komplexe Indizierung
display(df.loc[[0, 149, 2], 'petal_width'])
part = df.loc[[0, 149, 2], ['petal_width', 'sepal_width']]
part
# %% integer location
display(part.iloc[1, -1])
display(part.iloc[:2, -1])
display(part.iloc[[0, 1], [0, 1]])
# %% boolesche Indizierung
pw = part.loc[:, 'petal_width'] <= 1
sw = part.loc[:, 'sepal_width'] < 3.5
display(pw)
display(sw)
display(~sw)
display(part.loc[pw & sw])
display(part.loc[pw | ~sw])
display(part.loc[pw ^ sw])
# %% Daten fallen lassen
display(part.drop(index=149, columns='petal_width'))
display(part.drop(index=[149, 0]))
# %% einzelne Daten hinzufügen
part.loc[3] = [2, 6]
display(part)
part.loc[:, 'weight'] = [1, 2, 3, 4]
display(part)
# %% DataFrames zusammenführen
a = part.drop(index=3)
b = df.loc[:2, ['petal_length', 'petal_width']]
display(a)
display(b)
display(pd.concat((a, b), axis='columns'))
display(pd.concat((a, b), axis='index'))
# %% Kategoriale Daten
df['species']
df['species'].info()
# %% Statistische Funktionen
X = df.drop(columns='species')
y = df['species']
display(X.mean())
display(y.value_counts())
# %% Gruppierung
species_means = X.groupby(y).mean()
display(species_means)
diff = species_means - [6, 3, 2, 0.5]
(diff**2).sum(axis='columns')