Skip to content
Snippets Groups Projects
Commit aef86389 authored by Christof Kaufmann's avatar Christof Kaufmann
Browse files

Upload Code für Folien

parent eafb749e
Branches
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# Code zu Folien
Dieses Skript bzw. Jupyter-Notebook enthält den Code, der auch auf den Folien "Pandas & Seaborn" enthalten ist. Zum Vorbereiten, Mitmachen oder Nacharbeiten.
%% Cell type:code id: tags:
```
# %% import Pandas
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
```
%% Cell type:code id: tags:
```
# %% Iris Flower Dataset
url = 'https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv'
df = pd.read_csv(url)
# offline-Alternative:
# from sklearn.datasets import load_iris
# df = pd.concat(load_iris(return_X_y=True, as_frame=True), axis='columns')
# df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
# df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
df['species'] = df['species'].astype('category')
df
```
%% Cell type:code id: tags:
```
# %% Informationen
print(df.shape)
print(df.columns)
print(df.dtypes)
print(df.index)
df.info()
```
%% Cell type:code id: tags:
```
# %% Statistischer Überblick
display(df.describe())
display(df.describe(exclude='number'))
```
%% Cell type:code id: tags:
```
# %% Kuchendiagramm
counts = df['species'].value_counts()
display(counts)
counts.plot.pie(startangle=60, autopct='{:.2f}%'.format)
plt.ylabel('species')
```
%% Cell type:code id: tags:
```
# %% Boxplot
df.boxplot(column='petal_length', by='species')
```
%% Cell type:code id: tags:
```
# %% Boxplots aller Features
fig, axs = plt.subplots(2, 2, sharey=False) # y-Achsen unabhängig
pd.plotting.boxplot(df, by='species', ax=axs) # übergebe axs
[ax.set_xlabel('') for ax in axs.ravel()] # entferne x-Labels
fig.tight_layout()
```
%% Cell type:code id: tags:
```
# %% Violinenplot
import seaborn as sns
sns.violinplot(hue='species', y='petal_length', data=df)
```
%% Cell type:code id: tags:
```
# %% Scatterplots
df.plot.scatter(x='petal_length', y='petal_width', c='species', colormap='viridis', alpha=0.7)
```
%% Cell type:code id: tags:
```
# %% Pair Plot
sns.pairplot(df, hue='species', plot_kws={'alpha': 0.5})
```
%% Cell type:code id: tags:
```
# %% Parallele Koordinaten Plot, unskaliert
pd.plotting.parallel_coordinates(df, 'species', colormap='viridis', alpha=.5)
```
%% Cell type:code id: tags:
```
# %% Parallele Koordinaten Plot, normiert
from sklearn.preprocessing import minmax_scale
num_cols = df.columns.drop('species')
df_scaled = df.copy()
df_scaled[num_cols] = minmax_scale(df[num_cols])
pd.plotting.parallel_coordinates(df_scaled, 'species', colormap='viridis', alpha=.5)
```
%% Cell type:code id: tags:
```
# %% Parallele Koordinaten Plot, custom Code from https://stackoverflow.com/a/60401570/2414411
import numpy as np
from matplotlib.path import Path
import matplotlib.patches as patches
ys = df.drop(columns='species')
ynames = ys.columns
ys = ys.to_numpy()
ymins = ys.min(axis=0)
ymaxs = ys.max(axis=0)
dys = ymaxs - ymins
ymins -= dys * 0.05 # add 5% padding below and above
ymaxs += dys * 0.05
# reverse axis 1 to have less crossings
# ymaxs[1], ymins[1] = ymins[1], ymaxs[1]
# dys = ymaxs - ymins
# transform all data to be compatible with the main axis
zs = np.zeros_like(ys)
zs[:, 0] = ys[:, 0]
zs[:, 1:] = (ys[:, 1:] - ymins[1:]) / dys[1:] * dys[0] + ymins[0]
fig, host = plt.subplots(figsize=(10, 4))
axes = [host] + [host.twinx() for i in range(ys.shape[1] - 1)]
for i, ax in enumerate(axes):
ax.set_ylim(ymins[i], ymaxs[i])
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
if ax != host:
ax.spines['left'].set_visible(False)
ax.yaxis.set_ticks_position('right')
ax.spines["right"].set_position(("axes", i / (ys.shape[1] - 1)))
host.set_xlim(0, ys.shape[1] - 1)
host.set_xticks(range(ys.shape[1]))
host.set_xticklabels(ynames, fontsize=14)
host.tick_params(axis='x', which='major', pad=7)
host.spines['right'].set_visible(False)
host.xaxis.tick_top()
# host.set_title('Parallel Coordinates Plot — Iris', fontsize=18, pad=12)
colors = plt.cm.viridis([0, 128, 255])
target_names = df['species'].unique()
target = df['species'].cat.codes
legend_handles = [None for _ in target_names]
for j in range(ys.shape[0]):
# create bezier curves
verts = list(zip([x for x in np.linspace(0, len(ys) - 1, len(ys) * 3 - 2, endpoint=True)],
np.repeat(zs[j, :], 3)[1:-1]))
codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
path = Path(verts, codes)
patch = patches.PathPatch(path, facecolor='none',
lw=2, alpha=0.5, edgecolor=colors[target[j]])
legend_handles[target[j]] = patch
host.add_patch(patch)
host.legend(legend_handles, target_names,
loc='lower center', bbox_to_anchor=(0.5, -0.18),
ncol=len(target_names), fancybox=True, shadow=True)
```
%% Cell type:code id: tags:
```
# %% Parallele Koordinaten Plot mit Plotly Express
import plotly.express as px
# fig = px.parallel_coordinates(df, color="species", labels={'species': tuple('ABC')})
fig = px.parallel_coordinates(df, color=df["species"].cat.codes)
fig.data[0]['dimensions'][-1]['label'] = 'species'
fig.show()
```
%% Cell type:code id: tags:
```
# %% Slicing
cp = df.copy()
cp.loc[1, 'sepal_width'] = 1
cp.loc[0:2, 'petal_length'] = 2
cp.loc[0, 'sepal_width':'petal_width'] = 3
cp.loc[1:, 'sepal_length'] = 4
cp.loc[:2, :'sepal_width'] = 5
cp.loc[:49, :].to_csv('iris-setosa.csv')
cp
```
%% Cell type:code id: tags:
```
# %% komplexe Indizierung
display(df.loc[[0, 149, 2], 'petal_width'])
part = df.loc[[0, 149, 2], ['petal_width', 'sepal_width']]
part
```
%% Cell type:code id: tags:
```
# %% integer location
display(part.iloc[1, -1])
display(part.iloc[:2, -1])
display(part.iloc[[0, 1], [0, 1]])
```
%% Cell type:code id: tags:
```
# %% boolesche Indizierung
pw = part.loc[:, 'petal_width'] <= 1
sw = part.loc[:, 'sepal_width'] < 3.5
display(pw)
display(sw)
display(~sw)
display(part.loc[pw & sw])
display(part.loc[pw | ~sw])
display(part.loc[pw ^ sw])
```
%% Cell type:code id: tags:
```
# %% Daten fallen lassen
display(part.drop(index=149, columns='petal_width'))
display(part.drop(index=[149, 0]))
```
%% Cell type:code id: tags:
```
# %% einzelne Daten hinzufügen
part.loc[3] = [2, 6]
display(part)
part.loc[:, 'weight'] = [1, 2, 3, 4]
display(part)
```
%% Cell type:code id: tags:
```
# %% DataFrames zusammenführen
a = part.drop(index=3)
b = df.loc[:2, ['petal_length', 'petal_width']]
display(a)
display(b)
display(pd.concat((a, b), axis='columns'))
display(pd.concat((a, b), axis='index'))
```
%% Cell type:code id: tags:
```
# %% Kategoriale Daten
df['species']
df['species'].info()
```
%% Cell type:code id: tags:
```
# %% Statistische Funktionen
X = df.drop(columns='species')
y = df['species']
display(X.mean())
display(y.value_counts())
```
%% Cell type:code id: tags:
```
# %% Gruppierung
species_means = X.groupby(y).mean()
display(species_means)
diff = species_means - [6, 3, 2, 0.5]
(diff**2).sum(axis='columns')
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment