From 2baf5d16e780c2704f3aa553fe3e678ee604ea20 Mon Sep 17 00:00:00 2001
From: Christof Kaufmann <christof.kaufmann@hs-bochum.de>
Date: Wed, 8 May 2024 06:45:48 +0000
Subject: [PATCH] Upload Code (Python Script)

---
 04-pandas-und-seaborn/folien-code.py | 221 +++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)
 create mode 100644 04-pandas-und-seaborn/folien-code.py

diff --git a/04-pandas-und-seaborn/folien-code.py b/04-pandas-und-seaborn/folien-code.py
new file mode 100644
index 0000000..b119bfe
--- /dev/null
+++ b/04-pandas-und-seaborn/folien-code.py
@@ -0,0 +1,221 @@
+# %% [markdown]
+# # Code zu Folien
+#
+# Dieses Skript bzw. Jupyter-Notebook enthält den Code, der auch auf den Folien "Pandas & Seaborn" enthalten ist. Zum Vorbereiten, Mitmachen oder Nacharbeiten.
+
+# %% import Pandas
+import pandas as pd
+import matplotlib.pyplot as plt
+from IPython.display import display
+
+
+# %% Iris Flower Dataset
+url = 'https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv'
+df = pd.read_csv(url)
+
+# offline-Alternative:
+# from sklearn.datasets import load_iris
+# df = pd.concat(load_iris(return_X_y=True, as_frame=True), axis='columns')
+# df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
+# df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
+
+df['species'] = df['species'].astype('category')
+df
+
+
+# %% Informationen
+print(df.shape)
+print(df.columns)
+print(df.dtypes)
+print(df.index)
+df.info()
+
+
+# %% Statistischer Überblick
+display(df.describe())
+display(df.describe(exclude='number'))
+
+
+# %% Kuchendiagramm
+counts = df['species'].value_counts()
+display(counts)
+
+counts.plot.pie(startangle=60, autopct='{:.2f}%'.format)
+plt.ylabel('species')
+
+
+# %% Boxplot
+df.boxplot(column='petal_length', by='species')
+
+
+# %% Boxplots aller Features
+fig, axs = plt.subplots(2, 2, sharey=False)    # y-Achsen unabhängig
+pd.plotting.boxplot(df, by='species', ax=axs)  # übergebe axs
+[ax.set_xlabel('') for ax in axs.ravel()]      # entferne x-Labels
+fig.tight_layout()
+
+# %% Violinenplot
+import seaborn as sns
+sns.violinplot(hue='species', y='petal_length', data=df)
+
+
+# %% Scatterplots
+df.plot.scatter(x='petal_length', y='petal_width', c='species', colormap='viridis', alpha=0.7)
+
+
+# %% Pair Plot
+sns.pairplot(df, hue='species', plot_kws={'alpha': 0.5})
+
+
+# %% Parallele Koordinaten Plot, unskaliert
+pd.plotting.parallel_coordinates(df, 'species', colormap='viridis', alpha=.5)
+
+
+# %% Parallele Koordinaten Plot, normiert
+from sklearn.preprocessing import minmax_scale
+num_cols = df.columns.drop('species')
+df_scaled = df.copy()
+df_scaled[num_cols] = minmax_scale(df[num_cols])
+pd.plotting.parallel_coordinates(df_scaled, 'species', colormap='viridis', alpha=.5)
+
+# %% Parallele Koordinaten Plot, custom Code from https://stackoverflow.com/a/60401570/2414411
+import numpy as np
+from matplotlib.path import Path
+import matplotlib.patches as patches
+
+ys = df.drop(columns='species')
+ynames = ys.columns
+ys = ys.to_numpy()
+ymins = ys.min(axis=0)
+ymaxs = ys.max(axis=0)
+dys = ymaxs - ymins
+ymins -= dys * 0.05  # add 5% padding below and above
+ymaxs += dys * 0.05
+
+# reverse axis 1 to have less crossings
+# ymaxs[1], ymins[1] = ymins[1], ymaxs[1]
+# dys = ymaxs - ymins
+
+# transform all data to be compatible with the main axis
+zs = np.zeros_like(ys)
+zs[:, 0] = ys[:, 0]
+zs[:, 1:] = (ys[:, 1:] - ymins[1:]) / dys[1:] * dys[0] + ymins[0]
+
+fig, host = plt.subplots(figsize=(10, 4))
+
+axes = [host] + [host.twinx() for i in range(ys.shape[1] - 1)]
+for i, ax in enumerate(axes):
+    ax.set_ylim(ymins[i], ymaxs[i])
+    ax.spines['top'].set_visible(False)
+    ax.spines['bottom'].set_visible(False)
+    if ax != host:
+        ax.spines['left'].set_visible(False)
+        ax.yaxis.set_ticks_position('right')
+        ax.spines["right"].set_position(("axes", i / (ys.shape[1] - 1)))
+
+host.set_xlim(0, ys.shape[1] - 1)
+host.set_xticks(range(ys.shape[1]))
+host.set_xticklabels(ynames, fontsize=14)
+host.tick_params(axis='x', which='major', pad=7)
+host.spines['right'].set_visible(False)
+host.xaxis.tick_top()
+# host.set_title('Parallel Coordinates Plot — Iris', fontsize=18, pad=12)
+
+colors = plt.cm.viridis([0, 128, 255])
+target_names = df['species'].unique()
+target = df['species'].cat.codes
+legend_handles = [None for _ in target_names]
+for j in range(ys.shape[0]):
+    # create bezier curves
+    verts = list(zip([x for x in np.linspace(0, len(ys) - 1, len(ys) * 3 - 2, endpoint=True)],
+                     np.repeat(zs[j, :], 3)[1:-1]))
+    codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
+    path = Path(verts, codes)
+    patch = patches.PathPatch(path, facecolor='none',
+                              lw=2, alpha=0.5, edgecolor=colors[target[j]])
+    legend_handles[target[j]] = patch
+    host.add_patch(patch)
+host.legend(legend_handles, target_names,
+            loc='lower center', bbox_to_anchor=(0.5, -0.18),
+            ncol=len(target_names), fancybox=True, shadow=True)
+
+# %% Parallele Koordinaten Plot mit Plotly Express
+import plotly.express as px
+# fig = px.parallel_coordinates(df, color="species", labels={'species': tuple('ABC')})
+fig = px.parallel_coordinates(df, color=df["species"].cat.codes)
+fig.data[0]['dimensions'][-1]['label'] = 'species'
+fig.show()
+
+# %% Slicing
+cp = df.copy()
+cp.loc[1, 'sepal_width'] = 1
+cp.loc[0:2, 'petal_length'] = 2
+cp.loc[0, 'sepal_width':'petal_width'] = 3
+cp.loc[1:, 'sepal_length'] = 4
+cp.loc[:2, :'sepal_width'] = 5
+cp.loc[:49, :].to_csv('iris-setosa.csv')
+cp
+
+# %% komplexe Indizierung
+display(df.loc[[0, 149, 2], 'petal_width'])
+
+part = df.loc[[0, 149, 2], ['petal_width', 'sepal_width']]
+part
+
+
+# %% integer location
+display(part.iloc[1, -1])
+display(part.iloc[:2, -1])
+display(part.iloc[[0, 1], [0, 1]])
+
+
+# %% boolesche Indizierung
+pw = part.loc[:, 'petal_width'] <= 1
+sw = part.loc[:, 'sepal_width'] < 3.5
+display(pw)
+display(sw)
+display(~sw)
+display(part.loc[pw & sw])
+display(part.loc[pw | ~sw])
+display(part.loc[pw ^ sw])
+
+# %% Daten fallen lassen
+display(part.drop(index=149, columns='petal_width'))
+display(part.drop(index=[149, 0]))
+
+
+# %% einzelne Daten hinzufügen
+part.loc[3] = [2, 6]
+display(part)
+part.loc[:, 'weight'] = [1, 2, 3, 4]
+display(part)
+
+
+# %% DataFrames zusammenführen
+a = part.drop(index=3)
+b = df.loc[:2, ['petal_length', 'petal_width']]
+display(a)
+display(b)
+display(pd.concat((a, b), axis='columns'))
+display(pd.concat((a, b), axis='index'))
+
+
+# %% Kategoriale Daten
+df['species']
+df['species'].info()
+
+
+# %% Statistische Funktionen
+X = df.drop(columns='species')
+y = df['species']
+
+display(X.mean())
+display(y.value_counts())
+
+
+# %% Gruppierung
+species_means = X.groupby(y).mean()
+display(species_means)
+
+diff = species_means - [6, 3, 2, 0.5]
+(diff**2).sum(axis='columns')
-- 
GitLab