From 97438e63946dd5ce407f0b0a3daa53fd8f37c3b7 Mon Sep 17 00:00:00 2001 From: Christof Kaufmann <christof.kaufmann@hs-bochum.de> Date: Mon, 28 Apr 2025 09:13:43 +0000 Subject: [PATCH] Notebooks from applied-cs/data-science@caa29152 --- .../folien-code/folien-code.ipynb | 60 ++++++++++++++++++- .../folien-code/folien-code.py | 36 ++++++++++- .../solutions/folien-code/folien-code.ipynb | 60 ++++++++++++++++++- .../solutions/folien-code/folien-code.py | 36 ++++++++++- 4 files changed, 188 insertions(+), 4 deletions(-) diff --git a/04-pandas-und-seaborn/folien-code/folien-code.ipynb b/04-pandas-und-seaborn/folien-code/folien-code.ipynb index 27ac994..10a8ca8 100644 --- a/04-pandas-und-seaborn/folien-code/folien-code.ipynb +++ b/04-pandas-und-seaborn/folien-code/folien-code.ipynb @@ -19,6 +19,7 @@ "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", "from IPython.display import display\n", "\n" ] @@ -82,6 +83,64 @@ "\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col = ['petal_length']\n", + "df.plot.hist(column=col)\n", + "df.plot.hist(column=col, range=(1, 7), bins=12)\n", + "df.plot.hist(column=col, range=(1, 7), bins=12, by='species', sharex=True)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.hist(layout=(1, 4))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X = df.drop(columns='species')\n", + "y = df['species']\n", + "\n", + "fig, axs = plt.subplots(1, 4)\n", + "for c in y.unique():\n", + " axs = X[y == c].hist(ax=axs, alpha=0.5)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "sns.kdeplot(data=df, x='petal_length', fill=True)\n", + "\n", + "plt.figure()\n", + "sns.kdeplot(data=df, x='petal_length', hue='species', fill=True, alpha=0.5)\n", + "\n", + "fig, axs = plt.subplots(1, 4, figsize=(10, 3), constrained_layout=True)\n", + "for ax, col in zip(axs, df.columns[:-1]):\n", + " sns.kdeplot(data=df, x=col, hue='species', fill=True, alpha=0.5, ax=ax)\n", + " if col != 'petal_width':\n", + " ax.get_legend().set_visible(False)\n", + "\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -110,7 +169,6 @@ "metadata": {}, "outputs": [], "source": [ - "import seaborn as sns\n", "sns.violinplot(hue='species', y='petal_length', data=df)\n", "\n" ] diff --git a/04-pandas-und-seaborn/folien-code/folien-code.py b/04-pandas-und-seaborn/folien-code/folien-code.py index d3229c9..d96a9c0 100644 --- a/04-pandas-und-seaborn/folien-code/folien-code.py +++ b/04-pandas-und-seaborn/folien-code/folien-code.py @@ -6,6 +6,7 @@ # %% import Pandas import pandas as pd import matplotlib.pyplot as plt +import seaborn as sns from IPython.display import display @@ -44,6 +45,40 @@ counts.plot.pie(startangle=60, autopct='{:.2f}%'.format) plt.ylabel('species') +# %% Histogramm mit einem Feature +col = ['petal_length'] +df.plot.hist(column=col) +df.plot.hist(column=col, range=(1, 7), bins=12) +df.plot.hist(column=col, range=(1, 7), bins=12, by='species', sharex=True) + + +# %% Histogramm mit allen Features +df.hist(layout=(1, 4)) + + +# %% Histogramm mit allen Features, gruppiert nach Spezies +X = df.drop(columns='species') +y = df['species'] + +fig, axs = plt.subplots(1, 4) +for c in y.unique(): + axs = X[y == c].hist(ax=axs, alpha=0.5) + + +# %% Kernel Density Estimation (KDE) +plt.figure() +sns.kdeplot(data=df, x='petal_length', fill=True) + +plt.figure() +sns.kdeplot(data=df, x='petal_length', hue='species', fill=True, alpha=0.5) + +fig, axs = plt.subplots(1, 4, figsize=(10, 3), constrained_layout=True) +for ax, col in zip(axs, df.columns[:-1]): + sns.kdeplot(data=df, x=col, hue='species', fill=True, alpha=0.5, ax=ax) + if col != 'petal_width': + ax.get_legend().set_visible(False) + + # %% Boxplot df.boxplot(column='petal_length', by='species') @@ -55,7 +90,6 @@ pd.plotting.boxplot(df, by='species', ax=axs) # übergebe axs fig.tight_layout() # %% Violinenplot -import seaborn as sns sns.violinplot(hue='species', y='petal_length', data=df) diff --git a/04-pandas-und-seaborn/solutions/folien-code/folien-code.ipynb b/04-pandas-und-seaborn/solutions/folien-code/folien-code.ipynb index 27ac994..10a8ca8 100644 --- a/04-pandas-und-seaborn/solutions/folien-code/folien-code.ipynb +++ b/04-pandas-und-seaborn/solutions/folien-code/folien-code.ipynb @@ -19,6 +19,7 @@ "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", "from IPython.display import display\n", "\n" ] @@ -82,6 +83,64 @@ "\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col = ['petal_length']\n", + "df.plot.hist(column=col)\n", + "df.plot.hist(column=col, range=(1, 7), bins=12)\n", + "df.plot.hist(column=col, range=(1, 7), bins=12, by='species', sharex=True)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.hist(layout=(1, 4))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X = df.drop(columns='species')\n", + "y = df['species']\n", + "\n", + "fig, axs = plt.subplots(1, 4)\n", + "for c in y.unique():\n", + " axs = X[y == c].hist(ax=axs, alpha=0.5)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "sns.kdeplot(data=df, x='petal_length', fill=True)\n", + "\n", + "plt.figure()\n", + "sns.kdeplot(data=df, x='petal_length', hue='species', fill=True, alpha=0.5)\n", + "\n", + "fig, axs = plt.subplots(1, 4, figsize=(10, 3), constrained_layout=True)\n", + "for ax, col in zip(axs, df.columns[:-1]):\n", + " sns.kdeplot(data=df, x=col, hue='species', fill=True, alpha=0.5, ax=ax)\n", + " if col != 'petal_width':\n", + " ax.get_legend().set_visible(False)\n", + "\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -110,7 +169,6 @@ "metadata": {}, "outputs": [], "source": [ - "import seaborn as sns\n", "sns.violinplot(hue='species', y='petal_length', data=df)\n", "\n" ] diff --git a/04-pandas-und-seaborn/solutions/folien-code/folien-code.py b/04-pandas-und-seaborn/solutions/folien-code/folien-code.py index d3229c9..d96a9c0 100644 --- a/04-pandas-und-seaborn/solutions/folien-code/folien-code.py +++ b/04-pandas-und-seaborn/solutions/folien-code/folien-code.py @@ -6,6 +6,7 @@ # %% import Pandas import pandas as pd import matplotlib.pyplot as plt +import seaborn as sns from IPython.display import display @@ -44,6 +45,40 @@ counts.plot.pie(startangle=60, autopct='{:.2f}%'.format) plt.ylabel('species') +# %% Histogramm mit einem Feature +col = ['petal_length'] +df.plot.hist(column=col) +df.plot.hist(column=col, range=(1, 7), bins=12) +df.plot.hist(column=col, range=(1, 7), bins=12, by='species', sharex=True) + + +# %% Histogramm mit allen Features +df.hist(layout=(1, 4)) + + +# %% Histogramm mit allen Features, gruppiert nach Spezies +X = df.drop(columns='species') +y = df['species'] + +fig, axs = plt.subplots(1, 4) +for c in y.unique(): + axs = X[y == c].hist(ax=axs, alpha=0.5) + + +# %% Kernel Density Estimation (KDE) +plt.figure() +sns.kdeplot(data=df, x='petal_length', fill=True) + +plt.figure() +sns.kdeplot(data=df, x='petal_length', hue='species', fill=True, alpha=0.5) + +fig, axs = plt.subplots(1, 4, figsize=(10, 3), constrained_layout=True) +for ax, col in zip(axs, df.columns[:-1]): + sns.kdeplot(data=df, x=col, hue='species', fill=True, alpha=0.5, ax=ax) + if col != 'petal_width': + ax.get_legend().set_visible(False) + + # %% Boxplot df.boxplot(column='petal_length', by='species') @@ -55,7 +90,6 @@ pd.plotting.boxplot(df, by='species', ax=axs) # übergebe axs fig.tight_layout() # %% Violinenplot -import seaborn as sns sns.violinplot(hue='species', y='petal_length', data=df) -- GitLab