Notebooks from applied-cs/data-science@8820effc

96d4d671 · Christof Kaufmann · 97438e63 · 96d4d671 · 96d4d671 · 96d4d671
Commit 96d4d671 authored 3 months ago by Christof Kaufmann
--- a/03-numpy-und-matplotlib/folien-code/folien-code.ipynb
+++ b/03-numpy-und-matplotlib/folien-code/folien-code.ipynb
@@ -339,8 +339,8 @@
    "from numpy.polynomial import Polynomial\n",
    "\n",
    "x = np.arange(10)\n",
-    "y = 2 * x\n",
+    "y = 2 * x + 1\n",
-    "# y = x ** 2\n",
+    "# y = x ** 2 + 1\n",
    "\n",
    "p = Polynomial.fit(x, y, deg=1).convert()\n",
    "\n",
@@ -361,7 +361,7 @@
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "x = np.arange(10).reshape(-1, 1)\n",
-    "y = x ** 2\n",
+    "y = x ** 2 + 1\n",
    "\n",
    "linreg = LinearRegression()\n",
    "linreg.fit(x, y)\n",
@@ -379,7 +379,7 @@
   "source": [
    "\n",
    "x = np.arange(10).reshape(-1, 1)\n",
-    "y = x ** 2\n",
+    "y = x ** 2 + 1\n",
    "\n",
    "x_poly = np.hstack((x, x**2)) # shape: 10 x 2\n",
    "\n",

 %% Cell type:markdown id: tags:
 # Code zu Folien
 Dieses Skript bzw. Jupyter-Notebook enthält den Code, der auch auf den Folien "NumPy & Matplotlib" enthalten ist. Zum Vorbereiten, Mitmachen oder Nacharbeiten.
 %% Cell type:code id: tags:
 ``` 
 import numpy as np
 v = np.array([1, 3, 5])
 print(v)
 print(v.shape, v.dtype)
 a = np.array([[1, 2, 3], [4, 5, 6]])
 print(a)
 print(a.shape, a.dtype)
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[[1]], [[3]], [[5]]])
 print(a.shape)
 a = np.array([[[1], [3], [5]]])
 print(a.shape)
 a = np.array([[[1, 3, 5]]])
 print(a.shape)
 ```
 %% Cell type:code id: tags:
 ``` 
 v = np.arange(6)                      # shape: (6,) Vektor
 a = v.reshape(-1, 3)                  # shape: (2, 3) Matrix
 b = v.reshape(1, -1)                  # shape: (1, 6) Matrix
 # x = v.reshape(4, -1)                  # shape: (4, ?) Matrix (passt nicht)
 c = a[np.newaxis, :, :]              # shape: (1, 2, 3) 3D
 d = a[np.newaxis, ...]               # shape: (1, 2, 3) 3D
 e = a[np.newaxis, :, np.newaxis, :]  # shape: (1, 2, 1, 3) 4D
 f = e.squeeze()                      # shape: (2, 3) Matrix. Was ergäbe e.ravel()?
 print(v.shape)
 print(a.shape)
 print(b.shape)
 print(c.shape)
 print(d.shape)
 print(e.shape)
 print(f.shape)
 ```
 %% Cell type:code id: tags:
 ``` 
 rng = np.random.default_rng()  # random number generator
 np.zeros((1, 5), dtype=bool)          # 1 x 5-Matrix mit False
 np.zeros_like(a)                      # Array mit 0en mit Shape und Typ von a
 np.ones((1, 3))                       # 1 x 3-Matrix mit 1en
 rng.uniform(size=(2, 4))              # 2 x 4-Matrix mit uniformen Zufallszahlen aus [0, 1)
 rng.integers(0, 10, (2, 4))           # 2 x 4-Matrix mit Zufallszahlen von 0 bis 9
 rng.choice(10, size=4, replace=False) # Vektor mit 4 eindeutigen Elem. aus {0, ..., 9}
 rng.permutation(10)                   # {0, ..., 9}, aber in zufälliger Reihenfolge
 np.arange(-1, 0.05, 0.1)              # Vektor von -1 bis 1 mit Schrittweite 0.1
 np.linspace(-1, 1, 9)                 # Vektor mit 9 Werten lin. zwischen -1 und 1
 np.logspace(-1, 1, 9)                 # Vektor mit 9 Werten log. zwischen 10^-1 und 10^1
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[1, 2, 3], [4, 5, 6]])
 v = np.array([10, 20, 30])
 b = np.ones((3, 3))
 print('np.sin(a):')
 print(np.sin(a))
 print('np.mean(a), np.std(a), np.max(a), np.min(a):')
 print(np.mean(a), np.std(a), np.max(a), np.min(a))
 print('np.sum(a), np.sum(a, axis=0), np.sum(a, axis=1):')
 print(np.sum(a), np.sum(a, axis=0), np.sum(a, axis=1))
 print('a**2:')
 print(a**2)
 print('a.T:')
 print(a.T)
 print('a @ v:')
 print(a @ v)
 print('a @ b:')
 print(a @ b)
 print('a.reshape(6):')
 print(a.reshape(6))
 print('np.delete(a, 1, axis=0):')
 print(np.delete(a, 1, axis=0))
 print('np.sort(a):')
 print(np.sort(a))
 print('np.argsort(a):')
 print(np.argsort(a))
 ```
 %% Cell type:code id: tags:
 ``` 
 v = np.array([0, 1, 2, 3, 4, 5, 6])
 print(v[1:5])   # Elemente 1 (inkl.) bis 5 (exkl.)
 print(v[1:5:2]) # Elemente 1 bis 5 mit Schrittweite 2
 print(v[::2])   # Alle Elemente mit Schrittweite 2
 print(v[-3:])   # Die 3 letzten Elemente
 print(v[::-1])  # Reihenfolge umkehren
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[1,2,3], [4,5,6]])
 print(a[1, 2])    # Element a_{1,2}
 print(a[1])       # Zeile 1
 print(a[1, 1:])   # Zeile 1, ab Spalte 1
 print(a[:, -1])   # letzte Spalte
 print(a[0, ::-1]) # Zeile 0, Spalten umkehren
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[1, 2, 3], [4, 5, 6]])
 b = a[0, ::-1]    # a und b zeigen auf die gleichen Daten.
 b[0] = 0          # Eine Änderung in b ...
 print(a)          # ... wirkt sich auch auf a aus.
 c = a.copy()
 print(np.shares_memory(a, b))
 print(np.shares_memory(a, c))
 ```
 %% Cell type:code id: tags:
 ``` 
 v = np.array([4, 8, 15, 16, 23, 42])
 print(v[[1, 4, 2]])  # Die Elemente bei 1, 4 und 2 als neues np-Array
 print(v[np.array([[1, 4],
                  [1, 2]])])
 ```
 %% Cell type:code id: tags:
 ``` 
 A = np.array([[1,  2,  3,  4],
              [5,  6,  7,  8],
              [9, 10, 11, 12]])
 # Die Indizes werden paarweise genommen: [1, 1] und [2, 3]
 print(A[[1, 2],
        [1, 3]])
 # Zeilen 1 und 2, Spalten 1 und 3)
 print(A[np.ix_([1, 2], [1, 3])])
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[1, 2, 3], [4, 5, 6]])
 i = a > 4
 print(i)
 print(a[i])
 a[~i] += 10   # ~ invertiert die Boolesche Matrix
 print(a)
 ```
 %% Cell type:code id: tags:
 ``` 
 idx2d = np.nonzero(i)      # Trues bei Zeile 1, Spalte 1 und bei Zeile 1, Spalte 2
 print(idx2d)
 print(a[idx2d])
 idx1d = np.flatnonzero(i)  # Trues bei Einzelindex 4 und bei Einzelindex 5
 print(idx1d)
 print(a.flat[idx1d])
 ```
 %% Cell type:code id: tags:
 ``` 
 data = np.random.default_rng().random(100)  # uniform verteilt in [0, 1)
 print(np.sum(data > 0.5))
 print(np.count_nonzero(data > 0.5))
 print(len(data[data > 0.5]))
 print(np.nonzero(data > 0.5)[0].size)  # [0] weil mit Tupel umhüllt
 print(np.flatnonzero(data > 0.5).size) # size ist Produkt über #*shape*
 ```
 %% Cell type:code id: tags:
 ``` 
 A = np.array([[0, 1],
              [2, 3],
              [4, 5]])
 B = np.array([[5],
              [7],
              [9]])
 C = np.array([[8, 7],
              [9, 6]])
 V = np.vstack((A, C))
 print(V)
 H = np.hstack((A, B, B))
 print(H)
 ```
 %% Cell type:code id: tags:
 ``` 
 data = np.loadtxt('autos.csv', delimiter=',', skiprows=1, usecols=range(2,9))
 print(data)
 print(data.shape)
 # usecols als Zahlen oder Namen (z. B. 'Hubraum') möglich!
 sa = np.genfromtxt('autos.csv', delimiter=',', names=True, dtype=float)
 data = sa.view((float, len(sa.dtype.names)))
 print(data)
 print(data.shape)
 structdata = np.genfromtxt('autos.csv', delimiter=',', names=True, dtype=None, encoding='utf-8')
 print(structdata)
 print(structdata.shape)
 ```
 %% Cell type:code id: tags:
 ``` 
 x = np.arange(0, 10, 0.2)
 y1 = np.sin(x); y2 = np.cos(x)
 import matplotlib.pyplot as plt
 fig, ax = plt.subplots()    # neues Fenster mit Achsen
 ax.plot(x, y1, label='Sin') # sin Plot hinzufügen
 ax.plot(x, y2, label='Cos') # cos Plot hinzufügen
 ax.set_title('Simple Plot')
 ax.set_xlabel('x-Achse')
 ax.set_ylabel('y-Achse')
 ax.grid()
 plt.legend()
 fig.savefig('plot.pdf', bbox_inches='tight') # ggf. Plot speichern (pdf, png, ...)
 plt.show()                                   # anzeigen (bei non-interactive)
 ```
 %% Cell type:code id: tags:
 ``` 
 rng = np.random.default_rng()
 x = rng.random(100)
 y = 5 * x**2 + rng.random(100)
 plt.figure()   # neues Fenster (optional)
 plt.scatter(x, y, s=100*y, c=x, alpha=0.4)
 fig = plt.gcf()
 ax = plt.gca()
 ```
 %% Cell type:code id: tags:
 ``` 
 from numpy.polynomial import Polynomial
 x = np.arange(10)
-y = 2 * x
+y = 2 * x + 1
-# y = x ** 2
+# y = x ** 2 + 1
 p = Polynomial.fit(x, y, deg=1).convert()
 print(p(1))
 print(p(np.array([1., 3., 5.])))
 print(p.coef)      # Koeffizienten
 plt.plot(x, p(x))   # Plot oben
 plt.scatter(x, y, color='r')
 ```
 %% Cell type:code id: tags:
 ``` 
 from sklearn.linear_model import LinearRegression
 x = np.arange(10).reshape(-1, 1)
-y = x ** 2
+y = x ** 2 + 1
 linreg = LinearRegression()
 linreg.fit(x, y)
 print(linreg.predict([[1]]))
 print(linreg.predict([[1], [3], [5]]))
 print(linreg.intercept_, linreg.coef_)
 ```
 %% Cell type:code id: tags:
 ``` 
 x = np.arange(10).reshape(-1, 1)
-y = x ** 2
+y = x ** 2 + 1
 x_poly = np.hstack((x, x**2)) # shape: 10 x 2
 linreg = LinearRegression()
 linreg.fit(x_poly, y)
 print(linreg.predict([[3, 9]]))
 print(linreg.intercept_, linreg.coef_)
 ```
 %% Cell type:code id: tags:
 ``` 
 rng = np.random.default_rng()
 x = rng.uniform(size=(100, 1)) * 3 - 0.5
 y = np.round(x % 1) - 0.5
 def fourier_basis(x, n=4):
    return np.hstack([np.sin(2 * np.pi * k * x) for k in range(1, 2 * n, 2)])
 x_f = fourier_basis(x, 5)
 linreg = LinearRegression()
 linreg.fit(x_f, y)
 x_plt = np.linspace(0, 4, 501).reshape(-1, 1)
 x_plt_f = fourier_basis(x_plt, 5)
 y_plt = linreg.predict(x_plt_f)
 plt.plot(x_plt, y_plt)
 plt.scatter(x, y, color='r')
 ```

--- a/03-numpy-und-matplotlib/folien-code/folien-code.py
+++ b/03-numpy-und-matplotlib/folien-code/folien-code.py
@@ -224,8 +224,8 @@ ax = plt.gca()
 from numpy.polynomial import Polynomial
 x = np.arange(10)
-y = 2 * x
+y = 2 * x + 1
-# y = x ** 2
+# y = x ** 2 + 1
 p = Polynomial.fit(x, y, deg=1).convert()
@@ -240,7 +240,7 @@ plt.scatter(x, y, color='r')
 from sklearn.linear_model import LinearRegression
 x = np.arange(10).reshape(-1, 1)
-y = x ** 2
+y = x ** 2 + 1
 linreg = LinearRegression()
 linreg.fit(x, y)
@@ -252,7 +252,7 @@ print(linreg.intercept_, linreg.coef_)
 # %% Lineare Regression mit Scikit-Learn, Grad 2
 x = np.arange(10).reshape(-1, 1)
-y = x ** 2
+y = x ** 2 + 1
 x_poly = np.hstack((x, x**2)) # shape: 10 x 2

--- a/03-numpy-und-matplotlib/solutions/folien-code/folien-code.ipynb
+++ b/03-numpy-und-matplotlib/solutions/folien-code/folien-code.ipynb
@@ -339,8 +339,8 @@
    "from numpy.polynomial import Polynomial\n",
    "\n",
    "x = np.arange(10)\n",
-    "y = 2 * x\n",
+    "y = 2 * x + 1\n",
-    "# y = x ** 2\n",
+    "# y = x ** 2 + 1\n",
    "\n",
    "p = Polynomial.fit(x, y, deg=1).convert()\n",
    "\n",
@@ -361,7 +361,7 @@
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "x = np.arange(10).reshape(-1, 1)\n",
-    "y = x ** 2\n",
+    "y = x ** 2 + 1\n",
    "\n",
    "linreg = LinearRegression()\n",
    "linreg.fit(x, y)\n",
@@ -379,7 +379,7 @@
   "source": [
    "\n",
    "x = np.arange(10).reshape(-1, 1)\n",
-    "y = x ** 2\n",
+    "y = x ** 2 + 1\n",
    "\n",
    "x_poly = np.hstack((x, x**2)) # shape: 10 x 2\n",
    "\n",

 %% Cell type:markdown id: tags:
 # Code zu Folien
 Dieses Skript bzw. Jupyter-Notebook enthält den Code, der auch auf den Folien "NumPy & Matplotlib" enthalten ist. Zum Vorbereiten, Mitmachen oder Nacharbeiten.
 %% Cell type:code id: tags:
 ``` 
 import numpy as np
 v = np.array([1, 3, 5])
 print(v)
 print(v.shape, v.dtype)
 a = np.array([[1, 2, 3], [4, 5, 6]])
 print(a)
 print(a.shape, a.dtype)
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[[1]], [[3]], [[5]]])
 print(a.shape)
 a = np.array([[[1], [3], [5]]])
 print(a.shape)
 a = np.array([[[1, 3, 5]]])
 print(a.shape)
 ```
 %% Cell type:code id: tags:
 ``` 
 v = np.arange(6)                      # shape: (6,) Vektor
 a = v.reshape(-1, 3)                  # shape: (2, 3) Matrix
 b = v.reshape(1, -1)                  # shape: (1, 6) Matrix
 # x = v.reshape(4, -1)                  # shape: (4, ?) Matrix (passt nicht)
 c = a[np.newaxis, :, :]              # shape: (1, 2, 3) 3D
 d = a[np.newaxis, ...]               # shape: (1, 2, 3) 3D
 e = a[np.newaxis, :, np.newaxis, :]  # shape: (1, 2, 1, 3) 4D
 f = e.squeeze()                      # shape: (2, 3) Matrix. Was ergäbe e.ravel()?
 print(v.shape)
 print(a.shape)
 print(b.shape)
 print(c.shape)
 print(d.shape)
 print(e.shape)
 print(f.shape)
 ```
 %% Cell type:code id: tags:
 ``` 
 rng = np.random.default_rng()  # random number generator
 np.zeros((1, 5), dtype=bool)          # 1 x 5-Matrix mit False
 np.zeros_like(a)                      # Array mit 0en mit Shape und Typ von a
 np.ones((1, 3))                       # 1 x 3-Matrix mit 1en
 rng.uniform(size=(2, 4))              # 2 x 4-Matrix mit uniformen Zufallszahlen aus [0, 1)
 rng.integers(0, 10, (2, 4))           # 2 x 4-Matrix mit Zufallszahlen von 0 bis 9
 rng.choice(10, size=4, replace=False) # Vektor mit 4 eindeutigen Elem. aus {0, ..., 9}
 rng.permutation(10)                   # {0, ..., 9}, aber in zufälliger Reihenfolge
 np.arange(-1, 0.05, 0.1)              # Vektor von -1 bis 1 mit Schrittweite 0.1
 np.linspace(-1, 1, 9)                 # Vektor mit 9 Werten lin. zwischen -1 und 1
 np.logspace(-1, 1, 9)                 # Vektor mit 9 Werten log. zwischen 10^-1 und 10^1
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[1, 2, 3], [4, 5, 6]])
 v = np.array([10, 20, 30])
 b = np.ones((3, 3))
 print('np.sin(a):')
 print(np.sin(a))
 print('np.mean(a), np.std(a), np.max(a), np.min(a):')
 print(np.mean(a), np.std(a), np.max(a), np.min(a))
 print('np.sum(a), np.sum(a, axis=0), np.sum(a, axis=1):')
 print(np.sum(a), np.sum(a, axis=0), np.sum(a, axis=1))
 print('a**2:')
 print(a**2)
 print('a.T:')
 print(a.T)
 print('a @ v:')
 print(a @ v)
 print('a @ b:')
 print(a @ b)
 print('a.reshape(6):')
 print(a.reshape(6))
 print('np.delete(a, 1, axis=0):')
 print(np.delete(a, 1, axis=0))
 print('np.sort(a):')
 print(np.sort(a))
 print('np.argsort(a):')
 print(np.argsort(a))
 ```
 %% Cell type:code id: tags:
 ``` 
 v = np.array([0, 1, 2, 3, 4, 5, 6])
 print(v[1:5])   # Elemente 1 (inkl.) bis 5 (exkl.)
 print(v[1:5:2]) # Elemente 1 bis 5 mit Schrittweite 2
 print(v[::2])   # Alle Elemente mit Schrittweite 2
 print(v[-3:])   # Die 3 letzten Elemente
 print(v[::-1])  # Reihenfolge umkehren
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[1,2,3], [4,5,6]])
 print(a[1, 2])    # Element a_{1,2}
 print(a[1])       # Zeile 1
 print(a[1, 1:])   # Zeile 1, ab Spalte 1
 print(a[:, -1])   # letzte Spalte
 print(a[0, ::-1]) # Zeile 0, Spalten umkehren
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[1, 2, 3], [4, 5, 6]])
 b = a[0, ::-1]    # a und b zeigen auf die gleichen Daten.
 b[0] = 0          # Eine Änderung in b ...
 print(a)          # ... wirkt sich auch auf a aus.
 c = a.copy()
 print(np.shares_memory(a, b))
 print(np.shares_memory(a, c))
 ```
 %% Cell type:code id: tags:
 ``` 
 v = np.array([4, 8, 15, 16, 23, 42])
 print(v[[1, 4, 2]])  # Die Elemente bei 1, 4 und 2 als neues np-Array
 print(v[np.array([[1, 4],
                  [1, 2]])])
 ```
 %% Cell type:code id: tags:
 ``` 
 A = np.array([[1,  2,  3,  4],
              [5,  6,  7,  8],
              [9, 10, 11, 12]])
 # Die Indizes werden paarweise genommen: [1, 1] und [2, 3]
 print(A[[1, 2],
        [1, 3]])
 # Zeilen 1 und 2, Spalten 1 und 3)
 print(A[np.ix_([1, 2], [1, 3])])
 ```
 %% Cell type:code id: tags:
 ``` 
 a = np.array([[1, 2, 3], [4, 5, 6]])
 i = a > 4
 print(i)
 print(a[i])
 a[~i] += 10   # ~ invertiert die Boolesche Matrix
 print(a)
 ```
 %% Cell type:code id: tags:
 ``` 
 idx2d = np.nonzero(i)      # Trues bei Zeile 1, Spalte 1 und bei Zeile 1, Spalte 2
 print(idx2d)
 print(a[idx2d])
 idx1d = np.flatnonzero(i)  # Trues bei Einzelindex 4 und bei Einzelindex 5
 print(idx1d)
 print(a.flat[idx1d])
 ```
 %% Cell type:code id: tags:
 ``` 
 data = np.random.default_rng().random(100)  # uniform verteilt in [0, 1)
 print(np.sum(data > 0.5))
 print(np.count_nonzero(data > 0.5))
 print(len(data[data > 0.5]))
 print(np.nonzero(data > 0.5)[0].size)  # [0] weil mit Tupel umhüllt
 print(np.flatnonzero(data > 0.5).size) # size ist Produkt über #*shape*
 ```
 %% Cell type:code id: tags:
 ``` 
 A = np.array([[0, 1],
              [2, 3],
              [4, 5]])
 B = np.array([[5],
              [7],
              [9]])
 C = np.array([[8, 7],
              [9, 6]])
 V = np.vstack((A, C))
 print(V)
 H = np.hstack((A, B, B))
 print(H)
 ```
 %% Cell type:code id: tags:
 ``` 
 data = np.loadtxt('autos.csv', delimiter=',', skiprows=1, usecols=range(2,9))
 print(data)
 print(data.shape)
 # usecols als Zahlen oder Namen (z. B. 'Hubraum') möglich!
 sa = np.genfromtxt('autos.csv', delimiter=',', names=True, dtype=float)
 data = sa.view((float, len(sa.dtype.names)))
 print(data)
 print(data.shape)
 structdata = np.genfromtxt('autos.csv', delimiter=',', names=True, dtype=None, encoding='utf-8')
 print(structdata)
 print(structdata.shape)
 ```
 %% Cell type:code id: tags:
 ``` 
 x = np.arange(0, 10, 0.2)
 y1 = np.sin(x); y2 = np.cos(x)
 import matplotlib.pyplot as plt
 fig, ax = plt.subplots()    # neues Fenster mit Achsen
 ax.plot(x, y1, label='Sin') # sin Plot hinzufügen
 ax.plot(x, y2, label='Cos') # cos Plot hinzufügen
 ax.set_title('Simple Plot')
 ax.set_xlabel('x-Achse')
 ax.set_ylabel('y-Achse')
 ax.grid()
 plt.legend()
 fig.savefig('plot.pdf', bbox_inches='tight') # ggf. Plot speichern (pdf, png, ...)
 plt.show()                                   # anzeigen (bei non-interactive)
 ```
 %% Cell type:code id: tags:
 ``` 
 rng = np.random.default_rng()
 x = rng.random(100)
 y = 5 * x**2 + rng.random(100)
 plt.figure()   # neues Fenster (optional)
 plt.scatter(x, y, s=100*y, c=x, alpha=0.4)
 fig = plt.gcf()
 ax = plt.gca()
 ```
 %% Cell type:code id: tags:
 ``` 
 from numpy.polynomial import Polynomial
 x = np.arange(10)
-y = 2 * x
+y = 2 * x + 1
-# y = x ** 2
+# y = x ** 2 + 1
 p = Polynomial.fit(x, y, deg=1).convert()
 print(p(1))
 print(p(np.array([1., 3., 5.])))
 print(p.coef)      # Koeffizienten
 plt.plot(x, p(x))   # Plot oben
 plt.scatter(x, y, color='r')
 ```
 %% Cell type:code id: tags:
 ``` 
 from sklearn.linear_model import LinearRegression
 x = np.arange(10).reshape(-1, 1)
-y = x ** 2
+y = x ** 2 + 1
 linreg = LinearRegression()
 linreg.fit(x, y)
 print(linreg.predict([[1]]))
 print(linreg.predict([[1], [3], [5]]))
 print(linreg.intercept_, linreg.coef_)
 ```
 %% Cell type:code id: tags:
 ``` 
 x = np.arange(10).reshape(-1, 1)
-y = x ** 2
+y = x ** 2 + 1
 x_poly = np.hstack((x, x**2)) # shape: 10 x 2
 linreg = LinearRegression()
 linreg.fit(x_poly, y)
 print(linreg.predict([[3, 9]]))
 print(linreg.intercept_, linreg.coef_)
 ```
 %% Cell type:code id: tags:
 ``` 
 rng = np.random.default_rng()
 x = rng.uniform(size=(100, 1)) * 3 - 0.5
 y = np.round(x % 1) - 0.5
 def fourier_basis(x, n=4):
    return np.hstack([np.sin(2 * np.pi * k * x) for k in range(1, 2 * n, 2)])
 x_f = fourier_basis(x, 5)
 linreg = LinearRegression()
 linreg.fit(x_f, y)
 x_plt = np.linspace(0, 4, 501).reshape(-1, 1)
 x_plt_f = fourier_basis(x_plt, 5)
 y_plt = linreg.predict(x_plt_f)
 plt.plot(x_plt, y_plt)
 plt.scatter(x, y, color='r')
 ```

--- a/03-numpy-und-matplotlib/solutions/folien-code/folien-code.py
+++ b/03-numpy-und-matplotlib/solutions/folien-code/folien-code.py
@@ -224,8 +224,8 @@ ax = plt.gca()
 from numpy.polynomial import Polynomial
 x = np.arange(10)
-y = 2 * x
+y = 2 * x + 1
-# y = x ** 2
+# y = x ** 2 + 1
 p = Polynomial.fit(x, y, deg=1).convert()
@@ -240,7 +240,7 @@ plt.scatter(x, y, color='r')
 from sklearn.linear_model import LinearRegression
 x = np.arange(10).reshape(-1, 1)
-y = x ** 2
+y = x ** 2 + 1
 linreg = LinearRegression()
 linreg.fit(x, y)
@@ -252,7 +252,7 @@ print(linreg.intercept_, linreg.coef_)
 # %% Lineare Regression mit Scikit-Learn, Grad 2
 x = np.arange(10).reshape(-1, 1)
-y = x ** 2
+y = x ** 2 + 1
 x_poly = np.hstack((x, x**2)) # shape: 10 x 2

--- a/04-pandas-und-seaborn/03-odoo-eval.ipynb
+++ b/04-pandas-und-seaborn/03-odoo-eval.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Odoo Datenanalyse\n",
+    "\n",
+    "Odoo ist ein ERP-System, ähnlich wie SAP, aber Open-Source. Man kann\n",
+    "damit u. a. Verkaufsaufträge und Rechnungen aufzeichen. Anbei befindet\n",
+    "sich die Datei `odoo-data.xlsx`, die Nils Fürkotter in der Lernfabrik\n",
+    "aufgezeichnet hat. Ihre Aufgabe besteht nun darin, die Daten zu\n",
+    "analysieren.\n",
+    "\n",
+    "-   Lesen Sie die Daten mit Pandas ein und geben Sie die Anzahl der\n",
+    "    Aufträge aus.\n",
+    "-   Erstellen Sie dann ein Pie-Chart, das das Verhältnis der Aufträge\n",
+    "    von Einzelpersonen und Unternehmen zeigt. Die gezeigte Information\n",
+    "    wäre z. B. Aufträge stammen zu 80 % von Einzelpersonen und 20 % von\n",
+    "    Unternehmen.\n",
+    "-   Erstellen Sie ein Barplot, das den Umsatz nach Kundentypen zeigt.\n",
+    "    Die gezeigte Information wäre z. B. Einzelpersonen haben 180 000 €\n",
+    "    Umsatz und Unternehmen 220 000 € Umsatz erwirtschaftet.\n",
+    "-   Erstellen Sie ein Histogramm, das die Verteilung der Umsätze pro\n",
+    "    Auftrag nach Kundentypen zeigt. Die gezeigte Information wäre z. B.\n",
+    "    Aufträge von Einzelpersonen haben Umsätze zwischen 50 € und 150 €\n",
+    "    und Aufträge von Unternehmen haben hauptsächlich Umsätze zwischen\n",
+    "    400 € und 600 €.\n",
+    "\n",
+    "Hier Ihr Start-Code:"
+   ],
+   "id": "0003-ffd65dfc8cad6b993d607936d89fb4de4d542de8348b893e161b721771f"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt\n"
+   ],
+   "id": "0004-d7e9dfa1635b2f39c536f959558e13284c1b5a3e2118f74285f499be56b"
+  }
+ ],
+ "nbformat": 4,
+ "nbformat_minor": 5,
+ "metadata": {}
+}
+%% Cell type:markdown id:0003-ffd65dfc8cad6b993d607936d89fb4de4d542de8348b893e161b721771f tags:
+# Odoo Datenanalyse
+Odoo ist ein ERP-System, ähnlich wie SAP, aber Open-Source. Man kann
+damit u. a. Verkaufsaufträge und Rechnungen aufzeichen. Anbei befindet
+sich die Datei `odoo-data.xlsx`, die Nils Fürkotter in der Lernfabrik
+aufgezeichnet hat. Ihre Aufgabe besteht nun darin, die Daten zu
+analysieren.
+-   Lesen Sie die Daten mit Pandas ein und geben Sie die Anzahl der
+    Aufträge aus.
+-   Erstellen Sie dann ein Pie-Chart, das das Verhältnis der Aufträge
+    von Einzelpersonen und Unternehmen zeigt. Die gezeigte Information
+    wäre z. B. Aufträge stammen zu 80 % von Einzelpersonen und 20 % von
+    Unternehmen.
+-   Erstellen Sie ein Barplot, das den Umsatz nach Kundentypen zeigt.
+    Die gezeigte Information wäre z. B. Einzelpersonen haben 180 000 €
+    Umsatz und Unternehmen 220 000 € Umsatz erwirtschaftet.
+-   Erstellen Sie ein Histogramm, das die Verteilung der Umsätze pro
+    Auftrag nach Kundentypen zeigt. Die gezeigte Information wäre z. B.
+    Aufträge von Einzelpersonen haben Umsätze zwischen 50 € und 150 €
+    und Aufträge von Unternehmen haben hauptsächlich Umsätze zwischen
+    400 € und 600 €.
+Hier Ihr Start-Code:
+%% Cell type:code id:0004-d7e9dfa1635b2f39c536f959558e13284c1b5a3e2118f74285f499be56b tags:
+``` 
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+```
--- a/04-pandas-und-seaborn/odoo-data.xlsx
+++ b/04-pandas-und-seaborn/odoo-data.xlsx
--- a/04-pandas-und-seaborn/solutions/03-odoo-eval-sol.ipynb
+++ b/04-pandas-und-seaborn/solutions/03-odoo-eval-sol.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Odoo Datenanalyse\n",
+    "\n",
+    "Odoo ist ein ERP-System, ähnlich wie SAP, aber Open-Source. Man kann\n",
+    "damit u. a. Verkaufsaufträge und Rechnungen aufzeichen. Anbei befindet\n",
+    "sich die Datei `odoo-data.xlsx`, die Nils Fürkotter in der Lernfabrik\n",
+    "aufgezeichnet hat. Ihre Aufgabe besteht nun darin, die Daten zu\n",
+    "analysieren.\n",
+    "\n",
+    "-   Lesen Sie die Daten mit Pandas ein und geben Sie die Anzahl der\n",
+    "    Aufträge aus.\n",
+    "-   Erstellen Sie dann ein Pie-Chart, das das Verhältnis der Aufträge\n",
+    "    von Einzelpersonen und Unternehmen zeigt. Die gezeigte Information\n",
+    "    wäre z. B. Aufträge stammen zu 80 % von Einzelpersonen und 20 % von\n",
+    "    Unternehmen.\n",
+    "-   Erstellen Sie ein Barplot, das den Umsatz nach Kundentypen zeigt.\n",
+    "    Die gezeigte Information wäre z. B. Einzelpersonen haben 180 000 €\n",
+    "    Umsatz und Unternehmen 220 000 € Umsatz erwirtschaftet.\n",
+    "-   Erstellen Sie ein Histogramm, das die Verteilung der Umsätze pro\n",
+    "    Auftrag nach Kundentypen zeigt. Die gezeigte Information wäre z. B.\n",
+    "    Aufträge von Einzelpersonen haben Umsätze zwischen 50 € und 150 €\n",
+    "    und Aufträge von Unternehmen haben hauptsächlich Umsätze zwischen\n",
+    "    400 € und 600 €.\n",
+    "\n",
+    "## Lösung\n",
+    "\n",
+    "Zunächst lesen wir den Datensatz mit der `pd.read_excel`-Funktion ein.\n",
+    "Dafür brauchen wir die Bibliothek `openpyxl`, die hoffentlich schon\n",
+    "installiert ist. Wenn nicht, können Sie sie mit\n",
+    "`mamba install -n ml openpyxl`, wobei ml der Umgebungsname ist,\n",
+    "installieren."
+   ],
+   "id": "0004-a0964a79c2d2ab0a6c2663854450e826b2d478a719d1ccce6497bd64d94"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "data = pd.read_excel('odoo-data.xlsx')"
+   ],
+   "id": "0005-237116bae51d87da8bfac1efb32ed96a4ac71d6d1471d771fd9cac013be"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Wir geben als erstes aus, wie viele Aufträge es insgesamt gibt:"
+   ],
+   "id": "0006-0278cc28842eeecc0cc1259b2a4462d0297fd9356560e647f9aa67086db"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Anzahl Aufträge: 2125"
+     ]
+    }
+   ],
+   "source": [
+    "print('Anzahl Aufträge:', len(data))"
+   ],
+   "id": "0007-053cc9903c4f934e49bcf5d0b964e9337c1e648638fe1add2750f64b60a"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Um konsistente Farben zu verwenden, definieren wir ein Dictionary mit\n",
+    "den Farben für die einzelnen Typen:"
+   ],
+   "id": "0008-ca4c12ba31cd853b088a12bc7f68a4677950a43a37a6d5211367b4b9865"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "colors = {'Einzelperson': 'tab:blue', 'Unternehmen': 'tab:orange'}"
+   ],
+   "id": "0009-8866ed2619e427a57a38cc1f9b3d2b949a9ca6cf4664473b93a4535a2b9"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Pie-Chart der Kundentypen\n",
+    "\n",
+    "Um den ersten Plot zu erstellen, zählen wir zunächst wie viele Aufträge\n",
+    "es von Einzelpersonen und wie viele von Unternehmen gibt. Das geht mit\n",
+    "`value_counts()`."
+   ],
+   "id": "0011-ca0de42f0ea048138e772acfa78e19b9efa23c3562cc54337fbed35d948"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "customer_type = data['Kunde/Unternehmenstyp']\n",
+    "counts_type = customer_type.value_counts()\n",
+    "counts_type.name = ''                         # remove ylabel\n",
+    "counts_type = counts_type.loc[colors.keys()]  # reorder according to colors"
+   ],
+   "id": "0012-2e2b4c17c6c1914754697469e5df2e5ed4923552c5d075c1a209f37e957"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Jetzt plotten wir das Verhältnis der Aufträge als Pie-Chart. Dabei\n",
+    "verwenden wir die `plot.pie`-Methode von Pandas. Wir geben die Farben\n",
+    "aus dem Dictionary an und formatieren den Text mit\n",
+    "`autopct='{:.1f}%'.format`. Das Ergebnis sieht so aus:"
+   ],
+   "id": "0013-d98f7f54a6505b26cd1458026fd8ea83e5cea6361390fad9b271543f38d"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "counts_type.plot.pie(colors=colors.values(), autopct='{:.1f}%'.format, startangle=-50)\n",
+    "plt.title('Verhältnis der Aufträge')"
+   ],
+   "id": "0014-a3ad3be8323d84a1e594eec4878583ba4e4e2c7c8df2e93cfda1d9ddfa6"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternativ können wir auch die `plt.pie`-Funktion von Matplotlib\n",
+    "verwenden, was sehr ähnlich funktioniert.:"
+   ],
+   "id": "0015-7bec95538841de435f5b289e1c8564c1c607dab31493448208f7c5bbad7"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "plt.pie(counts_type, labels=counts_type.index, colors=colors.values(), autopct='%1.1f%%', startangle=-50)\n",
+    "plt.title('Verhältnis der Aufträge')"
+   ],
+   "id": "0016-f30c65e85874b1995aa6f489fe1a2fc3052368a2f1a850391115ca50c4f"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Seaborn bietet keine Funktion für Pie-Charts an.\n",
+    "\n",
+    "### Barplot des Umsatzes nach Kundentypen\n",
+    "\n",
+    "Um den Gesamtumsatz nach Kundentypen zu ermitteln, verwenden wir die\n",
+    "`groupby`-Methode von Pandas. Wir gruppieren die Daten nach dem Typ und\n",
+    "summieren den Umsatz. Das Ergebnis ist ein DataFrame mit den Typen als\n",
+    "Index und dem Umsatz als Spalte. `squeeze()` entfernt die Dimension des\n",
+    "Umsatzes, sodass wir nur eine Serie erhalten. Wir sortieren die Serie\n",
+    "nach den Farben, die wir vorher definiert haben:"
+   ],
+   "id": "0019-83fa942651ce7359d891283f57a14731131ef572ab2370651ae46b1e68c"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots()\n",
+    "revenue = data.groupby('Kunde/Unternehmenstyp')['Gesamt'].sum().squeeze()\n",
+    "revenue = revenue.loc[colors.keys()]                            # reorder according to colors\n",
+    "ax = revenue.plot.bar(color=colors.values(), ax=ax)             # pandas\n",
+    "# ax.bar(revenue.index, revenue.values, color=colors.values())  # matplotlib\n",
+    "ax.set_ylabel('Gesamt')"
+   ],
+   "id": "0020-26b591c8cf51de10bd0fa8dec08f9c436b339118450dc55b06c94a74d5b"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Seaborn gruppiert die Daten automatisch, wenn wir den `hue`-Parameter\n",
+    "verwenden. Wir müssen den `estimator`-Parameter verwenden, um den Umsatz\n",
+    "zu summieren, anstatt ihn zu mitteln. Der `errorbar`-Parameter gibt an,\n",
+    "dass wir keine Fehlerbalken wollen:"
+   ],
+   "id": "0021-fdb7ce41317da01fbbfde885ca952f363ed45258b194ed2eeff4c91fb6c"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "sns.barplot(data, y='Gesamt', ax=ax, hue='Kunde/Unternehmenstyp', estimator=sum, errorbar=('ci', 0))"
+   ],
+   "id": "0022-eeb27de0b7169395daa59a03cbf2ca3ebe15c9b171d2d714275eb90797b"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Histogramm des Umsatzes nach Kundentypen\n",
+    "\n",
+    "Um die Verteilung der Umsätze nach Kundentypen zu sehen, verwenden wir\n",
+    "die `hist`- oder die `plot.hist`-Methode von Pandas. Wir geben den\n",
+    "Spaltennamen und den Typ an. Es lassen sich keine Farben für die\n",
+    "Subplots angeben, daher müssen wir die Farben nachträglich setzen:"
+   ],
+   "id": "0024-22e27ab1072962a64c19aaadd088e368b3417dea6a5abe091bfdda7819f"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "axs = data.hist(column='Gesamt', by='Kunde/Unternehmenstyp')   # (1 x 2)\n",
+    "# axs = data.plot.hist(column='Gesamt', by='Kunde/Unternehmenstyp', sharex=True, legend=False)  # (2 x 1)\n",
+    "# set colors afterwards\n",
+    "for ax in axs.flatten():\n",
+    "    c = colors[ax.get_title()]\n",
+    "    for p in ax.containers[0].patches:\n",
+    "        p.set_facecolor(c)"
+   ],
+   "id": "0025-d877bcd761d1ca32d4208be9b71b27099b82be96f1d8c21909e09c53c75"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternativ können wir auch die `plt.hist`-Funktion von Matplotlib\n",
+    "verwenden. Wir erstellen ein Subplot mit 1 Zeile und 2 Spalten. Dann\n",
+    "iterieren wir über die Gruppen und erstellen für jede Gruppe ein\n",
+    "Histogramm. Wir setzen den Titel, die x-Achse und die y-Achse:"
+   ],
+   "id": "0026-5bc587277f2b73b50a2b848aade7245e9c90296fb933136d61e247ced3a"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "fig, axs = plt.subplots(1, 2, constrained_layout=True, figsize=(10, 5))\n",
+    "for i, (ctype, v) in enumerate(data.groupby('Kunde/Unternehmenstyp')):\n",
+    "    axs[i].hist(v['Gesamt'], color=colors[ctype])\n",
+    "    axs[i].set_title(ctype)\n",
+    "    axs[i].set_xlabel('Gesamt')\n",
+    "    if i == 0:\n",
+    "        axs[i].set_ylabel('Anzahl Aufträge')"
+   ],
+   "id": "0027-7d2078d752036f9cbd6a6073f3eaa0d1c2459f8372609f608ee4f6f963a"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In Seaborn gibt es die `countplot`-Funktion, die die Anzahl der Aufträge\n",
+    "nach Typen zählt. Wir geben den `hue`-Parameter an, um die Farben zu\n",
+    "setzen. Der `alpha`-Parameter gibt die Transparenz an, weil die\n",
+    "Histogramme in einem Plot übereinander liegen:"
+   ],
+   "id": "0028-bf127cd3ed81d394e946710a54e2ff63a4b7e004c30069e97081efe6411"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "style": "python"
+   },
+   "outputs": [],
+   "source": [
+    "sns.countplot(data=data, x='Gesamt', hue='Kunde/Unternehmenstyp', palette=colors, alpha=0.7, formatter='{:.0f}'.format)"
+   ],
+   "id": "0029-9d85047be203d22387fe72feaa244c59630adc4cb143c0fcff1e62a1df0"
+  }
+ ],
+ "nbformat": 4,
+ "nbformat_minor": 5,
+ "metadata": {}
+}
+%% Cell type:markdown id:0004-a0964a79c2d2ab0a6c2663854450e826b2d478a719d1ccce6497bd64d94 tags:
+# Odoo Datenanalyse
+Odoo ist ein ERP-System, ähnlich wie SAP, aber Open-Source. Man kann
+damit u. a. Verkaufsaufträge und Rechnungen aufzeichen. Anbei befindet
+sich die Datei `odoo-data.xlsx`, die Nils Fürkotter in der Lernfabrik
+aufgezeichnet hat. Ihre Aufgabe besteht nun darin, die Daten zu
+analysieren.
+-   Lesen Sie die Daten mit Pandas ein und geben Sie die Anzahl der
+    Aufträge aus.
+-   Erstellen Sie dann ein Pie-Chart, das das Verhältnis der Aufträge
+    von Einzelpersonen und Unternehmen zeigt. Die gezeigte Information
+    wäre z. B. Aufträge stammen zu 80 % von Einzelpersonen und 20 % von
+    Unternehmen.
+-   Erstellen Sie ein Barplot, das den Umsatz nach Kundentypen zeigt.
+    Die gezeigte Information wäre z. B. Einzelpersonen haben 180 000 €
+    Umsatz und Unternehmen 220 000 € Umsatz erwirtschaftet.
+-   Erstellen Sie ein Histogramm, das die Verteilung der Umsätze pro
+    Auftrag nach Kundentypen zeigt. Die gezeigte Information wäre z. B.
+    Aufträge von Einzelpersonen haben Umsätze zwischen 50 € und 150 €
+    und Aufträge von Unternehmen haben hauptsächlich Umsätze zwischen
+    400 € und 600 €.
+## Lösung
+Zunächst lesen wir den Datensatz mit der `pd.read_excel`-Funktion ein.
+Dafür brauchen wir die Bibliothek `openpyxl`, die hoffentlich schon
+installiert ist. Wenn nicht, können Sie sie mit
+`mamba install -n ml openpyxl`, wobei ml der Umgebungsname ist,
+installieren.
+%% Cell type:code id:0005-237116bae51d87da8bfac1efb32ed96a4ac71d6d1471d771fd9cac013be tags:
+``` 
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+data = pd.read_excel('odoo-data.xlsx')
+```
+%% Cell type:markdown id:0006-0278cc28842eeecc0cc1259b2a4462d0297fd9356560e647f9aa67086db tags:
+Wir geben als erstes aus, wie viele Aufträge es insgesamt gibt:
+%% Cell type:code id:0007-053cc9903c4f934e49bcf5d0b964e9337c1e648638fe1add2750f64b60a tags:
+``` 
+print('Anzahl Aufträge:', len(data))
+```
+%% Output
+    Anzahl Aufträge: 2125
+%% Cell type:markdown id:0008-ca4c12ba31cd853b088a12bc7f68a4677950a43a37a6d5211367b4b9865 tags:
+Um konsistente Farben zu verwenden, definieren wir ein Dictionary mit
+den Farben für die einzelnen Typen:
+%% Cell type:code id:0009-8866ed2619e427a57a38cc1f9b3d2b949a9ca6cf4664473b93a4535a2b9 tags:
+``` 
+colors = {'Einzelperson': 'tab:blue', 'Unternehmen': 'tab:orange'}
+```
+%% Cell type:markdown id:0011-ca0de42f0ea048138e772acfa78e19b9efa23c3562cc54337fbed35d948 tags:
+### Pie-Chart der Kundentypen
+Um den ersten Plot zu erstellen, zählen wir zunächst wie viele Aufträge
+es von Einzelpersonen und wie viele von Unternehmen gibt. Das geht mit
+`value_counts()`.
+%% Cell type:code id:0012-2e2b4c17c6c1914754697469e5df2e5ed4923552c5d075c1a209f37e957 tags:
+``` 
+customer_type = data['Kunde/Unternehmenstyp']
+counts_type = customer_type.value_counts()
+counts_type.name = ''                         # remove ylabel
+counts_type = counts_type.loc[colors.keys()]  # reorder according to colors
+```
+%% Cell type:markdown id:0013-d98f7f54a6505b26cd1458026fd8ea83e5cea6361390fad9b271543f38d tags:
+Jetzt plotten wir das Verhältnis der Aufträge als Pie-Chart. Dabei
+verwenden wir die `plot.pie`-Methode von Pandas. Wir geben die Farben
+aus dem Dictionary an und formatieren den Text mit
+`autopct='{:.1f}%'.format`. Das Ergebnis sieht so aus:
+%% Cell type:code id:0014-a3ad3be8323d84a1e594eec4878583ba4e4e2c7c8df2e93cfda1d9ddfa6 tags:
+``` 
+counts_type.plot.pie(colors=colors.values(), autopct='{:.1f}%'.format, startangle=-50)
+plt.title('Verhältnis der Aufträge')
+```
+%% Cell type:markdown id:0015-7bec95538841de435f5b289e1c8564c1c607dab31493448208f7c5bbad7 tags:
+Alternativ können wir auch die `plt.pie`-Funktion von Matplotlib
+verwenden, was sehr ähnlich funktioniert.:
+%% Cell type:code id:0016-f30c65e85874b1995aa6f489fe1a2fc3052368a2f1a850391115ca50c4f tags:
+``` 
+plt.pie(counts_type, labels=counts_type.index, colors=colors.values(), autopct='%1.1f%%', startangle=-50)
+plt.title('Verhältnis der Aufträge')
+```
+%% Cell type:markdown id:0019-83fa942651ce7359d891283f57a14731131ef572ab2370651ae46b1e68c tags:
+Seaborn bietet keine Funktion für Pie-Charts an.
+### Barplot des Umsatzes nach Kundentypen
+Um den Gesamtumsatz nach Kundentypen zu ermitteln, verwenden wir die
+`groupby`-Methode von Pandas. Wir gruppieren die Daten nach dem Typ und
+summieren den Umsatz. Das Ergebnis ist ein DataFrame mit den Typen als
+Index und dem Umsatz als Spalte. `squeeze()` entfernt die Dimension des
+Umsatzes, sodass wir nur eine Serie erhalten. Wir sortieren die Serie
+nach den Farben, die wir vorher definiert haben:
+%% Cell type:code id:0020-26b591c8cf51de10bd0fa8dec08f9c436b339118450dc55b06c94a74d5b tags:
+``` 
+fig, ax = plt.subplots()
+revenue = data.groupby('Kunde/Unternehmenstyp')['Gesamt'].sum().squeeze()
+revenue = revenue.loc[colors.keys()]                            # reorder according to colors
+ax = revenue.plot.bar(color=colors.values(), ax=ax)             # pandas
+# ax.bar(revenue.index, revenue.values, color=colors.values())  # matplotlib
+ax.set_ylabel('Gesamt')
+```
+%% Cell type:markdown id:0021-fdb7ce41317da01fbbfde885ca952f363ed45258b194ed2eeff4c91fb6c tags:
+Seaborn gruppiert die Daten automatisch, wenn wir den `hue`-Parameter
+verwenden. Wir müssen den `estimator`-Parameter verwenden, um den Umsatz
+zu summieren, anstatt ihn zu mitteln. Der `errorbar`-Parameter gibt an,
+dass wir keine Fehlerbalken wollen:
+%% Cell type:code id:0022-eeb27de0b7169395daa59a03cbf2ca3ebe15c9b171d2d714275eb90797b tags:
+``` 
+sns.barplot(data, y='Gesamt', ax=ax, hue='Kunde/Unternehmenstyp', estimator=sum, errorbar=('ci', 0))
+```
+%% Cell type:markdown id:0024-22e27ab1072962a64c19aaadd088e368b3417dea6a5abe091bfdda7819f tags:
+### Histogramm des Umsatzes nach Kundentypen
+Um die Verteilung der Umsätze nach Kundentypen zu sehen, verwenden wir
+die `hist`- oder die `plot.hist`-Methode von Pandas. Wir geben den
+Spaltennamen und den Typ an. Es lassen sich keine Farben für die
+Subplots angeben, daher müssen wir die Farben nachträglich setzen:
+%% Cell type:code id:0025-d877bcd761d1ca32d4208be9b71b27099b82be96f1d8c21909e09c53c75 tags:
+``` 
+axs = data.hist(column='Gesamt', by='Kunde/Unternehmenstyp')   # (1 x 2)
+# axs = data.plot.hist(column='Gesamt', by='Kunde/Unternehmenstyp', sharex=True, legend=False)  # (2 x 1)
+# set colors afterwards
+for ax in axs.flatten():
+    c = colors[ax.get_title()]
+    for p in ax.containers[0].patches:
+        p.set_facecolor(c)
+```
+%% Cell type:markdown id:0026-5bc587277f2b73b50a2b848aade7245e9c90296fb933136d61e247ced3a tags:
+Alternativ können wir auch die `plt.hist`-Funktion von Matplotlib
+verwenden. Wir erstellen ein Subplot mit 1 Zeile und 2 Spalten. Dann
+iterieren wir über die Gruppen und erstellen für jede Gruppe ein
+Histogramm. Wir setzen den Titel, die x-Achse und die y-Achse:
+%% Cell type:code id:0027-7d2078d752036f9cbd6a6073f3eaa0d1c2459f8372609f608ee4f6f963a tags:
+``` 
+fig, axs = plt.subplots(1, 2, constrained_layout=True, figsize=(10, 5))
+for i, (ctype, v) in enumerate(data.groupby('Kunde/Unternehmenstyp')):
+    axs[i].hist(v['Gesamt'], color=colors[ctype])
+    axs[i].set_title(ctype)
+    axs[i].set_xlabel('Gesamt')
+    if i == 0:
+        axs[i].set_ylabel('Anzahl Aufträge')
+```
+%% Cell type:markdown id:0028-bf127cd3ed81d394e946710a54e2ff63a4b7e004c30069e97081efe6411 tags:
+In Seaborn gibt es die `countplot`-Funktion, die die Anzahl der Aufträge
+nach Typen zählt. Wir geben den `hue`-Parameter an, um die Farben zu
+setzen. Der `alpha`-Parameter gibt die Transparenz an, weil die
+Histogramme in einem Plot übereinander liegen:
+%% Cell type:code id:0029-9d85047be203d22387fe72feaa244c59630adc4cb143c0fcff1e62a1df0 tags:
+``` 
+sns.countplot(data=data, x='Gesamt', hue='Kunde/Unternehmenstyp', palette=colors, alpha=0.7, formatter='{:.0f}'.format)
+```
--- a/04-pandas-und-seaborn/solutions/odoo-data.xlsx
+++ b/04-pandas-und-seaborn/solutions/odoo-data.xlsx