Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
Data Science Notebooks
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Christof Kaufmann
Data Science Notebooks
Commits
2baf5d16
Commit
2baf5d16
authored
1 year ago
by
Christof Kaufmann
Browse files
Options
Downloads
Patches
Plain Diff
Upload Code (Python Script)
parent
aef86389
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
04-pandas-und-seaborn/folien-code.py
+221
-0
221 additions, 0 deletions
04-pandas-und-seaborn/folien-code.py
with
221 additions
and
0 deletions
04-pandas-und-seaborn/folien-code.py
0 → 100644
+
221
−
0
View file @
2baf5d16
# %% [markdown]
# # Code zu Folien
#
# Dieses Skript bzw. Jupyter-Notebook enthält den Code, der auch auf den Folien "Pandas & Seaborn" enthalten ist. Zum Vorbereiten, Mitmachen oder Nacharbeiten.
# %% import Pandas
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
from
IPython.display
import
display
# %% Iris Flower Dataset
url
=
'
https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv
'
df
=
pd
.
read_csv
(
url
)
# offline-Alternative:
# from sklearn.datasets import load_iris
# df = pd.concat(load_iris(return_X_y=True, as_frame=True), axis='columns')
# df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
# df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
df
[
'
species
'
]
=
df
[
'
species
'
].
astype
(
'
category
'
)
df
# %% Informationen
print
(
df
.
shape
)
print
(
df
.
columns
)
print
(
df
.
dtypes
)
print
(
df
.
index
)
df
.
info
()
# %% Statistischer Überblick
display
(
df
.
describe
())
display
(
df
.
describe
(
exclude
=
'
number
'
))
# %% Kuchendiagramm
counts
=
df
[
'
species
'
].
value_counts
()
display
(
counts
)
counts
.
plot
.
pie
(
startangle
=
60
,
autopct
=
'
{:.2f}%
'
.
format
)
plt
.
ylabel
(
'
species
'
)
# %% Boxplot
df
.
boxplot
(
column
=
'
petal_length
'
,
by
=
'
species
'
)
# %% Boxplots aller Features
fig
,
axs
=
plt
.
subplots
(
2
,
2
,
sharey
=
False
)
# y-Achsen unabhängig
pd
.
plotting
.
boxplot
(
df
,
by
=
'
species
'
,
ax
=
axs
)
# übergebe axs
[
ax
.
set_xlabel
(
''
)
for
ax
in
axs
.
ravel
()]
# entferne x-Labels
fig
.
tight_layout
()
# %% Violinenplot
import
seaborn
as
sns
sns
.
violinplot
(
hue
=
'
species
'
,
y
=
'
petal_length
'
,
data
=
df
)
# %% Scatterplots
df
.
plot
.
scatter
(
x
=
'
petal_length
'
,
y
=
'
petal_width
'
,
c
=
'
species
'
,
colormap
=
'
viridis
'
,
alpha
=
0.7
)
# %% Pair Plot
sns
.
pairplot
(
df
,
hue
=
'
species
'
,
plot_kws
=
{
'
alpha
'
:
0.5
})
# %% Parallele Koordinaten Plot, unskaliert
pd
.
plotting
.
parallel_coordinates
(
df
,
'
species
'
,
colormap
=
'
viridis
'
,
alpha
=
.
5
)
# %% Parallele Koordinaten Plot, normiert
from
sklearn.preprocessing
import
minmax_scale
num_cols
=
df
.
columns
.
drop
(
'
species
'
)
df_scaled
=
df
.
copy
()
df_scaled
[
num_cols
]
=
minmax_scale
(
df
[
num_cols
])
pd
.
plotting
.
parallel_coordinates
(
df_scaled
,
'
species
'
,
colormap
=
'
viridis
'
,
alpha
=
.
5
)
# %% Parallele Koordinaten Plot, custom Code from https://stackoverflow.com/a/60401570/2414411
import
numpy
as
np
from
matplotlib.path
import
Path
import
matplotlib.patches
as
patches
ys
=
df
.
drop
(
columns
=
'
species
'
)
ynames
=
ys
.
columns
ys
=
ys
.
to_numpy
()
ymins
=
ys
.
min
(
axis
=
0
)
ymaxs
=
ys
.
max
(
axis
=
0
)
dys
=
ymaxs
-
ymins
ymins
-=
dys
*
0.05
# add 5% padding below and above
ymaxs
+=
dys
*
0.05
# reverse axis 1 to have less crossings
# ymaxs[1], ymins[1] = ymins[1], ymaxs[1]
# dys = ymaxs - ymins
# transform all data to be compatible with the main axis
zs
=
np
.
zeros_like
(
ys
)
zs
[:,
0
]
=
ys
[:,
0
]
zs
[:,
1
:]
=
(
ys
[:,
1
:]
-
ymins
[
1
:])
/
dys
[
1
:]
*
dys
[
0
]
+
ymins
[
0
]
fig
,
host
=
plt
.
subplots
(
figsize
=
(
10
,
4
))
axes
=
[
host
]
+
[
host
.
twinx
()
for
i
in
range
(
ys
.
shape
[
1
]
-
1
)]
for
i
,
ax
in
enumerate
(
axes
):
ax
.
set_ylim
(
ymins
[
i
],
ymaxs
[
i
])
ax
.
spines
[
'
top
'
].
set_visible
(
False
)
ax
.
spines
[
'
bottom
'
].
set_visible
(
False
)
if
ax
!=
host
:
ax
.
spines
[
'
left
'
].
set_visible
(
False
)
ax
.
yaxis
.
set_ticks_position
(
'
right
'
)
ax
.
spines
[
"
right
"
].
set_position
((
"
axes
"
,
i
/
(
ys
.
shape
[
1
]
-
1
)))
host
.
set_xlim
(
0
,
ys
.
shape
[
1
]
-
1
)
host
.
set_xticks
(
range
(
ys
.
shape
[
1
]))
host
.
set_xticklabels
(
ynames
,
fontsize
=
14
)
host
.
tick_params
(
axis
=
'
x
'
,
which
=
'
major
'
,
pad
=
7
)
host
.
spines
[
'
right
'
].
set_visible
(
False
)
host
.
xaxis
.
tick_top
()
# host.set_title('Parallel Coordinates Plot — Iris', fontsize=18, pad=12)
colors
=
plt
.
cm
.
viridis
([
0
,
128
,
255
])
target_names
=
df
[
'
species
'
].
unique
()
target
=
df
[
'
species
'
].
cat
.
codes
legend_handles
=
[
None
for
_
in
target_names
]
for
j
in
range
(
ys
.
shape
[
0
]):
# create bezier curves
verts
=
list
(
zip
([
x
for
x
in
np
.
linspace
(
0
,
len
(
ys
)
-
1
,
len
(
ys
)
*
3
-
2
,
endpoint
=
True
)],
np
.
repeat
(
zs
[
j
,
:],
3
)[
1
:
-
1
]))
codes
=
[
Path
.
MOVETO
]
+
[
Path
.
CURVE4
for
_
in
range
(
len
(
verts
)
-
1
)]
path
=
Path
(
verts
,
codes
)
patch
=
patches
.
PathPatch
(
path
,
facecolor
=
'
none
'
,
lw
=
2
,
alpha
=
0.5
,
edgecolor
=
colors
[
target
[
j
]])
legend_handles
[
target
[
j
]]
=
patch
host
.
add_patch
(
patch
)
host
.
legend
(
legend_handles
,
target_names
,
loc
=
'
lower center
'
,
bbox_to_anchor
=
(
0.5
,
-
0.18
),
ncol
=
len
(
target_names
),
fancybox
=
True
,
shadow
=
True
)
# %% Parallele Koordinaten Plot mit Plotly Express
import
plotly.express
as
px
# fig = px.parallel_coordinates(df, color="species", labels={'species': tuple('ABC')})
fig
=
px
.
parallel_coordinates
(
df
,
color
=
df
[
"
species
"
].
cat
.
codes
)
fig
.
data
[
0
][
'
dimensions
'
][
-
1
][
'
label
'
]
=
'
species
'
fig
.
show
()
# %% Slicing
cp
=
df
.
copy
()
cp
.
loc
[
1
,
'
sepal_width
'
]
=
1
cp
.
loc
[
0
:
2
,
'
petal_length
'
]
=
2
cp
.
loc
[
0
,
'
sepal_width
'
:
'
petal_width
'
]
=
3
cp
.
loc
[
1
:,
'
sepal_length
'
]
=
4
cp
.
loc
[:
2
,
:
'
sepal_width
'
]
=
5
cp
.
loc
[:
49
,
:].
to_csv
(
'
iris-setosa.csv
'
)
cp
# %% komplexe Indizierung
display
(
df
.
loc
[[
0
,
149
,
2
],
'
petal_width
'
])
part
=
df
.
loc
[[
0
,
149
,
2
],
[
'
petal_width
'
,
'
sepal_width
'
]]
part
# %% integer location
display
(
part
.
iloc
[
1
,
-
1
])
display
(
part
.
iloc
[:
2
,
-
1
])
display
(
part
.
iloc
[[
0
,
1
],
[
0
,
1
]])
# %% boolesche Indizierung
pw
=
part
.
loc
[:,
'
petal_width
'
]
<=
1
sw
=
part
.
loc
[:,
'
sepal_width
'
]
<
3.5
display
(
pw
)
display
(
sw
)
display
(
~
sw
)
display
(
part
.
loc
[
pw
&
sw
])
display
(
part
.
loc
[
pw
|
~
sw
])
display
(
part
.
loc
[
pw
^
sw
])
# %% Daten fallen lassen
display
(
part
.
drop
(
index
=
149
,
columns
=
'
petal_width
'
))
display
(
part
.
drop
(
index
=
[
149
,
0
]))
# %% einzelne Daten hinzufügen
part
.
loc
[
3
]
=
[
2
,
6
]
display
(
part
)
part
.
loc
[:,
'
weight
'
]
=
[
1
,
2
,
3
,
4
]
display
(
part
)
# %% DataFrames zusammenführen
a
=
part
.
drop
(
index
=
3
)
b
=
df
.
loc
[:
2
,
[
'
petal_length
'
,
'
petal_width
'
]]
display
(
a
)
display
(
b
)
display
(
pd
.
concat
((
a
,
b
),
axis
=
'
columns
'
))
display
(
pd
.
concat
((
a
,
b
),
axis
=
'
index
'
))
# %% Kategoriale Daten
df
[
'
species
'
]
df
[
'
species
'
].
info
()
# %% Statistische Funktionen
X
=
df
.
drop
(
columns
=
'
species
'
)
y
=
df
[
'
species
'
]
display
(
X
.
mean
())
display
(
y
.
value_counts
())
# %% Gruppierung
species_means
=
X
.
groupby
(
y
).
mean
()
display
(
species_means
)
diff
=
species_means
-
[
6
,
3
,
2
,
0.5
]
(
diff
**
2
).
sum
(
axis
=
'
columns
'
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment