Generate the data and import packages¶
First, we need to create the data. I'll start by defining it as a dictionary and then convert it into a pandas DataFrame, since pandas is commonly used in many projects for data manipulation.
# tutorial https://www.jphwang.com/posts/nba-shot-data-analytics-visualization-with-python-pandas-and-matplotlib-part-2-grouping-data-by-area/
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import numpy as np
import pandas as pd
color_dict = {"Norway": "#2B314D", "Denmark": "#A54836", "Sweden": "#5375D4"}
xy_ticklabel_color, label_color, grid_color, datalabels_color = (
"#757C85",
"#101628",
"#C8C9C9",
"#FFFFFF",
)
data = {
"year": [2004, 2022, 2004, 2022, 2004, 2022],
"countries": ["Sweden", "Sweden", "Denmark", "Denmark", "Norway", "Norway"],
"sites": [13, 15, 4, 10, 5, 8],
}
df = pd.DataFrame(data)
df["year_lbl"] = "'" + df["year"].astype(str).str[-2:].astype(str)
df["diff"] = df.groupby(["countries"])["sites"].diff()
df["diff"].fillna(df.sites, inplace=True)
# custom sort
sort_order_dict = {"Denmark": 3, "Sweden": 2, "Norway": 1, 2004: 4, 2022: 5}
df = df.sort_values(
by=[
"year",
"countries",
],
key=lambda x: x.map(sort_order_dict),
)
# map the colors of a dict to a dataframe
df["color"] = df.countries.map(color_dict)
df["sub_total"] = df.groupby("year")["sites"].transform("sum")
df
C:\Users\Ruth Pozuelo\AppData\Local\Temp\ipykernel_27536\238149915.py:24: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
df["diff"].fillna(df.sites, inplace=True)
| year | countries | sites | year_lbl | diff | color | sub_total | |
|---|---|---|---|---|---|---|---|
| 4 | 2004 | Norway | 5 | '04 | 5.0 | #2B314D | 22 |
| 0 | 2004 | Sweden | 13 | '04 | 13.0 | #5375D4 | 22 |
| 2 | 2004 | Denmark | 4 | '04 | 4.0 | #A54836 | 22 |
| 5 | 2022 | Norway | 8 | '22 | 3.0 | #2B314D | 33 |
| 1 | 2022 | Sweden | 15 | '22 | 2.0 | #5375D4 | 33 |
| 3 | 2022 | Denmark | 10 | '22 | 6.0 | #A54836 | 33 |
fig, ax = plt.subplots(figsize=(14, 2), facecolor="w")
ax.set_ylim([-2, 3])
bottom = np.zeros(len(df))
for row in df.itertuples():
ax.barh(
range(1),
row.sites,
left = bottom,
height = 2,
color = row.color
)
bottom += row.sites
for bar in ax.patches:
ax.text(
bar.get_x() + bar.get_width() / 2,
bar.get_height() / 2 + bar.get_y(),
round(bar.get_width()),
ha="center",
va="center",
weight = "light",
color=datalabels_color,
size=12,
)
labels = df.countries.unique().tolist()
fig
Add the labels and lines¶
subtotals = df.sub_total.unique().tolist()
sub04 = subtotals[0]
ax.axvline(
x = sub04,
ymin = -0.1,
ymax = 1.3,
color = grid_color,
linestyle = "-",
clip_on = False
)
line_x = 2.5
common_line_params = {
"va": "center",
"arrowprops": dict(
arrowstyle = "<-",
color = grid_color,
mutation_scale = 20,
)
}
ax.annotate( "", xy = (0, line_x), xytext = (sub04, line_x), **common_line_params )
ax.annotate( "", xy=(sub04, line_x), xytext=(df.sites.sum(), line_x), **common_line_params )
text_x = 2.2
sub22 = subtotals[1]
common_text_params = {
"size": 18,
"color": label_color,
"ha": "center",
"bbox": dict(
boxstyle = "square,pad=0.3",
fc = "w",
ec = "w",
lw = 2
),
}
ax.annotate( "Up until 2004", xy = (sub04 /2 , text_x), **common_text_params)
ax.annotate( "After 2004", xy = (sub04 + sub22 / 2, text_x), **common_text_params)
fig
ax.set_axis_off()
# add legend
lines = [
Line2D(
[0],
[0],
color=c,
marker="s",
linestyle="",
markersize=10,
)
for c in df.color.unique()
]
ax.legend(
lines,
labels,
labelcolor=xy_ticklabel_color,
bbox_to_anchor=(0.1, -2),
loc="lower center",
frameon=False,
fontsize=22,
)
fig