Generate the data and import packages¶
First, we need to create the data. I'll start by defining it as a dictionary and then convert it into a pandas DataFrame, since pandas is commonly used in many projects for data manipulation.
# tutorial
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
color_dict = {"Norway": "#2B314D", "Denmark": "#A54836", "Sweden": "#5375D4"}
xy_ticklabel_color, xlabel_color, bbox_color, grid_color = (
"#757C85",
"#FFFFFF",
"#101628",
"#C8C9C9",
)
data = {
"year": [2004, 2022, 2004, 2022, 2004, 2022],
"countries": ["Denmark", "Denmark", "Norway", "Norway", "Sweden", "Sweden"],
"sites": [4, 10, 5, 8, 13, 15],
}
df = pd.DataFrame(data)
# custom sort a dataframe
custom_dict = {"Sweden": 3, "Denmark": 2, "Norway": 1}
df.sort_values(by=["countries"], key=lambda x: x.map(custom_dict))
df["sub_total"] = df.groupby("countries")["sites"].transform("sum")
df["pct_change"] = (
df.groupby("countries", sort=False)["sites"]
.apply(lambda x: x.pct_change())
.to_numpy()
.round(3)
* 100
)
# map the colors of a dict to a dataframe
df["color"] = df.countries.map(color_dict)
df
| year | countries | sites | sub_total | pct_change | color | |
|---|---|---|---|---|---|---|
| 0 | 2004 | Denmark | 4 | 14 | NaN | #A54836 |
| 1 | 2022 | Denmark | 10 | 14 | 150.0 | #A54836 |
| 2 | 2004 | Norway | 5 | 13 | NaN | #2B314D |
| 3 | 2022 | Norway | 8 | 13 | 60.0 | #2B314D |
| 4 | 2004 | Sweden | 13 | 28 | NaN | #5375D4 |
| 5 | 2022 | Sweden | 15 | 28 | 15.4 | #5375D4 |
pct_change = [int(num) if float(num).is_integer() else num for num in df["pct_change"]]
fig, ax = plt.subplots(figsize=(5,5), facecolor = "#FFFFFF")
for (country, group), pct in zip(df.groupby("countries", sort = False), pct_change[1::2]):
x = group.year
y = group.sites
ax.plot(
x,
y,
'-o',
ms=8,
mec="w",
lw= 3,
color = group.color.values[0],
clip_on=False,
zorder=1,
)
#add legends
ax.annotate(
group.countries.iloc[0],
xy=(x.iloc[-1]+1,
y.iloc[-1]),
ha= "left",
va="center",
color= bbox_color,
annotation_clip=False,
)
print()
ax.annotate(
f'+{pct}%' ,
xy=(x.iloc[-1]+4.5,
y.iloc[-1]),
size= 10,
ha= "left",
va="center",
weight = "bold",
color= bbox_color,
annotation_clip=False,
)
Add the legend and vertical lines¶
#add year labels
ax.set(xlim=(2003,2023), ylim= (-2,18))
years = df.year.unique()
ax.xaxis.set_ticks(years, labels ="")
xticks_location = ax.get_xticks()
for year, xticks in zip(years, xticks_location):
ax.axvline(
x=year,
ymin=-1,
ymax=15,
lw=3,
zorder=0,
color= grid_color,
)
ax.text(
year,18.8,"\u25BC",
size= 12,
color = "#191F30",
ha="center",
)
ax.annotate(
year,
(xticks-1, 20),
color= xlabel_color,
annotation_clip=False,
bbox=dict(
ec = bbox_color,
boxstyle='round,pad=0.8',
fc = bbox_color)
)
fig
Add the rest of the styling:
#create the gridlines manually
for r in range(0, 20, 5):
ax.axhline(
y=r,
xmin=0,
xmax=0.95,
lw=1,
zorder=0,
color= "#DEE2E4",
)
ax.tick_params(
axis='both',
which='major',
length=0,
labelsize=10,
colors= bbox_color,
pad=1.5 )
ax.yaxis.set_ticks(np.arange(0, 20, 5), labels = [0,5,10,15])
major_ticks = np.arange(0, 16, 5)
ax.set_yticks(major_ticks)
ax.set_frame_on(False)
fig