Generate the data and import packages¶
First, we need to create the data. I'll start by defining it as a dictionary and then convert it into a pandas DataFrame, since pandas is commonly used in many projects for data manipulation.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
color_dict = {"Norway": "#2B314D", "Denmark": "#A54836", "Sweden": "#5375D4", }
xy_ticklabel_color, grand_totals_color, grid_color, datalabels_color ='#757C85',"#101628", "#C8C9C9", "#FFFFFF"
data = {
"year": [2004, 2022, 2004, 2022, 2004, 2022],
"countries" : ["Sweden", "Sweden", "Denmark", "Denmark", "Norway", "Norway"],
"sites": [13,15,4,10,5,8]
}
df= pd.DataFrame(data)
df['sub_total'] = df.groupby('year')['sites'].transform('sum')
df['pct_change'] = df.groupby('countries', sort=False)['sites'].apply(
lambda x: x.pct_change()).to_numpy().round(3)*100
#map the colors of a dict to a dataframe
df['color']= df.countries.map(color_dict)
df = df.sort_values(by=['countries','year'], ascending=False)
df["sites_cum"] = df.groupby("year")["sites"].cumsum()
df
| year | countries | sites | sub_total | pct_change | color | sites_cum | |
|---|---|---|---|---|---|---|---|
| 1 | 2022 | Sweden | 15 | 33 | 15.4 | #5375D4 | 15 |
| 0 | 2004 | Sweden | 13 | 22 | NaN | #5375D4 | 13 |
| 5 | 2022 | Norway | 8 | 33 | 60.0 | #2B314D | 23 |
| 4 | 2004 | Norway | 5 | 22 | NaN | #2B314D | 18 |
| 3 | 2022 | Denmark | 10 | 33 | 150.0 | #A54836 | 33 |
| 2 | 2004 | Denmark | 4 | 22 | NaN | #A54836 | 22 |
Create a function to make the curved lines:
#function to color a line
def multiColorLine(xstart, xend, ystart, yend, npoints, line_thickness, color ):
y = np.linspace(xstart, xend, npoints)
x = [ystart]*(int(npoints/4)) + np.linspace(ystart,yend,int(npoints/2)).tolist() + [yend]*int(npoints/4)
ax.plot(x, y, color=color, linewidth=line_thickness)
Plot the chart¶
fig, ax = plt.subplots(figsize=(10,4), facecolor = "w")
years = df.year.nunique()
bottom = np.zeros(years)
for country, group in df.groupby("countries", sort = False):
sites = group["sites"].tolist()
cum = group["sites_cum"].tolist()
ax.barh(
range(years),
sites,
left=bottom,
height = 0.3,
zorder=1,
color = group['color'].iloc[0],
lw= 20,
ec="w"
)
bottom +=sites
#add the multiline
multiColorLine(
1.1,
-0.1,
cum[1],
cum[0],
900,
1,
group['color'].iloc[0])
Add the data labels:
for bar, site in zip(ax.patches, df.sites):
#add the bar labels
ax.text(
bar.get_x() +1.3 ,
bar.get_height()/2 + bar.get_y(),
site,
ha='center',
va="center",
color='w',
size=14
)
fig
Add the country labels:
for bar, country in zip(ax.patches[0::2], df.countries.unique()):
#add country names
ax.text(
bar.get_x() +1 ,
bar.get_height()/2 + bar.get_y()-0.25,
country,
size= 12,
color=xy_ticklabel_color
)
fig
Add the grand sub totals:
sites_cum_lists = []
# Show sum on each stacked bar
for i, (year, group) in enumerate(df.groupby("year", sort = False)):
total = group["sub_total"].values.max()
site_cum = group['sites_cum'].tolist()
ax.text(
total+1 ,
i,
total,
va='center',
weight= "bold",
size = 14,
color = xy_ticklabel_color
)
fig
Add the percentage labels:
pcts = [int(num) if float(num).is_integer() else num for num in df["pct_change"]]+[67]
color_pct = df.color.to_list() +["#101628"]
bbox_coord_x = [5, 15.7, 22, 30]
for bbox, c_pct, pct in zip(bbox_coord_x, color_pct[0::2], pcts[0::2]+[67]):
ax.annotate(
f'+{pct}%',
xy= (0,0),
xytext = (bbox,0.5),
size= 12,
color= c_pct,
bbox=dict(
ec = c_pct,
boxstyle='round,pad=0.4',
fc = "w"
)
)
fig
Add the styling:
ax.set_xlim(-1)
#add the year labels
ax.yaxis.set_ticks(range(2), labels = df.year.unique())
ax.set_frame_on(False)
#add the first vertical line
ax.vlines(
x=0,
ymin=-0.1,
ymax=1.1,
lw=1,
color= df.color[0]
)
ax.tick_params(
axis='both',
which='both',
labelsize=14,
colors= xy_ticklabel_color,
length=0,
labelbottom = False
)
fig