Generate the data and import packages¶
First, we need to create the data. I'll start by defining it as a dictionary and then convert it into a pandas DataFrame, since pandas is commonly used in many projects for data manipulation.
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.lines import Line2D
import numpy as np
import pandas as pd
color_dict = {"Norway": "#2B314D", "Denmark": "#A54836", "Sweden": "#5375D4" }
code_dict = {"Norway": "NO", "Denmark": "DK", "Sweden": "SE" }
xy_ticklabel_color, xlabel_color, grid_color, datalabels_color ='#757C85',"#101628", "#E9ECED", "#FFFFFF"
data = {
"year": [2004, 2022, 2004, 2022, 2004, 2022],
"countries" : [ "Denmark", "Denmark", "Norway", "Norway","Sweden", "Sweden"],
"sites": [4,10,5,8,13,15]
}
df= pd.DataFrame(data)
df['pct_total'] = df.sites/df.groupby('year')['sites'].transform('sum')
df = df.sort_values([ 'year'], ascending=True ).reset_index(drop=True)
df['ctry_code'] = df.countries.map(code_dict)
#map the colors of a dict to a dataframe
df['edge_colors']= df.countries.map(color_dict)
df['fill_colors']= [ "w", "w", "w","#CC5A43","#2C324F","#5375D4",]
df
| year | countries | sites | pct_total | ctry_code | edge_colors | fill_colors | |
|---|---|---|---|---|---|---|---|
| 0 | 2004 | Denmark | 4 | 0.181818 | DK | #A54836 | w |
| 1 | 2004 | Norway | 5 | 0.227273 | NO | #2B314D | w |
| 2 | 2004 | Sweden | 13 | 0.590909 | SE | #5375D4 | w |
| 3 | 2022 | Denmark | 10 | 0.303030 | DK | #A54836 | #CC5A43 |
| 4 | 2022 | Norway | 8 | 0.242424 | NO | #2B314D | #2C324F |
| 5 | 2022 | Sweden | 15 | 0.454545 | SE | #5375D4 | #5375D4 |
Plot the chart¶
fig, ax = plt.subplots( figsize=(5,5),facecolor = "#FFFFFF")
fig.tight_layout(pad=6.0)
for i, (year, group) in enumerate(df.groupby("year", sort = False)):
ax.scatter(
group['pct_total'].tolist(),
group['sites'].tolist(),
s=50,
color= group['fill_colors'],
ec= group['edge_colors'],
zorder=3,
clip_on= False
)
#add country labels for 2022 only
if i == 1:
for row in group.itertuples():
ax.text(
row.pct_total + 0.01,
row.sites + .5,
row.ctry_code,
size = 8,
color = row.edge_colors,
ha = "left",
va = "bottom"
)
Add the connecting arcs:
rad = .5
direction_arc = [1, 1, -1]
for j, (country, group1) in enumerate(df.groupby("countries")):
site = group1['sites'].tolist()
pct_total = group1['pct_total'].tolist()
ax.annotate(
"",
xy=(pct_total[0], site[0]), # arrow tip (where the arrow points to)
xytext=(pct_total[1], site[1]), # arrow start (where the arrow comes from)
zorder=1,
annotation_clip= False,
arrowprops=dict(
arrowstyle="-",
connectionstyle=f"arc3,rad={rad * direction_arc[j]:.2f}",
color=group1['edge_colors'].iloc[0],
alpha=0.5,
linewidth=.5,
linestyle="--",
),
)
fig
Add the legend:
#set chart limits
ax.set(xlim = (0, df['pct_total'].max() + 0.2), ylim = (0,20))
color_legend = ["w","#838B93"]
marker_edge_color = ["#838B93","#838B93"]
lines = [Line2D([0], [0], color=c, marker='o',linestyle='',markeredgecolor=ec, markersize=10,) for c, ec in zip(color_legend, marker_edge_color)]
plt.figlegend(
lines,
df.year.unique(),
bbox_to_anchor=(0.5, -0.02),
loc="lower center",
ncols = 2,
frameon=False,
fontsize= 10
)
fig
<Figure size 640x480 with 0 Axes>
Add the final styling:
ax.yaxis.set_ticks(np.arange(0, 25, 5), labels = [0,5,10,15,20])
ax.xaxis.set_ticks(np.arange(0, 0.8, 0.2), labels = ['0','20%','40%','60%'])
for axis in ['top', 'bottom', 'left', 'right']:
ax.spines[axis].set_color(grid_color)
ax.spines[axis].set_zorder(0)
ax.tick_params(
axis='both',
which='major',
length=0,
labelsize=8,
colors ="#9BA0A6"
)
ax.grid(which='major',color = grid_color,zorder=0)
ax.set_xlabel("Share of Scandinavian Sites", size = 8, color = xy_ticklabel_color)
ax.set_ylabel("Number of sites", size=8, color = xy_ticklabel_color )
fig