Generate the data and import packages¶
First, we need to create the data. I'll start by defining it as a dictionary and then convert it into a pandas DataFrame, since pandas is commonly used in many projects for data manipulation.
#tutorial
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import matplotlib.patches as patches
import numpy as np
import pandas as pd
color_dict = {"Norway": "#2B314D", "Denmark": "#A54836", "Sweden": "#5375D4", }
xy_ticklabel_color, grand_totals_color, grid_color, datalabels_color ='#101628',"#101628", "#E8EBEC", "#FFFFFF"
data = {
"year": [2004, 2022, 2004, 2022, 2004, 2022],
"countries" : [ "Denmark", "Denmark", "Norway", "Norway","Sweden", "Sweden"],
"sites": [4,10,5,8,13,15]
}
df= pd.DataFrame(data)
df = df.sort_values(['countries' ,'year' ], ascending=True ).reset_index(drop=True)
df['sub_total'] = df.groupby('year')['sites'].transform('sum')
#map the colors of a dict to a dataframe
df['color']= df.countries.map(color_dict)
df
| year | countries | sites | sub_total | color | |
|---|---|---|---|---|---|
| 0 | 2004 | Denmark | 4 | 22 | #A54836 |
| 1 | 2022 | Denmark | 10 | 33 | #A54836 |
| 2 | 2004 | Norway | 5 | 22 | #2B314D |
| 3 | 2022 | Norway | 8 | 33 | #2B314D |
| 4 | 2004 | Sweden | 13 | 22 | #5375D4 |
| 5 | 2022 | Sweden | 15 | 33 | #5375D4 |
Function to pack bubbles:
class BubbleChart:
def __init__(self, area, bubble_spacing=0):
"""
Setup for bubble collapse.
Parameters
----------
area : array-like
Area of the bubbles.
bubble_spacing : float, default: 0
Minimal spacing between bubbles after collapsing.
Notes
-----
If "area" is sorted, the results might look weird.
"""
area = np.asarray(area)
r = np.sqrt(area / np.pi)
self.bubble_spacing = bubble_spacing
self.bubbles = np.ones((len(area), 4))
self.bubbles[:, 2] = r
self.bubbles[:, 3] = area
self.maxstep = 2 * self.bubbles[:, 2].max() + self.bubble_spacing
self.step_dist = self.maxstep / 2
# calculate initial grid layout for bubbles
length = np.ceil(np.sqrt(len(self.bubbles)))
grid = np.arange(length) * self.maxstep
gx, gy = np.meshgrid(grid, grid)
self.bubbles[:, 0] = gx.flatten()[:len(self.bubbles)]
self.bubbles[:, 1] = gy.flatten()[:len(self.bubbles)]
self.com = self.center_of_mass()
def center_of_mass(self):
return np.average(
self.bubbles[:, :2], axis=0, weights=self.bubbles[:, 3]
)
def center_distance(self, bubble, bubbles):
return np.hypot(bubble[0] - bubbles[:, 0],
bubble[1] - bubbles[:, 1])
def outline_distance(self, bubble, bubbles):
center_distance = self.center_distance(bubble, bubbles)
return center_distance - bubble[2] - \
bubbles[:, 2] - self.bubble_spacing
def check_collisions(self, bubble, bubbles):
distance = self.outline_distance(bubble, bubbles)
return len(distance[distance < 0])
def collides_with(self, bubble, bubbles):
distance = self.outline_distance(bubble, bubbles)
return np.argmin(distance, keepdims=True)
def collapse(self, n_iterations=50):
"""
Move bubbles to the center of mass.
Parameters
----------
n_iterations : int, default: 50
Number of moves to perform.
"""
for _i in range(n_iterations):
moves = 0
for i in range(len(self.bubbles)):
rest_bub = np.delete(self.bubbles, i, 0)
# try to move directly towards the center of mass
# direction vector from bubble to the center of mass
dir_vec = self.com - self.bubbles[i, :2]
# shorten direction vector to have length of 1
dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))
# calculate new bubble position
new_point = self.bubbles[i, :2] + dir_vec * self.step_dist
new_bubble = np.append(new_point, self.bubbles[i, 2:4])
# check whether new bubble collides with other bubbles
if not self.check_collisions(new_bubble, rest_bub):
self.bubbles[i, :] = new_bubble
self.com = self.center_of_mass()
moves += 1
else:
# try to move around a bubble that you collide with
# find colliding bubble
for colliding in self.collides_with(new_bubble, rest_bub):
# calculate direction vector
dir_vec = rest_bub[colliding, :2] - self.bubbles[i, :2]
dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))
# calculate orthogonal vector
orth = np.array([dir_vec[1], -dir_vec[0]])
# test which direction to go
new_point1 = (self.bubbles[i, :2] + orth *
self.step_dist)
new_point2 = (self.bubbles[i, :2] - orth *
self.step_dist)
dist1 = self.center_distance(
self.com, np.array([new_point1]))
dist2 = self.center_distance(
self.com, np.array([new_point2]))
new_point = new_point1 if dist1 < dist2 else new_point2
new_bubble = np.append(new_point, self.bubbles[i, 2:4])
if not self.check_collisions(new_bubble, rest_bub):
self.bubbles[i, :] = new_bubble
self.com = self.center_of_mass()
if moves / len(self.bubbles) < 0.1:
self.step_dist = self.step_dist / 2
def plot(self, ax, labels, colors):
"""
Draw the bubble plot.
Parameters
----------
ax : matplotlib.axes.Axes
labels : list
Labels of the bubbles.
colors : list
Colors of the bubbles.
"""
for i in range(len(self.bubbles)):
circ = plt.Circle(
self.bubbles[i, :2], self.bubbles[i, 2], color=colors[i])
ax.add_patch(circ)
ax.text(*self.bubbles[i, :2], labels[i], color = "w",
horizontalalignment='center', verticalalignment='center')
Define the center of the dotted circles and titles manually:
y_sub_text = [-1.2, -2.2]
x_coord_circle =[0.8, 1.2]
radie_circle = [4,5]
Plot the chart¶
fig, axes = plt.subplots(ncols= 2, sharex=True, )
for i, (ax, (year, group)) in enumerate(zip(axes.ravel(), df.groupby("year"))):
sites = group['sites'].tolist()
#add the dotted circles around the packed ones
circle = patches.Circle(
(x_coord_circle[i], 3),
radius = radie_circle[i],
color=grid_color,
alpha=0.6,
fc= "w",
linewidth=2,
clip_on=False,
ls= "dotted",
)
ax.add_patch(circle)
ax.set_xlim(-5, 8)
ax.set_ylim(-3, 9)
ax.set_aspect("equal")
#add the packed bubbles
bubble_chart = BubbleChart(area=sites, bubble_spacing=0.1 )
bubble_chart.collapse()
bubble_chart.plot(
ax,
sites,
group['color'].tolist()
)
ax.set_frame_on(False)
ax.tick_params(length=0, labelleft = False, labelbottom = False)
ax.relim()
ax.autoscale_view()
#add sub total labels
ax.text(
x_coord_circle[i],
y_sub_text[i],
group['sub_total'].iloc[0],
color = xy_ticklabel_color
)
#add year labels
ax.set_title(
year,
y= 1.2,
weight = "bold",
color=xy_ticklabel_color
)
Add legends:
#add legend
lines = [Line2D([0], [0], color=c, marker='o',linestyle='', markersize=10,) for c in df.color.unique()]
labels = df.countries.unique().tolist()
fig.legend(
lines,
labels,
labelcolor = xy_ticklabel_color,
bbox_to_anchor=(0.5, 0),
loc="lower center",
ncols = 3,
frameon=False,
fontsize= 10
)
fig