Show code
target_date = None # Set via papermill, or auto-detect from manifestAnalysis of blob flow through validators, builders, and relays on Ethereum mainnet.
target_date = None # Set via papermill, or auto-detect from manifest# Injected Parameters
target_date = "2025-12-07"import pandas as pd
import plotly.graph_objects as go
import plotly.colors as pc
from loaders import load_parquet
MIN_BLOCKS = 10 # Minimum blocks for entity filtering# Load blob flow data
df_proposer_blobs = load_parquet("proposer_blobs", target_date)
# Fill missing values
df_proposer_blobs["proposer_entity"] = df_proposer_blobs["proposer_entity"].fillna("Unknown")
df_proposer_blobs["winning_relay"] = df_proposer_blobs["winning_relay"].fillna("Local/Unknown")
print(f"Total blocks: {len(df_proposer_blobs)}")
print(f"Unique proposer entities: {df_proposer_blobs['proposer_entity'].nunique()}")
print(f"Unique relays: {df_proposer_blobs['winning_relay'].nunique()}")Total blocks: 7135
Unique proposer entities: 264
Unique relays: 8
Sankey diagram showing how different staking entities (pools, solo stakers) distribute their blocks across blob counts. Wider flows indicate more blocks. Entities with fewer than 10 blocks are filtered out.
# Calculate block counts per entity
entity_block_counts = df_proposer_blobs.groupby("proposer_entity").size()
# Get entities that meet the threshold
valid_entities = entity_block_counts[entity_block_counts >= MIN_BLOCKS].index
# Filter the dataframe
df_filtered = df_proposer_blobs[df_proposer_blobs["proposer_entity"].isin(valid_entities)]
entity_blob_flow = (
df_filtered.groupby(["proposer_entity", "blob_count"])
.size()
.reset_index(name="block_count")
)
# Sort entities by total block count (descending)
entity_totals = entity_blob_flow.groupby("proposer_entity")["block_count"].sum()
entities = entity_totals.sort_values(ascending=False).index.tolist()
blob_counts = sorted(entity_blob_flow["blob_count"].unique(), reverse=True) # Descending
# Create node labels: entities + blob counts (blob counts sorted descending)
entity_nodes = [f"E:{e}" for e in entities]
blob_nodes = [f"{int(bc)} blobs" for bc in blob_counts]
all_nodes = entity_nodes + blob_nodes
# Create mapping from name to index
node_map = {name: idx for idx, name in enumerate(all_nodes)}
# Define x and y positions for nodes
n_entities = len(entity_nodes)
n_blobs = len(blob_nodes)
# Create color gradient for blob nodes (higher blob count = darker)
max_blob = max(blob_counts)
min_blob = min(blob_counts)
blob_colors = [
pc.sample_colorscale("Amp", (bc - min_blob) / (max_blob - min_blob) if max_blob > min_blob else 0.5)[0]
for bc in blob_counts
]
entity_colors = [pc.qualitative.Plotly[i % len(pc.qualitative.Plotly)] for i in range(n_entities)]
x_pos = []
y_pos = []
# Entity nodes on the left (x=0.01)
for i in range(n_entities):
x_pos.append(0.01)
y_pos.append((i + 0.5) / n_entities)
# Blob count nodes on the right (x=0.99), evenly spaced vertically (descending order)
for i in range(n_blobs):
x_pos.append(0.99)
y_pos.append((i + 0.5) / n_blobs)
sources = []
targets = []
values = []
for _, row in entity_blob_flow.iterrows():
e_node = f"E:{row['proposer_entity']}"
bc_node = f"{int(row['blob_count'])} blobs"
if e_node in node_map and bc_node in node_map:
sources.append(node_map[e_node])
targets.append(node_map[bc_node])
values.append(row["block_count"])
fig = go.Figure(
data=[
go.Sankey(
arrangement="snap",
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=0.5),
label=all_nodes,
x=x_pos,
y=y_pos,
color=entity_colors + blob_colors,
),
link=dict(source=sources, target=targets, value=values),
)
]
)
fig.update_layout(
title="Blob flow: Proposer Entity -> Blob Count",
font_size=12,
width=800,
height=3500,
)
fig.show()Shows which MEV-boost relays are associated with different blob counts. Reveals whether certain relays tend to produce blocks with more or fewer blobs.
relay_blob_flow = (
df_proposer_blobs.groupby(["winning_relay", "blob_count"])
.size()
.reset_index(name="block_count")
)
# Sort relays by total block count (descending)
relay_totals = relay_blob_flow.groupby("winning_relay")["block_count"].sum()
relays = relay_totals.sort_values(ascending=False).index.tolist()
blob_counts = sorted(relay_blob_flow["blob_count"].unique(), reverse=True) # Descending
# Create node labels: relays + blob counts (blob counts sorted descending)
relay_nodes = [f"R:{r}" for r in relays]
blob_nodes = [f"{int(bc)} blobs" for bc in blob_counts]
all_nodes = relay_nodes + blob_nodes
# Create mapping from name to index
node_map = {name: idx for idx, name in enumerate(all_nodes)}
# Define x and y positions for nodes
n_relays = len(relay_nodes)
n_blobs = len(blob_nodes)
# Create color gradient for blob nodes (higher blob count = darker)
max_blob = max(blob_counts)
min_blob = min(blob_counts)
blob_colors = [
pc.sample_colorscale("Amp", (bc - min_blob) / (max_blob - min_blob) if max_blob > min_blob else 0.5)[0]
for bc in blob_counts
]
relay_colors = [pc.qualitative.Pastel[i % len(pc.qualitative.Pastel)] for i in range(n_relays)]
x_pos = []
y_pos = []
# Relay nodes on the left (x=0.01)
for i in range(n_relays):
x_pos.append(0.01)
y_pos.append((i + 0.5) / n_relays)
# Blob count nodes on the right (x=0.99), evenly spaced vertically (descending order)
for i in range(n_blobs):
x_pos.append(0.99)
y_pos.append((i + 0.5) / n_blobs)
sources = []
targets = []
values = []
for _, row in relay_blob_flow.iterrows():
r_node = f"R:{row['winning_relay']}"
bc_node = f"{int(row['blob_count'])} blobs"
if r_node in node_map and bc_node in node_map:
sources.append(node_map[r_node])
targets.append(node_map[bc_node])
values.append(row["block_count"])
fig = go.Figure(
data=[
go.Sankey(
arrangement="snap",
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=0.5),
label=all_nodes,
x=x_pos,
y=y_pos,
color=relay_colors + blob_colors,
),
link=dict(source=sources, target=targets, value=values),
)
]
)
fig.update_layout(
title="Blob flow: Relay -> Blob Count",
font_size=12,
height=900,
)
fig.show()Maps which staking entities use which relays. Shows the relationship between validators and the MEV-boost relay infrastructure they rely on.
# Calculate block counts per entity
entity_block_counts = df_proposer_blobs.groupby("proposer_entity").size()
valid_entities = entity_block_counts[entity_block_counts >= MIN_BLOCKS].index
# Filter the dataframe
df_filtered = df_proposer_blobs[df_proposer_blobs["proposer_entity"].isin(valid_entities)]
proposer_relay_flow = (
df_filtered.groupby(["proposer_entity", "winning_relay"])
.size()
.reset_index(name="block_count")
)
# Sort entities by total block count (descending)
entity_totals = proposer_relay_flow.groupby("proposer_entity")["block_count"].sum()
entities = entity_totals.sort_values(ascending=False).index.tolist()
# Sort relays by total block count (descending)
relay_totals = proposer_relay_flow.groupby("winning_relay")["block_count"].sum()
relays = relay_totals.sort_values(ascending=False).index.tolist()
# Create node labels: entities + relays
entity_nodes = [f"E:{e}" for e in entities]
relay_nodes = [f"R:{r}" for r in relays]
all_nodes = entity_nodes + relay_nodes
# Create mapping from name to index
node_map = {name: idx for idx, name in enumerate(all_nodes)}
# Define x and y positions for nodes
n_entities = len(entity_nodes)
n_relays = len(relay_nodes)
entity_colors = [pc.qualitative.Plotly[i % len(pc.qualitative.Plotly)] for i in range(n_entities)]
relay_colors = [pc.qualitative.Pastel[i % len(pc.qualitative.Pastel)] for i in range(n_relays)]
x_pos = []
y_pos = []
# Entity nodes on the left (x=0.01)
for i in range(n_entities):
x_pos.append(0.01)
y_pos.append((i + 0.5) / n_entities)
# Relay nodes on the right (x=0.99)
for i in range(n_relays):
x_pos.append(0.99)
y_pos.append((i + 0.5) / n_relays)
sources = []
targets = []
values = []
for _, row in proposer_relay_flow.iterrows():
e_node = f"E:{row['proposer_entity']}"
r_node = f"R:{row['winning_relay']}"
if e_node in node_map and r_node in node_map:
sources.append(node_map[e_node])
targets.append(node_map[r_node])
values.append(row["block_count"])
fig = go.Figure(
data=[
go.Sankey(
arrangement="snap",
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=0.5),
label=all_nodes,
x=x_pos,
y=y_pos,
color=entity_colors + relay_colors,
),
link=dict(source=sources, target=targets, value=values),
)
]
)
fig.update_layout(
title="Blob flow: Proposer Entity -> Relay",
font_size=12,
width=800,
height=3500,
)
fig.show()Complete three-stage flow: from staking entities through relays to final blob counts. This comprehensive view shows the full pipeline of how blobs flow through the Ethereum block production ecosystem.
# Calculate block counts per entity
entity_block_counts = df_proposer_blobs.groupby("proposer_entity").size()
valid_entities = entity_block_counts[entity_block_counts >= MIN_BLOCKS].index
# Filter the dataframe
df_filtered = df_proposer_blobs[df_proposer_blobs["proposer_entity"].isin(valid_entities)]
# Aggregate flows: entity -> relay
entity_relay_flow = (
df_filtered.groupby(["proposer_entity", "winning_relay"])
.size()
.reset_index(name="block_count")
)
# Aggregate flows: relay -> blob_count
relay_blob_flow = (
df_filtered.groupby(["winning_relay", "blob_count"])
.size()
.reset_index(name="block_count")
)
# Sort entities by total block count (descending)
entity_totals = entity_relay_flow.groupby("proposer_entity")["block_count"].sum()
entities = entity_totals.sort_values(ascending=False).index.tolist()
# Sort relays by total block count (descending)
relay_totals = relay_blob_flow.groupby("winning_relay")["block_count"].sum()
relays = relay_totals.sort_values(ascending=False).index.tolist()
blob_counts = sorted(df_filtered["blob_count"].unique(), reverse=True) # Descending
# Create node labels: entities + relays + blob counts
entity_nodes = [f"E:{e}" for e in entities]
relay_nodes = [f"R:{r}" for r in relays]
blob_nodes = [f"{int(bc)} blobs" for bc in blob_counts]
all_nodes = entity_nodes + relay_nodes + blob_nodes
# Create mapping from name to index
node_map = {name: idx for idx, name in enumerate(all_nodes)}
# Define x and y positions for nodes (3 columns)
n_entities = len(entity_nodes)
n_relays = len(relay_nodes)
n_blobs = len(blob_nodes)
entity_colors = [pc.qualitative.Plotly[i % len(pc.qualitative.Plotly)] for i in range(n_entities)]
relay_colors = [pc.qualitative.Pastel[i % len(pc.qualitative.Pastel)] for i in range(n_relays)]
max_blob = max(blob_counts)
min_blob = min(blob_counts)
blob_colors = [
pc.sample_colorscale("Amp", (bc - min_blob) / (max_blob - min_blob) if max_blob > min_blob else 0.5)[0]
for bc in blob_counts
]
x_pos = []
y_pos = []
# Entity nodes on the left (x=0.01)
for i in range(n_entities):
x_pos.append(0.01)
y_pos.append((i + 0.5) / n_entities)
# Relay nodes in the middle (x=0.5)
for i in range(n_relays):
x_pos.append(0.5)
y_pos.append((i + 0.5) / n_relays)
# Blob count nodes on the right (x=0.99)
for i in range(n_blobs):
x_pos.append(0.99)
y_pos.append((i + 0.5) / n_blobs)
sources = []
targets = []
values = []
# Entity -> Relay links
for _, row in entity_relay_flow.iterrows():
e_node = f"E:{row['proposer_entity']}"
r_node = f"R:{row['winning_relay']}"
if e_node in node_map and r_node in node_map:
sources.append(node_map[e_node])
targets.append(node_map[r_node])
values.append(row["block_count"])
# Relay -> Blob count links
for _, row in relay_blob_flow.iterrows():
r_node = f"R:{row['winning_relay']}"
bc_node = f"{int(row['blob_count'])} blobs"
if r_node in node_map and bc_node in node_map:
sources.append(node_map[r_node])
targets.append(node_map[bc_node])
values.append(row["block_count"])
fig = go.Figure(
data=[
go.Sankey(
arrangement="snap",
node=dict(
pad=15,
thickness=30,
line=dict(color="black", width=0.5),
label=all_nodes,
x=x_pos,
y=y_pos,
color=entity_colors + relay_colors + blob_colors,
),
link=dict(source=sources, target=targets, value=values),
)
]
)
fig.update_layout(
title=f"Blob flow: Proposer Entity -> Relay -> Blob Count (min {MIN_BLOCKS} blocks)",
font_size=12,
width=800,
height=3500,
)
fig.show()