Blob Flow

Analysis of blob flow through validators, builders, and relays on Ethereum mainnet.

Show code

target_date = None  # Set via papermill, or auto-detect from manifest

Show code

# Injected Parameters
target_date = "2025-12-07"

Show code

import pandas as pd
import plotly.graph_objects as go
import plotly.colors as pc

from loaders import load_parquet

MIN_BLOCKS = 10  # Minimum blocks for entity filtering

Show code

# Load blob flow data
df_proposer_blobs = load_parquet("proposer_blobs", target_date)

# Fill missing values
df_proposer_blobs["proposer_entity"] = df_proposer_blobs["proposer_entity"].fillna("Unknown")
df_proposer_blobs["winning_relay"] = df_proposer_blobs["winning_relay"].fillna("Local/Unknown")

print(f"Total blocks: {len(df_proposer_blobs)}")
print(f"Unique proposer entities: {df_proposer_blobs['proposer_entity'].nunique()}")
print(f"Unique relays: {df_proposer_blobs['winning_relay'].nunique()}")

Total blocks: 7135
Unique proposer entities: 264
Unique relays: 8

Proposer Entity -> Blob Count

Sankey diagram showing how different staking entities (pools, solo stakers) distribute their blocks across blob counts. Wider flows indicate more blocks. Entities with fewer than 10 blocks are filtered out.

Show code

# Calculate block counts per entity
entity_block_counts = df_proposer_blobs.groupby("proposer_entity").size()

# Get entities that meet the threshold
valid_entities = entity_block_counts[entity_block_counts >= MIN_BLOCKS].index

# Filter the dataframe
df_filtered = df_proposer_blobs[df_proposer_blobs["proposer_entity"].isin(valid_entities)]

entity_blob_flow = (
    df_filtered.groupby(["proposer_entity", "blob_count"])
    .size()
    .reset_index(name="block_count")
)

# Sort entities by total block count (descending)
entity_totals = entity_blob_flow.groupby("proposer_entity")["block_count"].sum()
entities = entity_totals.sort_values(ascending=False).index.tolist()
blob_counts = sorted(entity_blob_flow["blob_count"].unique(), reverse=True)  # Descending

# Create node labels: entities + blob counts (blob counts sorted descending)
entity_nodes = [f"E:{e}" for e in entities]
blob_nodes = [f"{int(bc)} blobs" for bc in blob_counts]
all_nodes = entity_nodes + blob_nodes

# Create mapping from name to index
node_map = {name: idx for idx, name in enumerate(all_nodes)}

# Define x and y positions for nodes
n_entities = len(entity_nodes)
n_blobs = len(blob_nodes)

# Create color gradient for blob nodes (higher blob count = darker)
max_blob = max(blob_counts)
min_blob = min(blob_counts)
blob_colors = [
    pc.sample_colorscale("Amp", (bc - min_blob) / (max_blob - min_blob) if max_blob > min_blob else 0.5)[0]
    for bc in blob_counts
]
entity_colors = [pc.qualitative.Plotly[i % len(pc.qualitative.Plotly)] for i in range(n_entities)]

x_pos = []
y_pos = []

# Entity nodes on the left (x=0.01)
for i in range(n_entities):
    x_pos.append(0.01)
    y_pos.append((i + 0.5) / n_entities)

# Blob count nodes on the right (x=0.99), evenly spaced vertically (descending order)
for i in range(n_blobs):
    x_pos.append(0.99)
    y_pos.append((i + 0.5) / n_blobs)

sources = []
targets = []
values = []

for _, row in entity_blob_flow.iterrows():
    e_node = f"E:{row['proposer_entity']}"
    bc_node = f"{int(row['blob_count'])} blobs"
    if e_node in node_map and bc_node in node_map:
        sources.append(node_map[e_node])
        targets.append(node_map[bc_node])
        values.append(row["block_count"])

fig = go.Figure(
    data=[
        go.Sankey(
            arrangement="snap",
            node=dict(
                pad=15,
                thickness=20,
                line=dict(color="black", width=0.5),
                label=all_nodes,
                x=x_pos,
                y=y_pos,
                color=entity_colors + blob_colors,
            ),
            link=dict(source=sources, target=targets, value=values),
        )
    ]
)
fig.update_layout(
    title="Blob flow: Proposer Entity -> Blob Count",
    font_size=12,
    width=800,
    height=3500,
)
fig.show()

Relay -> Blob Count

Shows which MEV-boost relays are associated with different blob counts. Reveals whether certain relays tend to produce blocks with more or fewer blobs.

Show code

relay_blob_flow = (
    df_proposer_blobs.groupby(["winning_relay", "blob_count"])
    .size()
    .reset_index(name="block_count")
)

# Sort relays by total block count (descending)
relay_totals = relay_blob_flow.groupby("winning_relay")["block_count"].sum()
relays = relay_totals.sort_values(ascending=False).index.tolist()

blob_counts = sorted(relay_blob_flow["blob_count"].unique(), reverse=True)  # Descending

# Create node labels: relays + blob counts (blob counts sorted descending)
relay_nodes = [f"R:{r}" for r in relays]
blob_nodes = [f"{int(bc)} blobs" for bc in blob_counts]
all_nodes = relay_nodes + blob_nodes

# Create mapping from name to index
node_map = {name: idx for idx, name in enumerate(all_nodes)}

# Define x and y positions for nodes
n_relays = len(relay_nodes)
n_blobs = len(blob_nodes)

# Create color gradient for blob nodes (higher blob count = darker)
max_blob = max(blob_counts)
min_blob = min(blob_counts)
blob_colors = [
    pc.sample_colorscale("Amp", (bc - min_blob) / (max_blob - min_blob) if max_blob > min_blob else 0.5)[0]
    for bc in blob_counts
]
relay_colors = [pc.qualitative.Pastel[i % len(pc.qualitative.Pastel)] for i in range(n_relays)]

x_pos = []
y_pos = []

# Relay nodes on the left (x=0.01)
for i in range(n_relays):
    x_pos.append(0.01)
    y_pos.append((i + 0.5) / n_relays)

# Blob count nodes on the right (x=0.99), evenly spaced vertically (descending order)
for i in range(n_blobs):
    x_pos.append(0.99)
    y_pos.append((i + 0.5) / n_blobs)

sources = []
targets = []
values = []

for _, row in relay_blob_flow.iterrows():
    r_node = f"R:{row['winning_relay']}"
    bc_node = f"{int(row['blob_count'])} blobs"
    if r_node in node_map and bc_node in node_map:
        sources.append(node_map[r_node])
        targets.append(node_map[bc_node])
        values.append(row["block_count"])

fig = go.Figure(
    data=[
        go.Sankey(
            arrangement="snap",
            node=dict(
                pad=15,
                thickness=20,
                line=dict(color="black", width=0.5),
                label=all_nodes,
                x=x_pos,
                y=y_pos,
                color=relay_colors + blob_colors,

            ),
            link=dict(source=sources, target=targets, value=values),
        )
    ]
)
fig.update_layout(
    title="Blob flow: Relay -> Blob Count",
    font_size=12,
    height=900,
)
fig.show()

Proposer Entity -> Relay

Maps which staking entities use which relays. Shows the relationship between validators and the MEV-boost relay infrastructure they rely on.

Show code

# Calculate block counts per entity
entity_block_counts = df_proposer_blobs.groupby("proposer_entity").size()
valid_entities = entity_block_counts[entity_block_counts >= MIN_BLOCKS].index

# Filter the dataframe
df_filtered = df_proposer_blobs[df_proposer_blobs["proposer_entity"].isin(valid_entities)]

proposer_relay_flow = (
    df_filtered.groupby(["proposer_entity", "winning_relay"])
    .size()
    .reset_index(name="block_count")
)

# Sort entities by total block count (descending)
entity_totals = proposer_relay_flow.groupby("proposer_entity")["block_count"].sum()
entities = entity_totals.sort_values(ascending=False).index.tolist()

# Sort relays by total block count (descending)
relay_totals = proposer_relay_flow.groupby("winning_relay")["block_count"].sum()
relays = relay_totals.sort_values(ascending=False).index.tolist()

# Create node labels: entities + relays
entity_nodes = [f"E:{e}" for e in entities]
relay_nodes = [f"R:{r}" for r in relays]
all_nodes = entity_nodes + relay_nodes

# Create mapping from name to index
node_map = {name: idx for idx, name in enumerate(all_nodes)}

# Define x and y positions for nodes
n_entities = len(entity_nodes)
n_relays = len(relay_nodes)

entity_colors = [pc.qualitative.Plotly[i % len(pc.qualitative.Plotly)] for i in range(n_entities)]
relay_colors = [pc.qualitative.Pastel[i % len(pc.qualitative.Pastel)] for i in range(n_relays)]

x_pos = []
y_pos = []

# Entity nodes on the left (x=0.01)
for i in range(n_entities):
    x_pos.append(0.01)
    y_pos.append((i + 0.5) / n_entities)

# Relay nodes on the right (x=0.99)
for i in range(n_relays):
    x_pos.append(0.99)
    y_pos.append((i + 0.5) / n_relays)

sources = []
targets = []
values = []

for _, row in proposer_relay_flow.iterrows():
    e_node = f"E:{row['proposer_entity']}"
    r_node = f"R:{row['winning_relay']}"
    if e_node in node_map and r_node in node_map:
        sources.append(node_map[e_node])
        targets.append(node_map[r_node])
        values.append(row["block_count"])

fig = go.Figure(
    data=[
        go.Sankey(
            arrangement="snap",
            node=dict(
                pad=15,
                thickness=20,
                line=dict(color="black", width=0.5),
                label=all_nodes,
                x=x_pos,
                y=y_pos,
                color=entity_colors + relay_colors,
            ),
            link=dict(source=sources, target=targets, value=values),
        )
    ]
)
fig.update_layout(
    title="Blob flow: Proposer Entity -> Relay",
    font_size=12,
    width=800,
    height=3500,
)
fig.show()

Proposer Entity -> Relay -> Blob Count

Complete three-stage flow: from staking entities through relays to final blob counts. This comprehensive view shows the full pipeline of how blobs flow through the Ethereum block production ecosystem.

Show code

# Calculate block counts per entity
entity_block_counts = df_proposer_blobs.groupby("proposer_entity").size()
valid_entities = entity_block_counts[entity_block_counts >= MIN_BLOCKS].index

# Filter the dataframe
df_filtered = df_proposer_blobs[df_proposer_blobs["proposer_entity"].isin(valid_entities)]

# Aggregate flows: entity -> relay
entity_relay_flow = (
    df_filtered.groupby(["proposer_entity", "winning_relay"])
    .size()
    .reset_index(name="block_count")
)

# Aggregate flows: relay -> blob_count
relay_blob_flow = (
    df_filtered.groupby(["winning_relay", "blob_count"])
    .size()
    .reset_index(name="block_count")
)

# Sort entities by total block count (descending)
entity_totals = entity_relay_flow.groupby("proposer_entity")["block_count"].sum()
entities = entity_totals.sort_values(ascending=False).index.tolist()

# Sort relays by total block count (descending)
relay_totals = relay_blob_flow.groupby("winning_relay")["block_count"].sum()
relays = relay_totals.sort_values(ascending=False).index.tolist()

blob_counts = sorted(df_filtered["blob_count"].unique(), reverse=True)  # Descending

# Create node labels: entities + relays + blob counts
entity_nodes = [f"E:{e}" for e in entities]
relay_nodes = [f"R:{r}" for r in relays]
blob_nodes = [f"{int(bc)} blobs" for bc in blob_counts]
all_nodes = entity_nodes + relay_nodes + blob_nodes

# Create mapping from name to index
node_map = {name: idx for idx, name in enumerate(all_nodes)}

# Define x and y positions for nodes (3 columns)
n_entities = len(entity_nodes)
n_relays = len(relay_nodes)
n_blobs = len(blob_nodes)

entity_colors = [pc.qualitative.Plotly[i % len(pc.qualitative.Plotly)] for i in range(n_entities)]
relay_colors = [pc.qualitative.Pastel[i % len(pc.qualitative.Pastel)] for i in range(n_relays)]
max_blob = max(blob_counts)
min_blob = min(blob_counts)
blob_colors = [
    pc.sample_colorscale("Amp", (bc - min_blob) / (max_blob - min_blob) if max_blob > min_blob else 0.5)[0]
    for bc in blob_counts
]

x_pos = []
y_pos = []

# Entity nodes on the left (x=0.01)
for i in range(n_entities):
    x_pos.append(0.01)
    y_pos.append((i + 0.5) / n_entities)

# Relay nodes in the middle (x=0.5)
for i in range(n_relays):
    x_pos.append(0.5)
    y_pos.append((i + 0.5) / n_relays)

# Blob count nodes on the right (x=0.99)
for i in range(n_blobs):
    x_pos.append(0.99)
    y_pos.append((i + 0.5) / n_blobs)

sources = []
targets = []
values = []

# Entity -> Relay links
for _, row in entity_relay_flow.iterrows():
    e_node = f"E:{row['proposer_entity']}"
    r_node = f"R:{row['winning_relay']}"
    if e_node in node_map and r_node in node_map:
        sources.append(node_map[e_node])
        targets.append(node_map[r_node])
        values.append(row["block_count"])

# Relay -> Blob count links
for _, row in relay_blob_flow.iterrows():
    r_node = f"R:{row['winning_relay']}"
    bc_node = f"{int(row['blob_count'])} blobs"
    if r_node in node_map and bc_node in node_map:
        sources.append(node_map[r_node])
        targets.append(node_map[bc_node])
        values.append(row["block_count"])

fig = go.Figure(
    data=[
        go.Sankey(
            arrangement="snap",
            node=dict(
                pad=15,
                thickness=30,
                line=dict(color="black", width=0.5),
                label=all_nodes,
                x=x_pos,
                y=y_pos,
                color=entity_colors + relay_colors + blob_colors,
            ),
            link=dict(source=sources, target=targets, value=values),
        )
    ]
)
fig.update_layout(
    title=f"Blob flow: Proposer Entity -> Relay -> Blob Count (min {MIN_BLOCKS} blocks)",
    font_size=12,
    width=800,
    height=3500,
)
fig.show()