Advent of Code: Leaderboard Stats#

from bs4 import BeautifulSoup
import requests
import polars as pl


def get_leaderboard(day):
    # URL of the website
    url = f"https://adventofcode.com/2023/leaderboard/day/{day}"  # Replace with the actual URL

    # Fetch HTML content from the website
    response = requests.get(url)
    html_content = response.text

    # Parse HTML with BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')

    # Extract data from HTML and create a list of dictionaries
    data = []
    for entry in soup.find_all('div', class_='leaderboard-entry'):
        user_id = entry['data-user-id']
        position_elem = entry.find('span', class_='leaderboard-position')
        time_elem = entry.find('span', class_='leaderboard-time')
        username_elem = entry.find('a', href=True)
        photo_elem = entry.find('span', class_='leaderboard-userphoto')
        
        position = position_elem.text.strip() if position_elem else None
        time = time_elem.text.strip() if time_elem else None
        username = username_elem.text.strip() if username_elem else None
        photo_url = photo_elem.find('img')['src'] if photo_elem and photo_elem.find('img') else None
        
        data.append({
            'user_id': user_id,
            'position': position,
            'time': time,
            'username': username,
            'photo_url': photo_url
        })

    # Create a Polars DataFrame
    df = pl.DataFrame(data).with_columns(
        (pl.lit("2023 ") + pl.col("time")).str.to_datetime(format="%Y %b %d  %H:%M:%S").alias("time")
    )
    return df
overall_leaderboard = pl.concat([get_leaderboard(day) for day in [1,2,3,4,5,6,7,8]])
overall_leaderboard
shape: (1_600, 5)
user_idpositiontimeusernamephoto_url
strstrdatetime[μs]strstr
"962724""1)"2023-12-01 00:02:24null"https://avatar…
"484659""2)"2023-12-01 00:02:36"Craig Gidney""https://avatar…
"67806""3)"2023-12-01 00:02:45"jonathanpaulso…"https://avatar…
"26414""4)"2023-12-01 00:03:03null"https://avatar…
"644347""5)"2023-12-01 00:03:12"Miriam Vellaco…"https://avatar…
"1041498""6)"2023-12-01 00:03:13null"https://lh3.go…
"939552""7)"2023-12-01 00:03:17null"https://lh3.go…
"3051132""8)"2023-12-01 00:03:46null"https://avatar…
"984536""9)"2023-12-01 00:03:50"Enterprize1""https://avatar…
"967770""10)"2023-12-01 00:03:56"(AoC++)""https://avatar…
"2375057""11)"2023-12-01 00:04:02nullnull
"107707""12)"2023-12-01 00:04:05"MÃ¥ns Magnusso…"https://avatar…
"111831""89)"2023-12-08 00:03:24"jebouin""https://avatar…
"389935""90)"2023-12-08 00:03:24"Gabriel Kanega…"https://avatar…
"482358""91)"2023-12-08 00:03:25"Robert Usher""https://avatar…
"1563443""92)"2023-12-08 00:03:26nullnull
"2337747""93)"2023-12-08 00:03:26"grhkm21""https://avatar…
"43100""94)"2023-12-08 00:03:27"glguy""https://avatar…
"665432""95)"2023-12-08 00:03:28null"https://avatar…
"1017671""96)"2023-12-08 00:03:28"TimHuisman1703…"https://avatar…
"1561553""97)"2023-12-08 00:03:28"PoustouFlan""https://avatar…
"967770""98)"2023-12-08 00:03:29"(AoC++)""https://avatar…
"133627""99)"2023-12-08 00:03:29nullnull
"1196605""100)"2023-12-08 00:03:30"matthewyu01""https://avatar…
plot_df = overall_leaderboard.with_columns(
    pl.col("time").dt.minute().alias("minute"),
    pl.col("time").dt.second().alias("seconds"),
    pl.col("time").dt.day().alias("day"),
).with_columns(
    (pl.col("minute") * 60 + pl.col("seconds")).alias("total_seconds")
)
import altair as alt
alt.Chart(plot_df.to_pandas()).mark_bar(opacity=0.5).encode(
    x=alt.X("total_seconds").title("Elapsed Time (seconds)").bin(maxbins=100),
    y=alt.Y("count(user_id)").stack(False).title(None),
    color=alt.Color("day:N").legend(None),
    row=alt.Row("day:N").title("Advent of Code Day", fontSize=24)
).properties(width=500, height=60,
        title=alt.Title(
            "Advent of Code: Leaderboard Speed Distribution",
            subtitle="Count of Users by Solution time from Midnight, by Day",
            fontSize=24,
            subtitleFontSize=18,
            anchor="start"
        )
    )
import altair as alt

alt.renderers.set_embed_options(theme='dark')

fig = alt.Chart(plot_df.to_pandas()).mark_rect(stroke="#222222", strokeWidth=1, cornerRadius=3).encode(
    x=alt.X("total_seconds").title("Elapsed Time (seconds)").bin(maxbins=80),
    # y=alt.Y("count(user_id)").stack(False).title(None),
    color=alt.Color("count(total_seconds):N").legend(orient="top", labelFontSize=10, titleAlign='left').scale(scheme="yellowgreenblue"),
    y=alt.Y("day:O").title("Advent of Code Day", fontSize=24)
).properties(
    width=1000,title=alt.Title(
            "Advent of Code: Top 100 Leaderboard Speed Distribution",
            subtitle="Count of Users by Solution time from Midnight, by Day",
            fontSize=24,
            subtitleFontSize=18,
            anchor="start"
        )
    )
fig.save("aoc_leaderboard_stats.html")
fig