Advent of Code: Leaderboard Stats#
from bs4 import BeautifulSoup
import requests
import polars as pl
def get_leaderboard(day):
# URL of the website
url = f"https://adventofcode.com/2023/leaderboard/day/{day}" # Replace with the actual URL
# Fetch HTML content from the website
response = requests.get(url)
html_content = response.text
# Parse HTML with BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
# Extract data from HTML and create a list of dictionaries
data = []
for entry in soup.find_all('div', class_='leaderboard-entry'):
user_id = entry['data-user-id']
position_elem = entry.find('span', class_='leaderboard-position')
time_elem = entry.find('span', class_='leaderboard-time')
username_elem = entry.find('a', href=True)
photo_elem = entry.find('span', class_='leaderboard-userphoto')
position = position_elem.text.strip() if position_elem else None
time = time_elem.text.strip() if time_elem else None
username = username_elem.text.strip() if username_elem else None
photo_url = photo_elem.find('img')['src'] if photo_elem and photo_elem.find('img') else None
data.append({
'user_id': user_id,
'position': position,
'time': time,
'username': username,
'photo_url': photo_url
})
# Create a Polars DataFrame
df = pl.DataFrame(data).with_columns(
(pl.lit("2023 ") + pl.col("time")).str.to_datetime(format="%Y %b %d %H:%M:%S").alias("time")
)
return df
overall_leaderboard = pl.concat([get_leaderboard(day) for day in [1,2,3,4,5,6,7,8]])
overall_leaderboard
shape: (1_600, 5)
user_id | position | time | username | photo_url |
---|---|---|---|---|
str | str | datetime[μs] | str | str |
"962724" | "1)" | 2023-12-01 00:02:24 | null | "https://avatar… |
"484659" | "2)" | 2023-12-01 00:02:36 | "Craig Gidney" | "https://avatar… |
"67806" | "3)" | 2023-12-01 00:02:45 | "jonathanpaulso… | "https://avatar… |
"26414" | "4)" | 2023-12-01 00:03:03 | null | "https://avatar… |
"644347" | "5)" | 2023-12-01 00:03:12 | "Miriam Vellaco… | "https://avatar… |
"1041498" | "6)" | 2023-12-01 00:03:13 | null | "https://lh3.go… |
"939552" | "7)" | 2023-12-01 00:03:17 | null | "https://lh3.go… |
"3051132" | "8)" | 2023-12-01 00:03:46 | null | "https://avatar… |
"984536" | "9)" | 2023-12-01 00:03:50 | "Enterprize1" | "https://avatar… |
"967770" | "10)" | 2023-12-01 00:03:56 | "(AoC++)" | "https://avatar… |
"2375057" | "11)" | 2023-12-01 00:04:02 | null | null |
"107707" | "12)" | 2023-12-01 00:04:05 | "MÃ¥ns Magnusso… | "https://avatar… |
… | … | … | … | … |
"111831" | "89)" | 2023-12-08 00:03:24 | "jebouin" | "https://avatar… |
"389935" | "90)" | 2023-12-08 00:03:24 | "Gabriel Kanega… | "https://avatar… |
"482358" | "91)" | 2023-12-08 00:03:25 | "Robert Usher" | "https://avatar… |
"1563443" | "92)" | 2023-12-08 00:03:26 | null | null |
"2337747" | "93)" | 2023-12-08 00:03:26 | "grhkm21" | "https://avatar… |
"43100" | "94)" | 2023-12-08 00:03:27 | "glguy" | "https://avatar… |
"665432" | "95)" | 2023-12-08 00:03:28 | null | "https://avatar… |
"1017671" | "96)" | 2023-12-08 00:03:28 | "TimHuisman1703… | "https://avatar… |
"1561553" | "97)" | 2023-12-08 00:03:28 | "PoustouFlan" | "https://avatar… |
"967770" | "98)" | 2023-12-08 00:03:29 | "(AoC++)" | "https://avatar… |
"133627" | "99)" | 2023-12-08 00:03:29 | null | null |
"1196605" | "100)" | 2023-12-08 00:03:30 | "matthewyu01" | "https://avatar… |
plot_df = overall_leaderboard.with_columns(
pl.col("time").dt.minute().alias("minute"),
pl.col("time").dt.second().alias("seconds"),
pl.col("time").dt.day().alias("day"),
).with_columns(
(pl.col("minute") * 60 + pl.col("seconds")).alias("total_seconds")
)
import altair as alt
alt.Chart(plot_df.to_pandas()).mark_bar(opacity=0.5).encode(
x=alt.X("total_seconds").title("Elapsed Time (seconds)").bin(maxbins=100),
y=alt.Y("count(user_id)").stack(False).title(None),
color=alt.Color("day:N").legend(None),
row=alt.Row("day:N").title("Advent of Code Day", fontSize=24)
).properties(width=500, height=60,
title=alt.Title(
"Advent of Code: Leaderboard Speed Distribution",
subtitle="Count of Users by Solution time from Midnight, by Day",
fontSize=24,
subtitleFontSize=18,
anchor="start"
)
)
import altair as alt
alt.renderers.set_embed_options(theme='dark')
fig = alt.Chart(plot_df.to_pandas()).mark_rect(stroke="#222222", strokeWidth=1, cornerRadius=3).encode(
x=alt.X("total_seconds").title("Elapsed Time (seconds)").bin(maxbins=80),
# y=alt.Y("count(user_id)").stack(False).title(None),
color=alt.Color("count(total_seconds):N").legend(orient="top", labelFontSize=10, titleAlign='left').scale(scheme="yellowgreenblue"),
y=alt.Y("day:O").title("Advent of Code Day", fontSize=24)
).properties(
width=1000,title=alt.Title(
"Advent of Code: Top 100 Leaderboard Speed Distribution",
subtitle="Count of Users by Solution time from Midnight, by Day",
fontSize=24,
subtitleFontSize=18,
anchor="start"
)
)
fig.save("aoc_leaderboard_stats.html")
fig