{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Advent of Code: Leaderboard Stats" ] }, { "cell_type": "code", "execution_count": 182, "metadata": {}, "outputs": [], "source": [ "from bs4 import BeautifulSoup\n", "import requests\n", "import polars as pl\n", "\n", "\n", "def get_leaderboard(day):\n", " # URL of the website\n", " url = f\"https://adventofcode.com/2023/leaderboard/day/{day}\" # Replace with the actual URL\n", "\n", " # Fetch HTML content from the website\n", " response = requests.get(url)\n", " html_content = response.text\n", "\n", " # Parse HTML with BeautifulSoup\n", " soup = BeautifulSoup(html_content, 'html.parser')\n", "\n", " # Extract data from HTML and create a list of dictionaries\n", " data = []\n", " for entry in soup.find_all('div', class_='leaderboard-entry'):\n", " user_id = entry['data-user-id']\n", " position_elem = entry.find('span', class_='leaderboard-position')\n", " time_elem = entry.find('span', class_='leaderboard-time')\n", " username_elem = entry.find('a', href=True)\n", " photo_elem = entry.find('span', class_='leaderboard-userphoto')\n", " \n", " position = position_elem.text.strip() if position_elem else None\n", " time = time_elem.text.strip() if time_elem else None\n", " username = username_elem.text.strip() if username_elem else None\n", " photo_url = photo_elem.find('img')['src'] if photo_elem and photo_elem.find('img') else None\n", " \n", " data.append({\n", " 'user_id': user_id,\n", " 'position': position,\n", " 'time': time,\n", " 'username': username,\n", " 'photo_url': photo_url\n", " })\n", "\n", " # Create a Polars DataFrame\n", " df = pl.DataFrame(data).with_columns(\n", " (pl.lit(\"2023 \") + pl.col(\"time\")).str.to_datetime(format=\"%Y %b %d %H:%M:%S\").alias(\"time\")\n", " )\n", " return df" ] }, { "cell_type": "code", "execution_count": 188, "metadata": {}, "outputs": [], "source": [ "overall_leaderboard = pl.concat([get_leaderboard(day) for day in [1,2,3,4,5,6,7,8]])" ] }, { "cell_type": "code", "execution_count": 189, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1_600, 5)
user_idpositiontimeusernamephoto_url
strstrdatetime[μs]strstr
"962724""1)"2023-12-01 00:02:24null"https://avatar…
"484659""2)"2023-12-01 00:02:36"Craig Gidney""https://avatar…
"67806""3)"2023-12-01 00:02:45"jonathanpaulso…"https://avatar…
"26414""4)"2023-12-01 00:03:03null"https://avatar…
"644347""5)"2023-12-01 00:03:12"Miriam Vellaco…"https://avatar…
"1041498""6)"2023-12-01 00:03:13null"https://lh3.go…
"939552""7)"2023-12-01 00:03:17null"https://lh3.go…
"3051132""8)"2023-12-01 00:03:46null"https://avatar…
"984536""9)"2023-12-01 00:03:50"Enterprize1""https://avatar…
"967770""10)"2023-12-01 00:03:56"(AoC++)""https://avatar…
"2375057""11)"2023-12-01 00:04:02nullnull
"107707""12)"2023-12-01 00:04:05"MÃ¥ns Magnusso…"https://avatar…
"111831""89)"2023-12-08 00:03:24"jebouin""https://avatar…
"389935""90)"2023-12-08 00:03:24"Gabriel Kanega…"https://avatar…
"482358""91)"2023-12-08 00:03:25"Robert Usher""https://avatar…
"1563443""92)"2023-12-08 00:03:26nullnull
"2337747""93)"2023-12-08 00:03:26"grhkm21""https://avatar…
"43100""94)"2023-12-08 00:03:27"glguy""https://avatar…
"665432""95)"2023-12-08 00:03:28null"https://avatar…
"1017671""96)"2023-12-08 00:03:28"TimHuisman1703…"https://avatar…
"1561553""97)"2023-12-08 00:03:28"PoustouFlan""https://avatar…
"967770""98)"2023-12-08 00:03:29"(AoC++)""https://avatar…
"133627""99)"2023-12-08 00:03:29nullnull
"1196605""100)"2023-12-08 00:03:30"matthewyu01""https://avatar…
" ], "text/plain": [ "shape: (1_600, 5)\n", "┌─────────┬──────────┬─────────────────────┬─────────────────┬───────────────────────────────────┐\n", "│ user_id ┆ position ┆ time ┆ username ┆ photo_url │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ datetime[μs] ┆ str ┆ str │\n", "╞═════════╪══════════╪═════════════════════╪═════════════════╪═══════════════════════════════════╡\n", "│ 962724 ┆ 1) ┆ 2023-12-01 00:02:24 ┆ null ┆ https://avatars.githubuserconten… │\n", "│ 484659 ┆ 2) ┆ 2023-12-01 00:02:36 ┆ Craig Gidney ┆ https://avatars.githubuserconten… │\n", "│ 67806 ┆ 3) ┆ 2023-12-01 00:02:45 ┆ jonathanpaulson ┆ https://avatars.githubuserconten… │\n", "│ 26414 ┆ 4) ┆ 2023-12-01 00:03:03 ┆ null ┆ https://avatars.githubuserconten… │\n", "│ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 1561553 ┆ 97) ┆ 2023-12-08 00:03:28 ┆ PoustouFlan ┆ https://avatars.githubuserconten… │\n", "│ 967770 ┆ 98) ┆ 2023-12-08 00:03:29 ┆ (AoC++) ┆ https://avatars.githubuserconten… │\n", "│ 133627 ┆ 99) ┆ 2023-12-08 00:03:29 ┆ null ┆ null │\n", "│ 1196605 ┆ 100) ┆ 2023-12-08 00:03:30 ┆ matthewyu01 ┆ https://avatars.githubuserconten… │\n", "└─────────┴──────────┴─────────────────────┴─────────────────┴───────────────────────────────────┘" ] }, "execution_count": 189, "metadata": {}, "output_type": "execute_result" } ], "source": [ "overall_leaderboard" ] }, { "cell_type": "code", "execution_count": 190, "metadata": {}, "outputs": [], "source": [ "plot_df = overall_leaderboard.with_columns(\n", " pl.col(\"time\").dt.minute().alias(\"minute\"),\n", " pl.col(\"time\").dt.second().alias(\"seconds\"),\n", " pl.col(\"time\").dt.day().alias(\"day\"),\n", ").with_columns(\n", " (pl.col(\"minute\") * 60 + pl.col(\"seconds\")).alias(\"total_seconds\")\n", ")\n" ] }, { "cell_type": "code", "execution_count": 191, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 191, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import altair as alt\n", "alt.Chart(plot_df.to_pandas()).mark_bar(opacity=0.5).encode(\n", " x=alt.X(\"total_seconds\").title(\"Elapsed Time (seconds)\").bin(maxbins=100),\n", " y=alt.Y(\"count(user_id)\").stack(False).title(None),\n", " color=alt.Color(\"day:N\").legend(None),\n", " row=alt.Row(\"day:N\").title(\"Advent of Code Day\", fontSize=24)\n", ").properties(width=500, height=60,\n", " title=alt.Title(\n", " \"Advent of Code: Leaderboard Speed Distribution\",\n", " subtitle=\"Count of Users by Solution time from Midnight, by Day\",\n", " fontSize=24,\n", " subtitleFontSize=18,\n", " anchor=\"start\"\n", " )\n", " )" ] }, { "cell_type": "code", "execution_count": 192, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 192, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import altair as alt\n", "\n", "alt.renderers.set_embed_options(theme='dark')\n", "\n", "fig = alt.Chart(plot_df.to_pandas()).mark_rect(stroke=\"#222222\", strokeWidth=1, cornerRadius=3).encode(\n", " x=alt.X(\"total_seconds\").title(\"Elapsed Time (seconds)\").bin(maxbins=80),\n", " # y=alt.Y(\"count(user_id)\").stack(False).title(None),\n", " color=alt.Color(\"count(total_seconds):N\").legend(orient=\"top\", labelFontSize=10, titleAlign='left').scale(scheme=\"yellowgreenblue\"),\n", " y=alt.Y(\"day:O\").title(\"Advent of Code Day\", fontSize=24)\n", ").properties(\n", " width=1000,title=alt.Title(\n", " \"Advent of Code: Top 100 Leaderboard Speed Distribution\",\n", " subtitle=\"Count of Users by Solution time from Midnight, by Day\",\n", " fontSize=24,\n", " subtitleFontSize=18,\n", " anchor=\"start\"\n", " )\n", " )\n", "fig.save(\"aoc_leaderboard_stats.html\")\n", "fig" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "quarto-tutorial-V9VoSDA4-py3.11", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" } }, "nbformat": 4, "nbformat_minor": 2 }