{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Advent of Code: Leaderboard Stats" ] }, { "cell_type": "code", "execution_count": 182, "metadata": {}, "outputs": [], "source": [ "from bs4 import BeautifulSoup\n", "import requests\n", "import polars as pl\n", "\n", "\n", "def get_leaderboard(day):\n", " # URL of the website\n", " url = f\"https://adventofcode.com/2023/leaderboard/day/{day}\" # Replace with the actual URL\n", "\n", " # Fetch HTML content from the website\n", " response = requests.get(url)\n", " html_content = response.text\n", "\n", " # Parse HTML with BeautifulSoup\n", " soup = BeautifulSoup(html_content, 'html.parser')\n", "\n", " # Extract data from HTML and create a list of dictionaries\n", " data = []\n", " for entry in soup.find_all('div', class_='leaderboard-entry'):\n", " user_id = entry['data-user-id']\n", " position_elem = entry.find('span', class_='leaderboard-position')\n", " time_elem = entry.find('span', class_='leaderboard-time')\n", " username_elem = entry.find('a', href=True)\n", " photo_elem = entry.find('span', class_='leaderboard-userphoto')\n", " \n", " position = position_elem.text.strip() if position_elem else None\n", " time = time_elem.text.strip() if time_elem else None\n", " username = username_elem.text.strip() if username_elem else None\n", " photo_url = photo_elem.find('img')['src'] if photo_elem and photo_elem.find('img') else None\n", " \n", " data.append({\n", " 'user_id': user_id,\n", " 'position': position,\n", " 'time': time,\n", " 'username': username,\n", " 'photo_url': photo_url\n", " })\n", "\n", " # Create a Polars DataFrame\n", " df = pl.DataFrame(data).with_columns(\n", " (pl.lit(\"2023 \") + pl.col(\"time\")).str.to_datetime(format=\"%Y %b %d %H:%M:%S\").alias(\"time\")\n", " )\n", " return df" ] }, { "cell_type": "code", "execution_count": 188, "metadata": {}, "outputs": [], "source": [ "overall_leaderboard = pl.concat([get_leaderboard(day) for day in [1,2,3,4,5,6,7,8]])" ] }, { "cell_type": "code", "execution_count": 189, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
user_id | position | time | username | photo_url |
---|---|---|---|---|
str | str | datetime[μs] | str | str |
"962724" | "1)" | 2023-12-01 00:02:24 | null | "https://avatar… |
"484659" | "2)" | 2023-12-01 00:02:36 | "Craig Gidney" | "https://avatar… |
"67806" | "3)" | 2023-12-01 00:02:45 | "jonathanpaulso… | "https://avatar… |
"26414" | "4)" | 2023-12-01 00:03:03 | null | "https://avatar… |
"644347" | "5)" | 2023-12-01 00:03:12 | "Miriam Vellaco… | "https://avatar… |
"1041498" | "6)" | 2023-12-01 00:03:13 | null | "https://lh3.go… |
"939552" | "7)" | 2023-12-01 00:03:17 | null | "https://lh3.go… |
"3051132" | "8)" | 2023-12-01 00:03:46 | null | "https://avatar… |
"984536" | "9)" | 2023-12-01 00:03:50 | "Enterprize1" | "https://avatar… |
"967770" | "10)" | 2023-12-01 00:03:56 | "(AoC++)" | "https://avatar… |
"2375057" | "11)" | 2023-12-01 00:04:02 | null | null |
"107707" | "12)" | 2023-12-01 00:04:05 | "MÃ¥ns Magnusso… | "https://avatar… |
… | … | … | … | … |
"111831" | "89)" | 2023-12-08 00:03:24 | "jebouin" | "https://avatar… |
"389935" | "90)" | 2023-12-08 00:03:24 | "Gabriel Kanega… | "https://avatar… |
"482358" | "91)" | 2023-12-08 00:03:25 | "Robert Usher" | "https://avatar… |
"1563443" | "92)" | 2023-12-08 00:03:26 | null | null |
"2337747" | "93)" | 2023-12-08 00:03:26 | "grhkm21" | "https://avatar… |
"43100" | "94)" | 2023-12-08 00:03:27 | "glguy" | "https://avatar… |
"665432" | "95)" | 2023-12-08 00:03:28 | null | "https://avatar… |
"1017671" | "96)" | 2023-12-08 00:03:28 | "TimHuisman1703… | "https://avatar… |
"1561553" | "97)" | 2023-12-08 00:03:28 | "PoustouFlan" | "https://avatar… |
"967770" | "98)" | 2023-12-08 00:03:29 | "(AoC++)" | "https://avatar… |
"133627" | "99)" | 2023-12-08 00:03:29 | null | null |
"1196605" | "100)" | 2023-12-08 00:03:30 | "matthewyu01" | "https://avatar… |