initial commit

2025-08-27 11:23:48 -05:00
commit 5cecc6e280
5 changed files with 1159 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/*.csv
+/*.numbers
--- a/2025-csyba.json
+++ b/2025-csyba.json
@@ -0,0 +1,376 @@
+[{
+		"teamName": "Carol Stream Cheaties",
+		"team_id": "8944347",
+		"team_slug": "carol-stream-cheaties",
+		"subseason_id": "942425",
+		"instance_id": "10119604",
+		"w": "15",
+		"l": "2",
+		"t": "1",
+		"rf": "139",
+		"ra": "41",
+		"division_record": "10-2-1",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/8944347-carol-stream-cheaties?subseason=942425"
+	},
+	{
+		"teamName": "Deerfield Dynasty",
+		"team_id": "8944348",
+		"team_slug": "deerfield-dynasty",
+		"subseason_id": "942425",
+		"instance_id": "10119605",
+		"w": "15",
+		"l": "3",
+		"t": "0",
+		"rf": "152",
+		"ra": "52",
+		"division_record": "12-2-0",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/8944348-deerfield-dynasty?subseason=942425"
+	},
+	{
+		"teamName": "Buffalo Grove Marlins",
+		"team_id": "8944344",
+		"team_slug": "buffalo-grove-marlins",
+		"subseason_id": "942425",
+		"instance_id": "10119601",
+		"w": "15",
+		"l": "3",
+		"t": "0",
+		"rf": "127",
+		"ra": "47",
+		"division_record": "11-2-0",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/8944344-buffalo-grove-marlins?subseason=942425"
+	},
+	{
+		"teamName": "Buffalo Grove White Sox",
+		"team_id": "8944346",
+		"team_slug": "buffalo-grove-white-sox",
+		"subseason_id": "942425",
+		"instance_id": "10119603",
+		"w": "12",
+		"l": "5",
+		"t": "0",
+		"rf": "140",
+		"ra": "58",
+		"division_record": "10-5-0",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/8944346-buffalo-grove-white-sox?subseason=942425"
+	},
+	{
+		"teamName": "Arlington Hts Shamrocks",
+		"team_id": "8944342",
+		"team_slug": "arlington-hts-shamrocks",
+		"subseason_id": "942425",
+		"instance_id": "10119599",
+		"w": "9",
+		"l": "9",
+		"t": "3",
+		"rf": "120",
+		"ra": "119",
+		"division_record": "5-8-3",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/8944342-arlington-hts-shamrocks?subseason=942425"
+	},
+	{
+		"teamName": "Waukegan Alacranes",
+		"team_id": "9024497",
+		"team_slug": "waukegan-alacranes",
+		"subseason_id": "942425",
+		"instance_id": "10185021",
+		"w": "7",
+		"l": "7",
+		"t": "2",
+		"rf": "96",
+		"ra": "88",
+		"division_record": "6-5-2",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/9024497-waukegan-alacranes?subseason=942425"
+	},
+	{
+		"teamName": "Palatine Pelicans",
+		"team_id": "8944350",
+		"team_slug": "palatine-pelicans",
+		"subseason_id": "942425",
+		"instance_id": "10119607",
+		"w": "6",
+		"l": "10",
+		"t": "2",
+		"rf": "91",
+		"ra": "128",
+		"division_record": "3-9-2",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/8944350-palatine-pelicans?subseason=942425"
+	},
+	{
+		"teamName": "Buffalo Grove Blue Wahoos",
+		"team_id": "9071622",
+		"team_slug": "buffalo-grove-blue-wahoos",
+		"subseason_id": "942425",
+		"instance_id": "10219990",
+		"w": "5",
+		"l": "10",
+		"t": "1",
+		"rf": "57",
+		"ra": "115",
+		"division_record": "3-8-0",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/9071622-buffalo-grove-blue-wahoos?subseason=942425"
+	},
+	{
+		"teamName": "Arlington Hts Freeze",
+		"team_id": "8944343",
+		"team_slug": "arlington-hts-freeze",
+		"subseason_id": "942425",
+		"instance_id": "10119600",
+		"w": "6",
+		"l": "13",
+		"t": "0",
+		"rf": "87",
+		"ra": "116",
+		"division_record": "3-11-0",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/8944343-arlington-hts-freeze?subseason=942425"
+	},
+	{
+		"teamName": "Buffalo Grove Orioles",
+		"team_id": "8944345",
+		"team_slug": "buffalo-grove-orioles",
+		"subseason_id": "942425",
+		"instance_id": "10119602",
+		"w": "2",
+		"l": "16",
+		"t": "0",
+		"rf": "76",
+		"ra": "175",
+		"division_record": "1-12-0",
+		"division": "North",
+		"link": "https://www.csyba.com/page/show/8944345-buffalo-grove-orioles?subseason=942425"
+	},
+	{
+		"teamName": "Dunham Dash",
+		"team_id": "8944355",
+		"team_slug": "dunham-dash",
+		"subseason_id": "942425",
+		"instance_id": "10119611",
+		"w": "12",
+		"l": "3",
+		"t": "0",
+		"rf": "117",
+		"ra": "57",
+		"division_record": "9-0-0",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/8944355-dunham-dash?subseason=942425"
+	},
+	{
+		"teamName": "Skokie Vikings",
+		"team_id": "8944360",
+		"team_slug": "skokie-vikings",
+		"subseason_id": "942425",
+		"instance_id": "10119616",
+		"w": "9",
+		"l": "6",
+		"t": "1",
+		"rf": "93",
+		"ra": "72",
+		"division_record": "6-3-0",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/8944360-skokie-vikings?subseason=942425"
+	},
+	{
+		"teamName": "Elmhurst White Sox",
+		"team_id": "8944356",
+		"team_slug": "elmhurst-white-sox",
+		"subseason_id": "942425",
+		"instance_id": "10119612",
+		"w": "4",
+		"l": "3",
+		"t": "0",
+		"rf": "35",
+		"ra": "31",
+		"division_record": "3-2-0",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/8944356-elmhurst-white-sox?subseason=942425"
+	},
+	{
+		"teamName": "Lombard Expos",
+		"team_id": "8974790",
+		"team_slug": "lombard-expos",
+		"subseason_id": "942425",
+		"instance_id": "10148204",
+		"w": "8",
+		"l": "7",
+		"t": "1",
+		"rf": "97",
+		"ra": "68",
+		"division_record": "5-4-0",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/8974790-lombard-expos?subseason=942425"
+	},
+	{
+		"teamName": "Chicago Rebels",
+		"team_id": "8974058",
+		"team_slug": "chicago-rebels",
+		"subseason_id": "942425",
+		"instance_id": "10147713",
+		"w": "9",
+		"l": "9",
+		"t": "0",
+		"rf": "104",
+		"ra": "81",
+		"division_record": "6-4-0",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/8974058-chicago-rebels?subseason=942425"
+	},
+	{
+		"teamName": "Westchester Knights",
+		"team_id": "8944361",
+		"team_slug": "westchester-knights",
+		"subseason_id": "942425",
+		"instance_id": "10119617",
+		"w": "5",
+		"l": "10",
+		"t": "0",
+		"rf": "82",
+		"ra": "155",
+		"division_record": "4-4-0",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/8944361-westchester-knights?subseason=942425"
+	},
+	{
+		"teamName": "Melrose Park Thorns",
+		"team_id": "9014143",
+		"team_slug": "melrose-park-thorns",
+		"subseason_id": "942425",
+		"instance_id": "10178191",
+		"w": "5",
+		"l": "12",
+		"t": "1",
+		"rf": "106",
+		"ra": "139",
+		"division_record": "3-7-0",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/9014143-melrose-park-thorns?subseason=942425"
+	},
+	{
+		"teamName": "Bedford Park Bombers",
+		"team_id": "8944352",
+		"team_slug": "bedford-park-bombers",
+		"subseason_id": "942425",
+		"instance_id": "10119608",
+		"w": "3",
+		"l": "12",
+		"t": "0",
+		"rf": "48",
+		"ra": "133",
+		"division_record": "2-6-0",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/8944352-bedford-park-bombers?subseason=942425"
+	},
+	{
+		"teamName": "Skokie Classics",
+		"team_id": "8944359",
+		"team_slug": "skokie-classics",
+		"subseason_id": "942425",
+		"instance_id": "10119615",
+		"w": "5",
+		"l": "15",
+		"t": "1",
+		"rf": "105",
+		"ra": "177",
+		"division_record": "4-6-1",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/8944359-skokie-classics?subseason=942425"
+	},
+	{
+		"teamName": "Park Ridge White Sox",
+		"team_id": "8944358",
+		"team_slug": "park-ridge-white-sox",
+		"subseason_id": "942425",
+		"instance_id": "10119614",
+		"w": "1",
+		"l": "11",
+		"t": "3",
+		"rf": "42",
+		"ra": "142",
+		"division_record": "0-6-1",
+		"division": "South",
+		"link": "https://www.csyba.com/page/show/8944358-park-ridge-white-sox?subseason=942425"
+	},
+	{
+		"teamName": "Chicago White Sox",
+		"team_id": "9002208",
+		"team_slug": "chicago-white-sox",
+		"subseason_id": "942425",
+		"instance_id": "10168648",
+		"w": "19",
+		"l": "6",
+		"t": "0",
+		"rf": "162",
+		"ra": "73",
+		"division_record": "10-4-0",
+		"division": "CMBA",
+		"link": "https://www.csyba.com/page/show/9002208-chicago-white-sox?subseason=942425"
+	},
+	{
+		"teamName": "Chicago Blazers",
+		"team_id": "9002204",
+		"team_slug": "chicago-blazers",
+		"subseason_id": "942425",
+		"instance_id": "10168644",
+		"w": "17",
+		"l": "7",
+		"t": "0",
+		"rf": "239",
+		"ra": "94",
+		"division_record": "9-4-0",
+		"division": "CMBA",
+		"link": "https://www.csyba.com/page/show/9002204-chicago-blazers?subseason=942425"
+	},
+	{
+		"teamName": "Chicago Electrons",
+		"team_id": "9002205",
+		"team_slug": "chicago-electrons",
+		"subseason_id": "942425",
+		"instance_id": "10168645",
+		"w": "16",
+		"l": "6",
+		"t": "2",
+		"rf": "170",
+		"ra": "112",
+		"division_record": "9-4-0",
+		"division": "CMBA",
+		"link": "https://www.csyba.com/page/show/9002205-chicago-electrons?subseason=942425"
+	},
+	{
+		"teamName": "Chicago Hounds",
+		"team_id": "9002206",
+		"team_slug": "chicago-hounds",
+		"subseason_id": "942425",
+		"instance_id": "10168646",
+		"w": "15",
+		"l": "11",
+		"t": "0",
+		"rf": "182",
+		"ra": "126",
+		"division_record": "7-8-0",
+		"division": "CMBA",
+		"link": "https://www.csyba.com/page/show/9002206-chicago-hounds?subseason=942425"
+	},
+	{
+		"teamName": "Chicago Hawks",
+		"team_id": "9002209",
+		"team_slug": "chicago-hawks",
+		"subseason_id": "942425",
+		"instance_id": "10168649",
+		"w": "1",
+		"l": "25",
+		"t": "2",
+		"rf": "87",
+		"ra": "355",
+		"division_record": "0-15-0",
+		"division": "CMBA",
+		"link": "https://www.csyba.com/page/show/9002209-chicago-hawks?subseason=942425"
+	}
+]
--- a/build_season_schedule.py
+++ b/build_season_schedule.py
@@ -0,0 +1,464 @@
+#!/usr/bin/env python3
+# build_season_schedule.py
+#
+# Build a deduped season schedule from SportsEngine team-instance printable pages.
+# - Assumes team-instance schedule pages are TEAM-FIRST for scores.
+# - Determines home/away using the '@' marker on the opponent cell.
+# - Deduplicates primarily by game_id (from /game/show/<id> links), otherwise by a fallback key.
+# - Optionally fetches each game's time from the /game/show/<id> page ("tab_boxscores_content").
+#
+# Usage:
+#   pip install requests beautifulsoup4 python-dateutil
+#   python build_season_schedule.py --subseason 942425 --teams teams.json --out season_schedule.csv
+#
+# Example teams.json (array):
+# [
+#   {"teamName":"Carol Stream Cheaties","team_id":"8944347","team_slug":"carol-stream-cheaties","subseason_id":"942425","instance_id":"10119604"},
+#   ...
+# ]
+
+import argparse
+import csv
+import json
+import logging
+import re
+import time
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+from urllib.parse import urlencode
+
+import requests
+from bs4 import BeautifulSoup
+from dateutil import parser as dtp
+
+# ----------------- logging -----------------
+logging.basicConfig(
+    level=logging.INFO,  # change to DEBUG for verbose tracing
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    datefmt="%H:%M:%S",
+)
+
+# ----------------- constants -----------------
+UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) SE-Schedule/1.3 Safari/537.36"
+HEADERS = {"User-Agent": UA}
+PRINT_BASE = "https://www.csyba.com/schedule/print/team_instance/{iid}"
+GAME_BASE = "https://www.csyba.com/game/show/{gid}"
+
+SCORE_RE = re.compile(r"\b(\d+)\s*[–-]\s*(\d+)\b")
+GAME_LINK_RE = re.compile(r"/game/show/(\d+)")
+TIME_RE = re.compile(r"\b(\d{1,2}:\d{2})\s*([ap]\.?m\.?|AM|PM)?\b", re.I)
+
+# ----------------- helpers -----------------
+def clean(x: str) -> str:
+    return re.sub(r"\s+", " ", (x or "")).strip()
+
+def slugify(s: str) -> str:
+    s = s.lower()
+    s = re.sub(r"[^a-z0-9]+", "-", s).strip("-")
+    return s
+
+def norm_name(s: str) -> str:
+    s = s.lower()
+    s = re.sub(r"[^a-z0-9 ]+", " ", s)
+    s = re.sub(r"\b(the|club|team|ll|little league|baseball|softball|youth|athletic|athletics|rec|rec\.)\b", " ", s)
+    s = re.sub(r"\s+", " ", s).strip()
+    return s
+
+@dataclass(frozen=True)
+class TeamRec:
+    name: str
+    slug: str
+    team_id: str
+    instance_id: str
+    subseason_id: str
+
+def load_teams(teams_path: str):
+    """Load mapping tables from teams.json you provided."""
+    with open(teams_path, "r", encoding="utf-8") as f:
+        arr = json.load(f)
+    by_instance: Dict[str, TeamRec] = {}
+    by_slug: Dict[str, TeamRec] = {}
+    by_norm: Dict[str, TeamRec] = {}
+    for t in arr:
+        rec = TeamRec(
+            name=str(t["teamName"]),
+            slug=str(t["team_slug"]),
+            team_id=str(t["team_id"]),
+            instance_id=str(t["instance_id"]),
+            subseason_id=str(t["subseason_id"]),
+        )
+        by_instance[rec.instance_id] = rec
+        by_slug[rec.slug] = rec
+        by_norm[norm_name(rec.name)] = rec
+    return by_instance, by_slug, by_norm
+
+def best_match_team(opponent_text: str, by_slug, by_norm) -> Optional[TeamRec]:
+    """Match opponent using slug first, then normalized name, then loose containment."""
+    s = slugify(opponent_text)
+    if s in by_slug:
+        return by_slug[s]
+    n = norm_name(opponent_text)
+    if n in by_norm:
+        return by_norm[n]
+    for key, rec in by_norm.items():
+        if key in n or n in key:
+            return rec
+    return None
+
+def runs_from_team_pov(result_flag: str, s_a: str, s_b: str):
+    """
+    Team-instance pages are TEAM-FIRST. s_a is THIS team's runs, s_b is opponent runs.
+    We don't reorder; we only validate with W/L/T if needed.
+    """
+    if not (s_a.isdigit() and s_b.isdigit()):
+        return None, None
+    a, b = int(s_a), int(s_b)
+    if result_flag == "W" and a <= b:
+        logging.debug(f"Result=W but team_runs<=opp_runs ({a}-{b}); keeping as-is (team-first).")
+    if result_flag == "L" and a >= b:
+        logging.debug(f"Result=L but team_runs>=opp_runs ({a}-{b}); keeping as-is (team-first).")
+    return a, b
+
+# ----------------- HTTP utils -----------------
+def get_soup(url: str, session: Optional[requests.Session] = None, timeout: int = 30) -> Optional[BeautifulSoup]:
+    try:
+        sess = session or requests.Session()
+        r = sess.get(url, headers=HEADERS, timeout=timeout)
+        r.raise_for_status()
+        return BeautifulSoup(r.text, "html.parser")
+    except Exception as e:
+        logging.error(f"GET failed {url}: {e}")
+        return None
+
+# ----------------- scraping -----------------
+def parse_printable(instance_id: str, subseason_id: str, session: requests.Session) -> List[dict]:
+    """Parse one team-instance printable schedule page into perspective rows."""
+    url = PRINT_BASE.format(iid=instance_id) + "?" + urlencode({
+        "schedule_type": "index",
+        "subseason": subseason_id,
+    })
+    soup = get_soup(url, session=session)
+    if not soup:
+        return []
+
+    table = soup.select_one("table")
+    if not table:
+        logging.warning(f"No table found for team_instance={instance_id}")
+        return []
+
+    games = []
+    for row_idx, tr in enumerate(table.select("tr")[1:], start=1):
+        tds = tr.select("td")
+        if len(tds) < 5:
+            continue
+
+        # Cells: Date | Result | Opponent | Location | Status
+        date_txt   = clean(tds[0].get_text(" "))
+        result_txt = clean(tds[1].get_text(" "))
+        opp_txt    = clean(tds[2].get_text(" "))
+        loc_txt    = clean(tds[3].get_text(" "))
+        status_txt = clean(tds[4].get_text(" "))
+
+        # Date → ISO
+        try:
+            date_iso = dtp.parse(date_txt, fuzzy=True).date().isoformat()
+        except Exception:
+            date_iso = date_txt
+
+        # Pull a game_id if present (from any link in the row)
+        game_id = ""
+        for a in tr.select("a[href]"):
+            m = GAME_LINK_RE.search(a.get("href", ""))
+            if m:
+                game_id = m.group(1)
+                break
+
+        # Extract W/L/T (Result cell)
+        m_res = re.search(r"\b(W|L|T)\b", result_txt, re.I)
+        result_flag = m_res.group(1).upper() if m_res else ""
+
+        # Extract score from Result cell; if missing, also try Opponent cell
+        m_score = SCORE_RE.search(result_txt) or SCORE_RE.search(opp_txt)
+        s_a, s_b = (m_score.group(1), m_score.group(2)) if m_score else ("", "")
+
+        # Opponent + home/away flag
+        is_away = opp_txt.startswith("@")
+        opponent_name = opp_txt.lstrip("@").strip()
+
+        # Compute team/opp runs (TEAM-FIRST orientation)
+        team_runs, opp_runs = runs_from_team_pov(result_flag, s_a, s_b)
+
+        logging.debug(
+            f"PARSER: inst={instance_id} row={row_idx} date={date_iso} "
+            f"res={result_flag} scores=({s_a}-{s_b}) away={is_away} "
+            f"→ team_runs={team_runs}, opp_runs={opp_runs}"
+        )
+
+        games.append({
+            "team_instance": instance_id,
+            "game_id": game_id,              # may be empty
+            "date": date_iso,
+            "result": result_flag,           # W/L/T from THIS TEAM's perspective
+            "team_runs": team_runs,
+            "opp_runs": opp_runs,
+            "opponent_name": opponent_name,
+            "is_away": is_away,
+            "location": loc_txt,
+            "status": status_txt,
+            "source_url": url,
+        })
+
+    logging.info(f"Team {instance_id}: parsed {len(games)} rows")
+    return games
+
+def fetch_game_time(game_id: str, session: requests.Session) -> Optional[str]:
+    """
+    Fetch the game's local start time from the /game/show/<id> page.
+    Looks inside the tab with id 'tab_boxscores_content' but also
+    falls back to scanning the page for common time patterns.
+    Returns a zero-padded 24h 'HH:MM' string or None if unavailable.
+    """
+    if not game_id:
+        return None
+    url = GAME_BASE.format(gid=game_id)
+    soup = get_soup(url, session=session, timeout=30)
+    if not soup:
+        return None
+
+    # Prefer the boxscores tab content
+    box = soup.select_one("#tab_boxscores_content") or soup.select_one("#tab_boxscore_content")
+    text = ""
+    if box:
+        text = " ".join(box.stripped_strings)
+    else:
+        # Fall back to page-wide text (but avoid pulling too much)
+        main = soup.select_one("div.page") or soup
+        text = " ".join((main.get_text(" ", strip=True) or "")[:4000].split())
+
+    m = TIME_RE.search(text)
+    if not m:
+        logging.debug(f"TIME: no time found in game {game_id}")
+        return None
+
+    hhmm = m.group(1)
+    ampm = (m.group(2) or "").lower().replace(".", "")
+    try:
+        # Normalize to 24h HH:MM
+        from datetime import datetime
+        if ampm:
+            dt = datetime.strptime(f"{hhmm} {ampm.upper()}", "%I:%M %p")
+        else:
+            # already 24h-ish
+            dt = datetime.strptime(hhmm, "%H:%M")
+        return dt.strftime("%H:%M")
+    except Exception:
+        # Be forgiving (e.g., "6:00pm" without space)
+        try:
+            from datetime import datetime
+            hhmm2 = hhmm
+            if ampm:
+                dt = datetime.strptime(f"{hhmm2}{ampm}", "%I:%M%p")
+                return dt.strftime("%H:%M")
+        except Exception:
+            logging.debug(f"TIME: could not normalize '{hhmm} {ampm}' for game {game_id}")
+            return None
+
+# ----------------- build & merge -----------------
+def main():
+    ap = argparse.ArgumentParser(description="Build a deduped season schedule with IDs, winners/losers, runs, and times.")
+    ap.add_argument("--subseason", required=True, help="Subseason ID, e.g. 942425")
+    ap.add_argument("--teams", required=True, help="Path to teams.json (array with team_id, team_slug, instance_id, teamName)")
+    ap.add_argument("--out", default="season_schedule.csv", help="Output CSV path")
+    ap.add_argument("--fetch-time", action="store_true", help="Fetch game time from /game/show/<id>")
+    ap.add_argument("--sleep", type=float, default=0.35, help="Delay between requests (seconds)")
+    args = ap.parse_args()
+
+    by_instance, by_slug, by_norm = load_teams(args.teams)
+    instance_ids = sorted(by_instance.keys())
+
+    session = requests.Session()
+    session.headers.update(HEADERS)
+
+    # Scrape all teams
+    raw: List[dict] = []
+    for i, iid in enumerate(instance_ids, 1):
+        logging.info(f"[{i}/{len(instance_ids)}] Fetching schedule for instance {iid}")
+        raw.extend(parse_printable(iid, args.subseason, session=session))
+        time.sleep(args.sleep)  # be polite
+
+    def rec_from_instance(iid: str) -> Optional[TeamRec]:
+        return by_instance.get(iid)
+
+    def match_opponent(text: str) -> Optional[TeamRec]:
+        return best_match_team(text, by_slug, by_norm)
+
+    # Group by game_id if available; otherwise fallback on (date + unordered pair + raw score text if present)
+    buckets: Dict[str, dict] = {}
+    fallback_rows = 0
+
+    for row in raw:
+        team_rec = rec_from_instance(row["team_instance"])
+        if not team_rec:
+            logging.warning(f"Unknown instance {row['team_instance']}; skipping")
+            continue
+
+        opp_rec = match_opponent(row["opponent_name"])
+        opp_slug = opp_rec.slug if opp_rec else slugify(row["opponent_name"])
+        pair = tuple(sorted([team_rec.slug, opp_slug]))
+
+        if row["game_id"]:
+            key = f"id:{row['game_id']}"
+        else:
+            runs_sig = ""
+            if isinstance(row["team_runs"], int) and isinstance(row["opp_runs"], int):
+                runs_sig = f"{row['team_runs']}-{row['opp_runs']}"
+            key = f"fb:{row['date']}|{pair[0]}@{pair[1]}|{runs_sig}"
+            fallback_rows += 1
+
+        perspective = {
+            "team": team_rec,
+            "opp": opp_rec,  # may be None
+            "is_away": row["is_away"],
+            "team_runs": row["team_runs"],
+            "opp_runs": row["opp_runs"],
+            "location": row["location"],
+            "status": row["status"],
+            "source_url": row["source_url"],
+            "pair": pair,
+            "date": row["date"],
+            "game_id": row["game_id"],
+        }
+
+        if key not in buckets:
+            buckets[key] = {"persp": [perspective], "game_id": row["game_id"]}
+        else:
+            buckets[key]["persp"].append(perspective)
+
+    if fallback_rows:
+        logging.info(f"Used fallback dedupe for {fallback_rows} rows without game_id.")
+
+    # Merge perspectives into a single home/away row
+    out_rows = []
+    time_cache: Dict[str, Optional[str]] = {}
+
+    for key, bucket in buckets.items():
+        p = bucket["persp"]
+        date = p[0]["date"]
+        game_id = bucket.get("game_id", "")
+
+        # Identify home/away perspectives
+        p_home = next((x for x in p if x["is_away"] is False), None)
+        p_away = next((x for x in p if x["is_away"] is True), None)
+
+        # Team identities
+        home_team = (p_home["team"] if p_home else (p_away["opp"] if p_away else None))
+        away_team = (p_away["team"] if p_away else (p_home["opp"] if p_home else None))
+
+        def pack_team(rec: Optional[TeamRec], fallback_slug: str):
+            if rec:
+                return rec.slug, rec.instance_id, rec.team_id, rec.name
+            return fallback_slug, "", "", fallback_slug.replace("-", " ").title()
+
+        # Prefer runs from the explicit perspective (home if available; otherwise away)
+        home_runs = away_runs = None
+        if p_home and isinstance(p_home["team_runs"], int) and isinstance(p_home["opp_runs"], int):
+            home_runs = p_home["team_runs"]
+            away_runs = p_home["opp_runs"]
+        elif p_away and isinstance(p_away["team_runs"], int) and isinstance(p_away["opp_runs"], int):
+            away_runs = p_away["team_runs"]
+            home_runs = p_away["opp_runs"]
+
+        # Fallback: single perspective present but numbers known → place by is_away
+        if (home_runs is None or away_runs is None) and p:
+            one = p[0]
+            if isinstance(one["team_runs"], int) and isinstance(one["opp_runs"], int):
+                if one["is_away"]:
+                    away_runs = one["team_runs"]; home_runs = one["opp_runs"]
+                    away_team = one["team"]; home_team = one["opp"] if one["opp"] else home_team
+                else:
+                    home_runs = one["team_runs"]; away_runs = one["opp_runs"]
+                    home_team = one["team"]; away_team = one["opp"] if one["opp"] else away_team
+
+        # Pack final team identifiers (fallback slug = guess from perspectives)
+        guess_home_fallback = (p_home["team"].slug if p_home and p_home["team"] else
+                               p_away["opp"].slug if p_away and p_away["opp"] else
+                               p[0]["pair"][0])
+        guess_away_fallback = (p_away["team"].slug if p_away and p_away["team"] else
+                               p_home["opp"].slug if p_home and p_home["opp"] else
+                               p[0]["pair"][1])
+
+        home_slug, home_inst, home_id, home_name = pack_team(home_team, guess_home_fallback)
+        away_slug, away_inst, away_id, away_name = pack_team(away_team, guess_away_fallback)
+
+        # Winner/loser
+        winner_slug = winner_inst = winner_id = loser_slug = loser_inst = loser_id = ""
+        if isinstance(home_runs, int) and isinstance(away_runs, int):
+            if home_runs > away_runs:
+                winner_slug, winner_inst, winner_id = home_slug, home_inst, home_id
+                loser_slug, loser_inst, loser_id = away_slug, away_inst, away_id
+            elif away_runs > home_runs:
+                winner_slug, winner_inst, winner_id = away_slug, away_inst, away_id
+                loser_slug, loser_inst, loser_id = home_slug, home_inst, home_id
+
+        # Meta from perspectives
+        loc = (p_home["location"] if p_home else "") or (p_away["location"] if p_away else "")
+        status = (p_home["status"] if p_home else "") or (p_away["status"] if p_away else "")
+        source_urls = sorted({x["source_url"] for x in p})
+
+        # -------- NEW: fetch game start time from game page --------
+        time_local = ""
+        if args.fetch_time and game_id:
+            if game_id in time_cache:
+                tval = time_cache[game_id]
+            else:
+                logging.debug(f"TIME: fetching game {game_id}")
+                tval = fetch_game_time(game_id, session=session)
+                time_cache[game_id] = tval
+                if tval is None:
+                    # small backoff to be nice if many misses
+                    time.sleep(min(args.sleep * 2, 1.0))
+            if tval:
+                time_local = tval
+
+        logging.debug(
+            f"MERGE: {date} {home_slug}({home_runs}) vs {away_slug}({away_runs}) "
+            f"winner={winner_slug or 'TIE'} id={game_id} time={time_local or 'NA'}"
+        )
+
+        out_rows.append({
+            "date_local": date,
+            "time_local": time_local,
+            "home_slug": home_slug, "home_instance": home_inst, "home_id": home_id, "home_name": home_name,
+            "away_slug": away_slug, "away_instance": away_inst, "away_id": away_id, "away_name": away_name,
+            "home_runs": "" if home_runs is None else home_runs,
+            "away_runs": "" if away_runs is None else away_runs,
+            "winner_slug": winner_slug, "winner_instance": winner_inst, "winner_id": winner_id,
+            "loser_slug": loser_slug, "loser_instance": loser_inst, "loser_id": loser_id,
+            "location": loc, "status": status,
+            "game_id": game_id,
+            "source_urls": " ".join(source_urls),
+        })
+
+    if not out_rows:
+        logging.warning("No games produced.")
+        return
+
+    fieldnames = [
+        "date_local","time_local",
+        "home_slug","home_instance","home_id","home_name",
+        "away_slug","away_instance","away_id","away_name",
+        "home_runs","away_runs",
+        "winner_slug","winner_instance","winner_id",
+        "loser_slug","loser_instance","loser_id",
+        "location","status","game_id","source_urls",
+    ]
+    with open(args.out, "w", newline="", encoding="utf-8") as f:
+        w = csv.DictWriter(f, fieldnames=fieldnames)
+        w.writeheader()
+        for r in out_rows:
+            w.writerow(r)
+
+    logging.info(f"Wrote {len(out_rows)} games → {args.out}")
+
+if __name__ == "__main__":
+    main()
--- a/compute_ratings.py
+++ b/compute_ratings.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python3
+"""
+Rank baseball teams from a season_schedule.csv that has columns:
+date_local,time_local,home_slug,home_instance,home_id,home_name,
+away_slug,away_instance,away_id,away_name,home_runs,away_runs,
+winner_slug,winner_instance,winner_id,loser_slug,loser_instance,loser_id,
+location,status,game_id,source_urls
+
+Output CSV columns (one row per team):
+Team, GP, W, L, T, WinPct, RS, RA, RunDiff, PythagoreanWinPct,
+MasseyRating, EloRating, StrengthOfSchedule, CompositeRating
+
+Defaults:
+- Team identity uses *_name; switch to slugs with --team-id slugs
+- Pythagorean exponent = 1.83
+- Massey caps margins at 8 runs and subtracts estimated home-field runs
+- Elo: start 1500, K=24, home bonus H=30, margin factor ln(|m|+1) capped at 2.0
+- Elo averaged over 20 random shuffles (reduces order dependence)
+"""
+
+from __future__ import annotations
+import argparse
+import math
+import numpy as np
+import pandas as pd
+
+def parse_args():
+    p = argparse.ArgumentParser(description="Power ratings from season_schedule.csv")
+    p.add_argument("--in", dest="inp", required=True, help="Input CSV (season_schedule.csv)")
+    p.add_argument("--out", dest="out", required=True, help="Output ratings CSV")
+    p.add_argument("--team-id", choices=["names","slugs"], default="names",
+                   help="Use team names or slugs as identifiers (default: names)")
+    p.add_argument("--final-status", default=None,
+                   help="Only include games where status == this value (e.g., 'final'). If omitted, any row with scores is included.")
+    # Tunables
+    p.add_argument("--pyexp", type=float, default=1.83, help="Pythagorean exponent")
+    p.add_argument("--massey-cap", type=float, default=8.0, help="Cap for run margins in Massey")
+    p.add_argument("--no-massey-home-adj", action="store_true",
+                   help="Disable subtracting estimated home-field runs in Massey")
+    p.add_argument("--elo-k", type=float, default=24.0, help="Elo K-factor")
+    p.add_argument("--elo-home", type=float, default=30.0, help="Elo home bonus (points)")
+    p.add_argument("--elo-mcap", type=float, default=2.0, help="Cap for margin factor ln(|m|+1)")
+    p.add_argument("--elo-shuffles", type=int, default=20, help="Random shuffles to average Elo")
+    p.add_argument("--elo-seed", type=int, default=42, help="RNG seed for shuffles")
+    return p.parse_args()
+
+def load_games(a) -> pd.DataFrame:
+    df = pd.read_csv(a.inp)
+    # Choose identifiers
+    home_id_col = "home_name" if a.team_id == "names" else "home_slug"
+    away_id_col = "away_name" if a.team_id == "names" else "away_slug"
+    for c in [home_id_col, away_id_col, "home_runs", "away_runs"]:
+        if c not in df.columns:
+            raise ValueError(f"Missing required column: {c}")
+
+    # Optional status filter (helps exclude postponed/canceled)
+    if a.final_status is not None and "status" in df.columns:
+        df = df[df["status"].astype(str).str.lower() == str(a.final_status).lower()]
+
+    # Keep only games with numeric scores
+    df = df.copy()
+    df["home_runs"] = pd.to_numeric(df["home_runs"], errors="coerce")
+    df["away_runs"] = pd.to_numeric(df["away_runs"], errors="coerce")
+    df = df.dropna(subset=[home_id_col, away_id_col, "home_runs", "away_runs"])
+
+    # Parse datetime (robust to missing either field)
+    date = pd.to_datetime(df.get("date_local", pd.NaT), errors="coerce")
+    time = pd.to_datetime(df.get("time_local", pd.NaT), errors="coerce").dt.time
+    # Combine when possible
+    dt = date
+    if "time_local" in df.columns:
+        # build datetime only where both present
+        dt = pd.to_datetime(
+            date.dt.strftime("%Y-%m-%d").fillna("") + " " +
+            pd.Series(time).astype(str).replace("NaT",""),
+            errors="coerce"
+        )
+    df_out = pd.DataFrame({
+        "Date": dt,
+        "HomeTeam": df[home_id_col].astype(str),
+        "AwayTeam": df[away_id_col].astype(str),
+        "HomeRuns": df["home_runs"].astype(int),
+        "AwayRuns": df["away_runs"].astype(int),
+    })
+    df_out["Margin"] = df_out["HomeRuns"] - df_out["AwayRuns"]
+    df_out["Result"] = np.where(df_out["HomeRuns"] > df_out["AwayRuns"], "H",
+                         np.where(df_out["HomeRuns"] < df_out["AwayRuns"], "A", "T"))
+    return df_out.reset_index(drop=True)
+
+def aggregate_team_stats(df: pd.DataFrame) -> pd.DataFrame:
+    teams = pd.Index(sorted(set(df["HomeTeam"]).union(df["AwayTeam"])), name="Team")
+    stats = pd.DataFrame(index=teams, columns=["W","L","T","RS","RA"], data=0)
+    for _, r in df.iterrows():
+        h, a = r["HomeTeam"], r["AwayTeam"]
+        hr, ar = int(r["HomeRuns"]), int(r["AwayRuns"])
+        stats.at[h,"RS"] += hr; stats.at[h,"RA"] += ar
+        stats.at[a,"RS"] += ar; stats.at[a,"RA"] += hr
+        if hr > ar:
+            stats.at[h,"W"] += 1; stats.at[a,"L"] += 1
+        elif hr < ar:
+            stats.at[a,"W"] += 1; stats.at[h,"L"] += 1
+        else:
+            stats.at[h,"T"] += 1; stats.at[a,"T"] += 1
+    stats = stats.astype(int)
+    stats["GP"] = stats["W"] + stats["L"] + stats["T"]
+    stats["WinPct"] = (stats["W"] + 0.5 * stats["T"]) / stats["GP"].replace(0, np.nan)
+    stats["RunDiff"] = stats["RS"] - stats["RA"]
+    return stats.reset_index()
+
+def pythagorean(rs: pd.Series, ra: pd.Series, exp: float) -> pd.Series:
+    rs = rs.clip(lower=0); ra = ra.clip(lower=0)
+    num = np.power(rs, exp); den = num + np.power(ra, exp)
+    with np.errstate(divide="ignore", invalid="ignore"):
+        p = np.where(den > 0, num / den, 0.5)
+    return pd.Series(p, index=rs.index)
+
+def estimate_home_field_runs(df: pd.DataFrame) -> float:
+    return float(df["Margin"].mean()) if len(df) else 0.0
+
+def massey(df: pd.DataFrame, cap: float, subtract_home: bool) -> tuple[pd.Series, float]:
+    teams = sorted(set(df["HomeTeam"]).union(df["AwayTeam"]))
+    idx = {t:i for i,t in enumerate(teams)}
+    y = df["Margin"].astype(float).to_numpy()
+    if cap and cap > 0:
+        y = np.clip(y, -cap, cap)
+    h_est = estimate_home_field_runs(df)
+    if subtract_home:
+        y = y - h_est
+    G, N = len(df), len(teams)
+    A = np.zeros((G+1, N), dtype=float)
+    for r_i, r in enumerate(df.itertuples(index=False)):
+        A[r_i, idx[r.HomeTeam]] = 1.0
+        A[r_i, idx[r.AwayTeam]] = -1.0
+    A[G, :] = 1.0
+    y_ext = np.concatenate([y, [0.0]])
+    r_sol, *_ = np.linalg.lstsq(A, y_ext, rcond=None)
+    return pd.Series(r_sol, index=teams), (h_est if subtract_home else 0.0)
+
+def elo_expected(ra: float, rb: float) -> float:
+    return 1.0 / (1.0 + 10.0 ** (-(ra - rb) / 400.0))
+
+def elo_once(df: pd.DataFrame, K: float, H: float, mcap: float, init: dict[str,float]) -> dict[str,float]:
+    ratings = dict(init)
+    for _, r in df.iterrows():
+        h, a = r["HomeTeam"], r["AwayTeam"]
+        hr, ar = int(r["HomeRuns"]), int(r["AwayRuns"])
+        margin = hr - ar
+        Eh = elo_expected(ratings[h] + H, ratings[a])
+        Sh, Sa = (1.0, 0.0) if hr > ar else ((0.0, 1.0) if hr < ar else (0.5, 0.5))
+        M = np.log(abs(margin) + 1.0)
+        if mcap is not None:
+            M = min(M, mcap)
+        ratings[h] += K * M * (Sh - Eh)
+        ratings[a] += K * M * ((1.0 - Sh) - (1.0 - Eh))
+    return ratings
+
+def elo(df: pd.DataFrame, K=24.0, H=30.0, mcap=2.0, shuffles=20, seed=42) -> pd.Series:
+    teams = sorted(set(df["HomeTeam"]).union(df["AwayTeam"]))
+    base = {t: 1500.0 for t in teams}
+    # baseline in chronological order (Date may be NaT; sort is stable)
+    df0 = df.sort_values(["Date"]).reset_index(drop=True)
+    r_first = elo_once(df0, K, H, mcap, base)
+    rng = np.random.default_rng(seed)
+    vals = {t: [r_first[t]] for t in teams}
+    for _ in range(max(0, shuffles-1)):
+        idx = np.arange(len(df0)); rng.shuffle(idx)
+        r = elo_once(df0.iloc[idx].reset_index(drop=True), K, H, mcap, base)
+        for t in teams:
+            vals[t].append(r[t])
+    return pd.Series({t: float(np.mean(vals[t])) for t in teams}).sort_index()
+
+def zscore(s: pd.Series) -> pd.Series:
+    mu, sd = s.mean(), s.std(ddof=0)
+    return pd.Series(0.0, index=s.index) if (sd == 0 or np.isnan(sd)) else (s - mu) / sd
+
+def main():
+    a = parse_args()
+    games = load_games(a)
+
+    # Aggregates
+    team = aggregate_team_stats(games)
+    team["PythagoreanWinPct"] = pythagorean(team["RS"], team["RA"], a.pyexp)
+
+    # Ratings
+    massey_r, h_runs = massey(games, cap=a.massey_cap, subtract_home=(not a.no_massey_home_adj))
+    sos = (
+        games.assign(OppTeam=np.where(True, games["AwayTeam"], games["AwayTeam"]))  # placeholder
+    )
+    # Strength of schedule: avg opponent Massey rating faced
+    opps = {t: [] for t in massey_r.index}
+    for _, r in games.iterrows():
+        opps[r["HomeTeam"]].append(r["AwayTeam"])
+        opps[r["AwayTeam"]].append(r["HomeTeam"])
+    sos_series = pd.Series({t: (float(massey_r[opps[t]].mean()) if opps[t] else 0.0) for t in opps})
+
+    elo_r = elo(games, K=a.elo_k, H=a.elo_home, mcap=a.elo_mcap, shuffles=a.elo_shuffles, seed=a.elo_seed)
+
+    # Merge
+    out = team.set_index("Team")
+    out["MasseyRating"] = massey_r
+    out["EloRating"] = elo_r
+    out["StrengthOfSchedule"] = sos_series
+
+    # Composite
+    Z_r, Z_e, Z_p = zscore(out["MasseyRating"]), zscore(out["EloRating"]), zscore(out["PythagoreanWinPct"])
+    out["CompositeRating"] = 0.45*Z_r + 0.35*Z_e + 0.20*Z_p
+
+    out = out.reset_index()
+    out = out[[
+        "Team","GP","W","L","T","WinPct","RS","RA","RunDiff",
+        "PythagoreanWinPct","MasseyRating","EloRating","StrengthOfSchedule","CompositeRating"
+    ]].sort_values("CompositeRating", ascending=False)
+
+    # Round for readability
+    for c in ["WinPct","PythagoreanWinPct","MasseyRating","EloRating","StrengthOfSchedule","CompositeRating"]:
+        out[c] = out[c].astype(float).round(5)
+
+    out.to_csv(a.out, index=False)
+    print(f"Done. Estimated home-field (runs) used in Massey: {h_runs:.3f}")
+    print(f"Teams ranked: {len(out)} | Games processed: {len(games)}")
+    print(f"Output -> {a.out}")
+
+if __name__ == "__main__":
+    main()
--- a/csyba.py
+++ b/csyba.py
@@ -0,0 +1,93 @@
+import requests, re, time, csv, logging
+from bs4 import BeautifulSoup
+from dateutil import parser as dtp
+
+# --- Logging setup ---
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    datefmt="%H:%M:%S"
+)
+
+HEADERS = {"User-Agent": "Mozilla/5.0"}
+SUBSEASON_ID = "942425"
+
+TEAM_INSTANCES = [
+    "10119604","10119605","10119601","10119603","10119599","10185021","10119607",
+    "10219990","10119600","10119602","10119611","10119616","10119612","10148204",
+    "10147713","10119617","10178191","10119608","10119615","10119614","10168648",
+    "10168644","10168645","10168646","10168649"
+]
+
+def clean(x): 
+    return re.sub(r"\s+"," ",x or "").strip()
+
+def fetch_team_schedule(iid):
+    url = f"https://www.csyba.com/schedule/print/team_instance/{iid}?schedule_type=index&subseason={SUBSEASON_ID}"
+    try:
+        r = requests.get(url, headers=HEADERS, timeout=30)
+        r.raise_for_status()
+    except Exception as e:
+        logging.error(f"Failed to fetch team {iid}: {e}")
+        return []
+
+    soup = BeautifulSoup(r.text,"html.parser")
+    games = []
+    for tr in soup.select("table tr")[1:]:  # skip header
+        tds = tr.select("td")
+        if len(tds) < 5: 
+            continue
+        date_txt, result_txt, opp_txt, loc_txt, status_txt = [clean(td.get_text(" ")) for td in tds[:5]]
+        # parse date
+        try: 
+            date_iso = dtp.parse(date_txt, fuzzy=True).date().isoformat()
+        except: 
+            date_iso = date_txt
+        # extract result/score
+        m_res = re.search(r"\b(W|L|T)\b", result_txt, re.I)
+        result = m_res.group(1).upper() if m_res else ""
+        m_score = re.search(r"(\d+)\s*[-–]\s*(\d+)", result_txt)
+        hs, as_ = (m_score.group(1), m_score.group(2)) if m_score else ("","")
+        away_flag = opp_txt.startswith("@")
+        opponent = opp_txt.lstrip("@").strip()
+        games.append({
+            "team_instance": iid,
+            "date": date_iso,
+            "result": result,
+            "score": f"{hs}-{as_}" if hs else "",
+            "home_score": hs,
+            "away_score": as_,
+            "opponent": opponent,
+            "is_away": away_flag,
+            "location": loc_txt,
+            "status": status_txt,
+            "source_url": url
+        })
+    logging.info(f"Team {iid}: parsed {len(games)} games")
+    return games
+
+def main():
+    all_games = []
+    for i, iid in enumerate(TEAM_INSTANCES, start=1):
+        logging.info(f"[{i}/{len(TEAM_INSTANCES)}] Fetching schedule for team {iid}")
+        all_games.extend(fetch_team_schedule(iid))
+        time.sleep(0.5)
+
+    # deduplicate: key = (date, sorted team_instance+opponent, score)
+    unique = {}
+    for g in all_games:
+        key = (g["date"], tuple(sorted([g["opponent"], g["team_instance"]])), g["score"])
+        if key not in unique:
+            unique[key] = g
+    deduped_games = list(unique.values())
+
+    out_file = "season_games.csv"
+    with open(out_file,"w",newline="",encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=deduped_games[0].keys())
+        writer.writeheader()
+        writer.writerows(deduped_games)
+
+    logging.info(f"Finished. {len(all_games)} raw rows → {len(deduped_games)} unique games saved to {out_file}")
+
+if __name__ == "__main__":
+    main()