Convert scripts to use Typer and add requirements.txt
- Replaced argparse with Typer for CLI argument parsing in both scripts - Updated function signatures and calls accordingly in build_season_schedule.py and compute_ratings.py - Added requirements.txt listing dependencies including typer[all], pandas, numpy, etc.
This commit is contained in:
@@ -19,43 +19,27 @@ Defaults:
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import argparse
|
||||
import math
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import typer
|
||||
|
||||
def parse_args():
|
||||
p = argparse.ArgumentParser(description="Power ratings from season_schedule.csv")
|
||||
p.add_argument("--in", dest="inp", required=True, help="Input CSV (season_schedule.csv)")
|
||||
p.add_argument("--out", dest="out", required=True, help="Output ratings CSV")
|
||||
p.add_argument("--team-id", choices=["names","slugs"], default="names",
|
||||
help="Use team names or slugs as identifiers (default: names)")
|
||||
p.add_argument("--final-status", default=None,
|
||||
help="Only include games where status == this value (e.g., 'final'). If omitted, any row with scores is included.")
|
||||
# Tunables
|
||||
p.add_argument("--pyexp", type=float, default=1.83, help="Pythagorean exponent")
|
||||
p.add_argument("--massey-cap", type=float, default=8.0, help="Cap for run margins in Massey")
|
||||
p.add_argument("--no-massey-home-adj", action="store_true",
|
||||
help="Disable subtracting estimated home-field runs in Massey")
|
||||
p.add_argument("--elo-k", type=float, default=24.0, help="Elo K-factor")
|
||||
p.add_argument("--elo-home", type=float, default=30.0, help="Elo home bonus (points)")
|
||||
p.add_argument("--elo-mcap", type=float, default=2.0, help="Cap for margin factor ln(|m|+1)")
|
||||
p.add_argument("--elo-shuffles", type=int, default=20, help="Random shuffles to average Elo")
|
||||
p.add_argument("--elo-seed", type=int, default=42, help="RNG seed for shuffles")
|
||||
return p.parse_args()
|
||||
|
||||
def load_games(a) -> pd.DataFrame:
|
||||
df = pd.read_csv(a.inp)
|
||||
def load_games(
|
||||
inp: str,
|
||||
team_id: str = "names",
|
||||
final_status: str | None = None,
|
||||
) -> pd.DataFrame:
|
||||
df = pd.read_csv(inp)
|
||||
# Choose identifiers
|
||||
home_id_col = "home_name" if a.team_id == "names" else "home_slug"
|
||||
away_id_col = "away_name" if a.team_id == "names" else "away_slug"
|
||||
home_id_col = "home_name" if team_id == "names" else "home_slug"
|
||||
away_id_col = "away_name" if team_id == "names" else "away_slug"
|
||||
for c in [home_id_col, away_id_col, "home_runs", "away_runs"]:
|
||||
if c not in df.columns:
|
||||
raise ValueError(f"Missing required column: {c}")
|
||||
|
||||
# Optional status filter (helps exclude postponed/canceled)
|
||||
if a.final_status is not None and "status" in df.columns:
|
||||
df = df[df["status"].astype(str).str.lower() == str(a.final_status).lower()]
|
||||
if final_status is not None and "status" in df.columns:
|
||||
df = df[df["status"].astype(str).str.lower() == str(final_status).lower()]
|
||||
|
||||
# Keep only games with numeric scores
|
||||
df = df.copy()
|
||||
@@ -157,7 +141,6 @@ def elo_once(df: pd.DataFrame, K: float, H: float, mcap: float, init: dict[str,f
|
||||
def elo(df: pd.DataFrame, K=24.0, H=30.0, mcap=2.0, shuffles=20, seed=42) -> pd.Series:
|
||||
teams = sorted(set(df["HomeTeam"]).union(df["AwayTeam"]))
|
||||
base = {t: 1500.0 for t in teams}
|
||||
# baseline in chronological order (Date may be NaT; sort is stable)
|
||||
df0 = df.sort_values(["Date"]).reset_index(drop=True)
|
||||
r_first = elo_once(df0, K, H, mcap, base)
|
||||
rng = np.random.default_rng(seed)
|
||||
@@ -173,52 +156,71 @@ def zscore(s: pd.Series) -> pd.Series:
|
||||
mu, sd = s.mean(), s.std(ddof=0)
|
||||
return pd.Series(0.0, index=s.index) if (sd == 0 or np.isnan(sd)) else (s - mu) / sd
|
||||
|
||||
def main():
|
||||
a = parse_args()
|
||||
games = load_games(a)
|
||||
def main(
|
||||
inp: str = typer.Option(..., help="Input CSV (season_schedule.csv)"),
|
||||
out: str = typer.Option(..., help="Output ratings CSV"),
|
||||
team_id: str = typer.Option(
|
||||
"names",
|
||||
help="Use team names or slugs as identifiers (default: names)",
|
||||
show_default=True,
|
||||
case_sensitive=False,
|
||||
prompt=False,
|
||||
),
|
||||
final_status: str | None = typer.Option(None, help="Only include games where status == this value (e.g., 'final'). If omitted, any row with scores is included."),
|
||||
pyexp: float = typer.Option(1.83, help="Pythagorean exponent"),
|
||||
massey_cap: float = typer.Option(8.0, help="Cap for run margins in Massey"),
|
||||
no_massey_home_adj: bool = typer.Option(False, help="Disable subtracting estimated home-field runs in Massey"),
|
||||
elo_k: float = typer.Option(24.0, help="Elo K-factor"),
|
||||
elo_home: float = typer.Option(30.0, help="Elo home bonus (points)"),
|
||||
elo_mcap: float = typer.Option(2.0, help="Cap for margin factor ln(|m|+1)"),
|
||||
elo_shuffles: int = typer.Option(20, help="Random shuffles to average Elo"),
|
||||
elo_seed: int = typer.Option(42, help="RNG seed for shuffles")
|
||||
):
|
||||
team_id = team_id.lower()
|
||||
# Load games
|
||||
games = load_games(inp, team_id=team_id, final_status=final_status)
|
||||
|
||||
# Aggregates
|
||||
team = aggregate_team_stats(games)
|
||||
team["PythagoreanWinPct"] = pythagorean(team["RS"], team["RA"], a.pyexp)
|
||||
team["PythagoreanWinPct"] = pythagorean(team["RS"], team["RA"], pyexp)
|
||||
|
||||
# Ratings
|
||||
massey_r, h_runs = massey(games, cap=a.massey_cap, subtract_home=(not a.no_massey_home_adj))
|
||||
sos = (
|
||||
games.assign(OppTeam=np.where(True, games["AwayTeam"], games["AwayTeam"])) # placeholder
|
||||
)
|
||||
# Strength of schedule: avg opponent Massey rating faced
|
||||
massey_r, h_runs = massey(games, cap=massey_cap, subtract_home=not no_massey_home_adj)
|
||||
|
||||
# Strength of schedule
|
||||
opps = {t: [] for t in massey_r.index}
|
||||
for _, r in games.iterrows():
|
||||
opps[r["HomeTeam"]].append(r["AwayTeam"])
|
||||
opps[r["AwayTeam"]].append(r["HomeTeam"])
|
||||
sos_series = pd.Series({t: (float(massey_r[opps[t]].mean()) if opps[t] else 0.0) for t in opps})
|
||||
|
||||
elo_r = elo(games, K=a.elo_k, H=a.elo_home, mcap=a.elo_mcap, shuffles=a.elo_shuffles, seed=a.elo_seed)
|
||||
elo_r = elo(games, K=elo_k, H=elo_home, mcap=elo_mcap, shuffles=elo_shuffles, seed=elo_seed)
|
||||
|
||||
# Merge
|
||||
out = team.set_index("Team")
|
||||
out["MasseyRating"] = massey_r
|
||||
out["EloRating"] = elo_r
|
||||
out["StrengthOfSchedule"] = sos_series
|
||||
out_df = team.set_index("Team")
|
||||
out_df["MasseyRating"] = massey_r
|
||||
out_df["EloRating"] = elo_r
|
||||
out_df["StrengthOfSchedule"] = sos_series
|
||||
|
||||
# Composite
|
||||
Z_r, Z_e, Z_p = zscore(out["MasseyRating"]), zscore(out["EloRating"]), zscore(out["PythagoreanWinPct"])
|
||||
out["CompositeRating"] = 0.45*Z_r + 0.35*Z_e + 0.20*Z_p
|
||||
Z_r, Z_e, Z_p = zscore(out_df["MasseyRating"]), zscore(out_df["EloRating"]), zscore(out_df["PythagoreanWinPct"])
|
||||
out_df["CompositeRating"] = 0.45*Z_r + 0.35*Z_e + 0.20*Z_p
|
||||
|
||||
out = out.reset_index()
|
||||
out = out[[
|
||||
out_df = out_df.reset_index()
|
||||
out_df = out_df[[
|
||||
"Team","GP","W","L","T","WinPct","RS","RA","RunDiff",
|
||||
"PythagoreanWinPct","MasseyRating","EloRating","StrengthOfSchedule","CompositeRating"
|
||||
]].sort_values("CompositeRating", ascending=False)
|
||||
|
||||
# Round for readability
|
||||
for c in ["WinPct","PythagoreanWinPct","MasseyRating","EloRating","StrengthOfSchedule","CompositeRating"]:
|
||||
out[c] = out[c].astype(float).round(5)
|
||||
out_df[c] = out_df[c].astype(float).round(5)
|
||||
|
||||
out.to_csv(a.out, index=False)
|
||||
out_df.to_csv(out, index=False)
|
||||
print(f"Done. Estimated home-field (runs) used in Massey: {h_runs:.3f}")
|
||||
print(f"Teams ranked: {len(out)} | Games processed: {len(games)}")
|
||||
print(f"Output -> {a.out}")
|
||||
print(f"Teams ranked: {len(out_df)} | Games processed: {len(games)}")
|
||||
print(f"Output -> {out}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
typer.run(main)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user