import csv import re from typing import List, Dict, Union, TextIO from io import TextIOBase, StringIO from xlsx2csv import Xlsx2csv from dateutil import parser from pathlib import Path from rich.console import Console from rich.table import Table from .normalize import normalize_header_key, load_config, normalize_value, normalize_keyvalue, normalize_row import datetime def list_key_values(data: List[Dict], key): if key.lower() == "team": key = "team" else: key = key if key != "team" or "team" in data[0].keys(): output = {row.get(key) for row in data} else: output = {row.get('team') for row in data} output = output | {row.get('home') for row in data} output = output | {row.get('visitor') for row in data} output.discard(None) return output def read_and_normalize_csv_or_xlsx(input_file: Union[List[TextIO], List[Path], TextIO, Path]) -> List[dict]: """ Reads CSV file(s) from the provided input file path(s) or file object(s), and returns a list of dictionaries with normalized keys and values where each dictionary represents a row in the CSV. Parameters: input_file (Union[List[TextIO], List[Path], TextIO, Path]): Either a single file path (as a string or Path object) or a list of file paths, or a single file object (opened in text mode) or a list of file objects. If a list is provided, each element should be either a file path or a file object. Returns: List[dict]: A list of dictionaries where each dictionary represents a row in the CSV. Keys in the dictionaries correspond to column names, and values correspond to cell values. """ normalization_config = load_config() result_data = [] if isinstance(input_file, list): file_list = input_file else: file_list = [input_file] for f in file_list: if isinstance(f, Path): if f.suffix.lower() == ".csv": with f.open("r", encoding="utf-8") as f: reader = csv.DictReader(f) elif f.suffix.lower() == ".xlsx": output = StringIO() Xlsx2csv(f, outputencoding="utf-8").convert(output) output.seek(0) reader = csv.DictReader(output) else: raise ValueError("File must be a .csv or .xlsx") for row in reader: normalized_row = normalize_row(row, normalization_config) result_data.append(normalized_row) return result_data def personalize_data_for_team(data:List[dict], target_team:str): for row in data: if row.get('home') == target_team: row['homevisitor'] = 'home' row['opponent'] = row.get('visitor') elif row.get('visitor') == target_team: row['homevisitor'] = 'visitor' row['opponent'] = row.get('home') return data def write_csv(file_path: Path, data: List[dict]) -> None: with open(file_path, "w", newline="") as csvfile: fieldnames = data[0].keys() writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(data) def parse_score(score_str: str, reverse_order: bool = False) -> Dict[str, int]: """ Parse a score string and extract home and visitor scores. Args: score_str (str): The score string contain somewhere "visitor-home". reverse_order (bool, optional): If True, the order of the scores is reversed (home first). Defaults to False. Returns: Dict[str, int]: A dictionary containing home and visitor scores. """ regex = re.compile(r"^(?P
.*?)?(?:(?P\d+)-(?P \d+))?(?P .*?)?$") match = regex.match(score_str) if match: score = {} if match.group("pre"): score["pre"] = match.group("pre") if match.group("post"): score["post"] = match.group("post") if match.group("runs_first") and match.group("runs_second"): score['has_result'] = True runs_first, runs_second = int(match.group("runs_first")), int(match.group("runs_second")) if not reverse_order: score.update({ "home_runs_for": runs_second, "visitor_runs_for": runs_first, "home_runs_against":runs_first, "visitor_runs_against": runs_second }) elif reverse_order: score.update({ "home_runs_for": runs_first, "visitor_runs_for": runs_second, "home_runs_against": runs_second, "visitor_runs_against": runs_first }) if score["home_runs_for"] > score["visitor_runs_for"]: score["home_outcome"] = "win" score["visitor_outcome"] = "loss" if "forfeit" in score.get("post",""): score["visitor_outcome"] = "forfeit" elif score["home_runs_for"] < score["visitor_runs_for"]: score["home_outcome"] = "loss" score["visitor_outcome"] = "win" if "forfeit" in score.get("post",""): score["home_outcome"] = "forfeit" else: score["home_outcome"] = "tie" score["visitor_outcome"] = "tie" else: score['has_result'] = False return score raise ValueError("Invalid score format") def is_visitor_home_order_reversed(header: List[str]) -> bool: """ Determine if the order of 'visitor' and 'home' in the header suggests reversed order. convention is that home is second. Args: header (List[str]): The list of header keys. Returns: bool: True if the 'home' key comes before the 'visitor' key, indicating reversed order. Returns False if nothing needs to be done to the data. """ if 'visitor' in header and 'home' in header: return header.index('visitor') > header.index('home') else: return KeyError def parse_datetime(data: List[Dict]): for row in data: if isinstance(row.get('date'), datetime.datetime) and isinstance(roq.get('time'), datetime.time): row['datetime'] try: row['datetime'] = parser.parse(f"{row.get('date')} {row.get('time')}") except parser.ParserError as e: raise e return data def import_gamebygame(data: Union[List[Dict], TextIO, Path]) -> List[Dict]: if isinstance(data, TextIOBase) or isinstance(data, Path) : data = read_and_normalize_csv_or_xlsx(data) header = data[0].keys() visitor_home_order_reversed = is_visitor_home_order_reversed(list(header)) for row in data: parsed_score = parse_score(row.get("results",''), visitor_home_order_reversed) row.update(parsed_score) try: row['datetime'] = parser.parse(f"{row['date']} {row['time']}") except parser.ParserError as e: raise e return data def aggregate_teams(data: List[Dict[str, str]]) -> List[Dict[str, int]]: """ Aggregate data by team, summing up wins, losses, and ties. Args: data (List[Dict[str, str]]): A list of dictionaries representing the CSV data. Returns: List[Dict[str, int]]: A list of dictionaries containing aggregated data for each team. """ team_stats = {} for row in data: if not row["has_result"]: continue home_team = row["home"] visitor_team = row["visitor"] team_stats.setdefault(home_team, {"win": 0, "loss": 0, "tie": 0, "gp": 0, "runs_for": 0, "runs_against":0}) team_stats.setdefault(visitor_team, {"win": 0, "loss": 0, "tie": 0, "gp": 0, "runs_for": 0, "runs_against":0}) team_stats[home_team]['gp'] += 1 team_stats[visitor_team]['gp'] += 1 for outcome in ["win", "loss", "tie"]: if row["home_outcome"] == outcome: team_stats[home_team][outcome] += 1 # team_stats[home_team]["games"].append(f"{row['datetime']}: {visitor_team}: {outcome[0].upper()} {row['home_runs_for']}-{row['home_runs_against']}") if row["visitor_outcome"] == outcome: team_stats[visitor_team][outcome] += 1 # team_stats[visitor_team]["games"].append(f"{row['datetime']}: {home_team}: {outcome[0].upper()} {row['visitor_runs_for']}-{row['visitor_runs_against']}") team_stats[home_team]["runs_for"] += row["home_runs_for"] team_stats[home_team]["runs_against"] += row["home_runs_against"] team_stats[visitor_team]["runs_for"] += row["visitor_runs_for"] team_stats[visitor_team]["runs_against"] += row["visitor_runs_against"] # Convert team_stats dictionary to a list of dictionaries aggregated_data = [{"team": team, **stats} for team, stats in team_stats.items()] # Sort the list by team name sorted_aggregated_data = sorted(aggregated_data, key=lambda x: x["win"], reverse=True) return sorted_aggregated_data def aggregate_teams_by_season(data: List[Dict[str, str]]) -> List[Dict[str, int]]: team_stats = {} for row in data: for team_key in ["home", "visitor", "team"]: # team = row.get(team_key) if (team := row.get(team_key)) and (season := row.get('season')): team_stats.setdefault(team, {"seasons": set()}) # breakpoint() team_stats[team]['seasons'].update({season}) # Convert team_stats dictionary to a list of dictionaries aggregated_data = [{"team": team, **stats} for team, stats in team_stats.items()] # Sort the list by team name sorted_aggregated_data = sorted(aggregated_data, key=lambda x: x["team"]) return sorted_aggregated_data def write_sportspress_csv(data: List[Dict], file_path: Path, only_with_outcome:bool = False): """ Writes sports event data to a CSV file in a specific format. Parameters: - data (List[Dict]): List of dictionaries where each dictionary represents a sports event. - file_path (Path): The Path object representing the file path where the CSV file will be created. - only_with_outcome (bool, optional): If True, only events with outcomes will be included in the CSV. Default is False. Returns: None Example: >>> data = [...] # List of dictionaries representing sports events >>> file_path = Path("output.csv") >>> write_sportspress_csv(data, file_path) """ with file_path.open('w') as output_csv_file: writer = csv.writer(output_csv_file) fieldnames = [ "Format", #Competitive or Friendly # "Competition", "Season", # "Date Format", "Date", "Time", "Venue", "Team", "Results", "Outcome", # "Players", # "Performance", ] # Write the header writer.writerow(fieldnames) # Write the data for row in data: if only_with_outcome and not row['has_result']: continue writer.writerow( [ row["datetime"].strftime("%Y/%m/%d"), row["datetime"].strftime("%H:%M"), row.get("field", ""), row["home"], "|".join([str(row.get(k,"")) for k in [ "home_runs_for_inning_1", "home_runs_for_inning_2", "home_runs_for_inning_3", "home_runs_for_inning_4", "home_runs_for_inning_5", "home_runs_for_inning_6", "home_runs_for_inning_7", "home_runs_for_inning_8", "home_runs_for_inning_9", "home_runs_for_inning_10", "home_runs_for", "home_errors", "home_hits" ]]), row.get("home_outcome") ] ) writer.writerow( [ "", "", "", row["visitor"], "|".join([str(row.get(k,"")) for k in [ "visitor_runs_for_inning_1", "visitor_runs_for_inning_2", "visitor_runs_for_inning_3", "visitor_runs_for_inning_4", "visitor_runs_for_inning_5", "visitor_runs_for_inning_6", "visitor_runs_for_inning_7", "visitor_runs_for_inning_8", "visitor_runs_for_inning_9", "visitor_runs_for_inning_10", "visitor_runs_for", "visitor_errors", "visitor_hits" ]]), row.get("visitor_outcome") ] )