initial commit

2023-12-31 14:28:02 -06:00
commit 4105cc2373
9 changed files with 653 additions and 0 deletions
--- a/convert_to_sportspress/init.py
+++ b/convert_to_sportspress/init.py
--- a/convert_to_sportspress/main.py
+++ b/convert_to_sportspress/main.py
@@ -0,0 +1,2 @@
+from .convert_to_sportspress import app
+app()
--- a/convert_to_sportspress/convert_to_sportspress.py
+++ b/convert_to_sportspress/convert_to_sportspress.py
@@ -0,0 +1,238 @@
+import csv
+import re
+from typing import List, Dict
+from dateutil import parser
+from pathlib import Path
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table, Column
+from rich.columns import Columns
+import typer
+from .utils import normalize_header_key, validate_csv_header, read_csv, is_visitor_home_order_reversed, process_data, aggregate_teams, write_sportspress_csv
+
+app = typer.Typer()
+
+
+@app.command()
+def standings(file_path: Path = typer.Argument(..., help="Path to the CSV file")):
+    # Validate CSV header
+    header = next(csv.reader(open(file_path, "r")))
+    normalized_header = [normalize_header_key(key) for key in header]
+    if not validate_csv_header(header):
+        typer.echo("Error: Invalid CSV header. Make sure the CSV file contains the correct headers.")
+        return
+
+    # Read CSV data
+    data = read_csv(file_path)
+    visitor_home_order_reversed =  is_visitor_home_order_reversed(normalized_header)
+    processed_data = process_data(data, visitor_home_order_reversed)
+    aggregate_team_data = aggregate_teams(processed_data)
+
+    # Display aggregated data as a table
+    console = Console()
+    table = Table(title="Aggregated Team Data")
+    table.add_column("Team", style="bold")
+    table.add_column("Wins", style="bold")
+    table.add_column("Losses", style="bold")
+    table.add_column("Ties", style="bold")
+    table.add_column("Runs For", style="bold")
+    table.add_column("Runs Against", style="bold")
+
+    for team_stats in aggregate_team_data:
+        table.add_row(
+            team_stats["team"],
+            str(team_stats["win"]),
+            str(team_stats["loss"]),
+            str(team_stats["tie"]),
+            str(team_stats["runs_for"]),
+            str(team_stats["runs_against"]),
+        )
+
+    console.print(table)
+
+    # Write processed CSV data back to a new file
+    # output_file_path = file_path.with_suffix(".processed.csv")
+    # write_csv(output_file_path, data)
+    # typer.echo(f"Processed data written to: {output_file_path}")
+
+@app.command()
+def sportspress_csv(file_path: Path = typer.Argument(..., help="Path to the CSV file"), file_output_path: Path = typer.Argument(..., help="Path to the output CSV file"), only_with_outcome: bool = typer.Option(default=True, is_flag=True, help="")):
+    # Validate CSV header
+    header = next(csv.reader(open(file_path, "r")))
+    normalized_header = [normalize_header_key(key) for key in header]
+    if not validate_csv_header(header):
+        typer.echo("Error: Invalid CSV header. Make sure the CSV file contains the correct headers.")
+        return
+
+    # Read CSV data
+    data = read_csv(file_path)
+    visitor_home_order_reversed =  is_visitor_home_order_reversed(normalized_header)
+    processed_data = process_data(data, visitor_home_order_reversed)
+
+    write_sportspress_csv(processed_data, file_output_path, only_with_outcome)
+    typer.echo(f"Output to {file_output_path}")
+
+def list_key_values(data: List[Dict], key):
+    if key.lower() == "team":
+        normalized_key = "team"
+    else:
+        normalized_key = normalize_header_key(key)
+
+    if normalized_key != "team" or "team" in data[0].keys():
+        output = {row.get(normalized_key) for row in data}
+    else:
+        output = {row.get('home') for row in data}
+        output = output | {row.get('visitor') for row in data}
+    return output
+
+def replace_key_values(data: List[Dict], key, match:str, replace:str, is_regex:bool =False):
+    if not is_regex:
+        regex = re.compile(fr"^{match}$")
+    else:
+        regex = re.compile(fr"{match}")
+
+    for row in data:
+        row[key] = regex.sub(replace, row[key])
+
+    return data
+
+def add_key_values(data: List[Dict], key, value:str):
+    for row in data:
+        row[key] = value
+
+    return data
+
+clean_app = typer.Typer()
+@clean_app.command("list")
+def print_values_for_key(file_path: Path = typer.Argument(..., help="Path to the CSV file"), key: str = typer.Argument(..., help="")):
+    # Read CSV data
+    data = read_csv(file_path)
+    processed_data = list_key_values(data, key)
+    
+    console = Console()
+    table = Table(show_header=False)
+    table.add_column("Values")
+
+    for value in sorted(processed_data):
+        table.add_row(value)
+
+    console.print(table)
+
+@clean_app.command("replace")
+def replace_values_for_key(
+    file_path: Path = typer.Argument(..., help="Path to the CSV file"), 
+    key: str = typer.Argument(..., help=""), 
+    match: str = typer.Argument(..., help=""), 
+    replace: str = typer.Argument(..., help=""),
+    in_place: bool = typer.Option(False, "--in-place", "-p", help="Modify file in place."),
+    output_file: Path = typer.Option(None, "--output-file", "-o", help="Specify output file."),
+    match_is_regex: bool = typer.Option(False, "--regex", "-p", help="Match is a regex pattern.")
+    ):
+
+    if in_place and output_file:
+        typer.echo("Error: Only one of --in-place or --output-file should be provided, not both.")
+        raise typer.Abort()
+
+    if key.lower() == "team":
+        normalized_key = "team"
+    else:
+        normalized_key = normalize_header_key(key)
+
+    console = Console()
+
+    # Read CSV data
+    data = read_csv(file_path)
+
+    before_table = Table(Column(), show_header=False, title="Before")
+    for value in sorted(list_key_values(data, key)):
+        before_table.add_row(value)
+
+
+    after_table = Table( Column(), show_header=False, title="After")
+
+    if normalized_key != "team" or "team" in data[0].keys():
+        data = replace_key_values(data, normalized_key, match, replace, match_is_regex)
+    else:
+        data=replace_key_values(data, "home", match, replace, match_is_regex)
+        data=replace_key_values(data, "visitor", match, replace, match_is_regex)
+
+    for value in sorted(list_key_values(data, key)):
+        after_table.add_row(value)    
+
+    panel = Panel(
+        Columns([before_table, after_table]),
+        title="Replace"
+    )
+
+    console.print(panel)
+
+    if in_place and typer.confirm("Perform Replacement in-place?"):
+        with file_path.open('w') as f:
+            fieldnames = data[0].keys()
+            writer = csv.DictWriter(f, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(data)
+            
+    elif output_file: 
+        if output_file.is_dir():
+            output_file = output_file.joinpath(file_path.name)
+        if typer.confirm(f"Write to {output_file}?"):
+            with output_file.open('w') as f:
+                fieldnames = data[0].keys()
+                writer = csv.DictWriter(f, fieldnames=fieldnames)
+                writer.writeheader()
+                writer.writerows(data)
+
+@clean_app.command("add-key")
+def add_values_for_key(
+    file_path: Path = typer.Argument(..., help="Path to the CSV file"), 
+    key: str = typer.Argument(..., help=""), 
+    value: str = typer.Argument("", help=""), 
+    in_place: bool = typer.Option(False, "--in-place", "-p", help="Modify file in place."),
+    output_file: Path = typer.Option(None, "--output-file", "-o", help="Specify output file."),
+    ):
+
+    if in_place and output_file:
+        typer.echo("Error: Only one of --in-place or --output-file should be provided, not both.")
+        raise typer.Abort()
+
+    # Validate CSV header
+    header = next(csv.reader(open(file_path, "r")))
+    normalized_header = [normalize_header_key(key) for key in header]
+    if key.lower() == "team":
+        normalized_key = "team"
+    else:
+        normalized_key = normalize_header_key(key)
+    if not validate_csv_header(header):
+        typer.echo("Error: Invalid CSV header. Make sure the CSV file contains the correct headers.")
+        return
+
+    console = Console()
+
+    # Read CSV data
+    data = read_csv(file_path)
+
+    data = add_key_values(data, key, value)
+
+    if in_place and typer.confirm("Perform Replacement in-place?"):
+        with file_path.open('w') as f:
+            fieldnames = data[0].keys()
+            writer = csv.DictWriter(f, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(data)
+            
+    elif output_file: 
+        if output_file.is_dir():
+            output_file = output_file.joinpath(file_path.name)
+        if typer.confirm(f"Write to {output_file}?"):
+            with output_file.open('w') as f:
+                fieldnames = data[0].keys()
+                writer = csv.DictWriter(f, fieldnames=fieldnames)
+                writer.writeheader()
+                writer.writerows(data)
+
+
+app.add_typer(clean_app, name="clean")    
+
+if __name__ == "__main__":
+    app()
--- a/convert_to_sportspress/utils.py
+++ b/convert_to_sportspress/utils.py
@@ -0,0 +1,263 @@
+import csv
+import re
+from typing import List, Dict
+from dateutil import parser
+from pathlib import Path
+from rich.console import Console
+from rich.table import Table
+
+def normalize_header_key(key: str) -> str:
+    key_mapping = {
+        "away": "visitor",
+        "results": "results",
+        "final score": "results",
+        "venue": "field",
+        "location":"field",
+        "result": "results",
+        "w":"win",
+        "l":"loss",
+        "t":"tie",
+        "div":"division",
+        "rf":"runs_for",
+        "runs":"runs_against"
+    }
+    return key_mapping.get(key.lower().strip(), key.lower().strip())
+
+def validate_csv_header(header: List[str]) -> bool:
+    required_keys = ["date", "time", "field", "visitor", "home", "results"]
+    normalized_header = [normalize_header_key(key) for key in header]
+    return all(key in normalized_header for key in required_keys)
+
+def read_csv(file_path: Path) -> List[dict]:
+    data = []
+    with open(file_path, "r", newline="") as csvfile:
+        reader = csv.DictReader(csvfile)
+        for row in reader:
+            normalized_row = {normalize_header_key(key): value.strip() for key, value in row.items()}
+            data.append(normalized_row)
+    return data
+
+def write_csv(file_path: Path, data: List[dict]) -> None:
+    with open(file_path, "w", newline="") as csvfile:
+        fieldnames = data[0].keys()
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(data)
+
+def parse_score(score_str: str, reverse_order: bool = False) -> Dict[str, int]:
+    """
+    Parse a score string and extract home and visitor scores.
+
+    Args:
+        score_str (str): The score string contain somewhere "visitor-home".
+        reverse_order (bool, optional): If True, the order of the scores is reversed (home first).
+            Defaults to False.
+
+    Returns:
+        Dict[str, int]: A dictionary containing home and visitor scores.
+    """
+    regex = re.compile(r"^(?P<pre>.*?)?(?:(?P<runs_first>\d+)-(?P<runs_second>\d+))?(?P<post>.*?)?$")
+    match = regex.match(score_str)
+    
+    if match:
+        score = {}
+        if match.group("pre"):  score["pre"] =  match.group("pre")
+        if match.group("post"): score["post"] = match.group("post")
+        if match.group("runs_first") and match.group("runs_second"):
+            score['has_result'] = True
+            runs_first, runs_second = int(match.group("runs_first")), int(match.group("runs_second"))
+            if not reverse_order:
+                score.update({
+                    "home_runs_for": runs_second,   "visitor_runs_for": runs_first,
+                    "home_runs_against":runs_first, "visitor_runs_against": runs_second
+                    })
+            elif reverse_order:
+                score.update({
+                    "home_runs_for": runs_first,        "visitor_runs_for": runs_second,
+                    "home_runs_against": runs_second,   "visitor_runs_against": runs_first
+                    })
+            
+            if score["home_runs_for"] > score["visitor_runs_for"]:
+                score["home_outcome"] = "win"
+                score["visitor_outcome"] = "loss"
+                if "forfeit" in score.get("post",""):
+                    score["visitor_outcome"] = "forfeit"
+            elif score["home_runs_for"] < score["visitor_runs_for"]:
+                score["home_outcome"] = "loss"
+                score["visitor_outcome"] = "win"
+                if "forfeit" in score.get("post",""):
+                    score["home_outcome"] = "forfeit"
+            else:
+                score["home_outcome"] = "tie"
+                score["visitor_outcome"] = "tie"
+        else:
+            score['has_result'] = False
+
+        return score
+    
+    raise ValueError("Invalid score format")
+
+def is_visitor_home_order_reversed(header: List[str]) -> bool:
+    """
+    Determine if the order of 'visitor' and 'home' in the header suggests reversed order.
+    convention is that home is second.
+
+    Args:
+        header (List[str]): The list of header keys.
+
+    Returns:
+        bool: True if the 'home' key comes before the 'visitor' key, indicating reversed order.
+    """
+    return header.index('visitor') > header.index('home')
+
+def process_data(data: List[Dict], visitor_home_order_reversed = False) -> List[Dict]:
+
+    for row in data:
+        parsed_score = parse_score(row["results"], visitor_home_order_reversed)
+        row.update(parsed_score)
+        try:
+            row['datetime'] = parser.parse(f"{row['date']} {row['time']}")
+        except parser.ParserError as e:
+            raise e
+    
+    return data
+
+def aggregate_teams(data: List[Dict[str, str]]) -> List[Dict[str, int]]:
+    """
+    Aggregate data by team, summing up wins, losses, and ties.
+
+    Args:
+        data (List[Dict[str, str]]): A list of dictionaries representing the CSV data.
+
+    Returns:
+        List[Dict[str, int]]: A list of dictionaries containing aggregated data for each team.
+    """
+    team_stats = {}
+
+    for row in data:
+        if not row["has_result"]:
+            continue
+        home_team = row["home"]
+        visitor_team = row["visitor"]
+        team_stats.setdefault(home_team, {"win": 0, "loss": 0, "tie": 0, "gp": 0, "runs_for": 0, "runs_against":0})
+        team_stats.setdefault(visitor_team, {"win": 0, "loss": 0, "tie": 0, "gp": 0, "runs_for": 0, "runs_against":0})
+
+        team_stats[home_team]['gp'] += 1
+        team_stats[visitor_team]['gp'] += 1
+
+        for outcome in ["win", "loss", "tie"]:
+            if row["home_outcome"] == outcome:
+                team_stats[home_team][outcome] += 1
+                # team_stats[home_team]["games"].append(f"{row['datetime']}: {visitor_team}: {outcome[0].upper()} {row['home_runs_for']}-{row['home_runs_against']}")
+
+            if row["visitor_outcome"] == outcome:
+                team_stats[visitor_team][outcome] += 1
+                # team_stats[visitor_team]["games"].append(f"{row['datetime']}: {home_team}: {outcome[0].upper()} {row['visitor_runs_for']}-{row['visitor_runs_against']}")
+
+        team_stats[home_team]["runs_for"] += row["home_runs_for"]
+        team_stats[home_team]["runs_against"] += row["home_runs_against"]
+
+        team_stats[visitor_team]["runs_for"] += row["visitor_runs_for"]
+        team_stats[visitor_team]["runs_against"] += row["visitor_runs_against"]
+        
+
+    # Convert team_stats dictionary to a list of dictionaries
+    aggregated_data = [{"team": team, **stats} for team, stats in team_stats.items()]
+
+    # Sort the list by team name
+    sorted_aggregated_data = sorted(aggregated_data, key=lambda x: x["win"], reverse=True)
+
+    return sorted_aggregated_data
+
+def write_sportspress_csv(data: List[Dict], file_path: Path, only_with_outcome:bool = False):
+    """
+    Writes sports event data to a CSV file in a specific format.
+
+    Parameters:
+    - data (List[Dict]): List of dictionaries where each dictionary represents a sports event.
+    - file_path (Path): The Path object representing the file path where the CSV file will be created.
+    - only_with_outcome (bool, optional): If True, only events with outcomes will be included in the CSV. Default is False.
+
+    Returns:
+    None
+
+    Example:
+    >>> data = [...]  # List of dictionaries representing sports events
+    >>> file_path = Path("output.csv")
+    >>> write_sportspress_csv(data, file_path)
+    """
+
+    with file_path.open('w') as output_csv_file:
+        writer = csv.writer(output_csv_file)
+
+        fieldnames = [
+            "Format", #Competitive or Friendly
+            # "Competition",
+            "Season",
+            # "Date Format",
+            "Date",
+            "Time",
+            "Venue",
+            "Team",
+            "Results",
+            "Outcome",
+            # "Players",
+            # "Performance",
+        ]
+
+        # Write the header
+        writer.writerow(fieldnames)
+
+        # Write the data
+        for row in data:
+            if only_with_outcome and not row['has_result']:
+                continue
+            writer.writerow(
+                [
+                    row["datetime"].strftime("%Y/%m/%d"),
+                    row["datetime"].strftime("%H:%M"),
+                    row.get("field", ""),
+                    row["home"],
+                    "|".join([str(row.get(k,"")) for k in [ 
+                        "home_runs_for_inning_1",
+                        "home_runs_for_inning_2",
+                        "home_runs_for_inning_3",
+                        "home_runs_for_inning_4",
+                        "home_runs_for_inning_5",
+                        "home_runs_for_inning_6",
+                        "home_runs_for_inning_7",
+                        "home_runs_for_inning_8",
+                        "home_runs_for_inning_9",
+                        "home_runs_for_inning_10",
+                        "home_runs_for",
+                        "home_errors",
+                        "home_hits"
+                    ]]),
+                    row.get("home_outcome")
+                    ]
+                )
+            writer.writerow(
+                [
+                    "",
+                    "",
+                    "",
+                    row["visitor"],
+                    "|".join([str(row.get(k,"")) for k in [ 
+                        "visitor_runs_for_inning_1",
+                        "visitor_runs_for_inning_2",
+                        "visitor_runs_for_inning_3",
+                        "visitor_runs_for_inning_4",
+                        "visitor_runs_for_inning_5",
+                        "visitor_runs_for_inning_6",
+                        "visitor_runs_for_inning_7",
+                        "visitor_runs_for_inning_8",
+                        "visitor_runs_for_inning_9",
+                        "visitor_runs_for_inning_10",
+                        "visitor_runs_for",
+                        "visitor_errors",
+                        "visitor_hits"
+                    ]]),
+                    row.get("visitor_outcome")
+                    ]
+                )
+