ncaa_stats_py.basketball

   1# Author: Joseph Armstrong (armstrongjoseph08@gmail.com)
   2# File Name: `basketball.py`
   3# Purpose: Houses functions that allows one to access NCAA basketball data
   4# Creation Date: 2024-09-20 08:15 PM EDT
   5# Update History:
   6# - 2024-09-20 08:15 PM EDT
   7# - 2024-11-01 12:10 AM EDT
   8# - 2024-11-25 07:45 PM EDT
   9# - 2025-01-04 03:00 PM EDT
  10# - 2025-01-18 02:40 PM EDT
  11# - 2025-02-01 02:40 PM EDT
  12# - 2025-02-05 08:50 PM EDT
  13
  14
  15import logging
  16import re
  17from datetime import date, datetime
  18from os import mkdir
  19from os.path import exists, expanduser, getmtime
  20
  21import numpy as np
  22import pandas as pd
  23from bs4 import BeautifulSoup
  24from dateutil import parser
  25from pytz import timezone
  26from tqdm import tqdm
  27
  28from ncaa_stats_py.utls import (
  29    _format_folder_str,
  30    _get_minute_formatted_time_from_seconds,
  31    _get_schools,
  32    _get_webpage,
  33)
  34
  35
  36def get_basketball_teams(
  37    season: int,
  38    level: str | int,
  39    get_wbb_data: bool = False
  40) -> pd.DataFrame:
  41    """
  42    Retrieves a list of basketball teams from the NCAA.
  43
  44    Parameters
  45    ----------
  46    `season` (int, mandatory):
  47        Required argument.
  48        Specifies the season you want NCAA basketball team information from.
  49
  50    `level` (int, mandatory):
  51        Required argument.
  52        Specifies the level/division you want
  53        NCAA basketball team information from.
  54        This can either be an integer (1-3) or a string ("I"-"III").
  55
  56    `get_wbb_data` (bool, optional):
  57        Optional argument.
  58        If you want women's basketball data instead of men's basketball data,
  59        set this to `True`.
  60
  61    Usage
  62    ----------
  63    ```python
  64
  65    from ncaa_stats_py.basketball import get_basketball_teams
  66
  67    ########################################
  68    #          Men's Basketball            #
  69    ########################################
  70
  71    # Get all D1 men's basketball teams for the 2024 season.
  72    print("Get all D1 men's basketball teams for the 2024 season.")
  73    df = get_basketball_teams(2024, 1)
  74    print(df)
  75
  76    # Get all D2 men's basketball teams for the 2023 season.
  77    print("Get all D2 men's basketball teams for the 2023 season.")
  78    df = get_basketball_teams(2023, 2)
  79    print(df)
  80
  81    # Get all D3 men's basketball teams for the 2022 season.
  82    print("Get all D3 men's basketball teams for the 2022 season.")
  83    df = get_basketball_teams(2022, 3)
  84    print(df)
  85
  86    # Get all D1 men's basketball teams for the 2021 season.
  87    print("Get all D1 men's basketball teams for the 2021 season.")
  88    df = get_basketball_teams(2021, "I")
  89    print(df)
  90
  91    # Get all D2 men's basketball teams for the 2020 season.
  92    print("Get all D2 men's basketball teams for the 2020 season.")
  93    df = get_basketball_teams(2020, "II")
  94    print(df)
  95
  96    # Get all D3 men's basketball teams for the 2019 season.
  97    print("Get all D3 men's basketball teams for the 2019 season.")
  98    df = get_basketball_teams(2019, "III")
  99    print(df)
 100
 101    ########################################
 102    #          Women's Basketball          #
 103    ########################################
 104
 105    # Get all D1 women's basketball teams for the 2024 season.
 106    print(
 107        "Get all D1 women's basketball teams for the 2024 season."
 108    )
 109    df = get_basketball_teams(2024, 1)
 110    print(df)
 111
 112    # Get all D2 women's basketball teams for the 2023 season.
 113    print(
 114        "Get all D2 women's basketball teams for the 2023 season."
 115    )
 116    df = get_basketball_teams(2023, 2)
 117    print(df)
 118
 119    # Get all D3 women's basketball teams for the 2022 season.
 120    print(
 121        "Get all D3 women's basketball teams for the 2022 season."
 122    )
 123    df = get_basketball_teams(2022, 3)
 124    print(df)
 125
 126    # Get all D1 women's basketball teams for the 2021 season.
 127    print(
 128        "Get all D1 women's basketball teams for the 2021 season."
 129    )
 130    df = get_basketball_teams(2021, "I")
 131    print(df)
 132
 133    # Get all D2 women's basketball teams for the 2020 season.
 134    print(
 135        "Get all D2 women's basketball teams for the 2020 season."
 136    )
 137    df = get_basketball_teams(2020, "II")
 138    print(df)
 139
 140    # Get all D3 women's basketball teams for the 2019 season.
 141    print(
 142        "Get all D3 women's basketball teams for the 2019 season."
 143    )
 144    df = get_basketball_teams(2019, "III")
 145    print(df)
 146
 147    ```
 148
 149    Returns
 150    ----------
 151    A pandas `DataFrame` object with a list of college basketball teams
 152    in that season and NCAA level.
 153    """
 154    # def is_comment(elem):
 155    #     return isinstance(elem, Comment)
 156    sport_id = ""
 157    # stat_sequence = 0
 158    load_from_cache = True
 159    home_dir = expanduser("~")
 160    home_dir = _format_folder_str(home_dir)
 161    teams_df = pd.DataFrame()
 162    teams_df_arr = []
 163    temp_df = pd.DataFrame()
 164    formatted_level = ""
 165    ncaa_level = 0
 166
 167    if get_wbb_data is True:
 168        sport_id = "WBB"
 169        stat_sequence = 169
 170    else:
 171        sport_id = "MBB"
 172        stat_sequence = 168
 173
 174    if isinstance(level, int) and level == 1:
 175        formatted_level = "I"
 176        ncaa_level = 1
 177    elif isinstance(level, int) and level == 2:
 178        formatted_level = "II"
 179        ncaa_level = 2
 180    elif isinstance(level, int) and level == 3:
 181        formatted_level = "III"
 182        ncaa_level = 3
 183    elif isinstance(level, str) and (
 184        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
 185    ):
 186        ncaa_level = 1
 187        formatted_level = level.upper()
 188    elif isinstance(level, str) and (
 189        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
 190    ):
 191        ncaa_level = 2
 192        formatted_level = level.upper()
 193    elif isinstance(level, str) and (
 194        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
 195    ):
 196        ncaa_level = 3
 197        formatted_level = level.upper()
 198
 199    if exists(f"{home_dir}/.ncaa_stats_py/"):
 200        pass
 201    else:
 202        mkdir(f"{home_dir}/.ncaa_stats_py/")
 203
 204    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
 205        pass
 206    else:
 207        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
 208
 209    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"):
 210        pass
 211    else:
 212        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}//teams/")
 213
 214    if exists(
 215        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"
 216        + f"{season}_{formatted_level}_teams.csv"
 217    ):
 218        teams_df = pd.read_csv(
 219            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"
 220            + f"{season}_{formatted_level}_teams.csv"
 221        )
 222        file_mod_datetime = datetime.fromtimestamp(
 223            getmtime(
 224                f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"
 225                + f"{season}_{formatted_level}_teams.csv"
 226            )
 227        )
 228    else:
 229        file_mod_datetime = datetime.today()
 230        load_from_cache = False
 231
 232    now = datetime.today()
 233
 234    age = now - file_mod_datetime
 235
 236    if (
 237        age.days > 1 and
 238        season >= (now.year - 1) and
 239        now.month <= 7
 240    ):
 241        load_from_cache = False
 242    elif age.days >= 35:
 243        load_from_cache = False
 244
 245    if load_from_cache is True:
 246        return teams_df
 247
 248    logging.warning(
 249        f"Either we could not load {season} D{level} schools from cache, "
 250        + "or it's time to refresh the cached data."
 251    )
 252    schools_df = _get_schools()
 253    url = (
 254        "https://stats.ncaa.org/rankings/change_sport_year_div?"
 255        + f"academic_year={season}.0&division={ncaa_level}.0" +
 256        f"&sport_code={sport_id}"
 257    )
 258
 259    response = _get_webpage(url=url)
 260
 261    soup = BeautifulSoup(response.text, features="lxml")
 262    ranking_periods = soup.find("select", {"name": "rp", "id": "rp"})
 263    ranking_periods = ranking_periods.find_all("option")
 264
 265    rp_value = 0
 266    found_value = False
 267
 268    while found_value is False:
 269        # print("check")
 270        for rp in ranking_periods:
 271            if "final " in rp.text.lower():
 272                rp_value = rp.get("value")
 273                found_value = True
 274                break
 275            else:
 276                rp_value = rp.get("value")
 277                found_value = True
 278                break
 279
 280    url = (
 281        "https://stats.ncaa.org/rankings/institution_trends?"
 282        + f"academic_year={season}.0&division={ncaa_level}.0&"
 283        + f"ranking_period={rp_value}&sport_code={sport_id}"
 284        + f"&sport_code={sport_id}"
 285    )
 286
 287    best_method = True
 288    if (
 289        (season < 2015 and sport_id == "MBB")
 290    ):
 291        url = (
 292            "https://stats.ncaa.org/rankings/national_ranking?"
 293            + f"academic_year={season}.0&division={ncaa_level}.0&"
 294            + f"ranking_period={rp_value}&sport_code={sport_id}"
 295            + f"&stat_seq={stat_sequence}"
 296        )
 297        response = _get_webpage(url=url)
 298        best_method = False
 299    elif season < 2013:
 300        url = (
 301            "https://stats.ncaa.org/rankings/national_ranking?"
 302            + f"academic_year={season}.0&division={ncaa_level}.0&"
 303            + f"ranking_period={rp_value}&sport_code={sport_id}"
 304            + f"&stat_seq={stat_sequence}"
 305        )
 306        response = _get_webpage(url=url)
 307        best_method = False
 308    else:
 309        try:
 310            response = _get_webpage(url=url)
 311        except Exception as e:
 312            logging.info(f"Found exception when loading teams `{e}`")
 313            logging.info("Attempting backup method.")
 314            url = (
 315                "https://stats.ncaa.org/rankings/national_ranking?"
 316                + f"academic_year={season}.0&division={ncaa_level}.0&"
 317                + f"ranking_period={rp_value}&sport_code={sport_id}"
 318                + f"&stat_seq={stat_sequence}"
 319            )
 320            response = _get_webpage(url=url)
 321            best_method = False
 322
 323    soup = BeautifulSoup(response.text, features="lxml")
 324
 325    if best_method is True:
 326        soup = soup.find(
 327            "table",
 328            {"id": "stat_grid"},
 329        )
 330        soup = soup.find("tbody")
 331        t_rows = soup.find_all("tr")
 332
 333        for t in t_rows:
 334            team_id = t.find("a")
 335            team_id = team_id.get("href")
 336            team_id = team_id.replace("/teams/", "")
 337            team_id = int(team_id)
 338            team_name = t.find_all("td")[0].text
 339            team_conference_name = t.find_all("td")[1].text
 340            # del team
 341            temp_df = pd.DataFrame(
 342                {
 343                    "season": season,
 344                    "ncaa_division": ncaa_level,
 345                    "ncaa_division_formatted": formatted_level,
 346                    "team_conference_name": team_conference_name,
 347                    "team_id": team_id,
 348                    "school_name": team_name,
 349                    "sport_id": sport_id,
 350                },
 351                index=[0],
 352            )
 353            teams_df_arr.append(temp_df)
 354            del temp_df
 355    else:
 356        soup = soup.find(
 357            "table",
 358            {"id": "rankings_table"},
 359        )
 360        soup = soup.find("tbody")
 361        t_rows = soup.find_all("tr")
 362
 363        for t in t_rows:
 364            team_id = t.find("a")
 365            team_id = team_id.get("href")
 366            team_id = team_id.replace("/teams/", "")
 367            team_id = int(team_id)
 368            team = t.find_all("td")[1].get("data-order")
 369            team_name, team_conference_name = team.split(",")
 370            del team
 371            temp_df = pd.DataFrame(
 372                {
 373                    "season": season,
 374                    "ncaa_division": ncaa_level,
 375                    "ncaa_division_formatted": formatted_level,
 376                    "team_conference_name": team_conference_name,
 377                    "team_id": team_id,
 378                    "school_name": team_name,
 379                    "sport_id": sport_id,
 380                },
 381                index=[0],
 382            )
 383            teams_df_arr.append(temp_df)
 384            del temp_df
 385
 386    teams_df = pd.concat(teams_df_arr, ignore_index=True)
 387    teams_df = pd.merge(
 388        left=teams_df,
 389        right=schools_df,
 390        on=["school_name"],
 391        how="left"
 392    )
 393    teams_df.sort_values(by=["team_id"], inplace=True)
 394
 395    teams_df.to_csv(
 396        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"
 397        + f"{season}_{formatted_level}_teams.csv",
 398        index=False,
 399    )
 400
 401    return teams_df
 402
 403
 404def load_basketball_teams(
 405    start_year: int = 2011,
 406    get_wbb_data: bool = False
 407) -> pd.DataFrame:
 408    """
 409    Compiles a list of known NCAA basketball teams in NCAA basketball history.
 410
 411    Parameters
 412    ----------
 413    `start_year` (int, optional):
 414        Optional argument.
 415        Specifies the first season you want
 416        NCAA basketball team information from.
 417
 418    `get_wbb_data` (bool, optional):
 419        Optional argument.
 420        If you want women's basketball data instead of men's basketball data,
 421        set this to `True`.
 422
 423    Usage
 424    ----------
 425    ```python
 426
 427    from ncaa_stats_py.basketball import load_basketball_teams
 428
 429    # WARNING: Running this script "as-is" for the first time may
 430    #          take some time.
 431    #          The *N*th time you run this script will be faster.
 432
 433    # Load in every women's basketball team
 434    # from 2011 to present day.
 435    print(
 436        "Load in every women's basketball team " +
 437        "from 2011 to present day."
 438    )
 439    df = load_basketball_teams(get_wbb_data=True)
 440    print(df)
 441
 442    # Load in every men's basketball team
 443    # from 2011 to present day.
 444    print(
 445        "Load in every men's basketball team " +
 446        "from 2011 to present day."
 447    )
 448    df = load_basketball_teams()
 449    print(df)
 450
 451    # Load in every men's basketball team
 452    # from 2020 to present day.
 453    print(
 454        "Load in every men's basketball team " +
 455        "from 2020 to present day."
 456    )
 457    df = load_basketball_teams(start_year=2020)
 458    print(df)
 459
 460    ```
 461
 462    Returns
 463    ----------
 464    A pandas `DataFrame` object with a list of
 465    all known college basketball teams.
 466
 467    """
 468    # start_year = 2008
 469
 470    # if get_wbb_data is True:
 471    #     sport_id = "WBB"
 472    # else:
 473    #     sport_id = "MBB"
 474
 475    teams_df = pd.DataFrame()
 476    teams_df_arr = []
 477    temp_df = pd.DataFrame()
 478
 479    now = datetime.now()
 480    ncaa_divisions = ["I", "II", "III"]
 481    if now.month > 5:
 482        ncaa_seasons = [x for x in range(start_year, (now.year + 2))]
 483    else:
 484        ncaa_seasons = [x for x in range(start_year, (now.year + 1))]
 485
 486    logging.info(
 487        "Loading in all NCAA basketball teams. "
 488        + "If this is the first time you're seeing this message, "
 489        + "it may take some time (3-10 minutes) for this to load."
 490    )
 491    for s in ncaa_seasons:
 492        logging.info(f"Loading in basketball teams for the {s} season.")
 493        for d in ncaa_divisions:
 494            try:
 495                temp_df = get_basketball_teams(season=s, level=d)
 496                teams_df_arr.append(temp_df)
 497                del temp_df
 498            except Exception as e:
 499                logging.warning(
 500                    "Unhandled exception when trying to " +
 501                    f"get the teams. Full exception: `{e}`"
 502                )
 503
 504
 505    teams_df = pd.concat(teams_df_arr, ignore_index=True)
 506    teams_df = teams_df.infer_objects()
 507    return teams_df
 508
 509
 510def get_basketball_team_schedule(team_id: int) -> pd.DataFrame:
 511    """
 512    Retrieves a team schedule, from a valid NCAA basketball team ID.
 513
 514    Parameters
 515    ----------
 516    `team_id` (int, mandatory):
 517        Required argument.
 518        Specifies the team you want a schedule from.
 519        This is separate from a school ID, which identifies the institution.
 520        A team ID should be unique to a school, and a season.
 521
 522    Usage
 523    ----------
 524    ```python
 525
 526    from ncaa_stats_py.basketball import get_basketball_team_schedule
 527
 528    ########################################
 529    #          Men's Basketball            #
 530    ########################################
 531
 532    # Get the team schedule for the
 533    # 2024 Wright St. MBB team (D1, ID: 561255).
 534    print(
 535        "Get the team schedule for the " +
 536        "2024 Wright St. MBB team (D1, ID: 561255)."
 537    )
 538    df = get_basketball_team_schedule(561255)
 539    print(df)
 540
 541    # Get the team schedule for the
 542    # 2023 Caldwell MBB team (D2, ID: 542813).
 543    print(
 544        "Get the team schedule for the " +
 545        "2023 Caldwell MBB team (D2, ID: 542813)."
 546    )
 547    df = get_basketball_team_schedule(542813)
 548    print(df)
 549
 550    # Get the team schedule for the
 551    # 2022 SUNY Maritime MBB team (D3, ID: 528097).
 552    print(
 553        "Get the team schedule for the " +
 554        "2022 SUNY Maritime MBB team (D3, ID: 528097)."
 555    )
 556    df = get_basketball_team_schedule(528097)
 557    print(df)
 558
 559    ########################################
 560    #          Women's Basketball          #
 561    ########################################
 562
 563    # Get the team schedule for the
 564    # 2021 Wake Forest WBB team (D1, ID: 506339).
 565    print(
 566        "Get the team schedule for the " +
 567        "2021 Wake Forest WBB team (D1, ID: 506339)."
 568    )
 569    df = get_basketball_team_schedule(506339)
 570    print(df)
 571
 572    # Get the team schedule for the
 573    # 2020 Trevecca Nazarene WBB team (D2, ID: 484527).
 574    print(
 575        "Get the team schedule for the " +
 576        "2020 Trevecca Nazarene WBB team (D2, ID: 484527)."
 577    )
 578    df = get_basketball_team_schedule(484527)
 579    print(df)
 580
 581    # Get the team schedule for the
 582    # 2019 Simpson WBB team (D3, ID: 452452).
 583    print(
 584        "Get the team schedule for the " +
 585        "2019 Simpson WBB team (D3, ID: 452452)."
 586    )
 587    df = get_basketball_team_schedule(452452)
 588    print(df)
 589
 590    ```
 591
 592    Returns
 593    ----------
 594    A pandas `DataFrame` object with an NCAA basketball team's schedule.
 595
 596    """
 597
 598    sport_id = ""
 599    schools_df = _get_schools()
 600    games_df = pd.DataFrame()
 601    games_df_arr = []
 602    season = 0
 603    temp_df = pd.DataFrame()
 604    load_from_cache = True
 605
 606    home_dir = expanduser("~")
 607    home_dir = _format_folder_str(home_dir)
 608
 609    url = f"https://stats.ncaa.org/teams/{team_id}"
 610
 611    try:
 612        team_df = load_basketball_teams()
 613        team_df = team_df[team_df["team_id"] == team_id]
 614        season = team_df["season"].iloc[0]
 615        ncaa_division = team_df["ncaa_division"].iloc[0]
 616        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
 617        sport_id = "MBB"
 618    except Exception:
 619        team_df = load_basketball_teams(get_wbb_data=True)
 620        team_df = team_df[team_df["team_id"] == team_id]
 621        season = team_df["season"].iloc[0]
 622        ncaa_division = team_df["ncaa_division"].iloc[0]
 623        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
 624        sport_id = "WBB"
 625    # team_conference_name = team_df["team_conference_name"].iloc[0]
 626    # school_name = team_df["school_name"].iloc[0]
 627    # school_id = int(team_df["school_id"].iloc[0])
 628
 629    del team_df
 630
 631    if exists(f"{home_dir}/.ncaa_stats_py/"):
 632        pass
 633    else:
 634        mkdir(f"{home_dir}/.ncaa_stats_py/")
 635
 636    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
 637        pass
 638    else:
 639        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
 640
 641    if exists(
 642        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/team_schedule/"
 643    ):
 644        pass
 645    else:
 646        mkdir(
 647            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/team_schedule/"
 648        )
 649
 650    if exists(
 651        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/team_schedule/"
 652        + f"{team_id}_team_schedule.csv"
 653    ):
 654        games_df = pd.read_csv(
 655            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/team_schedule/"
 656            + f"{team_id}_team_schedule.csv"
 657        )
 658        file_mod_datetime = datetime.fromtimestamp(
 659            getmtime(
 660                f"{home_dir}/.ncaa_stats_py/"
 661                + f"basketball_{sport_id}/team_schedule/"
 662                + f"{team_id}_team_schedule.csv"
 663            )
 664        )
 665    else:
 666        file_mod_datetime = datetime.today()
 667        load_from_cache = False
 668
 669    now = datetime.today()
 670
 671    age = now - file_mod_datetime
 672    if (
 673        age.days > 1 and
 674        season >= now.year
 675    ):
 676        load_from_cache = False
 677
 678    if load_from_cache is True:
 679        return games_df
 680
 681    response = _get_webpage(url=url)
 682    soup = BeautifulSoup(response.text, features="lxml")
 683
 684    school_name = soup.find("div", {"class": "card"}).find("img").get("alt")
 685    season_name = (
 686        soup.find("select", {"id": "year_list"})
 687        .find("option", {"selected": "selected"})
 688        .text
 689    )
 690    # For NCAA basketball, the season always starts in the fall semester,
 691    # and ends in the spring semester.
 692    # Thus, if `season_name` = "2011-12", this is the "2012" basketball season,
 693    # because 2012 would encompass the fall and spring semesters
 694    # for NCAA member institutions.
 695    # season = f"{season_name[0:2]}{season_name[-2:]}"
 696    # season = int(season)
 697    soup = soup.find_all(
 698        "div",
 699        {"class": "col p-0"},
 700    )
 701
 702    # declaring it here to prevent potential problems down the road.
 703    table_data = ""
 704    for s in soup:
 705        try:
 706            temp_name = s.find("div", {"class": "card-header"})
 707            temp_name = temp_name.text
 708        except Exception as e:
 709            logging.warning(
 710                f"Could not parse card header. Full exception `{e}`. "
 711                + "Attempting alternate method."
 712            )
 713            temp_name = s.find("tr", {"class": "heading"}).find("td").text
 714
 715        if "schedule" in temp_name.lower():
 716            table_data = s.find("table")
 717
 718    t_rows = table_data.find_all("tr", {"class": "underline_rows"})
 719
 720    if len(t_rows) == 0:
 721        t_rows = table_data.find_all("tr")
 722
 723    for g in t_rows:
 724        is_valid_row = True
 725        game_num = 1
 726        ot_periods = 0
 727        is_home_game = True
 728        is_neutral_game = False
 729
 730        cells = g.find_all("td")
 731        if len(cells) <= 1:
 732            # Because of how *well* designed
 733            # stats.ncaa.org is, if we have to use execute
 734            # the `if len(t_rows) == 0:` code,
 735            # we need to catch any cases where every element in a
 736            # table row (`<tr>`) is a table header (`<th>`),
 737            # instead of a table data cell (`<td>`)
 738            continue
 739
 740        game_date = cells[0].text
 741
 742        # If "(" is in the same cell as the date,
 743        # this means that this game is an extra innings game.
 744        # The number encased in `()` is the actual number of innings.
 745        # We need to remove that from the date,
 746        # and move it into a separate variable.
 747        if "(" in game_date:
 748            game_date = game_date.replace(")", "")
 749            game_date, game_num = game_date.split("(")
 750            game_date = game_date.strip()
 751            game_num = int(game_num.strip())
 752
 753        game_date = datetime.strptime(game_date, "%m/%d/%Y").date()
 754
 755        try:
 756            opp_team_id = cells[1].find("a").get("href")
 757        except IndexError:
 758            logging.info(
 759                "Skipping row because it is clearly "
 760                + "not a row that has schedule data."
 761            )
 762            is_valid_row = False
 763        except AttributeError as e:
 764            logging.info(
 765                "Could not extract a team ID for this game. " +
 766                f"Full exception {e}"
 767            )
 768            opp_team_id = "-1"
 769        except Exception as e:
 770            logging.warning(
 771                "An unhandled exception has occurred when "
 772                + "trying to get the opposition team ID for this game. "
 773                f"Full exception `{e}`."
 774            )
 775            raise e
 776        if is_valid_row is True:
 777            if opp_team_id is not None:
 778                opp_team_id = opp_team_id.replace("/teams/", "")
 779                opp_team_id = int(opp_team_id)
 780
 781                try:
 782                    opp_team_name = cells[1].find("img").get("alt")
 783                except AttributeError:
 784                    logging.info(
 785                        "Couldn't find the opposition team name "
 786                        + "for this row from an image element. "
 787                        + "Attempting a backup method"
 788                    )
 789                    opp_team_name = cells[1].text
 790                except Exception as e:
 791                    logging.info(
 792                        "Unhandled exception when trying to get the "
 793                        + "opposition team name from this game. "
 794                        + f"Full exception `{e}`"
 795                    )
 796                    raise e
 797            else:
 798                opp_team_name = cells[1].text
 799
 800            if opp_team_name[0] == "@":
 801                # The logic for determining if this game was a
 802                # neutral site game doesn't care if that info is in
 803                # `opp_team_name`.
 804                opp_team_name = opp_team_name.strip().replace("@", "")
 805            elif "@" in opp_team_name:
 806                opp_team_name = opp_team_name.strip().split("@")[0]
 807            # opp_team_show_name = cells[1].text.strip()
 808
 809            opp_text = cells[1].text
 810            opp_text = opp_text.strip()
 811            if "@" in opp_text and opp_text[0] == "@":
 812                is_home_game = False
 813            elif "@" in opp_text and opp_text[0] != "@":
 814                is_neutral_game = True
 815                is_home_game = False
 816            # This is just to cover conference and NCAA championship
 817            # tournaments.
 818            elif "championship" in opp_text.lower():
 819                is_neutral_game = True
 820                is_home_game = False
 821            elif "ncaa" in opp_text.lower():
 822                is_neutral_game = True
 823                is_home_game = False
 824
 825            del opp_text
 826
 827            score = cells[2].text.strip()
 828            if len(score) == 0:
 829                score_1 = 0
 830                score_2 = 0
 831            elif (
 832                "canceled" not in score.lower() and
 833                "ppd" not in score.lower()
 834            ):
 835                score_1, score_2 = score.split("-")
 836
 837                # `score_1` should be "W `n`", "L `n`", or "T `n`",
 838                # with `n` representing the number of runs this team
 839                # scored in this game.
 840                # Let's remove the "W", "L", or "T" from `score_1`,
 841                # and determine which team won later on in this code.
 842                if any(x in score_1 for x in ["W", "L", "T"]):
 843                    score_1 = score_1.split(" ")[1]
 844
 845                if "(" in score_2:
 846                    score_2 = score_2.replace(")", "")
 847                    score_2, ot_periods = score_2.split("(")
 848                    ot_periods = ot_periods.replace("OT", "")
 849                    ot_periods = ot_periods.replace(" ", "")
 850                    ot_periods = int(ot_periods)
 851
 852                if ot_periods is None:
 853                    ot_periods = 0
 854                score_1 = int(score_1)
 855                score_2 = int(score_2)
 856            else:
 857                score_1 = None
 858                score_2 = None
 859
 860            try:
 861                game_id = cells[2].find("a").get("href")
 862                game_id = game_id.replace("/contests", "")
 863                game_id = game_id.replace("/box_score", "")
 864                game_id = game_id.replace("/", "")
 865                game_id = int(game_id)
 866                game_url = (
 867                    f"https://stats.ncaa.org/contests/{game_id}/box_score"
 868                )
 869
 870            except AttributeError as e:
 871                logging.info(
 872                    "Could not parse a game ID for this game. "
 873                    + f"Full exception `{e}`."
 874                )
 875                game_id = None
 876                game_url = None
 877            except Exception as e:
 878                logging.info(
 879                    "An unhandled exception occurred when trying "
 880                    + "to find a game ID for this game. "
 881                    + f"Full exception `{e}`."
 882                )
 883                raise e
 884            try:
 885                attendance = cells[3].text
 886                attendance = attendance.replace(",", "")
 887                attendance = attendance.replace("\n", "")
 888                attendance = int(attendance)
 889            except IndexError as e:
 890                logging.info(
 891                    "It doesn't appear as if there is an attendance column "
 892                    + "for this team's schedule table."
 893                    f"Full exception `{e}`."
 894                )
 895                attendance = None
 896            except ValueError as e:
 897                logging.info(
 898                    "There doesn't appear as if "
 899                    + "there is a recorded attendance. "
 900                    + "for this game/row. "
 901                    f"Full exception `{e}`."
 902                )
 903                attendance = None
 904
 905            except Exception as e:
 906                logging.info(
 907                    "An unhandled exception occurred when trying "
 908                    + "to find this game's attendance. "
 909                    + f"Full exception `{e}`."
 910                )
 911                raise e
 912
 913            if is_home_game is True:
 914                temp_df = pd.DataFrame(
 915                    {
 916                        "season": season,
 917                        "season_name": season_name,
 918                        "game_id": game_id,
 919                        "game_date": game_date,
 920                        "game_num": game_num,
 921                        "ot_periods": ot_periods,
 922                        "home_team_id": team_id,
 923                        "home_team_name": school_name,
 924                        "away_team_id": opp_team_id,
 925                        "away_team_name": opp_team_name,
 926                        "home_team_score": score_1,
 927                        "away_team_score": score_2,
 928                        "is_neutral_game": is_neutral_game,
 929                        "game_url": game_url,
 930                    },
 931                    index=[0],
 932                )
 933                games_df_arr.append(temp_df)
 934                del temp_df
 935            elif is_neutral_game is True:
 936                # For the sake of simplicity,
 937                # order both team ID's,
 938                # and set the lower number of the two as
 939                # the "away" team in this neutral site game,
 940                # just so there's no confusion if someone
 941                # combines a ton of these team schedule `DataFrame`s,
 942                # and wants to remove duplicates afterwards.
 943                t_ids = [opp_team_id, team_id]
 944                t_ids.sort()
 945
 946                if t_ids[0] == team_id:
 947                    # home
 948                    temp_df = pd.DataFrame(
 949                        {
 950                            "season": season,
 951                            "season_name": season_name,
 952                            "game_id": game_id,
 953                            "game_date": game_date,
 954                            "game_num": game_num,
 955                            "ot_periods": ot_periods,
 956                            "home_team_id": team_id,
 957                            "home_team_name": school_name,
 958                            "away_team_id": opp_team_id,
 959                            "away_team_name": opp_team_name,
 960                            "home_team_score": score_1,
 961                            "away_team_score": score_2,
 962                            "is_neutral_game": is_neutral_game,
 963                            "game_url": game_url,
 964                        },
 965                        index=[0],
 966                    )
 967
 968                else:
 969                    # away
 970                    temp_df = pd.DataFrame(
 971                        {
 972                            "season": season,
 973                            "season_name": season_name,
 974                            "game_id": game_id,
 975                            "game_date": game_date,
 976                            "game_num": game_num,
 977                            "ot_periods": ot_periods,
 978                            "home_team_id": opp_team_id,
 979                            "home_team_name": opp_team_name,
 980                            "away_team_id": team_id,
 981                            "away_team_name": school_name,
 982                            "home_team_score": score_2,
 983                            "away_team_score": score_1,
 984                            "is_neutral_game": is_neutral_game,
 985                            "game_url": game_url,
 986                        },
 987                        index=[0],
 988                    )
 989
 990                games_df_arr.append(temp_df)
 991                del temp_df
 992            else:
 993                temp_df = pd.DataFrame(
 994                    {
 995                        "season": season,
 996                        "season_name": season_name,
 997                        "game_id": game_id,
 998                        "game_date": game_date,
 999                        "game_num": game_num,
1000                        "ot_periods": ot_periods,
1001                        "home_team_id": opp_team_id,
1002                        "home_team_name": opp_team_name,
1003                        "away_team_id": team_id,
1004                        "away_team_name": school_name,
1005                        "home_team_score": score_2,
1006                        "away_team_score": score_1,
1007                        "is_neutral_game": is_neutral_game,
1008                        "game_url": game_url,
1009                    },
1010                    index=[0],
1011                )
1012
1013                games_df_arr.append(temp_df)
1014                del temp_df
1015
1016        # team_photo = team_id.find("img").get("src")
1017
1018    games_df = pd.concat(games_df_arr, ignore_index=True)
1019
1020    temp_df = schools_df.rename(
1021        columns={
1022            "school_name": "home_team_name",
1023            "school_id": "home_school_id"
1024        }
1025    )
1026    games_df = games_df.merge(right=temp_df, on="home_team_name", how="left")
1027
1028    temp_df = schools_df.rename(
1029        columns={
1030            "school_name": "away_team_name",
1031            "school_id": "away_school_id"
1032        }
1033    )
1034    games_df = games_df.merge(right=temp_df, on="away_team_name", how="left")
1035    games_df["ncaa_division"] = ncaa_division
1036    games_df["ncaa_division_formatted"] = ncaa_division_formatted
1037
1038    # games_df["game_url"] = games_df["game_url"].str.replace("/box_score", "")
1039    games_df.to_csv(
1040        f"{home_dir}/.ncaa_stats_py/"
1041        + f"basketball_{sport_id}/team_schedule/"
1042        + f"{team_id}_team_schedule.csv",
1043        index=False,
1044    )
1045
1046    return games_df
1047
1048
1049def get_basketball_day_schedule(
1050    game_date: str | date | datetime,
1051    level: str | int = "I",
1052    get_wbb_data: bool = False
1053):
1054    """
1055    Given a date and NCAA level, this function retrieves basketball every game
1056    for that date.
1057
1058    Parameters
1059    ----------
1060    `game_date` (int, mandatory):
1061        Required argument.
1062        Specifies the date you want a basketball schedule from.
1063        For best results, pass a string formatted as "YYYY-MM-DD".
1064
1065    `level` (int, mandatory):
1066        Required argument.
1067        Specifies the level/division you want a
1068        NCAA basketball schedule from.
1069        This can either be an integer (1-3) or a string ("I"-"III").
1070
1071    `get_wbb_data` (bool, optional):
1072        Optional argument.
1073        If you want women's basketball data instead of men's basketball data,
1074        set this to `True`.
1075
1076    Usage
1077    ----------
1078    ```python
1079
1080    from ncaa_stats_py.basketball import get_basketball_day_schedule
1081
1082
1083    # Get all DI games that will be played on April 22th, 2025.
1084    print("Get all games that will be played on April 22th, 2025.")
1085    df = get_basketball_day_schedule("2025-04-22", level=1)
1086    print(df)
1087
1088    # Get all division II games that were played on February 14th, 2025.
1089    print("Get all division II games that were played on February 14th, 2025.")
1090    df = get_basketball_day_schedule("2025-02-14", level="I")
1091    print(df)
1092
1093    # Get all DI games that were played on December 10th, 2024.
1094    print("Get all games that were played on December 10th, 2024.")
1095    df = get_basketball_day_schedule("2024-12-10", level="I")
1096    print(df)
1097
1098    # Get all DI games (if any) that were played on December 12th, 2024.
1099    print("Get all DI games (if any) that were played on December 12th, 2024.")
1100    df = get_basketball_day_schedule("2024-12-12")
1101    print(df)
1102
1103    # Get all DII games played on January 14th, 2024.
1104    print("Get all DI games played on January 14th, 2024.")
1105    df = get_basketball_day_schedule("2024-01-14")
1106    print(df)
1107
1108    # Get all division III games played on December 16th, 2023.
1109    print("Get all division III games played on December 16th, 2023.")
1110    df = get_basketball_day_schedule("2023-12-16")
1111    print(df)
1112
1113    ```
1114
1115    Returns
1116    ----------
1117    A pandas `DataFrame` object with all basketball games played on that day,
1118    for that NCAA division/level.
1119
1120    """
1121
1122    season = 0
1123    sport_id = "MBB"
1124
1125    schedule_df = pd.DataFrame()
1126    schedule_df_arr = []
1127
1128    if isinstance(game_date, date):
1129        game_datetime = datetime.combine(
1130            game_date, datetime.min.time()
1131        )
1132    elif isinstance(game_date, datetime):
1133        game_datetime = game_date
1134    elif isinstance(game_date, str):
1135        game_datetime = parser.parse(
1136            game_date
1137        )
1138    else:
1139        unhandled_datatype = type(game_date)
1140        raise ValueError(
1141            f"Unhandled datatype for `game_date`: `{unhandled_datatype}`"
1142        )
1143
1144    if isinstance(level, int) and level == 1:
1145        formatted_level = "I"
1146        ncaa_level = 1
1147    elif isinstance(level, int) and level == 2:
1148        formatted_level = "II"
1149        ncaa_level = 2
1150    elif isinstance(level, int) and level == 3:
1151        formatted_level = "III"
1152        ncaa_level = 3
1153    elif isinstance(level, str) and (
1154        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
1155    ):
1156        ncaa_level = 1
1157        formatted_level = level.upper()
1158    elif isinstance(level, str) and (
1159        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
1160    ):
1161        ncaa_level = 2
1162        formatted_level = level.upper()
1163    elif isinstance(level, str) and (
1164        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
1165    ):
1166        ncaa_level = 3
1167        formatted_level = level.upper()
1168
1169    del level
1170
1171    if get_wbb_data is True:
1172        sport_id = "WBB"
1173    elif get_wbb_data is False:
1174        sport_id = "MBB"
1175    else:
1176        raise ValueError(
1177            f"Unhandled value for `get_wbb_data`: `{get_wbb_data}`"
1178        )
1179
1180    season = game_datetime.year
1181    game_month = game_datetime.month
1182    game_day = game_datetime.day
1183    game_year = game_datetime.year
1184
1185    if game_month > 7:
1186        season += 1
1187        url = (
1188            "https://stats.ncaa.org/contests/" +
1189            f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" +
1190            f"&academic_year={season}&division={ncaa_level}" +
1191            f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" +
1192            "&commit=Submit"
1193        )
1194    else:
1195        url = (
1196            "https://stats.ncaa.org/contests/" +
1197            f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" +
1198            f"&academic_year={season}&division={ncaa_level}" +
1199            f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" +
1200            "&commit=Submit"
1201        )
1202
1203    response = _get_webpage(url=url)
1204    soup = BeautifulSoup(response.text, features="lxml")
1205
1206    game_boxes = soup.find_all("div", {"class": "table-responsive"})
1207
1208    for box in game_boxes:
1209        game_id = None
1210        game_alt_text = None
1211        game_num = 1
1212        # t_box = box.find("table")
1213        table_box = box.find("table")
1214        table_rows = table_box.find_all("tr")
1215
1216        # Date/attendance
1217        game_date_str = table_rows[0].find("div", {"class": "col-6 p-0"}).text
1218        game_date_str = game_date_str.replace("\n", "")
1219        game_date_str = game_date_str.strip()
1220        game_date_str = game_date_str.replace("TBA ", "TBA")
1221        game_date_str = game_date_str.replace("TBD ", "TBD")
1222        game_date_str = game_date_str.replace("PM ", "PM")
1223        game_date_str = game_date_str.replace("AM ", "AM")
1224        game_date_str = game_date_str.strip()
1225        attendance_str = table_rows[0].find(
1226            "div",
1227            {"class": "col p-0 text-right"}
1228        ).text
1229
1230        attendance_str = attendance_str.replace("Attend:", "")
1231        attendance_str = attendance_str.replace(",", "")
1232        attendance_str = attendance_str.replace("\n", "")
1233        if (
1234            "st" in attendance_str.lower() or
1235            "nd" in attendance_str.lower() or
1236            "rd" in attendance_str.lower() or
1237            "th" in attendance_str.lower()
1238        ):
1239            # This is not an attendance,
1240            # this is whatever quarter/half/inning this game is in.
1241            attendance_num = None
1242        elif "final" in attendance_str.lower():
1243            attendance_num = None
1244        elif len(attendance_str) > 0:
1245            attendance_num = int(attendance_str)
1246        else:
1247            attendance_num = None
1248
1249        if "(" in game_date_str:
1250            game_date_str = game_date_str.replace(")", "")
1251            game_date_str, game_num = game_date_str.split("(")
1252            game_num = int(game_num)
1253
1254        if "TBA" in game_date_str:
1255            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
1256        elif "tba" in game_date_str:
1257            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
1258        elif "TBD" in game_date_str:
1259            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
1260        elif "tbd" in game_date_str:
1261            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
1262        elif (
1263            "tbd" not in game_date_str.lower() and
1264            ":" not in game_date_str.lower()
1265        ):
1266            game_date_str = game_date_str.replace(" ", "")
1267            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
1268        else:
1269            game_datetime = datetime.strptime(
1270                game_date_str,
1271                '%m/%d/%Y %I:%M %p'
1272            )
1273        game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
1274
1275        game_alt_text = table_rows[1].find_all("td")[0].text
1276        if game_alt_text is not None and len(game_alt_text) > 0:
1277            game_alt_text = game_alt_text.replace("\n", "")
1278            game_alt_text = game_alt_text.strip()
1279
1280        if len(game_alt_text) == 0:
1281            game_alt_text = None
1282
1283        urls_arr = box.find_all("a")
1284
1285        for u in urls_arr:
1286            url_temp = u.get("href")
1287            if "contests" in url_temp:
1288                game_id = url_temp
1289                del url_temp
1290
1291        if game_id is None:
1292            for r in range(0, len(table_rows)):
1293                temp = table_rows[r]
1294                temp_id = temp.get("id")
1295
1296                if temp_id is not None and len(temp_id) > 0:
1297                    game_id = temp_id
1298
1299        del urls_arr
1300
1301        game_id = game_id.replace("/contests", "")
1302        game_id = game_id.replace("/box_score", "")
1303        game_id = game_id.replace("/livestream_scoreboards", "")
1304        game_id = game_id.replace("/", "")
1305        game_id = game_id.replace("contest_", "")
1306        game_id = int(game_id)
1307
1308        table_rows = table_box.find_all("tr", {"id": f"contest_{game_id}"})
1309        away_team_row = table_rows[0]
1310        home_team_row = table_rows[1]
1311
1312        # Away team
1313        td_arr = away_team_row.find_all("td")
1314
1315        try:
1316            away_team_name = td_arr[0].find("img").get("alt")
1317        except Exception:
1318            away_team_name = td_arr[1].text
1319        away_team_name = away_team_name.replace("\n", "")
1320        away_team_name = away_team_name.strip()
1321
1322        try:
1323            away_team_id = td_arr[1].find("a").get("href")
1324            away_team_id = away_team_id.replace("/teams/", "")
1325            away_team_id = int(away_team_id)
1326        except AttributeError:
1327            away_team_id = None
1328            logging.info("No team ID found for the away team")
1329        except Exception as e:
1330            raise e
1331
1332        away_points_scored = td_arr[-1].text
1333        away_points_scored = away_points_scored.replace("\n", "")
1334        away_points_scored = away_points_scored.replace("\xa0", "")
1335        if len(away_points_scored) > 0:
1336            away_points_scored = int(away_points_scored)
1337        else:
1338            away_points_scored = 0
1339
1340        del td_arr
1341
1342        # Home team
1343        td_arr = home_team_row.find_all("td")
1344
1345        try:
1346            home_team_name = td_arr[0].find("img").get("alt")
1347        except Exception:
1348            home_team_name = td_arr[1].text
1349        home_team_name = home_team_name.replace("\n", "")
1350        home_team_name = home_team_name.strip()
1351
1352        try:
1353            home_team_id = td_arr[1].find("a").get("href")
1354            home_team_id = home_team_id.replace("/teams/", "")
1355            home_team_id = int(home_team_id)
1356        except AttributeError:
1357            home_team_id = None
1358            logging.info("No team ID found for the home team")
1359        except Exception as e:
1360            raise e
1361
1362        home_points_scored = td_arr[-1].text
1363        home_points_scored = home_points_scored.replace("\n", "")
1364        home_points_scored = home_points_scored.replace("\xa0", "")
1365        if len(home_points_scored) > 0:
1366            home_points_scored = int(home_points_scored)
1367        else:
1368            home_points_scored = 0
1369
1370        temp_df = pd.DataFrame(
1371            {
1372                "season": season,
1373                "sport_id": sport_id,
1374                "game_date": game_datetime.strftime("%Y-%m-%d"),
1375                "game_datetime": game_datetime.isoformat(),
1376                "game_id": game_id,
1377                "formatted_level": formatted_level,
1378                "ncaa_level": ncaa_level,
1379                "game_alt_text": game_alt_text,
1380                "away_team_id": away_team_id,
1381                "away_team_name": away_team_name,
1382                "home_team_id": home_team_id,
1383                "home_team_name": home_team_name,
1384                "home_points_scored": home_points_scored,
1385                "away_points_scored": away_points_scored,
1386                "attendance": attendance_num
1387            },
1388            index=[0]
1389        )
1390        schedule_df_arr.append(temp_df)
1391
1392        del temp_df
1393
1394    if len(schedule_df_arr) >= 1:
1395        schedule_df = pd.concat(schedule_df_arr, ignore_index=True)
1396    else:
1397        logging.warning(
1398            "Could not find any game(s) for "
1399            + f"{game_datetime.year:00d}-{game_datetime.month:00d}"
1400            + f"-{game_datetime.day:00d}. "
1401            + "If you believe this is an error, "
1402            + "please raise an issue at "
1403            + "\n https://github.com/armstjc/ncaa_stats_py/issues \n"
1404        )
1405    return schedule_df
1406
1407
1408def get_full_basketball_schedule(
1409    season: int,
1410    level: str | int = "I",
1411    get_wbb_data: bool = False
1412) -> pd.DataFrame:
1413    """
1414    Retrieves a full basketball schedule,
1415    from an NCAA level (`"I"`, `"II"`, `"III"`).
1416    The way this is done is by going through every team in a division,
1417    and parsing the schedules of every team in a division.
1418
1419    This function will take time when first run (30-60 minutes)!
1420    You have been warned.
1421
1422    Parameters
1423    ----------
1424    `season` (int, mandatory):
1425        Specifies the season you want a schedule from.
1426
1427    `level` (int | str, mandatory):
1428        Specifies the team you want a schedule from.
1429
1430    `get_wbb_data` (bool, optional):
1431        Optional argument.
1432        If you want women's basketball data instead of men's basketball data,
1433        set this to `True`.
1434
1435    Usage
1436    ----------
1437    ```python
1438
1439    from ncaa_stats_py.basketball import get_full_basketball_schedule
1440
1441    # Get the entire 2024 schedule for the 2024 D1 basketball season.
1442    print("Get the entire 2024 schedule for the 2024 D1 basketball season.")
1443    df = get_full_basketball_schedule(season=2024, level="I")
1444    print(df)
1445
1446    # You can also input `level` as an integer.
1447    # In addition, this and other functions cache data,
1448    # so this should load very quickly
1449    # compared to the first run of this function.
1450    print("You can also input `level` as an integer.")
1451    print(
1452        "In addition, this and other functions cache data, "
1453        + "so this should load very quickly "
1454        + "compared to the first run of this function."
1455    )
1456    df = get_full_basketball_schedule(season=2024, level=1)
1457    print(df)
1458
1459    ```
1460
1461    Returns
1462    ----------
1463    A pandas `DataFrame` object with an NCAA basketball
1464    schedule for a specific season and level.
1465    """
1466
1467    sport_id = ""
1468    load_from_cache = True
1469    home_dir = expanduser("~")
1470    home_dir = _format_folder_str(home_dir)
1471    schedule_df = pd.DataFrame()
1472    schedule_df_arr = []
1473    temp_df = pd.DataFrame()
1474    formatted_level = ""
1475    ncaa_level = 0
1476
1477    if get_wbb_data is True:
1478        sport_id = "WBB"
1479    else:
1480        sport_id = "MBB"
1481
1482    if isinstance(level, int) and level == 1:
1483        formatted_level = "I"
1484        ncaa_level = 1
1485    elif isinstance(level, int) and level == 2:
1486        formatted_level = "II"
1487        ncaa_level = 2
1488    elif isinstance(level, int) and level == 3:
1489        formatted_level = "III"
1490        ncaa_level = 3
1491    elif isinstance(level, str) and (
1492        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
1493    ):
1494        ncaa_level = 1
1495        formatted_level = level.upper()
1496    elif isinstance(level, str) and (
1497        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
1498    ):
1499        ncaa_level = 2
1500        formatted_level = level.upper()
1501    elif isinstance(level, str) and (
1502        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
1503    ):
1504        ncaa_level = 3
1505        formatted_level = level.upper()
1506
1507    del level
1508
1509    if exists(f"{home_dir}/.ncaa_stats_py/"):
1510        pass
1511    else:
1512        mkdir(f"{home_dir}/.ncaa_stats_py/")
1513
1514    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
1515        pass
1516    else:
1517        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
1518
1519    if exists(
1520        f"{home_dir}/.ncaa_stats_py/" +
1521        f"basketball_{sport_id}/full_schedule/"
1522    ):
1523        pass
1524    else:
1525        mkdir(
1526            f"{home_dir}/.ncaa_stats_py/" +
1527            f"basketball_{sport_id}/full_schedule/"
1528        )
1529
1530    if exists(
1531        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/full_schedule/"
1532        + f"{season}_{formatted_level}_full_schedule.csv"
1533    ):
1534        teams_df = pd.read_csv(
1535            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/full_schedule/"
1536            + f"{season}_{formatted_level}_full_schedule.csv"
1537        )
1538        file_mod_datetime = datetime.fromtimestamp(
1539            getmtime(
1540                f"{home_dir}/.ncaa_stats_py/" +
1541                f"basketball_{sport_id}/full_schedule/"
1542                + f"{season}_{formatted_level}_full_schedule.csv"
1543            )
1544        )
1545    else:
1546        file_mod_datetime = datetime.today()
1547        load_from_cache = False
1548
1549    now = datetime.today()
1550
1551    age = now - file_mod_datetime
1552
1553    if (
1554        age.days > 1 and
1555        season >= now.year
1556    ):
1557        load_from_cache = False
1558
1559    if load_from_cache is True:
1560        return teams_df
1561
1562    teams_df = load_basketball_teams()
1563    teams_df = teams_df[
1564        (teams_df["season"] == season) &
1565        (teams_df["ncaa_division"] == ncaa_level)
1566    ]
1567    team_ids_arr = teams_df["team_id"].to_numpy()
1568
1569    for team_id in tqdm(team_ids_arr):
1570        temp_df = get_basketball_team_schedule(team_id=team_id)
1571        schedule_df_arr.append(temp_df)
1572
1573    schedule_df = pd.concat(schedule_df_arr, ignore_index=True)
1574    schedule_df = schedule_df.drop_duplicates(subset="game_id", keep="first")
1575    schedule_df.to_csv(
1576        f"{home_dir}/.ncaa_stats_py/"
1577        + f"basketball_{sport_id}/full_schedule/"
1578        + f"{season}_{formatted_level}_full_schedule.csv",
1579        index=False,
1580    )
1581    return schedule_df
1582
1583
1584def get_basketball_team_roster(team_id: int) -> pd.DataFrame:
1585    """
1586    Retrieves a basketball team's roster from a given team ID.
1587
1588    Parameters
1589    ----------
1590    `team_id` (int, mandatory):
1591        Required argument.
1592        Specifies the team you want a roster from.
1593        This is separate from a school ID, which identifies the institution.
1594        A team ID should be unique to a school, and a season.
1595
1596    Usage
1597    ----------
1598    ```python
1599
1600    from ncaa_stats_py.basketball import get_basketball_team_roster
1601
1602    ########################################
1603    #          Men's Basketball            #
1604    ########################################
1605
1606    # Get the basketball roster for the
1607    # 2024 Alabama St. MBB team (D1, ID: 560655).
1608    print(
1609        "Get the basketball roster for the " +
1610        "2024 Alabama St. MBB team (D1, ID: 560655)."
1611    )
1612    df = get_basketball_team_roster(560655)
1613    print(df)
1614
1615    # Get the basketball roster for the
1616    # 2023 Roberts Wesleyan MBB team (D2, ID: 542994).
1617    print(
1618        "Get the basketball roster for the " +
1619        "2023 Roberts Wesleyan MBB team (D2, ID: 542994)."
1620    )
1621    df = get_basketball_team_roster(542994)
1622    print(df)
1623
1624    # Get the basketball roster for the
1625    # 2022 Pacific Lutheran MBB team (D3, ID: 528255).
1626    print(
1627        "Get the basketball roster for the " +
1628        "2022 Pacific Lutheran MBB team (D3, ID: 528255)."
1629    )
1630    df = get_basketball_team_roster(528255)
1631    print(df)
1632
1633    ########################################
1634    #          Women's Basketball          #
1635    ########################################
1636
1637    # Get the basketball roster for the
1638    # 2021 Michigan St. WBB team (D1, ID: 506069).
1639    print(
1640        "Get the basketball roster for the " +
1641        "2021 Michigan St. WBB team (D1, ID: 506069)."
1642    )
1643    df = get_basketball_team_roster(506069)
1644    print(df)
1645
1646    # Get the basketball roster for the
1647    # 2020 Shippensburg WBB team (D2, ID: 484864).
1648    print(
1649        "Get the basketball roster for the " +
1650        "2020 Shippensburg WBB team (D2, ID: 484864)."
1651    )
1652    df = get_basketball_team_roster(484864)
1653    print(df)
1654
1655    # Get the basketball roster for the
1656    # 2019 Maranatha Baptist team (D3, ID: 452546).
1657    print(
1658        "Get the basketball roster for the " +
1659        "2019 Maranatha Baptist team (D3, ID: 452546)."
1660    )
1661    df = get_basketball_team_roster(452546)
1662    print(df)
1663
1664    ```
1665
1666    Returns
1667    ----------
1668    A pandas `DataFrame` object with
1669    an NCAA basketball team's roster for that season.
1670    """
1671    sport_id = ""
1672    roster_df = pd.DataFrame()
1673    roster_df_arr = []
1674    temp_df = pd.DataFrame()
1675    url = f"https://stats.ncaa.org/teams/{team_id}/roster"
1676    load_from_cache = True
1677    home_dir = expanduser("~")
1678    home_dir = _format_folder_str(home_dir)
1679
1680    stat_columns = [
1681        "season",
1682        "season_name",
1683        "sport_id",
1684        "ncaa_division",
1685        "ncaa_division_formatted",
1686        "team_conference_name",
1687        "school_id",
1688        "school_name",
1689        "player_id",
1690        "player_jersey_num",
1691        "player_full_name",
1692        "player_first_name",
1693        "player_last_name",
1694        "player_class",
1695        "player_positions",
1696        "player_height_string",
1697        "player_weight",
1698        "player_hometown",
1699        "player_high_school",
1700        "player_G",
1701        "player_GS",
1702        "player_url",
1703    ]
1704
1705    try:
1706        team_df = load_basketball_teams()
1707        team_df = team_df[team_df["team_id"] == team_id]
1708
1709        season = team_df["season"].iloc[0]
1710        ncaa_division = team_df["ncaa_division"].iloc[0]
1711        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
1712        team_conference_name = team_df["team_conference_name"].iloc[0]
1713        school_name = team_df["school_name"].iloc[0]
1714        school_id = int(team_df["school_id"].iloc[0])
1715        sport_id = "MBB"
1716    except Exception:
1717        team_df = load_basketball_teams(get_wbb_data=True)
1718        team_df = team_df[team_df["team_id"] == team_id]
1719
1720        season = team_df["season"].iloc[0]
1721        ncaa_division = team_df["ncaa_division"].iloc[0]
1722        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
1723        team_conference_name = team_df["team_conference_name"].iloc[0]
1724        school_name = team_df["school_name"].iloc[0]
1725        school_id = int(team_df["school_id"].iloc[0])
1726        school_id = int(team_df["school_id"].iloc[0])
1727        sport_id = "WBB"
1728
1729    del team_df
1730
1731    if exists(f"{home_dir}/.ncaa_stats_py/"):
1732        pass
1733    else:
1734        mkdir(f"{home_dir}/.ncaa_stats_py/")
1735
1736    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
1737        pass
1738    else:
1739        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
1740
1741    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/"):
1742        pass
1743    else:
1744        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/")
1745
1746    if exists(
1747        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/" +
1748        f"{team_id}_roster.csv"
1749    ):
1750        teams_df = pd.read_csv(
1751            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/" +
1752            f"{team_id}_roster.csv"
1753        )
1754        file_mod_datetime = datetime.fromtimestamp(
1755            getmtime(
1756                f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/" +
1757                f"{team_id}_roster.csv"
1758            )
1759        )
1760    else:
1761        file_mod_datetime = datetime.today()
1762        load_from_cache = False
1763
1764    now = datetime.today()
1765
1766    age = now - file_mod_datetime
1767
1768    if (
1769        age.days >= 14 and
1770        season >= now.year
1771    ):
1772        load_from_cache = False
1773
1774    if load_from_cache is True:
1775        return teams_df
1776
1777    response = _get_webpage(url=url)
1778    soup = BeautifulSoup(response.text, features="lxml")
1779    try:
1780        school_name = soup.find(
1781            "div",
1782            {"class": "card"}
1783        ).find("img").get("alt")
1784    except Exception:
1785        school_name = soup.find("div", {"class": "card"}).find("a").text
1786        school_name = school_name.rsplit(" ", maxsplit=1)[0]
1787
1788    season_name = (
1789        soup.find("select", {"id": "year_list"})
1790        .find("option", {"selected": "selected"})
1791        .text
1792    )
1793    # For NCAA basketball, the season always starts in the spring semester,
1794    # and ends in the fall semester.
1795    # Thus, if `season_name` = "2011-12", this is the "2012" basketball season,
1796    # because 2012 would encompass the spring and fall semesters
1797    # for NCAA member institutions.
1798    season = f"{season_name[0:2]}{season_name[-2:]}"
1799    season = int(season)
1800
1801    try:
1802        table = soup.find(
1803            "table",
1804            {"class": "dataTable small_font"},
1805        )
1806
1807        table_headers = table.find("thead").find_all("th")
1808    except Exception:
1809        table = soup.find(
1810            "table",
1811            {"class": "dataTable small_font no_padding"},
1812        )
1813
1814        table_headers = table.find("thead").find_all("th")
1815    table_headers = [x.text for x in table_headers]
1816
1817    t_rows = table.find("tbody").find_all("tr")
1818
1819    for t in t_rows:
1820        t_cells = t.find_all("td")
1821        t_cells = [x.text for x in t_cells]
1822
1823        temp_df = pd.DataFrame(
1824            data=[t_cells],
1825            columns=table_headers,
1826            # index=[0]
1827        )
1828
1829        player_id = t.find("a").get("href")
1830        # temp_df["school_name"] = school_name
1831        temp_df["player_url"] = f"https://stats.ncaa.org{player_id}"
1832
1833        player_id = player_id.replace("/players", "").replace("/", "")
1834        player_id = int(player_id)
1835
1836        temp_df["player_id"] = player_id
1837
1838        roster_df_arr.append(temp_df)
1839        del temp_df
1840
1841    roster_df = pd.concat(roster_df_arr, ignore_index=True)
1842    roster_df = roster_df.infer_objects()
1843    roster_df["season"] = season
1844    roster_df["season_name"] = season_name
1845    roster_df["ncaa_division"] = ncaa_division
1846    roster_df["ncaa_division_formatted"] = ncaa_division_formatted
1847    roster_df["team_conference_name"] = team_conference_name
1848    roster_df["school_id"] = school_id
1849    roster_df["school_name"] = school_name
1850    roster_df["sport_id"] = sport_id
1851
1852    roster_df.rename(
1853        columns={
1854            "GP": "player_G",
1855            "GS": "player_GS",
1856            "#": "player_jersey_num",
1857            "Name": "player_full_name",
1858            "Class": "player_class",
1859            "Position": "player_positions",
1860            "Height": "player_height_string",
1861            "Hometown": "player_hometown",
1862            "High School": "player_high_school",
1863        },
1864        inplace=True
1865    )
1866
1867    roster_df[["player_first_name", "player_last_name"]] = roster_df[
1868        "player_full_name"
1869    ].str.split(" ", n=1, expand=True)
1870    roster_df = roster_df.infer_objects()
1871
1872    for i in roster_df.columns:
1873        if i in stat_columns:
1874            pass
1875        else:
1876            raise ValueError(
1877                f"Unhandled column name {i}"
1878            )
1879
1880    roster_df = roster_df.infer_objects().reindex(columns=stat_columns)
1881
1882    roster_df.to_csv(
1883        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/" +
1884        f"{team_id}_roster.csv",
1885        index=False,
1886    )
1887    return roster_df
1888
1889
1890def get_basketball_player_season_stats(
1891    team_id: int,
1892) -> pd.DataFrame:
1893    """
1894    Given a team ID, this function retrieves and parses
1895    the season stats for all of the players in a given basketball team.
1896
1897    Parameters
1898    ----------
1899    `team_id` (int, mandatory):
1900        Required argument.
1901        Specifies the team you want basketball stats from.
1902        This is separate from a school ID, which identifies the institution.
1903        A team ID should be unique to a school, and a season.
1904
1905    Usage
1906    ----------
1907    ```python
1908
1909    from ncaa_stats_py.basketball import get_basketball_player_season_stats
1910
1911    ########################################
1912    #          Men's Basketball            #
1913    ########################################
1914
1915    # Get the season stats for the
1916    # 2024 Illinois MBB team (D1, ID: 560955).
1917    print(
1918        "Get the season stats for the " +
1919        "2024 Illinois MBB team (D1, ID: 560955)."
1920    )
1921    df = get_basketball_player_season_stats(560955)
1922    print(df)
1923
1924    # Get the season stats for the
1925    # 2023 Chico St. MBB team (D2, ID: 542605).
1926    print(
1927        "Get the season stats for the " +
1928        "2023 Chico St. MBB team (D2, ID: 542605)."
1929    )
1930    df = get_basketball_player_season_stats(542605)
1931    print(df)
1932
1933    # Get the season stats for the
1934    # 2022 Maine Maritime MBB team (D3, ID: 528070).
1935    print(
1936        "Get the season stats for the " +
1937        "2022 Maine Maritime MBB team (D3, ID: 528070)."
1938    )
1939    df = get_basketball_player_season_stats(528070)
1940    print(df)
1941
1942    ########################################
1943    #          Women's Basketball          #
1944    ########################################
1945
1946    # Get the season stats for the
1947    # 2021 Louisville WBB team (D1, ID: 506050).
1948    print(
1949        "Get the season stats for the " +
1950        "2021 Louisville WBB team (D1, ID: 506050)."
1951    )
1952    df = get_basketball_player_season_stats(506050)
1953    print(df)
1954
1955    # Get the season stats for the
1956    # 2020 Paine WBB team (D2, ID: 484830).
1957    print(
1958        "Get the season stats for the " +
1959        "2020 Paine WBB team (D2, ID: 484830)."
1960    )
1961    df = get_basketball_player_season_stats(484830)
1962    print(df)
1963
1964    # Get the season stats for the
1965    # 2019 Pomona-Pitzer team (D3, ID: 452413).
1966    print(
1967        "Get the season stats for the " +
1968        "2019 Pomona-Pitzer team (D3, ID: 452413)."
1969    )
1970    df = get_basketball_player_season_stats(452413)
1971    print(df)
1972
1973    ```
1974
1975    Returns
1976    ----------
1977    A pandas `DataFrame` object with the season batting stats for
1978    all players with a given NCAA basketball team.
1979    """
1980
1981    sport_id = ""
1982    load_from_cache = True
1983    stats_df = pd.DataFrame()
1984    stats_df_arr = []
1985    temp_df = pd.DataFrame()
1986
1987    stat_columns = [
1988        "season",
1989        "season_name",
1990        "sport_id",
1991        "team_id",
1992        "team_conference_name",
1993        "school_id",
1994        "school_name",
1995        "ncaa_division",
1996        "ncaa_division_formatted",
1997        "player_id",
1998        "player_jersey_number",
1999        "player_last_name",
2000        "player_first_name",
2001        "player_full_name",
2002        "player_class",
2003        "player_position",
2004        "player_height",
2005        "GP",
2006        "GS",
2007        "MP_str",
2008        "MP_minutes",
2009        "MP_seconds",
2010        "MP_total_seconds",
2011        "FGM",
2012        "FGA",
2013        "FG%",
2014        "eFG%",
2015        "TSA",
2016        "TS%",
2017        "2PM",
2018        "2PA",
2019        "2FG%",
2020        "3PM",
2021        "3PA",
2022        "3FG%",
2023        "FT",
2024        "FTA",
2025        "FT%",
2026        "PTS",
2027        "ORB",
2028        "DRB",
2029        "TRB",
2030        "Avg",
2031        "AST",
2032        "TOV",
2033        "TOV%",
2034        "STL",
2035        "BLK",
2036        "PF",
2037        "DBL_DBL",
2038        "TRP_DBL",
2039        "DQ",
2040        "TF",
2041    ]
2042
2043    # if get_wbb_data is True:
2044    #     sport_id = "WBB"
2045    # else:
2046    #     sport_id = "MBB"
2047
2048    try:
2049        team_df = load_basketball_teams()
2050
2051        team_df = team_df[team_df["team_id"] == team_id]
2052
2053        season = team_df["season"].iloc[0]
2054        ncaa_division = int(team_df["ncaa_division"].iloc[0])
2055        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2056        team_conference_name = team_df["team_conference_name"].iloc[0]
2057        school_name = team_df["school_name"].iloc[0]
2058        school_id = int(team_df["school_id"].iloc[0])
2059        sport_id = "MBB"
2060    except Exception:
2061        team_df = load_basketball_teams(get_wbb_data=True)
2062
2063        team_df = team_df[team_df["team_id"] == team_id]
2064
2065        season = team_df["season"].iloc[0]
2066        ncaa_division = int(team_df["ncaa_division"].iloc[0])
2067        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2068        team_conference_name = team_df["team_conference_name"].iloc[0]
2069        school_name = team_df["school_name"].iloc[0]
2070        school_id = int(team_df["school_id"].iloc[0])
2071        sport_id = "WBB"
2072
2073    del team_df
2074
2075    # stat_id = _get_stat_id(
2076    #     sport="basketball",
2077    #     season=season,
2078    #     stat_type="batting"
2079    # )
2080
2081    home_dir = expanduser("~")
2082    home_dir = _format_folder_str(home_dir)
2083
2084    url = f"https://stats.ncaa.org/teams/{team_id}/season_to_date_stats"
2085
2086    if exists(f"{home_dir}/.ncaa_stats_py/"):
2087        pass
2088    else:
2089        mkdir(f"{home_dir}/.ncaa_stats_py/")
2090
2091    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
2092        pass
2093    else:
2094        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
2095
2096    if exists(
2097        f"{home_dir}/.ncaa_stats_py/" +
2098        f"basketball_{sport_id}/player_season_stats/"
2099    ):
2100        pass
2101    else:
2102        mkdir(
2103            f"{home_dir}/.ncaa_stats_py/" +
2104            f"basketball_{sport_id}/player_season_stats/"
2105        )
2106
2107    if exists(
2108        f"{home_dir}/.ncaa_stats_py/" +
2109        f"basketball_{sport_id}/player_season_stats/"
2110        + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2111    ):
2112        games_df = pd.read_csv(
2113            f"{home_dir}/.ncaa_stats_py/" +
2114            f"basketball_{sport_id}/player_season_stats/"
2115            + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2116        )
2117        file_mod_datetime = datetime.fromtimestamp(
2118            getmtime(
2119                f"{home_dir}/.ncaa_stats_py/" +
2120                f"basketball_{sport_id}/player_season_stats/"
2121                + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2122            )
2123        )
2124    else:
2125        file_mod_datetime = datetime.today()
2126        load_from_cache = False
2127
2128    now = datetime.today()
2129
2130    age = now - file_mod_datetime
2131
2132    if (
2133        age.days > 1 and
2134        season >= now.year
2135    ):
2136        load_from_cache = False
2137
2138    if load_from_cache is True:
2139        return games_df
2140
2141    response = _get_webpage(url=url)
2142    soup = BeautifulSoup(response.text, features="lxml")
2143    # try:
2144    #     school_name = soup.find(
2145    #         "div", {"class": "card"}
2146    #     ).find("img").get("alt")
2147    # except Exception:
2148    #     school_name = soup.find("div", {"class": "card"}).find("a").text
2149    #     school_name = school_name.rsplit(" ", maxsplit=1)[0]
2150
2151    season_name = (
2152        soup.find("select", {"id": "year_list"})
2153        .find("option", {"selected": "selected"})
2154        .text
2155    )
2156    # For NCAA basketball, the season always starts in the fall semester,
2157    # and ends in the spring semester.
2158    # Thus, if `season_name` = "2011-12", this is the "2012" basketball season,
2159    # because 2012 would encompass the fall and spring semesters
2160    # for NCAA member institutions.
2161    season = f"{season_name[0:2]}{season_name[-2:]}"
2162    season = int(season)
2163
2164    # stat_categories_arr = soup.find(
2165    #     "ul", {"class": "nav nav-tabs padding-nav"}
2166    # ).find_all("a")
2167
2168    table_data = soup.find(
2169        "table",
2170        {"id": "stat_grid", "class": "small_font dataTable table-bordered"},
2171    )
2172
2173    temp_table_headers = table_data.find("thead").find("tr").find_all("th")
2174    table_headers = [x.text for x in temp_table_headers]
2175
2176    del temp_table_headers
2177
2178    t_rows = table_data.find("tbody").find_all("tr", {"class": "text"})
2179    for t in t_rows:
2180        p_last = ""
2181        p_first = ""
2182        t_cells = t.find_all("td")
2183        if "team" in t_cells[1].text.lower():
2184            continue
2185        p_sortable = t_cells[1].get("data-order")
2186        if len(p_sortable) == 2:
2187            p_last, p_first = p_sortable.split(",")
2188        elif len(p_sortable) == 3:
2189            p_last, temp_name, p_first = p_sortable.split(",")
2190            p_last = f"{p_last} {temp_name}"
2191
2192        t_cells = [x.text.strip() for x in t_cells]
2193
2194        temp_df = pd.DataFrame(
2195            data=[t_cells],
2196            columns=table_headers,
2197            # index=[0]
2198        )
2199
2200        player_id = t.find("a").get("href")
2201
2202        # temp_df["player_url"] = f"https://stats.ncaa.org{player_id}"
2203        player_id = player_id.replace("/players", "").replace("/", "")
2204
2205        # stat_id = -1
2206        # if "year_stat_category_id" in player_id:
2207        #     stat_id = player_id
2208        #     stat_id = stat_id.rsplit("?")[-1]
2209        #     stat_id = stat_id.replace("?", "").replace(
2210        #         "year_stat_category_id=", ""
2211        #     )
2212        #     stat_id = int(stat_id)
2213
2214        #     player_id = player_id.split("?")[0]
2215
2216        player_id = int(player_id)
2217
2218        temp_df["player_id"] = player_id
2219        temp_df["player_last_name"] = p_last.strip()
2220        temp_df["player_first_name"] = p_first.strip()
2221
2222        stats_df_arr.append(temp_df)
2223        del temp_df
2224
2225    stats_df = pd.concat(stats_df_arr, ignore_index=True)
2226    stats_df = stats_df.replace("", None)
2227
2228    # stats_df["stat_id"] = stat_id
2229    stats_df["season"] = season
2230    stats_df["season_name"] = season_name
2231    stats_df["school_id"] = school_id
2232    stats_df["school_name"] = school_name
2233    stats_df["ncaa_division"] = ncaa_division
2234    stats_df["ncaa_division_formatted"] = ncaa_division_formatted
2235    stats_df["team_conference_name"] = team_conference_name
2236    stats_df["sport_id"] = sport_id
2237    stats_df["team_id"] = team_id
2238
2239    stats_df = stats_df.infer_objects()
2240
2241    stats_df.rename(
2242        columns={
2243            "#": "player_jersey_number",
2244            "Player": "player_full_name",
2245            "Yr": "player_class",
2246            "Pos": "player_position",
2247            "Ht": "player_height",
2248            "B/T": "player_bats_throws",
2249            "3FG": "3PM",
2250            "3FGA": "3PA",
2251            "ORebs": "ORB",
2252            "DRebs": "DRB",
2253            "Tot Reb": "TRB",
2254            "TO": "TOV",
2255            "Dbl Dbl": "DBL_DBL",
2256            "Trpl Dbl": "TRP_DBL",
2257            "Fouls": "PF",
2258            'Tech Fouls': "TF",
2259            'Effective FG Pct.': "eFG%",
2260            "MP": "MP_str",
2261            "Min": "MP_str",
2262            "Off Reb": "ORB",
2263            "Def Reb": "DRB",
2264            "ST": "STL",
2265            "BLKS": "BLK"
2266        },
2267        inplace=True,
2268    )
2269    stats_df = stats_df.infer_objects().fillna(0)
2270    stats_df = stats_df.astype(
2271        {
2272            "GP": "uint16",
2273            "GS": "uint16",
2274            "FGM": "uint16",
2275            "FGA": "uint16",
2276            "3PM": "uint16",
2277            "3PA": "uint16",
2278            "FT": "uint16",
2279            "FTA": "uint16",
2280            "PTS": "uint16",
2281            "ORB": "uint16",
2282            "DRB": "uint16",
2283            "TRB": "uint16",
2284            "AST": "uint16",
2285            "TOV": "uint16",
2286            "STL": "uint16",
2287            "BLK": "uint16",
2288            "PF": "uint16",
2289            "DBL_DBL": "uint16",
2290            "TRP_DBL": "uint16",
2291            "school_id": "uint32",
2292        }
2293    )
2294
2295    # This is a separate function call because these stats
2296    # *don't* exist in every season.
2297    if "DQ" not in stats_df.columns:
2298        stats_df["DQ"] = None
2299
2300    if "TF" not in stats_df.columns:
2301        stats_df["TF"] = None
2302
2303    stats_df = stats_df.astype(
2304        {
2305            "DQ": "uint16",
2306            "TF": "uint16",
2307        },
2308        errors="ignore"
2309    )
2310
2311    stats_df[["MP_minutes", "MP_seconds"]] = stats_df["MP_str"].str.split(
2312        ":", expand=True
2313    )
2314    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
2315        "MP_minutes", "MP_seconds"
2316    ]].astype("uint64")
2317    stats_df["MP_total_seconds"] = (
2318        stats_df["MP_seconds"] + (stats_df["MP_minutes"] * 60)
2319    )
2320
2321    stats_df["FG%"] = (stats_df["FGM"] / stats_df["FGA"])
2322    stats_df["FG%"] = stats_df["FG%"].round(4)
2323
2324    stats_df["3P%"] = (stats_df["3PM"] / stats_df["3PA"])
2325    stats_df["3P%"] = stats_df["3P%"].round(4)
2326
2327    stats_df["FT%"] = (stats_df["FT"] / stats_df["FTA"])
2328    stats_df["FT%"] = stats_df["FT%"].round(4)
2329
2330    stats_df["2PM"] = (stats_df["FGM"] - stats_df["3PM"])
2331    stats_df["2PA"] = (stats_df["FGA"] - stats_df["3PA"])
2332    stats_df["2P%"] = (stats_df["2PM"] / stats_df["2PA"])
2333    stats_df["2P%"] = stats_df["2P%"].round(4)
2334
2335    stats_df["eFG%"] = (
2336        (
2337            stats_df["FGM"] +
2338            (stats_df["3PM"] * 0.5)
2339        ) /
2340        stats_df["FGA"]
2341    )
2342    stats_df["eFG%"] = stats_df["eFG%"].round(4)
2343
2344    stats_df["TSA"] = (
2345        stats_df["FGA"] + (stats_df["FTA"] * 0.44)
2346    )
2347    stats_df["TS%"] = stats_df["PTS"] / (2 * stats_df["TSA"])
2348    stats_df["TS%"] = stats_df["TS%"].round(4)
2349
2350    stats_df["TOV%"] = (
2351        stats_df["TOV"] /
2352        (
2353            stats_df["FGA"] +
2354            (stats_df["FTA"] * 0.44) +
2355            stats_df["TOV"]
2356        )
2357    )
2358    stats_df["TOV%"] = stats_df["TOV%"].round(4)
2359    # In many seasons, there is an ["Avg"] column
2360    # that would otherwise completely screw up
2361    # any attempts to use the final DataFrame,
2362    # because it would be a duplicate column
2363    # that pandas wouldn't complain about
2364    # until it's too late.
2365
2366    duplicate_cols = stats_df.columns[stats_df.columns.duplicated()]
2367    stats_df.drop(columns=duplicate_cols, inplace=True)
2368    # stats_df = stats_df.T.drop_duplicates().T
2369    stats_df = stats_df.reindex(columns=stat_columns)
2370    # print(stats_df.columns)
2371    stats_df.to_csv(
2372        f"{home_dir}/.ncaa_stats_py/" +
2373        f"basketball_{sport_id}/player_season_stats/" +
2374        f"{season:00d}_{school_id:00d}_player_season_stats.csv",
2375        index=False,
2376    )
2377
2378    return stats_df
2379
2380
2381def get_basketball_player_game_stats(
2382    player_id: int,
2383    season: int
2384) -> pd.DataFrame:
2385    """
2386    Given a valid player ID and season,
2387    this function retrieves the game stats for this player at a game level.
2388
2389    Parameters
2390    ----------
2391    `player_id` (int, mandatory):
2392        Required argument.
2393        Specifies the player you want game stats from.
2394
2395    `season` (int, mandatory):
2396        Required argument.
2397        Specifies the season you want game stats from.
2398
2399    Usage
2400    ----------
2401    ```python
2402
2403    from ncaa_stats_py.basketball import (
2404        get_basketball_player_game_stats
2405    )
2406
2407    # Get the batting stats of Jacob Berry in 2022 (LSU).
2408    print(
2409        "Get the batting stats of Jacob Berry in 2022 (LSU)."
2410    )
2411    df = get_basketball_player_game_stats(player_id=7579336, season=2022)
2412    print(df)
2413
2414    # Get the batting stats of Alec Burleson in 2019 (ECU).
2415    print(
2416        "Get the batting stats of Alec Burleson in 2019 (ECU)."
2417    )
2418    df = get_basketball_player_game_stats(player_id=6015715, season=2019)
2419    print(df)
2420
2421    # Get the batting stats of Hunter Bishop in 2018 (Arizona St.).
2422    print(
2423        "Get the batting stats of Hunter Bishop in 2018 (Arizona St.)."
2424    )
2425    df = get_basketball_player_game_stats(player_id=6014052, season=2019)
2426    print(df)
2427
2428    ```
2429
2430    Returns
2431    ----------
2432    A pandas `DataFrame` object with a player's batting game logs
2433    in a given season.
2434    """
2435    sport_id = ""
2436
2437    stat_columns = [
2438        "season",
2439        "game_id",
2440        "game_num",
2441        "player_id",
2442        "date",
2443        "opponent",
2444        "Result",
2445        "team_score",
2446        "opponent_score",
2447        "MP_str",
2448        "MP_minutes",
2449        "MP_seconds",
2450        "MP_total_seconds",
2451        "GP",
2452        "GS",
2453        "FGM",
2454        "FGA",
2455        "FG%",
2456        "eFG%",
2457        "2PM",
2458        "2PA",
2459        "2P%",
2460        "3PM",
2461        "3PA",
2462        "3P%",
2463        "FT",
2464        "FTA",
2465        "FT%",
2466        "ORB",
2467        "DRB",
2468        "TRB",
2469        "AST",
2470        "TOV",
2471        "TOV%",
2472        "STL",
2473        "BLK",
2474        "PF",
2475        "DQ",
2476        "TF",
2477        "TSA",
2478        "TS%",
2479        "PTS",
2480        "DBL_DBL",
2481        "TRP_DBL",
2482    ]
2483    load_from_cache = True
2484    stats_df = pd.DataFrame()
2485    stats_df_arr = []
2486    temp_df = pd.DataFrame()
2487    home_dir = expanduser("~")
2488    home_dir = _format_folder_str(home_dir)
2489
2490    # stat_id = _get_stat_id(
2491    #     sport="basketball",
2492    #     season=season,
2493    #     stat_type="batting"
2494    # )
2495    url = f"https://stats.ncaa.org/players/{player_id}"
2496
2497    if exists(f"{home_dir}/.ncaa_stats_py/"):
2498        pass
2499    else:
2500        mkdir(f"{home_dir}/.ncaa_stats_py/")
2501
2502    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/"):
2503        pass
2504    else:
2505        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/")
2506
2507    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/player_game_stats/"):
2508        pass
2509    else:
2510        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/player_game_stats/")
2511
2512    if exists(
2513        f"{home_dir}/.ncaa_stats_py/basketball_MBB/player_game_stats/"
2514        + f"{season}_{player_id}_player_game_stats.csv"
2515    ):
2516        games_df = pd.read_csv(
2517            f"{home_dir}/.ncaa_stats_py/basketball_MBB/player_game_stats/"
2518            + f"{season}_{player_id}_player_game_stats.csv"
2519        )
2520        file_mod_datetime = datetime.fromtimestamp(
2521            getmtime(
2522                f"{home_dir}/.ncaa_stats_py/basketball_MBB/"
2523                + "player_game_stats/"
2524                + f"{season}_{player_id}_player_game_stats.csv"
2525            )
2526        )
2527        games_df = games_df.infer_objects()
2528        load_from_cache = True
2529    else:
2530        file_mod_datetime = datetime.today()
2531        load_from_cache = False
2532
2533    if exists(f"{home_dir}/.ncaa_stats_py/"):
2534        pass
2535    else:
2536        mkdir(f"{home_dir}/.ncaa_stats_py/")
2537
2538    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/"):
2539        pass
2540    else:
2541        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/")
2542
2543    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/player_game_stats/"):
2544        pass
2545    else:
2546        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/player_game_stats/")
2547
2548    if exists(
2549        f"{home_dir}/.ncaa_stats_py/basketball_WBB/player_game_stats/"
2550        + f"{season}_{player_id}_player_game_stats.csv"
2551    ):
2552        games_df = pd.read_csv(
2553            f"{home_dir}/.ncaa_stats_py/basketball_WBB/player_game_stats/"
2554            + f"{season}_{player_id}_player_game_stats.csv"
2555        )
2556        file_mod_datetime = datetime.fromtimestamp(
2557            getmtime(
2558                f"{home_dir}/.ncaa_stats_py/basketball_WBB/"
2559                + "player_game_stats/"
2560                + f"{season}_{player_id}_player_game_stats.csv"
2561            )
2562        )
2563        games_df = games_df.infer_objects()
2564        load_from_cache = True
2565    else:
2566        logging.info("Could not find a WBB player game stats file")
2567
2568    now = datetime.today()
2569
2570    age = now - file_mod_datetime
2571
2572    if (
2573        age.days > 1 and
2574        (season - 1) >= now.year
2575    ):
2576        load_from_cache = False
2577
2578    if load_from_cache is True:
2579        return games_df
2580
2581    # team_df = load_basketball_teams()
2582
2583    # team_df = team_df[team_df["team_id"] == team_id]
2584
2585    # season = team_df["season"].iloc[0]
2586    # ncaa_division = team_df["ncaa_division"].iloc[0]
2587    # ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2588    # team_conference_name = team_df["team_conference_name"].iloc[0]
2589    # school_name = team_df["school_name"].iloc[0]
2590    # school_id = int(team_df["school_id"].iloc[0])
2591
2592    # del team_df
2593    response = _get_webpage(url=url)
2594    soup = BeautifulSoup(response.text, features="lxml")
2595
2596    table_navigation = soup.find("ul", {"class": "nav nav-tabs padding-nav"})
2597    table_nav_card = table_navigation.find_all("a")
2598
2599    for u in table_nav_card:
2600        url_str = u.get("href")
2601        if "MBB" in url_str.upper():
2602            sport_id = "MBB"
2603        elif "WBB" in url_str.upper():
2604            sport_id = "WBB"
2605
2606    if sport_id is None or len(sport_id) == 0:
2607        # This should **never** be the case IRL,
2608        # but in case something weird happened and
2609        # we can't make a determination of if this is a
2610        # MBB player or a WBB player, and we somehow haven't
2611        # crashed by this point, set the sport ID to
2612        # "MBB" by default so we don't have other weirdness.
2613        logging.error(
2614            f"Could not determine if player ID {player_id} " +
2615            "is a MBB or a WBB player. " +
2616            "Because this cannot be determined, " +
2617            "we will make the automatic assumption that this is a MBB player."
2618        )
2619        sport_id = "MBB"
2620
2621    table_data = soup.find_all(
2622        "table", {"class": "small_font dataTable table-bordered"}
2623    )[1]
2624
2625    temp_table_headers = table_data.find("thead").find("tr").find_all("th")
2626    table_headers = [x.text for x in temp_table_headers]
2627
2628    del temp_table_headers
2629
2630    temp_t_rows = table_data.find("tbody")
2631    temp_t_rows = temp_t_rows.find_all("tr")
2632
2633    for t in temp_t_rows:
2634        game_num = 1
2635        ot_periods = 0
2636        # innings = 9
2637        row_id = t.get("id")
2638        opp_team_name = ""
2639
2640        if "contest" not in row_id:
2641            continue
2642        del row_id
2643
2644        t_cells = t.find_all("td")
2645        t_cells = [x.text.strip() for x in t_cells]
2646
2647        g_date = t_cells[0]
2648
2649        if "(" in g_date:
2650            g_date, game_num = g_date.split("(")
2651            g_date = g_date.strip()
2652
2653            game_num = game_num.replace(")", "")
2654            game_num = int(game_num)
2655
2656        try:
2657            opp_team_id = t.find_all("td")[1].find("a").get("href")
2658        except AttributeError as e:
2659            logging.info(
2660                "Could not extract a team ID for this game. " +
2661                f"Full exception {e}"
2662            )
2663        except Exception as e:
2664            logging.warning(
2665                "An unhandled exception has occurred when "
2666                + "trying to get the opposition team ID for this game. "
2667                f"Full exception `{e}`."
2668            )
2669            raise e
2670
2671        try:
2672            opp_team_id = opp_team_id.replace("/teams/", "")
2673            opp_team_id = opp_team_id.replace(
2674                "javascript:toggleDefensiveStats(", ""
2675            )
2676            opp_team_id = opp_team_id.replace(");", "")
2677            opp_team_id = int(opp_team_id)
2678
2679            temp_df["opponent_team_id"] = opp_team_id
2680        except Exception:
2681            logging.info(
2682                "Couldn't find the opposition team naIDme "
2683                + "for this row. "
2684            )
2685            opp_team_id = None
2686        # print(i.find("td").text)
2687        try:
2688            opp_team_name = t.find_all("td")[1].find_all("img")[1].get("alt")
2689        except AttributeError:
2690            logging.info(
2691                "Couldn't find the opposition team name "
2692                + "for this row from an image element. "
2693                + "Attempting a backup method"
2694            )
2695            opp_team_name = t_cells[1]
2696        except IndexError:
2697            logging.info(
2698                "Couldn't find the opposition team name "
2699                + "for this row from an image element. "
2700                + "Attempting a backup method"
2701            )
2702            opp_team_name = t_cells[1]
2703        except Exception as e:
2704            logging.warning(
2705                "Unhandled exception when trying to get the "
2706                + "opposition team name from this game. "
2707                + f"Full exception `{e}`"
2708            )
2709            raise e
2710
2711        if opp_team_name == "Defensive Stats":
2712            opp_team_name = t_cells[1]
2713
2714        if "@" in opp_team_name:
2715            opp_team_name = opp_team_name.split("@")[0]
2716
2717        result_str = t_cells[2]
2718
2719        result_str = (
2720            result_str.lower().replace("w", "").replace("l", "").replace(
2721                "t", ""
2722            )
2723        )
2724
2725        if (
2726            result_str.lower() == "ppd" or
2727            result_str.lower() == "" or
2728            result_str.lower() == "canceed"
2729        ):
2730            continue
2731
2732        result_str = result_str.replace("\n", "")
2733        result_str = result_str.replace("*", "")
2734
2735        tm_score, opp_score = result_str.split("-")
2736        t_cells = [x.replace("*", "") for x in t_cells]
2737        t_cells = [x.replace("/", "") for x in t_cells]
2738        t_cells = [x.replace("\\", "") for x in t_cells]
2739
2740        temp_df = pd.DataFrame(
2741            data=[t_cells],
2742            columns=table_headers,
2743            # index=[0]
2744        )
2745
2746        tm_score = int(tm_score)
2747        if "(" in opp_score:
2748            opp_score = opp_score.replace(")", "")
2749            opp_score, ot_periods = opp_score.split("(")
2750            temp_df["ot_periods"] = ot_periods
2751
2752        if "\n" in opp_score:
2753            opp_score = opp_score.strip()
2754            # opp_score = opp_score
2755        opp_score = int(opp_score)
2756
2757        temp_df["team_score"] = tm_score
2758        temp_df["opponent_score"] = opp_score
2759
2760        del tm_score
2761        del opp_score
2762
2763        g_id = t.find_all("td")[2].find("a").get("href")
2764
2765        g_id = g_id.replace("/contests", "")
2766        g_id = g_id.replace("/box_score", "")
2767        g_id = g_id.replace("/", "")
2768
2769        g_id = int(g_id)
2770        temp_df["game_id"] = g_id
2771
2772        del g_id
2773        temp_df.rename(
2774            columns={"Opponent": "opponent", "Date": "date"},
2775            inplace=True,
2776        )
2777        game_date = datetime.strptime(g_date, "%m/%d/%Y").date()
2778
2779        temp_df["date"] = game_date
2780        temp_df["game_num"] = game_num
2781        # temp_df["game_innings"] = innings
2782
2783        if len(opp_team_name) > 0:
2784            temp_df["opponent"] = opp_team_name
2785        del opp_team_name
2786
2787        duplicate_cols = temp_df.columns[temp_df.columns.duplicated()]
2788        temp_df.drop(columns=duplicate_cols, inplace=True)
2789
2790        stats_df_arr.append(temp_df)
2791        del temp_df
2792
2793    stats_df = pd.concat(stats_df_arr, ignore_index=True)
2794    stats_df = stats_df.replace("/", "", regex=True)
2795    stats_df = stats_df.replace("", np.nan)
2796    stats_df = stats_df.infer_objects()
2797
2798    stats_df["player_id"] = player_id
2799    stats_df["season"] = season
2800    # In many seasons, there is an ["Avg"] column
2801    # that would otherwise completely screw up
2802    # any attempts to use the final DataFrame,
2803    # because it would be a duplicate column
2804    # that pandas wouldn't complain about
2805    # until it's too late.
2806
2807    duplicate_cols = stats_df.columns[stats_df.columns.duplicated()]
2808    stats_df.drop(columns=duplicate_cols, inplace=True)
2809
2810    stats_df.rename(
2811        columns={
2812            "#": "player_jersey_number",
2813            "Player": "player_full_name",
2814            "Yr": "player_class",
2815            "Pos": "player_position",
2816            "Ht": "player_height",
2817            "B/T": "player_bats_throws",
2818            "3FG": "3PM",
2819            "3FGA": "3PA",
2820            "ORebs": "ORB",
2821            "DRebs": "DRB",
2822            "Tot Reb": "TRB",
2823            "TO": "TOV",
2824            "Dbl Dbl": "DBL_DBL",
2825            "Trpl Dbl": "TRP_DBL",
2826            "Fouls": "PF",
2827            'Tech Fouls': "TF",
2828            'Effective FG Pct.': "eFG%",
2829            "MP": "MP_str",
2830            "Min": "MP_str",
2831            "Off Reb": "ORB",
2832            "Def Reb": "DRB",
2833            "ST": "STL",
2834            "3FG%": "3P%",
2835            "BLKS": "BLK"
2836        },
2837        inplace=True,
2838    )
2839
2840    # This is a separate function call because these stats
2841    # *don't* exist in every season.
2842    if "GS" not in stats_df.columns:
2843        stats_df["GS"] = None
2844
2845    if "DQ" not in stats_df.columns:
2846        stats_df["DQ"] = None
2847
2848    if "TF" not in stats_df.columns:
2849        stats_df["TF"] = None
2850
2851    if "DBL_DBL" not in stats_df.columns:
2852        stats_df["DBL_DBL"] = None
2853
2854    if "TRP_DBL" not in stats_df.columns:
2855        stats_df["TRP_DBL"] = None
2856
2857    stats_df = stats_df.astype(
2858        {
2859            "DQ": "uint16",
2860            "TF": "uint16",
2861        },
2862        errors="ignore"
2863    )
2864
2865    stats_df = stats_df.infer_objects().fillna(0)
2866    stats_df = stats_df.astype(
2867        {
2868            "GP": "uint16",
2869            "GS": "uint16",
2870            "FGM": "uint16",
2871            "FGA": "uint16",
2872            "3PM": "uint16",
2873            "3PA": "uint16",
2874            "FT": "uint16",
2875            "FTA": "uint16",
2876            "PTS": "uint16",
2877            "ORB": "uint16",
2878            "DRB": "uint16",
2879            "TRB": "uint16",
2880            "AST": "uint16",
2881            "TOV": "uint16",
2882            "STL": "uint16",
2883            "BLK": "uint16",
2884            "PF": "uint16",
2885            "DBL_DBL": "uint16",
2886            "TRP_DBL": "uint16",
2887            # "school_id": "uint32",
2888        }
2889    )
2890
2891    stats_df[["MP_minutes", "MP_seconds"]] = stats_df["MP_str"].str.split(
2892        ":", expand=True
2893    )
2894    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
2895        "MP_minutes", "MP_seconds"
2896    ]].fillna(0)
2897    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
2898        "MP_minutes", "MP_seconds"
2899    ]].astype("uint16")
2900    stats_df["MP_total_seconds"] = (
2901        stats_df["MP_seconds"] + (stats_df["MP_minutes"] * 60)
2902    )
2903
2904    stats_df["FG%"] = (stats_df["FGM"] / stats_df["FGA"])
2905    stats_df["FG%"] = stats_df["FG%"].round(4)
2906
2907    stats_df["3P%"] = (stats_df["3PM"] / stats_df["3PA"])
2908    stats_df["3P%"] = stats_df["3P%"].round(4)
2909
2910    stats_df["FT%"] = (stats_df["FT"] / stats_df["FTA"])
2911    stats_df["FT%"] = stats_df["FT%"].round(4)
2912
2913    stats_df["2PM"] = (stats_df["FGM"] - stats_df["3PM"])
2914    stats_df["2PA"] = (stats_df["FGA"] - stats_df["3PA"])
2915    stats_df["2P%"] = (stats_df["2PM"] / stats_df["2PA"])
2916    stats_df["2P%"] = stats_df["2P%"].round(4)
2917
2918    stats_df["eFG%"] = (
2919        (
2920            stats_df["FGM"] +
2921            (stats_df["3PM"] * 0.5)
2922        ) /
2923        stats_df["FGA"]
2924    )
2925    stats_df["eFG%"] = stats_df["eFG%"].round(4)
2926
2927    stats_df["TSA"] = (
2928        stats_df["FGA"] + (stats_df["FTA"] * 0.44)
2929    )
2930    stats_df["TS%"] = stats_df["PTS"] / (2 * stats_df["TSA"])
2931    stats_df["TS%"] = stats_df["TS%"].round(4)
2932
2933    stats_df["TOV%"] = (
2934        stats_df["TOV"] /
2935        (
2936            stats_df["FGA"] +
2937            (stats_df["FTA"] * 0.44) +
2938            stats_df["TOV"]
2939        )
2940    )
2941    stats_df["TOV%"] = stats_df["TOV%"].round(4)
2942    stats_df = stats_df.reindex(
2943        columns=stat_columns
2944    )
2945    # print(stats_df.columns)
2946    stats_df.to_csv(
2947        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"
2948        + "player_game_stats/"
2949        + f"{season}_{player_id}_player_game_stats.csv",
2950        index=False,
2951    )
2952    return stats_df
2953
2954
2955def get_basketball_game_player_stats(game_id: int) -> pd.DataFrame:
2956    """
2957    Given a valid game ID,
2958    this function will attempt to get all player game stats, if possible.
2959
2960    NOTE: Due to an issue with [stats.ncaa.org](stats.ncaa.org),
2961    full player game stats may not be loaded in through this function.
2962
2963    This is a known issue, however you should be able to get position
2964    data and starters information through this function
2965
2966    Parameters
2967    ----------
2968    `game_id` (int, mandatory):
2969        Required argument.
2970        Specifies the game you want player game stats from.
2971
2972    Usage
2973    ----------
2974    ```python
2975
2976    from ncaa_stats_py.basketball import get_basketball_game_player_stats
2977
2978    ########################################
2979    #          Men's Basketball            #
2980    ########################################
2981
2982    # Get the game stats of the
2983    # 2024 NCAA D1 Men's Basketball National Championship game.
2984    print(
2985        "Get the game stats of the "
2986        + "2024 NCAA D1 Men's Basketball National Championship game."
2987    )
2988    df = get_basketball_game_player_stats(5254137)
2989    print(df)
2990
2991    # Get the game stats of a March Madness game on March 29th, 2024
2992    # between Duke and the Houston Cougars.
2993    print(
2994        "Get the game stats of a March Madness game on March 29th, 2024 "
2995        + "between Duke and the Houston Cougars."
2996    )
2997    df = get_basketball_game_player_stats(5254126)
2998    print(df)
2999
3000    # Get the game stats of a St. Patrick's Day
3001    # game between the Duquesne Dukes and VCU Rams (D1).
3002    print(
3003        "Get the game stats of a St. Patrick's Day "
3004        + "game between the Duquesne Dukes and VCU Rams (D1)."
3005    )
3006    df = get_basketball_game_player_stats(5252318)
3007    print(df)
3008
3009    # Get the game stats of a December 17th, 2023
3010    # game between the Barry Buccaneers and Findlay Oilers (D2).
3011    print(
3012        "Get the game stats of a December 17th, 2023 "
3013        + "game between the Barry Buccaneers and Findlay Oilers (D2)."
3014    )
3015    df = get_basketball_game_player_stats(3960610)
3016    print(df)
3017
3018    # Get the game stats of a Valentine's Day
3019    # game between the Kalamazoo Hornets and the Trine Thunder (D2).
3020    print(
3021        "Get the game stats of a Valentine's Day "
3022        + "game between the Kalamazoo Hornets and the Trine Thunder (D2)."
3023    )
3024    df = get_basketball_game_player_stats(3967963)
3025    print(df)
3026
3027
3028    ########################################
3029    #          Women's Basketball          #
3030    ########################################
3031
3032    # Get the game stats of the
3033    # 2024 NCAA D1 Women's Basketball National Championship game.
3034    print(
3035        "Get the game stats of the "
3036        + "2024 NCAA D1 Women's Basketball National Championship game"
3037    )
3038    df = get_basketball_game_player_stats(5254137)
3039    print(df)
3040
3041    # Get the game stats of a March 3rd, 2024
3042    # game between Duke and the North Carolina Tar Heels.
3043    print(
3044        "Get the game stats of a March 3rd, 2024 "
3045        + "game between Duke and the North Carolina Tar Heels"
3046    )
3047    df = get_basketball_game_player_stats(3984600)
3048    print(df)
3049
3050    # Get the game stats of a Thanksgiving Day
3051    # game between the Sacred Heart Pioneers and the P.R.-Mayaguez Janes (D2).
3052    print(
3053        "Get the game stats of a Thanksgiving Day "
3054        + "game between the Sacred Heart Pioneers and "
3055        + "the P.R.-Mayaguez Janes (D2)."
3056    )
3057    df = get_basketball_game_player_stats(3972687)
3058    print(df)
3059
3060    # Get the game stats of a January 21st, 2024
3061    # game between the Puget Sound Loggers
3062    # and the Whitworth Pirates (D3).
3063    print(
3064        "Get the game stats of a January 21st, 2024 "
3065        + "game between the Puget Sound Loggers and "
3066        + "the Whitworth Pirates (D3)."
3067    )
3068    df = get_basketball_game_player_stats(3979051)
3069    print(df)
3070    ```
3071
3072    Returns
3073    ----------
3074    A pandas `DataFrame` object with player game stats in a given game.
3075
3076    """
3077    load_from_cache = True
3078
3079    sport_id = ""
3080    season = 0
3081
3082    mbb_teams_df = load_basketball_teams(get_wbb_data=False)
3083    mbb_team_ids_arr = mbb_teams_df["team_id"].to_list()
3084
3085    wbb_teams_df = load_basketball_teams(get_wbb_data=True)
3086    wbb_team_ids_arr = wbb_teams_df["team_id"].to_list()
3087
3088    stats_df = pd.DataFrame()
3089    stats_df_arr = []
3090
3091    temp_df = pd.DataFrame()
3092    home_dir = expanduser("~")
3093    home_dir = _format_folder_str(home_dir)
3094
3095    stat_columns = [
3096        "season",
3097        "game_id",
3098        "team_id",
3099        "team_name",
3100        "player_id",
3101        "player_num",
3102        "player_full_name",
3103        "player_position",
3104        "GP",
3105        "GS",
3106        "MP_str",
3107        "MP_minutes",
3108        "MP_seconds",
3109        "MP_total_seconds",
3110        "FGM",
3111        "FGA",
3112        "FG%",
3113        "3PM",
3114        "3PA",
3115        "3P%",
3116        "2PM",
3117        "2PA",
3118        "2P%",
3119        "eFG%",
3120        "FT",
3121        "FTA",
3122        "FT%",
3123        "TSA",
3124        "TS%",
3125        "ORB",
3126        "DRB",
3127        "TRB",
3128        "AST",
3129        "STL",
3130        "BLK",
3131        "TOV",
3132        "TOV%",
3133        "PF",
3134        "TF",
3135        "PTS",
3136        "DQ",
3137        "DBL_DBL",
3138        "TRP_DBL",
3139    ]
3140
3141    url = f"https://stats.ncaa.org/contests/{game_id}/individual_stats"
3142
3143    if exists(f"{home_dir}/.ncaa_stats_py/"):
3144        pass
3145    else:
3146        mkdir(f"{home_dir}/.ncaa_stats_py/")
3147
3148    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/"):
3149        pass
3150    else:
3151        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/")
3152
3153    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/"):
3154        pass
3155    else:
3156        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/")
3157
3158    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/"):
3159        pass
3160    else:
3161        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/")
3162
3163    if exists(
3164        f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/"
3165        + f"{game_id}_player_game_stats.csv"
3166    ):
3167        games_df = pd.read_csv(
3168            f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/"
3169            + f"{game_id}_player_game_stats.csv"
3170        )
3171        games_df = games_df.infer_objects()
3172        file_mod_datetime = datetime.fromtimestamp(
3173            getmtime(
3174                f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/"
3175                + f"{game_id}_player_game_stats.csv"
3176            )
3177        )
3178        load_from_cache = True
3179    else:
3180        file_mod_datetime = datetime.today()
3181        load_from_cache = False
3182
3183    if exists(f"{home_dir}/.ncaa_stats_py/"):
3184        pass
3185    else:
3186        mkdir(f"{home_dir}/.ncaa_stats_py/")
3187
3188    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/"):
3189        pass
3190    else:
3191        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/")
3192
3193    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/"):
3194        pass
3195    else:
3196        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/")
3197
3198    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/"):
3199        pass
3200    else:
3201        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/")
3202
3203    if exists(
3204        f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/"
3205        + f"{game_id}_player_game_stats.csv"
3206    ):
3207        games_df = pd.read_csv(
3208            f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/"
3209            + f"{game_id}_player_game_stats.csv"
3210        )
3211        games_df = games_df.infer_objects()
3212        file_mod_datetime = datetime.fromtimestamp(
3213            getmtime(
3214                f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/"
3215                + f"{game_id}_player_game_stats.csv"
3216            )
3217        )
3218        load_from_cache = True
3219    else:
3220        logging.info("Could not find a WBB player game stats file")
3221
3222    now = datetime.today()
3223
3224    age = now - file_mod_datetime
3225
3226    if age.days >= 35:
3227        load_from_cache = False
3228
3229    if load_from_cache is True:
3230        return games_df
3231
3232    response = _get_webpage(url=url)
3233    soup = BeautifulSoup(response.text, features="lxml")
3234
3235    # table_data = soup.find_all(
3236    #     "table",
3237    #     {"class": "small_font dataTable table-bordered"}
3238    # )[1]
3239    table_boxes = soup.find_all("div", {"class": "card p-0 table-responsive"})
3240
3241    for box in table_boxes:
3242        t_header = box.find(
3243            "div", {"class": "card-header"}
3244        ).find(
3245            "div", {"class": "row"}
3246        )
3247
3248        t_header_str = t_header.text
3249        t_header_str = t_header_str.replace("Period Stats", "")
3250        t_header_str = t_header_str.replace("\n", "")
3251        t_header_str = t_header_str.strip()
3252
3253        team_id = t_header.find("a").get("href")
3254        team_id = team_id.replace("/teams", "")
3255        team_id = team_id.replace("/", "")
3256        team_id = int(team_id)
3257
3258        table_data = box.find(
3259            "table",
3260            {"class": "display dataTable small_font"}
3261        )
3262        table_headers = box.find("thead").find_all("th")
3263        table_headers = [x.text for x in table_headers]
3264
3265        temp_t_rows = table_data.find("tbody")
3266        temp_t_rows = temp_t_rows.find_all("tr")
3267
3268        spec_stats_df = pd.DataFrame()
3269        spec_stats_df_arr = []
3270        for t in temp_t_rows:
3271            # row_id = t.get("id")
3272            game_played = 1
3273            game_started = 0
3274
3275            try:
3276                player_id = t.find("a").get("href")
3277                player_id = player_id.replace("/players", "")
3278                player_id = player_id.replace("/player", "")
3279                player_id = player_id.replace("/", "")
3280            except Exception as e:
3281                logging.debug(
3282                    "Could not replace player IDs. " +
3283                    f"Full exception: `{e}`"
3284                )
3285
3286            t_cells = t.find_all("td")
3287            p_name = t_cells[1].text.replace("\n", "")
3288            p_name = p_name.strip()
3289
3290            if t_header_str in p_name:
3291                continue
3292            elif p_name.lower() == "team":
3293                continue
3294            if "\xa0" in p_name:
3295                game_started = 0
3296
3297            t_cells = [x.text.strip() for x in t_cells]
3298            player_id = int(player_id)
3299
3300            temp_df = pd.DataFrame(
3301                data=[t_cells],
3302                columns=table_headers
3303            )
3304
3305            duplicate_cols = temp_df.columns[temp_df.columns.duplicated()]
3306            temp_df.drop(columns=duplicate_cols, inplace=True)
3307
3308            temp_df["player_id"] = player_id
3309            temp_df["GP"] = game_played
3310            temp_df["GS"] = game_started
3311
3312            spec_stats_df_arr.append(temp_df)
3313            del temp_df
3314
3315        spec_stats_df = pd.concat(
3316            spec_stats_df_arr,
3317            ignore_index=True
3318        )
3319
3320        if team_id in mbb_team_ids_arr:
3321            sport_id = "MBB"
3322            df = mbb_teams_df[mbb_teams_df["team_id"] == team_id]
3323            season = df["season"].iloc[0]
3324        elif team_id in wbb_team_ids_arr:
3325            sport_id = "WBB"
3326            df = wbb_teams_df[wbb_teams_df["team_id"] == team_id]
3327            season = df["season"].iloc[0]
3328        else:
3329            raise ValueError(
3330                f"Unhandled team ID {team_id}"
3331            )
3332        spec_stats_df["team_id"] = team_id
3333        spec_stats_df["team_name"] = t_header_str
3334        stats_df_arr.append(spec_stats_df)
3335        del spec_stats_df
3336
3337    stats_df = pd.concat(stats_df_arr)
3338    stats_df["season"] = season
3339    stats_df.rename(
3340        columns={
3341            "#": "player_num",
3342            "Name": "player_full_name",
3343            "P": "player_position",
3344            "MP": "MP_str",
3345            "3FG": "3PM",
3346            "3FGA": "3PA",
3347            "ORebs": "ORB",
3348            "DRebs": "DRB",
3349            "TotReb": "TRB",
3350            "TO": "TOV",
3351            "TechFouls": "TF",
3352            "Fouls": "PF"
3353        },
3354        inplace=True,
3355    )
3356
3357    if "GS" not in stats_df.columns:
3358        stats_df["GS"] = None
3359
3360    if "DQ" not in stats_df.columns:
3361        stats_df["DQ"] = None
3362
3363    if "TF" not in stats_df.columns:
3364        stats_df["TF"] = None
3365
3366    if "DBL_DBL" not in stats_df.columns:
3367        stats_df["DBL_DBL"] = None
3368
3369    if "TRP_DBL" not in stats_df.columns:
3370        stats_df["TRP_DBL"] = None
3371
3372    stats_df = stats_df.astype(
3373        {
3374            "DQ": "uint16",
3375            "TF": "uint16",
3376        },
3377        errors="ignore"
3378    )
3379
3380    stats_df = stats_df.infer_objects().fillna(0)
3381    stats_df = stats_df.astype(
3382        {
3383            "GP": "uint16",
3384            "GS": "uint16",
3385            "FGM": "uint16",
3386            "FGA": "uint16",
3387            "3PM": "uint16",
3388            "3PA": "uint16",
3389            "FT": "uint16",
3390            "FTA": "uint16",
3391            "PTS": "uint16",
3392            "ORB": "uint16",
3393            "DRB": "uint16",
3394            "TRB": "uint16",
3395            "AST": "uint16",
3396            "TOV": "uint16",
3397            "STL": "uint16",
3398            "BLK": "uint16",
3399            "PF": "uint16",
3400            "DBL_DBL": "uint16",
3401            "TRP_DBL": "uint16",
3402            # "school_id": "uint32",
3403        }
3404    )
3405
3406    stats_df[["MP_minutes", "MP_seconds"]] = stats_df["MP_str"].str.split(
3407        ":", expand=True
3408    )
3409    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
3410        "MP_minutes", "MP_seconds"
3411    ]].fillna(0)
3412    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
3413        "MP_minutes", "MP_seconds"
3414    ]].astype("uint16")
3415    stats_df["MP_total_seconds"] = (
3416        stats_df["MP_seconds"] + (stats_df["MP_minutes"] * 60)
3417    )
3418
3419    stats_df["FG%"] = (stats_df["FGM"] / stats_df["FGA"])
3420    stats_df["FG%"] = stats_df["FG%"].round(4)
3421
3422    stats_df["3P%"] = (stats_df["3PM"] / stats_df["3PA"])
3423    stats_df["3P%"] = stats_df["3P%"].round(4)
3424
3425    stats_df["FT%"] = (stats_df["FT"] / stats_df["FTA"])
3426    stats_df["FT%"] = stats_df["FT%"].round(4)
3427
3428    stats_df["2PM"] = (stats_df["FGM"] - stats_df["3PM"])
3429    stats_df["2PA"] = (stats_df["FGA"] - stats_df["3PA"])
3430    stats_df["2P%"] = (stats_df["2PM"] / stats_df["2PA"])
3431    stats_df["2P%"] = stats_df["2P%"].round(4)
3432
3433    stats_df["eFG%"] = (
3434        (
3435            stats_df["FGM"] +
3436            (stats_df["3PM"] * 0.5)
3437        ) /
3438        stats_df["FGA"]
3439    )
3440    stats_df["eFG%"] = stats_df["eFG%"].round(4)
3441
3442    stats_df["TSA"] = (
3443        stats_df["FGA"] + (stats_df["FTA"] * 0.44)
3444    )
3445    stats_df["TS%"] = stats_df["PTS"] / (2 * stats_df["TSA"])
3446    stats_df["TS%"] = stats_df["TS%"].round(4)
3447
3448    stats_df["TOV%"] = (
3449        stats_df["TOV"] /
3450        (
3451            stats_df["FGA"] +
3452            (stats_df["FTA"] * 0.44) +
3453            stats_df["TOV"]
3454        )
3455    )
3456    stats_df["TOV%"] = stats_df["TOV%"].round(4)
3457
3458    double_double_stats = ["PTS", "TRB", "AST", "BLK", "STL"]
3459    stats_df["DBL_DBL"] = (stats_df[double_double_stats] >= 10).sum(1) >= 2
3460    stats_df["TRP_DBL"] = (stats_df[double_double_stats] >= 10).sum(1) >= 3
3461
3462    stats_df = stats_df.astype(
3463        {
3464            "DBL_DBL": "uint16",
3465            "TRP_DBL": "uint16",
3466        },
3467        errors="ignore"
3468    )
3469    stats_df = stats_df.reindex(
3470        columns=stat_columns
3471    )
3472    stats_df["game_id"] = game_id
3473    # print(stats_df.columns)
3474    stats_df.to_csv(
3475        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/game_stats/player/"
3476        + f"{game_id}_player_game_stats.csv",
3477        index=False
3478    )
3479    return stats_df
3480
3481
3482def get_basketball_game_team_stats(game_id: int) -> pd.DataFrame:
3483    """
3484    Given a valid game ID,
3485    this function will attempt to get all team game stats, if possible.
3486
3487    NOTE: Due to an issue with [stats.ncaa.org](stats.ncaa.org),
3488    full team game stats may not be loaded in through this function.
3489
3490    This is a known issue, however you should be able to get position
3491    data and starters information through this function
3492
3493    Parameters
3494    ----------
3495    `game_id` (int, mandatory):
3496        Required argument.
3497        Specifies the game you want team game stats from.
3498
3499    Usage
3500    ----------
3501    ```python
3502
3503    from ncaa_stats_py.basketball import get_basketball_game_team_stats
3504
3505    ########################################
3506    #          Men's Basketball            #
3507    ########################################
3508
3509    # Get the game stats of the
3510    # 2024 NCAA D1 Men's Basketball National Championship game.
3511    print(
3512        "Get the game stats of the "
3513        + "2024 NCAA D1 Men's Basketball National Championship game."
3514    )
3515    df = get_basketball_game_team_stats(5254137)
3516    print(df)
3517
3518    # Get the game stats of a March Madness game on March 29th, 2024
3519    # between Duke and the Houston Cougars.
3520    print(
3521        "Get the game stats of a March Madness game on March 29th, 2024 "
3522        + "between Duke and the Houston Cougars."
3523    )
3524    df = get_basketball_game_team_stats(5254126)
3525    print(df)
3526
3527    # Get the game stats of a St. Patrick's Day
3528    # game between the Duquesne Dukes and VCU Rams (D1).
3529    print(
3530        "Get the game stats of a St. Patrick's Day "
3531        + "game between the Duquesne Dukes and VCU Rams (D1)."
3532    )
3533    df = get_basketball_game_team_stats(5252318)
3534    print(df)
3535
3536    # Get the game stats of a December 17th, 2023
3537    # game between the Barry Buccaneers and Findlay Oilers (D2).
3538    print(
3539        "Get the game stats of a December 17th, 2023 "
3540        + "game between the Barry Buccaneers and Findlay Oilers (D2)."
3541    )
3542    df = get_basketball_game_team_stats(3960610)
3543    print(df)
3544
3545    # Get the game stats of a Valentine's Day
3546    # game between the Kalamazoo Hornets and the Trine Thunder (D2).
3547    print(
3548        "Get the game stats of a Valentine's Day "
3549        + "game between the Kalamazoo Hornets and the Trine Thunder (D2)."
3550    )
3551    df = get_basketball_game_team_stats(3967963)
3552    print(df)
3553
3554
3555    ########################################
3556    #          Women's Basketball          #
3557    ########################################
3558
3559    # Get the game stats of the
3560    # 2024 NCAA D1 Women's Basketball National Championship game.
3561    print(
3562        "Get the game stats of the "
3563        + "2024 NCAA D1 Women's Basketball National Championship game"
3564    )
3565    df = get_basketball_game_team_stats(5254137)
3566    print(df)
3567
3568    # Get the game stats of a March 3rd, 2024
3569    # game between Duke and the North Carolina Tar Heels.
3570    print(
3571        "Get the game stats of a March 3rd, 2024 "
3572        + "game between Duke and the North Carolina Tar Heels"
3573    )
3574    df = get_basketball_game_team_stats(3984600)
3575    print(df)
3576
3577    # Get the game stats of a Thanksgiving Day
3578    # game between the Sacred Heart Pioneers and the P.R.-Mayaguez Janes (D2).
3579    print(
3580        "Get the game stats of a Thanksgiving Day "
3581        + "game between the Sacred Heart Pioneers and "
3582        + "the P.R.-Mayaguez Janes (D2)."
3583    )
3584    df = get_basketball_game_team_stats(3972687)
3585    print(df)
3586
3587    # Get the game stats of a January 21st, 2024
3588    # game between the Puget Sound Loggers
3589    # and the Whitworth Pirates (D3).
3590    print(
3591        "Get the game stats of a January 21st, 2024 "
3592        + "game between the Puget Sound Loggers and "
3593        + "the Whitworth Pirates (D3)."
3594    )
3595    df = get_basketball_game_team_stats(3979051)
3596
3597    ```
3598
3599    Returns
3600    ----------
3601    A pandas `DataFrame` object with team game stats in a given game.
3602
3603    """
3604    df = get_basketball_game_player_stats(game_id=game_id)
3605    # print(df.columns)
3606    df = df.infer_objects()
3607    stats_df = df.groupby(
3608        ["season", "game_id", "team_id", "team_name"],
3609        as_index=False
3610    ).agg(
3611        {
3612            # "MP_minutes": "sum",
3613            # "MP_seconds": "sum",
3614            "MP_total_seconds": "sum",
3615            "FGM": "sum",
3616            "FGA": "sum",
3617            "3PM": "sum",
3618            "3PA": "sum",
3619            "2PM": "sum",
3620            "2PA": "sum",
3621            "FT": "sum",
3622            "FTA": "sum",
3623            "ORB": "sum",
3624            "DRB": "sum",
3625            "TRB": "sum",
3626            "AST": "sum",
3627            "STL": "sum",
3628            "BLK": "sum",
3629            "TOV": "sum",
3630            "PF": "sum",
3631            "TF": "sum",
3632            "PTS": "sum",
3633            "DQ": "sum",
3634            "DBL_DBL": "sum",
3635            "TRP_DBL": "sum",
3636        }
3637    )
3638    stats_df["MP_str"] = stats_df["MP_total_seconds"].map(
3639        _get_minute_formatted_time_from_seconds
3640    )
3641
3642    stats_df["FG%"] = (stats_df["FGM"] / stats_df["FGA"])
3643    stats_df["FG%"] = stats_df["FG%"].round(4)
3644
3645    stats_df["3P%"] = (stats_df["3PM"] / stats_df["3PA"])
3646    stats_df["3P%"] = stats_df["3P%"].round(4)
3647
3648    stats_df["FT%"] = (stats_df["FT"] / stats_df["FTA"])
3649    stats_df["FT%"] = stats_df["FT%"].round(4)
3650
3651    stats_df["2PM"] = (stats_df["FGM"] - stats_df["3PM"])
3652    stats_df["2PA"] = (stats_df["FGA"] - stats_df["3PA"])
3653    stats_df["2P%"] = (stats_df["2PM"] / stats_df["2PA"])
3654    stats_df["2P%"] = stats_df["2P%"].round(4)
3655
3656    stats_df["eFG%"] = (
3657        (
3658            stats_df["FGM"] +
3659            (stats_df["3PM"] * 0.5)
3660        ) /
3661        stats_df["FGA"]
3662    )
3663    stats_df["eFG%"] = stats_df["eFG%"].round(4)
3664
3665    stats_df["TSA"] = (
3666        stats_df["FGA"] + (stats_df["FTA"] * 0.44)
3667    )
3668    stats_df["TS%"] = stats_df["PTS"] / (2 * stats_df["TSA"])
3669    stats_df["TS%"] = stats_df["TS%"].round(4)
3670
3671    stats_df["TOV%"] = (
3672        stats_df["TOV"] /
3673        (
3674            stats_df["FGA"] +
3675            (stats_df["FTA"] * 0.44) +
3676            stats_df["TOV"]
3677        )
3678    )
3679    stats_df["TOV%"] = stats_df["TOV%"].round(4)
3680
3681    return stats_df
3682
3683
3684def get_basketball_raw_pbp(game_id: int) -> pd.DataFrame:
3685    """
3686    Given a valid game ID,
3687    this function will attempt to get the raw play-by-play (PBP)
3688    data for that game.
3689
3690    Parameters
3691    ----------
3692    `game_id` (int, mandatory):
3693        Required argument.
3694        Specifies the game you want play-by-play data (PBP) from.
3695
3696    Usage
3697    ----------
3698    ```python
3699
3700    from ncaa_stats_py.basketball import get_basketball_raw_pbp
3701
3702    ########################################
3703    #          Men's Basketball            #
3704    ########################################
3705
3706    # Get the play-by-play data of the
3707    # 2024 NCAA D1 Men's Basketball National Championship game.
3708    print(
3709        "Get the play-by-play data of the "
3710        + "2024 NCAA D1 Men's Basketball National Championship game."
3711    )
3712    df = get_basketball_raw_pbp(5254137)
3713    print(df)
3714
3715    # Get the play-by-play data of a March Madness game on March 29th, 2024
3716    # between Duke and the Houston Cougars.
3717    print(
3718        "Get the play-by-play data "
3719        + "of a March Madness game on March 29th, 2024 "
3720        + "between Duke and the Houston Cougars."
3721    )
3722    df = get_basketball_raw_pbp(5254126)
3723    print(df)
3724
3725    # Get the play-by-play data of a February 28th
3726    # game between the Winthrop Eagles and High Point Panthers.
3727    print(
3728        "Get the play-by-play data of a February 28th "
3729        + "game between the Winthrop Eagles and High Point Panthers."
3730    )
3731    df = get_basketball_raw_pbp(3969302)
3732    print(df)
3733
3734    # Get the play-by-play data of a December 19th, 2022
3735    # game between the San Francisco St. Gators and
3736    # the Cal St. Monterey Bay Otters (D2).
3737    print(
3738        "Get the play-by-play data of a December 19th, 2022 "
3739        + "game between the San Francisco St. Gators and " +
3740        "the Cal St. Monterey Bay Otters (D2)."
3741    )
3742    df = get_basketball_raw_pbp(2341500)
3743    print(df)
3744
3745    # Get the play-by-play data of a January 3rd, 2022
3746    # game between the Hamline Pipers and the St. Olaf Oles (D3).
3747    print(
3748        "Get the play-by-play data of a January 3rd, 2022 "
3749        + "game between the Hamline Pipers and the St. Olaf Oles (D3)."
3750    )
3751    df = get_basketball_raw_pbp(3967963)
3752    print(df)
3753
3754
3755    ########################################
3756    #          Women's Basketball          #
3757    ########################################
3758
3759    # Get the play-by-play data of the
3760    # 2024 NCAA D1 Women's Basketball National Championship game.
3761    print(
3762        "Get the play-by-play data of the "
3763        + "2024 NCAA D1 Women's Basketball National Championship game."
3764    )
3765    df = get_basketball_raw_pbp(5254137)
3766    print(df)
3767
3768    # Get the play-by-play data of a March 12th, 2021
3769    # game between the La Salle Explorers and the Dayton Flyers.
3770    print(
3771        "Get the play-by-play data of a March 12th, 2021 "
3772        + "game between the La Salle Explorers and the Dayton Flyers."
3773    )
3774    df = get_basketball_raw_pbp(2055636)
3775    print(df)
3776
3777    # Get the play-by-play data of a February 6th, 2020
3778    # game between Purdue Northwest and the Michigan Tech Huskies (D2).
3779    print(
3780        "Get the play-by-play data of a Thanksgiving Day "
3781        + "game between the Sacred Heart Pioneers and "
3782        + "the P.R.-Mayaguez Janes (D2)."
3783    )
3784    df = get_basketball_raw_pbp(1793405)
3785    print(df)
3786
3787    # Get the play-by-play data of a January 5th, 2019
3788    # game between the Puget Sound Loggers
3789    # and the Whitworth Pirates (D3).
3790    print(
3791        "Get the play-by-play data of a January 5th, 2019 "
3792        + "game between the Simpson Storm and "
3793        + "the Dubuque Spartans (D3)."
3794    )
3795    df = get_basketball_raw_pbp(1625974)
3796    print(df)
3797
3798    ```
3799
3800    Returns
3801    ----------
3802    A pandas `DataFrame` object with a play-by-play (PBP) data in a given game.
3803
3804    """
3805    load_from_cache = True
3806    is_overtime = False
3807
3808    sport_id = ""
3809    season = 0
3810    away_score = 0
3811    home_score = 0
3812
3813    mbb_teams_df = load_basketball_teams(get_wbb_data=False)
3814    mbb_team_ids_arr = mbb_teams_df["team_id"].to_list()
3815
3816    wbb_teams_df = load_basketball_teams(get_wbb_data=True)
3817    wbb_team_ids_arr = wbb_teams_df["team_id"].to_list()
3818
3819    pbp_df = pd.DataFrame()
3820    pbp_df_arr = []
3821    temp_df = pd.DataFrame()
3822
3823    temp_df = pd.DataFrame()
3824    home_dir = expanduser("~")
3825    home_dir = _format_folder_str(home_dir)
3826
3827    stat_columns = [
3828        "season",
3829        "game_id",
3830        "sport_id",
3831        "game_datetime",
3832        "half_num",
3833        "event_num",
3834        "game_time_str",
3835        "game_time_seconds",
3836        "game_time_milliseconds",
3837        "event_team",
3838        "event_text",
3839        "is_overtime",
3840        "stadium_name",
3841        "attendance",
3842        "away_team_id",
3843        "away_team_name",
3844        "home_team_id",
3845        "home_team_name",
3846    ]
3847
3848    url = f"https://stats.ncaa.org/contests/{game_id}/play_by_play"
3849
3850    if exists(f"{home_dir}/.ncaa_stats_py/"):
3851        pass
3852    else:
3853        mkdir(f"{home_dir}/.ncaa_stats_py/")
3854
3855    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/"):
3856        pass
3857    else:
3858        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/")
3859
3860    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"):
3861        pass
3862    else:
3863        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/")
3864
3865    if exists(
3866        f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"
3867        + f"{game_id}_raw_pbp.csv"
3868    ):
3869        games_df = pd.read_csv(
3870            f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"
3871            + f"{game_id}_raw_pbp.csv"
3872        )
3873        games_df = games_df.infer_objects()
3874        file_mod_datetime = datetime.fromtimestamp(
3875            getmtime(
3876                f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"
3877                + f"{game_id}_raw_pbp.csv"
3878            )
3879        )
3880        load_from_cache = True
3881    else:
3882        file_mod_datetime = datetime.today()
3883        load_from_cache = False
3884
3885    if exists(f"{home_dir}/.ncaa_stats_py/"):
3886        pass
3887    else:
3888        mkdir(f"{home_dir}/.ncaa_stats_py/")
3889
3890    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/"):
3891        pass
3892    else:
3893        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/")
3894
3895    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"):
3896        pass
3897    else:
3898        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/")
3899
3900    if exists(
3901        f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"
3902        + f"{game_id}_raw_pbp.csv"
3903    ):
3904        games_df = pd.read_csv(
3905            f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"
3906            + f"{game_id}_raw_pbp.csv"
3907        )
3908        games_df = games_df.infer_objects()
3909        file_mod_datetime = datetime.fromtimestamp(
3910            getmtime(
3911                f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"
3912                + f"{game_id}_raw_pbp.csv"
3913            )
3914        )
3915        load_from_cache = True
3916    else:
3917        logging.info("Could not find a WBB player game stats file")
3918
3919    now = datetime.today()
3920
3921    age = now - file_mod_datetime
3922
3923    if age.days >= 35:
3924        load_from_cache = False
3925
3926    if load_from_cache is True:
3927        return games_df
3928
3929    response = _get_webpage(url=url)
3930    soup = BeautifulSoup(response.text, features="lxml")
3931
3932    info_table = soup.find(
3933        "td",
3934        {
3935            "style": "padding: 0px 30px 0px 30px",
3936            "class": "d-none d-md-table-cell"
3937        }
3938    ).find(
3939        "table",
3940        {"style": "border-collapse: collapse"}
3941    )
3942
3943    info_table_rows = info_table.find_all("tr")
3944
3945    game_date_str = info_table_rows[3].find("td").text
3946    if "TBA" in game_date_str:
3947        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
3948    elif "tba" in game_date_str:
3949        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
3950    elif "TBD" in game_date_str:
3951        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
3952    elif "tbd" in game_date_str:
3953        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
3954    elif (
3955        "tbd" not in game_date_str.lower() and
3956        ":" not in game_date_str.lower()
3957    ):
3958        game_date_str = game_date_str.replace(" ", "")
3959        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
3960    else:
3961        game_datetime = datetime.strptime(
3962            game_date_str,
3963            '%m/%d/%Y %I:%M %p'
3964        )
3965    game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
3966    game_date_str = game_datetime.isoformat()
3967    del game_datetime
3968
3969    stadium_str = info_table_rows[4].find("td").text
3970
3971    attendance_str = info_table_rows[5].find("td").text
3972    attendance_int = re.findall(
3973        r"([0-9\,]+)",
3974        attendance_str
3975    )[0]
3976    attendance_int = attendance_int.replace(",", "")
3977    attendance_int = int(attendance_int)
3978
3979    del attendance_str
3980    team_cards = soup.find_all(
3981        "td",
3982        {
3983            "valign": "center",
3984            "class": "grey_text d-none d-sm-table-cell"
3985        }
3986    )
3987
3988    away_url = team_cards[0].find_all("a")
3989    away_url = away_url[0]
3990    home_url = team_cards[1].find_all("a")
3991    home_url = home_url[0]
3992
3993    away_team_name = away_url.text
3994    home_team_name = home_url.text
3995
3996    away_team_id = away_url.get("href")
3997    home_team_id = home_url.get("href")
3998
3999    away_team_id = away_team_id.replace("/teams", "")
4000    away_team_id = away_team_id.replace("/team", "")
4001    away_team_id = away_team_id.replace("/", "")
4002    away_team_id = int(away_team_id)
4003
4004    home_team_id = home_team_id.replace("/teams", "")
4005    home_team_id = home_team_id.replace("/team", "")
4006    home_team_id = home_team_id.replace("/", "")
4007    home_team_id = int(home_team_id)
4008
4009    if home_team_id in mbb_team_ids_arr:
4010        sport_id = "MBB"
4011        temp_df = mbb_teams_df[mbb_teams_df["team_id"] == home_team_id]
4012        season = temp_df["season"].iloc[0]
4013        del temp_df
4014    elif home_team_id in wbb_team_ids_arr:
4015        sport_id = "WBB"
4016        temp_df = wbb_teams_df[wbb_teams_df["team_id"] == home_team_id]
4017        season = temp_df["season"].iloc[0]
4018        del temp_df
4019    # This should never be the case,
4020    # but if something goes very horribly wrong,
4021    # double check the away team ID to
4022    # the MBB and WBB team ID list.
4023    elif away_team_id in mbb_team_ids_arr:
4024        sport_id = "MBB"
4025        temp_df = mbb_teams_df[mbb_teams_df["team_id"] == away_team_id]
4026        season = temp_df["season"].iloc[0]
4027        del temp_df
4028    elif away_team_id in wbb_team_ids_arr:
4029        sport_id = "WBB"
4030        temp_df = wbb_teams_df[wbb_teams_df["team_id"] == home_team_id]
4031        season = temp_df["season"].iloc[0]
4032        del temp_df
4033    # If we get to this, we are in a code red situation.
4034    # "SHUT IT DOWN" - Gordon Ramsay
4035    else:
4036        raise ValueError(
4037            "Could not identify if this is a " +
4038            "MBB or WBB game based on team IDs. "
4039        )
4040
4041    section_cards = soup.find_all(
4042        "div",
4043        {"class": "row justify-content-md-center w-100"}
4044    )
4045
4046    for card in section_cards:
4047        # top_bot = ""
4048        event_text = ""
4049        half_str = card.find(
4050            "div",
4051            {"class": "card-header"}
4052        ).text
4053        half_num = re.findall(
4054            r"([0-9]+)",
4055            half_str
4056        )
4057
4058        half_num = int(half_num[0])
4059        if "ot" in half_str.lower():
4060            is_overtime = True
4061            half_num += 2
4062        table_body = card.find("table").find("tbody").find_all("tr")
4063
4064        for row in table_body:
4065            t_cells = row.find_all("td")
4066            t_cells = [x.text.strip() for x in t_cells]
4067            game_time_str = t_cells[0]
4068
4069            if len(t_cells[1]) > 0:
4070                event_team = away_team_id
4071                event_text = t_cells[1]
4072            elif len(t_cells[3]) > 0:
4073                event_team = home_team_id
4074                event_text = t_cells[3]
4075
4076            if t_cells[1].lower() == "game start":
4077                pass
4078            elif t_cells[1].lower() == "jumpball startperiod":
4079                pass
4080            elif t_cells[1].lower() == "period start":
4081                pass
4082            elif t_cells[1].lower() == "period end confirmed;":
4083                pass
4084            elif t_cells[1].lower() == "period end confirmed":
4085                pass
4086            elif t_cells[1].lower() == "game end confirmed;":
4087                pass
4088            elif t_cells[1].lower() == "game end confirmed":
4089                pass
4090            elif t_cells[1].lower() == "timeout commercial":
4091                pass
4092            else:
4093                away_score, home_score = t_cells[2].split("-")
4094
4095            away_score = int(away_score)
4096            home_score = int(home_score)
4097            if len(game_time_str.split(":")) == 3:
4098                temp_time_minutes, temp_time_seconds, game_time_ms = \
4099                    game_time_str.split(":")
4100            elif len(game_time_str.split(":")) == 2:
4101                temp_time_minutes, temp_time_seconds = \
4102                    game_time_str.split(":")
4103                game_time_ms = 0
4104
4105            temp_time_minutes = int(temp_time_minutes)
4106            temp_time_seconds = int(temp_time_seconds)
4107            game_time_ms = int(game_time_ms)
4108            game_time_seconds = temp_time_seconds + (temp_time_minutes * 60)
4109
4110            if half_num == 1:
4111                half_seconds_remaining = game_time_seconds
4112                half_ms_remaining = game_time_ms
4113
4114                game_time_seconds += 1200
4115            else:
4116                half_seconds_remaining = game_time_seconds
4117                half_ms_remaining = game_time_ms
4118
4119            temp_df = pd.DataFrame(
4120                {
4121                    # "season": season,
4122                    # "game_id": game_id,
4123                    # "sport_id": sport_id,
4124                    # "away_team_id": away_team_id,
4125                    # "away_team_name": away_team_name,
4126                    # "home_team_id": home_team_id,
4127                    # "home_team_name": home_team_name,
4128                    "game_time_str": game_time_str,
4129                    "half_seconds_remaining": half_seconds_remaining,
4130                    "half_milliseconds_remaining": half_ms_remaining,
4131                    "game_seconds_remaining": game_time_seconds,
4132                    "game_milliseconds_remaining": game_time_ms,
4133                    "half_num": half_num,
4134                    "event_team": event_team,
4135                    "event_text": event_text,
4136                    "is_overtime": is_overtime
4137                },
4138                index=[0],
4139            )
4140            pbp_df_arr.append(temp_df)
4141
4142    pbp_df = pd.concat(pbp_df_arr, ignore_index=True)
4143    pbp_df["event_num"] = pbp_df.index + 1
4144    pbp_df["game_datetime"] = game_date_str
4145    pbp_df["season"] = season
4146    pbp_df["game_id"] = game_id
4147    pbp_df["sport_id"] = sport_id
4148    pbp_df["stadium_name"] = stadium_str
4149    pbp_df["attendance"] = attendance_int
4150    pbp_df["away_team_id"] = away_team_id
4151    pbp_df["away_team_name"] = away_team_name
4152    pbp_df["home_team_id"] = home_team_id
4153    pbp_df["home_team_name"] = home_team_name
4154
4155    pbp_df = pbp_df.reindex(columns=stat_columns)
4156    pbp_df = pbp_df.infer_objects()
4157
4158    if sport_id == "MBB":
4159        pbp_df.to_csv(
4160            f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"
4161            + f"{game_id}_raw_pbp.csv",
4162            index=False
4163        )
4164    elif sport_id == "WBB":
4165        pbp_df.to_csv(
4166            f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"
4167            + f"{game_id}_raw_pbp.csv",
4168            index=False
4169        )
4170    else:
4171        raise ValueError(
4172            f"Improper Sport ID: `{sport_id}`"
4173        )
4174
4175    return pbp_df
4176
4177
4178def get_basketball_game_starters(game_id: int) -> list:
4179    """
4180    Given a valid game ID, this function will attempt to
4181    get the starting lineup out of the raw play-by-play data
4182    from the game.
4183
4184    NOTE #1: The layout of the list will be as follows:
4185
4186    > | Index |   **Away players**   |
4187    > | :---: | :------------------: |
4188    > |   0   | Away team starter #1 |
4189    > |   1   | Away team starter #2 |
4190    > |   2   | Away team starter #3 |
4191    > |   3   | Away team starter #4 |
4192    > |   4   | Away team starter #5 |
4193
4194    > | Index |   **Home players**   |
4195    > | :---: | :------------------: |
4196    > |   5   | Home team starter #1 |
4197    > |   6   | Home team starter #2 |
4198    > |   7   | Home team starter #3 |
4199    > |   8   | Home team starter #4 |
4200    > |   9   | Home team starter #5 |
4201
4202    NOTE #2: Starters are listed in order of when they first sub out.
4203    Do not assume that starter #5 for a team is a center,
4204    or that starter #1 is a PG!
4205
4206    Returns
4207    ----------
4208    A list of starters from a specific basketball game ID.
4209
4210    """
4211    starters_list = []
4212    pbp_df = get_basketball_raw_pbp(game_id=game_id)
4213    away_team_id = pbp_df["away_team_id"].iloc[0]
4214    home_team_id = pbp_df["home_team_id"].iloc[0]
4215    # pointer_int = 0
4216
4217    for team_id in [away_team_id, home_team_id]:
4218        temp_starters_list = []
4219
4220        temp_df = pbp_df[pbp_df["event_team"] == team_id]
4221
4222        play_text_list = temp_df["event_text"].to_list()
4223
4224        for play_txt in play_text_list:
4225            if len(temp_starters_list) == 5:
4226                break
4227            elif "substitution out" in play_txt:
4228                player_txt = play_txt.split(",")[0]
4229                if play_txt in temp_starters_list:
4230                    pass
4231                elif player_txt.lower() == "team":
4232                    pass
4233                elif (player_txt is None) or (len(player_txt) == 0):
4234                    raise ValueError(
4235                        "Player cannot be NULL."
4236                    )
4237                else:
4238                    temp_starters_list.append(player_txt)
4239
4240        if len(temp_starters_list) < 5:
4241            raise ValueError(
4242                f"Could not find all 5 starters for team ID {team_id} " +
4243                f"in game ID {game_id}"
4244            )
4245        for txt in temp_starters_list:
4246            starters_list.append(txt)
4247    return starters_list
4248
4249
4250def get_basketball_game_shot_locations(game_id: int) -> pd.DataFrame:
4251    """ """
4252    raise NotImplementedError(
4253        "It's not implemented yet."
4254    )
def get_basketball_teams( season: int, level: str | int, get_wbb_data: bool = False) -> pandas.core.frame.DataFrame:
 37def get_basketball_teams(
 38    season: int,
 39    level: str | int,
 40    get_wbb_data: bool = False
 41) -> pd.DataFrame:
 42    """
 43    Retrieves a list of basketball teams from the NCAA.
 44
 45    Parameters
 46    ----------
 47    `season` (int, mandatory):
 48        Required argument.
 49        Specifies the season you want NCAA basketball team information from.
 50
 51    `level` (int, mandatory):
 52        Required argument.
 53        Specifies the level/division you want
 54        NCAA basketball team information from.
 55        This can either be an integer (1-3) or a string ("I"-"III").
 56
 57    `get_wbb_data` (bool, optional):
 58        Optional argument.
 59        If you want women's basketball data instead of men's basketball data,
 60        set this to `True`.
 61
 62    Usage
 63    ----------
 64    ```python
 65
 66    from ncaa_stats_py.basketball import get_basketball_teams
 67
 68    ########################################
 69    #          Men's Basketball            #
 70    ########################################
 71
 72    # Get all D1 men's basketball teams for the 2024 season.
 73    print("Get all D1 men's basketball teams for the 2024 season.")
 74    df = get_basketball_teams(2024, 1)
 75    print(df)
 76
 77    # Get all D2 men's basketball teams for the 2023 season.
 78    print("Get all D2 men's basketball teams for the 2023 season.")
 79    df = get_basketball_teams(2023, 2)
 80    print(df)
 81
 82    # Get all D3 men's basketball teams for the 2022 season.
 83    print("Get all D3 men's basketball teams for the 2022 season.")
 84    df = get_basketball_teams(2022, 3)
 85    print(df)
 86
 87    # Get all D1 men's basketball teams for the 2021 season.
 88    print("Get all D1 men's basketball teams for the 2021 season.")
 89    df = get_basketball_teams(2021, "I")
 90    print(df)
 91
 92    # Get all D2 men's basketball teams for the 2020 season.
 93    print("Get all D2 men's basketball teams for the 2020 season.")
 94    df = get_basketball_teams(2020, "II")
 95    print(df)
 96
 97    # Get all D3 men's basketball teams for the 2019 season.
 98    print("Get all D3 men's basketball teams for the 2019 season.")
 99    df = get_basketball_teams(2019, "III")
100    print(df)
101
102    ########################################
103    #          Women's Basketball          #
104    ########################################
105
106    # Get all D1 women's basketball teams for the 2024 season.
107    print(
108        "Get all D1 women's basketball teams for the 2024 season."
109    )
110    df = get_basketball_teams(2024, 1)
111    print(df)
112
113    # Get all D2 women's basketball teams for the 2023 season.
114    print(
115        "Get all D2 women's basketball teams for the 2023 season."
116    )
117    df = get_basketball_teams(2023, 2)
118    print(df)
119
120    # Get all D3 women's basketball teams for the 2022 season.
121    print(
122        "Get all D3 women's basketball teams for the 2022 season."
123    )
124    df = get_basketball_teams(2022, 3)
125    print(df)
126
127    # Get all D1 women's basketball teams for the 2021 season.
128    print(
129        "Get all D1 women's basketball teams for the 2021 season."
130    )
131    df = get_basketball_teams(2021, "I")
132    print(df)
133
134    # Get all D2 women's basketball teams for the 2020 season.
135    print(
136        "Get all D2 women's basketball teams for the 2020 season."
137    )
138    df = get_basketball_teams(2020, "II")
139    print(df)
140
141    # Get all D3 women's basketball teams for the 2019 season.
142    print(
143        "Get all D3 women's basketball teams for the 2019 season."
144    )
145    df = get_basketball_teams(2019, "III")
146    print(df)
147
148    ```
149
150    Returns
151    ----------
152    A pandas `DataFrame` object with a list of college basketball teams
153    in that season and NCAA level.
154    """
155    # def is_comment(elem):
156    #     return isinstance(elem, Comment)
157    sport_id = ""
158    # stat_sequence = 0
159    load_from_cache = True
160    home_dir = expanduser("~")
161    home_dir = _format_folder_str(home_dir)
162    teams_df = pd.DataFrame()
163    teams_df_arr = []
164    temp_df = pd.DataFrame()
165    formatted_level = ""
166    ncaa_level = 0
167
168    if get_wbb_data is True:
169        sport_id = "WBB"
170        stat_sequence = 169
171    else:
172        sport_id = "MBB"
173        stat_sequence = 168
174
175    if isinstance(level, int) and level == 1:
176        formatted_level = "I"
177        ncaa_level = 1
178    elif isinstance(level, int) and level == 2:
179        formatted_level = "II"
180        ncaa_level = 2
181    elif isinstance(level, int) and level == 3:
182        formatted_level = "III"
183        ncaa_level = 3
184    elif isinstance(level, str) and (
185        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
186    ):
187        ncaa_level = 1
188        formatted_level = level.upper()
189    elif isinstance(level, str) and (
190        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
191    ):
192        ncaa_level = 2
193        formatted_level = level.upper()
194    elif isinstance(level, str) and (
195        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
196    ):
197        ncaa_level = 3
198        formatted_level = level.upper()
199
200    if exists(f"{home_dir}/.ncaa_stats_py/"):
201        pass
202    else:
203        mkdir(f"{home_dir}/.ncaa_stats_py/")
204
205    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
206        pass
207    else:
208        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
209
210    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"):
211        pass
212    else:
213        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}//teams/")
214
215    if exists(
216        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"
217        + f"{season}_{formatted_level}_teams.csv"
218    ):
219        teams_df = pd.read_csv(
220            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"
221            + f"{season}_{formatted_level}_teams.csv"
222        )
223        file_mod_datetime = datetime.fromtimestamp(
224            getmtime(
225                f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"
226                + f"{season}_{formatted_level}_teams.csv"
227            )
228        )
229    else:
230        file_mod_datetime = datetime.today()
231        load_from_cache = False
232
233    now = datetime.today()
234
235    age = now - file_mod_datetime
236
237    if (
238        age.days > 1 and
239        season >= (now.year - 1) and
240        now.month <= 7
241    ):
242        load_from_cache = False
243    elif age.days >= 35:
244        load_from_cache = False
245
246    if load_from_cache is True:
247        return teams_df
248
249    logging.warning(
250        f"Either we could not load {season} D{level} schools from cache, "
251        + "or it's time to refresh the cached data."
252    )
253    schools_df = _get_schools()
254    url = (
255        "https://stats.ncaa.org/rankings/change_sport_year_div?"
256        + f"academic_year={season}.0&division={ncaa_level}.0" +
257        f"&sport_code={sport_id}"
258    )
259
260    response = _get_webpage(url=url)
261
262    soup = BeautifulSoup(response.text, features="lxml")
263    ranking_periods = soup.find("select", {"name": "rp", "id": "rp"})
264    ranking_periods = ranking_periods.find_all("option")
265
266    rp_value = 0
267    found_value = False
268
269    while found_value is False:
270        # print("check")
271        for rp in ranking_periods:
272            if "final " in rp.text.lower():
273                rp_value = rp.get("value")
274                found_value = True
275                break
276            else:
277                rp_value = rp.get("value")
278                found_value = True
279                break
280
281    url = (
282        "https://stats.ncaa.org/rankings/institution_trends?"
283        + f"academic_year={season}.0&division={ncaa_level}.0&"
284        + f"ranking_period={rp_value}&sport_code={sport_id}"
285        + f"&sport_code={sport_id}"
286    )
287
288    best_method = True
289    if (
290        (season < 2015 and sport_id == "MBB")
291    ):
292        url = (
293            "https://stats.ncaa.org/rankings/national_ranking?"
294            + f"academic_year={season}.0&division={ncaa_level}.0&"
295            + f"ranking_period={rp_value}&sport_code={sport_id}"
296            + f"&stat_seq={stat_sequence}"
297        )
298        response = _get_webpage(url=url)
299        best_method = False
300    elif season < 2013:
301        url = (
302            "https://stats.ncaa.org/rankings/national_ranking?"
303            + f"academic_year={season}.0&division={ncaa_level}.0&"
304            + f"ranking_period={rp_value}&sport_code={sport_id}"
305            + f"&stat_seq={stat_sequence}"
306        )
307        response = _get_webpage(url=url)
308        best_method = False
309    else:
310        try:
311            response = _get_webpage(url=url)
312        except Exception as e:
313            logging.info(f"Found exception when loading teams `{e}`")
314            logging.info("Attempting backup method.")
315            url = (
316                "https://stats.ncaa.org/rankings/national_ranking?"
317                + f"academic_year={season}.0&division={ncaa_level}.0&"
318                + f"ranking_period={rp_value}&sport_code={sport_id}"
319                + f"&stat_seq={stat_sequence}"
320            )
321            response = _get_webpage(url=url)
322            best_method = False
323
324    soup = BeautifulSoup(response.text, features="lxml")
325
326    if best_method is True:
327        soup = soup.find(
328            "table",
329            {"id": "stat_grid"},
330        )
331        soup = soup.find("tbody")
332        t_rows = soup.find_all("tr")
333
334        for t in t_rows:
335            team_id = t.find("a")
336            team_id = team_id.get("href")
337            team_id = team_id.replace("/teams/", "")
338            team_id = int(team_id)
339            team_name = t.find_all("td")[0].text
340            team_conference_name = t.find_all("td")[1].text
341            # del team
342            temp_df = pd.DataFrame(
343                {
344                    "season": season,
345                    "ncaa_division": ncaa_level,
346                    "ncaa_division_formatted": formatted_level,
347                    "team_conference_name": team_conference_name,
348                    "team_id": team_id,
349                    "school_name": team_name,
350                    "sport_id": sport_id,
351                },
352                index=[0],
353            )
354            teams_df_arr.append(temp_df)
355            del temp_df
356    else:
357        soup = soup.find(
358            "table",
359            {"id": "rankings_table"},
360        )
361        soup = soup.find("tbody")
362        t_rows = soup.find_all("tr")
363
364        for t in t_rows:
365            team_id = t.find("a")
366            team_id = team_id.get("href")
367            team_id = team_id.replace("/teams/", "")
368            team_id = int(team_id)
369            team = t.find_all("td")[1].get("data-order")
370            team_name, team_conference_name = team.split(",")
371            del team
372            temp_df = pd.DataFrame(
373                {
374                    "season": season,
375                    "ncaa_division": ncaa_level,
376                    "ncaa_division_formatted": formatted_level,
377                    "team_conference_name": team_conference_name,
378                    "team_id": team_id,
379                    "school_name": team_name,
380                    "sport_id": sport_id,
381                },
382                index=[0],
383            )
384            teams_df_arr.append(temp_df)
385            del temp_df
386
387    teams_df = pd.concat(teams_df_arr, ignore_index=True)
388    teams_df = pd.merge(
389        left=teams_df,
390        right=schools_df,
391        on=["school_name"],
392        how="left"
393    )
394    teams_df.sort_values(by=["team_id"], inplace=True)
395
396    teams_df.to_csv(
397        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/teams/"
398        + f"{season}_{formatted_level}_teams.csv",
399        index=False,
400    )
401
402    return teams_df

Retrieves a list of basketball teams from the NCAA.

Parameters

season (int, mandatory): Required argument. Specifies the season you want NCAA basketball team information from.

level (int, mandatory): Required argument. Specifies the level/division you want NCAA basketball team information from. This can either be an integer (1-3) or a string ("I"-"III").

get_wbb_data (bool, optional): Optional argument. If you want women's basketball data instead of men's basketball data, set this to True.

Usage

from ncaa_stats_py.basketball import get_basketball_teams

########################################
#          Men's Basketball            #
########################################

# Get all D1 men's basketball teams for the 2024 season.
print("Get all D1 men's basketball teams for the 2024 season.")
df = get_basketball_teams(2024, 1)
print(df)

# Get all D2 men's basketball teams for the 2023 season.
print("Get all D2 men's basketball teams for the 2023 season.")
df = get_basketball_teams(2023, 2)
print(df)

# Get all D3 men's basketball teams for the 2022 season.
print("Get all D3 men's basketball teams for the 2022 season.")
df = get_basketball_teams(2022, 3)
print(df)

# Get all D1 men's basketball teams for the 2021 season.
print("Get all D1 men's basketball teams for the 2021 season.")
df = get_basketball_teams(2021, "I")
print(df)

# Get all D2 men's basketball teams for the 2020 season.
print("Get all D2 men's basketball teams for the 2020 season.")
df = get_basketball_teams(2020, "II")
print(df)

# Get all D3 men's basketball teams for the 2019 season.
print("Get all D3 men's basketball teams for the 2019 season.")
df = get_basketball_teams(2019, "III")
print(df)

########################################
#          Women's Basketball          #
########################################

# Get all D1 women's basketball teams for the 2024 season.
print(
    "Get all D1 women's basketball teams for the 2024 season."
)
df = get_basketball_teams(2024, 1)
print(df)

# Get all D2 women's basketball teams for the 2023 season.
print(
    "Get all D2 women's basketball teams for the 2023 season."
)
df = get_basketball_teams(2023, 2)
print(df)

# Get all D3 women's basketball teams for the 2022 season.
print(
    "Get all D3 women's basketball teams for the 2022 season."
)
df = get_basketball_teams(2022, 3)
print(df)

# Get all D1 women's basketball teams for the 2021 season.
print(
    "Get all D1 women's basketball teams for the 2021 season."
)
df = get_basketball_teams(2021, "I")
print(df)

# Get all D2 women's basketball teams for the 2020 season.
print(
    "Get all D2 women's basketball teams for the 2020 season."
)
df = get_basketball_teams(2020, "II")
print(df)

# Get all D3 women's basketball teams for the 2019 season.
print(
    "Get all D3 women's basketball teams for the 2019 season."
)
df = get_basketball_teams(2019, "III")
print(df)

Returns

A pandas DataFrame object with a list of college basketball teams in that season and NCAA level.

def load_basketball_teams( start_year: int = 2011, get_wbb_data: bool = False) -> pandas.core.frame.DataFrame:
405def load_basketball_teams(
406    start_year: int = 2011,
407    get_wbb_data: bool = False
408) -> pd.DataFrame:
409    """
410    Compiles a list of known NCAA basketball teams in NCAA basketball history.
411
412    Parameters
413    ----------
414    `start_year` (int, optional):
415        Optional argument.
416        Specifies the first season you want
417        NCAA basketball team information from.
418
419    `get_wbb_data` (bool, optional):
420        Optional argument.
421        If you want women's basketball data instead of men's basketball data,
422        set this to `True`.
423
424    Usage
425    ----------
426    ```python
427
428    from ncaa_stats_py.basketball import load_basketball_teams
429
430    # WARNING: Running this script "as-is" for the first time may
431    #          take some time.
432    #          The *N*th time you run this script will be faster.
433
434    # Load in every women's basketball team
435    # from 2011 to present day.
436    print(
437        "Load in every women's basketball team " +
438        "from 2011 to present day."
439    )
440    df = load_basketball_teams(get_wbb_data=True)
441    print(df)
442
443    # Load in every men's basketball team
444    # from 2011 to present day.
445    print(
446        "Load in every men's basketball team " +
447        "from 2011 to present day."
448    )
449    df = load_basketball_teams()
450    print(df)
451
452    # Load in every men's basketball team
453    # from 2020 to present day.
454    print(
455        "Load in every men's basketball team " +
456        "from 2020 to present day."
457    )
458    df = load_basketball_teams(start_year=2020)
459    print(df)
460
461    ```
462
463    Returns
464    ----------
465    A pandas `DataFrame` object with a list of
466    all known college basketball teams.
467
468    """
469    # start_year = 2008
470
471    # if get_wbb_data is True:
472    #     sport_id = "WBB"
473    # else:
474    #     sport_id = "MBB"
475
476    teams_df = pd.DataFrame()
477    teams_df_arr = []
478    temp_df = pd.DataFrame()
479
480    now = datetime.now()
481    ncaa_divisions = ["I", "II", "III"]
482    if now.month > 5:
483        ncaa_seasons = [x for x in range(start_year, (now.year + 2))]
484    else:
485        ncaa_seasons = [x for x in range(start_year, (now.year + 1))]
486
487    logging.info(
488        "Loading in all NCAA basketball teams. "
489        + "If this is the first time you're seeing this message, "
490        + "it may take some time (3-10 minutes) for this to load."
491    )
492    for s in ncaa_seasons:
493        logging.info(f"Loading in basketball teams for the {s} season.")
494        for d in ncaa_divisions:
495            try:
496                temp_df = get_basketball_teams(season=s, level=d)
497                teams_df_arr.append(temp_df)
498                del temp_df
499            except Exception as e:
500                logging.warning(
501                    "Unhandled exception when trying to " +
502                    f"get the teams. Full exception: `{e}`"
503                )
504
505
506    teams_df = pd.concat(teams_df_arr, ignore_index=True)
507    teams_df = teams_df.infer_objects()
508    return teams_df

Compiles a list of known NCAA basketball teams in NCAA basketball history.

Parameters

start_year (int, optional): Optional argument. Specifies the first season you want NCAA basketball team information from.

get_wbb_data (bool, optional): Optional argument. If you want women's basketball data instead of men's basketball data, set this to True.

Usage

from ncaa_stats_py.basketball import load_basketball_teams

# WARNING: Running this script "as-is" for the first time may
#          take some time.
#          The *N*th time you run this script will be faster.

# Load in every women's basketball team
# from 2011 to present day.
print(
    "Load in every women's basketball team " +
    "from 2011 to present day."
)
df = load_basketball_teams(get_wbb_data=True)
print(df)

# Load in every men's basketball team
# from 2011 to present day.
print(
    "Load in every men's basketball team " +
    "from 2011 to present day."
)
df = load_basketball_teams()
print(df)

# Load in every men's basketball team
# from 2020 to present day.
print(
    "Load in every men's basketball team " +
    "from 2020 to present day."
)
df = load_basketball_teams(start_year=2020)
print(df)

Returns

A pandas DataFrame object with a list of all known college basketball teams.

def get_basketball_team_schedule(team_id: int) -> pandas.core.frame.DataFrame:
 511def get_basketball_team_schedule(team_id: int) -> pd.DataFrame:
 512    """
 513    Retrieves a team schedule, from a valid NCAA basketball team ID.
 514
 515    Parameters
 516    ----------
 517    `team_id` (int, mandatory):
 518        Required argument.
 519        Specifies the team you want a schedule from.
 520        This is separate from a school ID, which identifies the institution.
 521        A team ID should be unique to a school, and a season.
 522
 523    Usage
 524    ----------
 525    ```python
 526
 527    from ncaa_stats_py.basketball import get_basketball_team_schedule
 528
 529    ########################################
 530    #          Men's Basketball            #
 531    ########################################
 532
 533    # Get the team schedule for the
 534    # 2024 Wright St. MBB team (D1, ID: 561255).
 535    print(
 536        "Get the team schedule for the " +
 537        "2024 Wright St. MBB team (D1, ID: 561255)."
 538    )
 539    df = get_basketball_team_schedule(561255)
 540    print(df)
 541
 542    # Get the team schedule for the
 543    # 2023 Caldwell MBB team (D2, ID: 542813).
 544    print(
 545        "Get the team schedule for the " +
 546        "2023 Caldwell MBB team (D2, ID: 542813)."
 547    )
 548    df = get_basketball_team_schedule(542813)
 549    print(df)
 550
 551    # Get the team schedule for the
 552    # 2022 SUNY Maritime MBB team (D3, ID: 528097).
 553    print(
 554        "Get the team schedule for the " +
 555        "2022 SUNY Maritime MBB team (D3, ID: 528097)."
 556    )
 557    df = get_basketball_team_schedule(528097)
 558    print(df)
 559
 560    ########################################
 561    #          Women's Basketball          #
 562    ########################################
 563
 564    # Get the team schedule for the
 565    # 2021 Wake Forest WBB team (D1, ID: 506339).
 566    print(
 567        "Get the team schedule for the " +
 568        "2021 Wake Forest WBB team (D1, ID: 506339)."
 569    )
 570    df = get_basketball_team_schedule(506339)
 571    print(df)
 572
 573    # Get the team schedule for the
 574    # 2020 Trevecca Nazarene WBB team (D2, ID: 484527).
 575    print(
 576        "Get the team schedule for the " +
 577        "2020 Trevecca Nazarene WBB team (D2, ID: 484527)."
 578    )
 579    df = get_basketball_team_schedule(484527)
 580    print(df)
 581
 582    # Get the team schedule for the
 583    # 2019 Simpson WBB team (D3, ID: 452452).
 584    print(
 585        "Get the team schedule for the " +
 586        "2019 Simpson WBB team (D3, ID: 452452)."
 587    )
 588    df = get_basketball_team_schedule(452452)
 589    print(df)
 590
 591    ```
 592
 593    Returns
 594    ----------
 595    A pandas `DataFrame` object with an NCAA basketball team's schedule.
 596
 597    """
 598
 599    sport_id = ""
 600    schools_df = _get_schools()
 601    games_df = pd.DataFrame()
 602    games_df_arr = []
 603    season = 0
 604    temp_df = pd.DataFrame()
 605    load_from_cache = True
 606
 607    home_dir = expanduser("~")
 608    home_dir = _format_folder_str(home_dir)
 609
 610    url = f"https://stats.ncaa.org/teams/{team_id}"
 611
 612    try:
 613        team_df = load_basketball_teams()
 614        team_df = team_df[team_df["team_id"] == team_id]
 615        season = team_df["season"].iloc[0]
 616        ncaa_division = team_df["ncaa_division"].iloc[0]
 617        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
 618        sport_id = "MBB"
 619    except Exception:
 620        team_df = load_basketball_teams(get_wbb_data=True)
 621        team_df = team_df[team_df["team_id"] == team_id]
 622        season = team_df["season"].iloc[0]
 623        ncaa_division = team_df["ncaa_division"].iloc[0]
 624        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
 625        sport_id = "WBB"
 626    # team_conference_name = team_df["team_conference_name"].iloc[0]
 627    # school_name = team_df["school_name"].iloc[0]
 628    # school_id = int(team_df["school_id"].iloc[0])
 629
 630    del team_df
 631
 632    if exists(f"{home_dir}/.ncaa_stats_py/"):
 633        pass
 634    else:
 635        mkdir(f"{home_dir}/.ncaa_stats_py/")
 636
 637    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
 638        pass
 639    else:
 640        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
 641
 642    if exists(
 643        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/team_schedule/"
 644    ):
 645        pass
 646    else:
 647        mkdir(
 648            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/team_schedule/"
 649        )
 650
 651    if exists(
 652        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/team_schedule/"
 653        + f"{team_id}_team_schedule.csv"
 654    ):
 655        games_df = pd.read_csv(
 656            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/team_schedule/"
 657            + f"{team_id}_team_schedule.csv"
 658        )
 659        file_mod_datetime = datetime.fromtimestamp(
 660            getmtime(
 661                f"{home_dir}/.ncaa_stats_py/"
 662                + f"basketball_{sport_id}/team_schedule/"
 663                + f"{team_id}_team_schedule.csv"
 664            )
 665        )
 666    else:
 667        file_mod_datetime = datetime.today()
 668        load_from_cache = False
 669
 670    now = datetime.today()
 671
 672    age = now - file_mod_datetime
 673    if (
 674        age.days > 1 and
 675        season >= now.year
 676    ):
 677        load_from_cache = False
 678
 679    if load_from_cache is True:
 680        return games_df
 681
 682    response = _get_webpage(url=url)
 683    soup = BeautifulSoup(response.text, features="lxml")
 684
 685    school_name = soup.find("div", {"class": "card"}).find("img").get("alt")
 686    season_name = (
 687        soup.find("select", {"id": "year_list"})
 688        .find("option", {"selected": "selected"})
 689        .text
 690    )
 691    # For NCAA basketball, the season always starts in the fall semester,
 692    # and ends in the spring semester.
 693    # Thus, if `season_name` = "2011-12", this is the "2012" basketball season,
 694    # because 2012 would encompass the fall and spring semesters
 695    # for NCAA member institutions.
 696    # season = f"{season_name[0:2]}{season_name[-2:]}"
 697    # season = int(season)
 698    soup = soup.find_all(
 699        "div",
 700        {"class": "col p-0"},
 701    )
 702
 703    # declaring it here to prevent potential problems down the road.
 704    table_data = ""
 705    for s in soup:
 706        try:
 707            temp_name = s.find("div", {"class": "card-header"})
 708            temp_name = temp_name.text
 709        except Exception as e:
 710            logging.warning(
 711                f"Could not parse card header. Full exception `{e}`. "
 712                + "Attempting alternate method."
 713            )
 714            temp_name = s.find("tr", {"class": "heading"}).find("td").text
 715
 716        if "schedule" in temp_name.lower():
 717            table_data = s.find("table")
 718
 719    t_rows = table_data.find_all("tr", {"class": "underline_rows"})
 720
 721    if len(t_rows) == 0:
 722        t_rows = table_data.find_all("tr")
 723
 724    for g in t_rows:
 725        is_valid_row = True
 726        game_num = 1
 727        ot_periods = 0
 728        is_home_game = True
 729        is_neutral_game = False
 730
 731        cells = g.find_all("td")
 732        if len(cells) <= 1:
 733            # Because of how *well* designed
 734            # stats.ncaa.org is, if we have to use execute
 735            # the `if len(t_rows) == 0:` code,
 736            # we need to catch any cases where every element in a
 737            # table row (`<tr>`) is a table header (`<th>`),
 738            # instead of a table data cell (`<td>`)
 739            continue
 740
 741        game_date = cells[0].text
 742
 743        # If "(" is in the same cell as the date,
 744        # this means that this game is an extra innings game.
 745        # The number encased in `()` is the actual number of innings.
 746        # We need to remove that from the date,
 747        # and move it into a separate variable.
 748        if "(" in game_date:
 749            game_date = game_date.replace(")", "")
 750            game_date, game_num = game_date.split("(")
 751            game_date = game_date.strip()
 752            game_num = int(game_num.strip())
 753
 754        game_date = datetime.strptime(game_date, "%m/%d/%Y").date()
 755
 756        try:
 757            opp_team_id = cells[1].find("a").get("href")
 758        except IndexError:
 759            logging.info(
 760                "Skipping row because it is clearly "
 761                + "not a row that has schedule data."
 762            )
 763            is_valid_row = False
 764        except AttributeError as e:
 765            logging.info(
 766                "Could not extract a team ID for this game. " +
 767                f"Full exception {e}"
 768            )
 769            opp_team_id = "-1"
 770        except Exception as e:
 771            logging.warning(
 772                "An unhandled exception has occurred when "
 773                + "trying to get the opposition team ID for this game. "
 774                f"Full exception `{e}`."
 775            )
 776            raise e
 777        if is_valid_row is True:
 778            if opp_team_id is not None:
 779                opp_team_id = opp_team_id.replace("/teams/", "")
 780                opp_team_id = int(opp_team_id)
 781
 782                try:
 783                    opp_team_name = cells[1].find("img").get("alt")
 784                except AttributeError:
 785                    logging.info(
 786                        "Couldn't find the opposition team name "
 787                        + "for this row from an image element. "
 788                        + "Attempting a backup method"
 789                    )
 790                    opp_team_name = cells[1].text
 791                except Exception as e:
 792                    logging.info(
 793                        "Unhandled exception when trying to get the "
 794                        + "opposition team name from this game. "
 795                        + f"Full exception `{e}`"
 796                    )
 797                    raise e
 798            else:
 799                opp_team_name = cells[1].text
 800
 801            if opp_team_name[0] == "@":
 802                # The logic for determining if this game was a
 803                # neutral site game doesn't care if that info is in
 804                # `opp_team_name`.
 805                opp_team_name = opp_team_name.strip().replace("@", "")
 806            elif "@" in opp_team_name:
 807                opp_team_name = opp_team_name.strip().split("@")[0]
 808            # opp_team_show_name = cells[1].text.strip()
 809
 810            opp_text = cells[1].text
 811            opp_text = opp_text.strip()
 812            if "@" in opp_text and opp_text[0] == "@":
 813                is_home_game = False
 814            elif "@" in opp_text and opp_text[0] != "@":
 815                is_neutral_game = True
 816                is_home_game = False
 817            # This is just to cover conference and NCAA championship
 818            # tournaments.
 819            elif "championship" in opp_text.lower():
 820                is_neutral_game = True
 821                is_home_game = False
 822            elif "ncaa" in opp_text.lower():
 823                is_neutral_game = True
 824                is_home_game = False
 825
 826            del opp_text
 827
 828            score = cells[2].text.strip()
 829            if len(score) == 0:
 830                score_1 = 0
 831                score_2 = 0
 832            elif (
 833                "canceled" not in score.lower() and
 834                "ppd" not in score.lower()
 835            ):
 836                score_1, score_2 = score.split("-")
 837
 838                # `score_1` should be "W `n`", "L `n`", or "T `n`",
 839                # with `n` representing the number of runs this team
 840                # scored in this game.
 841                # Let's remove the "W", "L", or "T" from `score_1`,
 842                # and determine which team won later on in this code.
 843                if any(x in score_1 for x in ["W", "L", "T"]):
 844                    score_1 = score_1.split(" ")[1]
 845
 846                if "(" in score_2:
 847                    score_2 = score_2.replace(")", "")
 848                    score_2, ot_periods = score_2.split("(")
 849                    ot_periods = ot_periods.replace("OT", "")
 850                    ot_periods = ot_periods.replace(" ", "")
 851                    ot_periods = int(ot_periods)
 852
 853                if ot_periods is None:
 854                    ot_periods = 0
 855                score_1 = int(score_1)
 856                score_2 = int(score_2)
 857            else:
 858                score_1 = None
 859                score_2 = None
 860
 861            try:
 862                game_id = cells[2].find("a").get("href")
 863                game_id = game_id.replace("/contests", "")
 864                game_id = game_id.replace("/box_score", "")
 865                game_id = game_id.replace("/", "")
 866                game_id = int(game_id)
 867                game_url = (
 868                    f"https://stats.ncaa.org/contests/{game_id}/box_score"
 869                )
 870
 871            except AttributeError as e:
 872                logging.info(
 873                    "Could not parse a game ID for this game. "
 874                    + f"Full exception `{e}`."
 875                )
 876                game_id = None
 877                game_url = None
 878            except Exception as e:
 879                logging.info(
 880                    "An unhandled exception occurred when trying "
 881                    + "to find a game ID for this game. "
 882                    + f"Full exception `{e}`."
 883                )
 884                raise e
 885            try:
 886                attendance = cells[3].text
 887                attendance = attendance.replace(",", "")
 888                attendance = attendance.replace("\n", "")
 889                attendance = int(attendance)
 890            except IndexError as e:
 891                logging.info(
 892                    "It doesn't appear as if there is an attendance column "
 893                    + "for this team's schedule table."
 894                    f"Full exception `{e}`."
 895                )
 896                attendance = None
 897            except ValueError as e:
 898                logging.info(
 899                    "There doesn't appear as if "
 900                    + "there is a recorded attendance. "
 901                    + "for this game/row. "
 902                    f"Full exception `{e}`."
 903                )
 904                attendance = None
 905
 906            except Exception as e:
 907                logging.info(
 908                    "An unhandled exception occurred when trying "
 909                    + "to find this game's attendance. "
 910                    + f"Full exception `{e}`."
 911                )
 912                raise e
 913
 914            if is_home_game is True:
 915                temp_df = pd.DataFrame(
 916                    {
 917                        "season": season,
 918                        "season_name": season_name,
 919                        "game_id": game_id,
 920                        "game_date": game_date,
 921                        "game_num": game_num,
 922                        "ot_periods": ot_periods,
 923                        "home_team_id": team_id,
 924                        "home_team_name": school_name,
 925                        "away_team_id": opp_team_id,
 926                        "away_team_name": opp_team_name,
 927                        "home_team_score": score_1,
 928                        "away_team_score": score_2,
 929                        "is_neutral_game": is_neutral_game,
 930                        "game_url": game_url,
 931                    },
 932                    index=[0],
 933                )
 934                games_df_arr.append(temp_df)
 935                del temp_df
 936            elif is_neutral_game is True:
 937                # For the sake of simplicity,
 938                # order both team ID's,
 939                # and set the lower number of the two as
 940                # the "away" team in this neutral site game,
 941                # just so there's no confusion if someone
 942                # combines a ton of these team schedule `DataFrame`s,
 943                # and wants to remove duplicates afterwards.
 944                t_ids = [opp_team_id, team_id]
 945                t_ids.sort()
 946
 947                if t_ids[0] == team_id:
 948                    # home
 949                    temp_df = pd.DataFrame(
 950                        {
 951                            "season": season,
 952                            "season_name": season_name,
 953                            "game_id": game_id,
 954                            "game_date": game_date,
 955                            "game_num": game_num,
 956                            "ot_periods": ot_periods,
 957                            "home_team_id": team_id,
 958                            "home_team_name": school_name,
 959                            "away_team_id": opp_team_id,
 960                            "away_team_name": opp_team_name,
 961                            "home_team_score": score_1,
 962                            "away_team_score": score_2,
 963                            "is_neutral_game": is_neutral_game,
 964                            "game_url": game_url,
 965                        },
 966                        index=[0],
 967                    )
 968
 969                else:
 970                    # away
 971                    temp_df = pd.DataFrame(
 972                        {
 973                            "season": season,
 974                            "season_name": season_name,
 975                            "game_id": game_id,
 976                            "game_date": game_date,
 977                            "game_num": game_num,
 978                            "ot_periods": ot_periods,
 979                            "home_team_id": opp_team_id,
 980                            "home_team_name": opp_team_name,
 981                            "away_team_id": team_id,
 982                            "away_team_name": school_name,
 983                            "home_team_score": score_2,
 984                            "away_team_score": score_1,
 985                            "is_neutral_game": is_neutral_game,
 986                            "game_url": game_url,
 987                        },
 988                        index=[0],
 989                    )
 990
 991                games_df_arr.append(temp_df)
 992                del temp_df
 993            else:
 994                temp_df = pd.DataFrame(
 995                    {
 996                        "season": season,
 997                        "season_name": season_name,
 998                        "game_id": game_id,
 999                        "game_date": game_date,
1000                        "game_num": game_num,
1001                        "ot_periods": ot_periods,
1002                        "home_team_id": opp_team_id,
1003                        "home_team_name": opp_team_name,
1004                        "away_team_id": team_id,
1005                        "away_team_name": school_name,
1006                        "home_team_score": score_2,
1007                        "away_team_score": score_1,
1008                        "is_neutral_game": is_neutral_game,
1009                        "game_url": game_url,
1010                    },
1011                    index=[0],
1012                )
1013
1014                games_df_arr.append(temp_df)
1015                del temp_df
1016
1017        # team_photo = team_id.find("img").get("src")
1018
1019    games_df = pd.concat(games_df_arr, ignore_index=True)
1020
1021    temp_df = schools_df.rename(
1022        columns={
1023            "school_name": "home_team_name",
1024            "school_id": "home_school_id"
1025        }
1026    )
1027    games_df = games_df.merge(right=temp_df, on="home_team_name", how="left")
1028
1029    temp_df = schools_df.rename(
1030        columns={
1031            "school_name": "away_team_name",
1032            "school_id": "away_school_id"
1033        }
1034    )
1035    games_df = games_df.merge(right=temp_df, on="away_team_name", how="left")
1036    games_df["ncaa_division"] = ncaa_division
1037    games_df["ncaa_division_formatted"] = ncaa_division_formatted
1038
1039    # games_df["game_url"] = games_df["game_url"].str.replace("/box_score", "")
1040    games_df.to_csv(
1041        f"{home_dir}/.ncaa_stats_py/"
1042        + f"basketball_{sport_id}/team_schedule/"
1043        + f"{team_id}_team_schedule.csv",
1044        index=False,
1045    )
1046
1047    return games_df

Retrieves a team schedule, from a valid NCAA basketball team ID.

Parameters

team_id (int, mandatory): Required argument. Specifies the team you want a schedule from. This is separate from a school ID, which identifies the institution. A team ID should be unique to a school, and a season.

Usage

from ncaa_stats_py.basketball import get_basketball_team_schedule

########################################
#          Men's Basketball            #
########################################

# Get the team schedule for the
# 2024 Wright St. MBB team (D1, ID: 561255).
print(
    "Get the team schedule for the " +
    "2024 Wright St. MBB team (D1, ID: 561255)."
)
df = get_basketball_team_schedule(561255)
print(df)

# Get the team schedule for the
# 2023 Caldwell MBB team (D2, ID: 542813).
print(
    "Get the team schedule for the " +
    "2023 Caldwell MBB team (D2, ID: 542813)."
)
df = get_basketball_team_schedule(542813)
print(df)

# Get the team schedule for the
# 2022 SUNY Maritime MBB team (D3, ID: 528097).
print(
    "Get the team schedule for the " +
    "2022 SUNY Maritime MBB team (D3, ID: 528097)."
)
df = get_basketball_team_schedule(528097)
print(df)

########################################
#          Women's Basketball          #
########################################

# Get the team schedule for the
# 2021 Wake Forest WBB team (D1, ID: 506339).
print(
    "Get the team schedule for the " +
    "2021 Wake Forest WBB team (D1, ID: 506339)."
)
df = get_basketball_team_schedule(506339)
print(df)

# Get the team schedule for the
# 2020 Trevecca Nazarene WBB team (D2, ID: 484527).
print(
    "Get the team schedule for the " +
    "2020 Trevecca Nazarene WBB team (D2, ID: 484527)."
)
df = get_basketball_team_schedule(484527)
print(df)

# Get the team schedule for the
# 2019 Simpson WBB team (D3, ID: 452452).
print(
    "Get the team schedule for the " +
    "2019 Simpson WBB team (D3, ID: 452452)."
)
df = get_basketball_team_schedule(452452)
print(df)

Returns

A pandas DataFrame object with an NCAA basketball team's schedule.

def get_basketball_day_schedule( game_date: str | datetime.date | datetime.datetime, level: str | int = 'I', get_wbb_data: bool = False):
1050def get_basketball_day_schedule(
1051    game_date: str | date | datetime,
1052    level: str | int = "I",
1053    get_wbb_data: bool = False
1054):
1055    """
1056    Given a date and NCAA level, this function retrieves basketball every game
1057    for that date.
1058
1059    Parameters
1060    ----------
1061    `game_date` (int, mandatory):
1062        Required argument.
1063        Specifies the date you want a basketball schedule from.
1064        For best results, pass a string formatted as "YYYY-MM-DD".
1065
1066    `level` (int, mandatory):
1067        Required argument.
1068        Specifies the level/division you want a
1069        NCAA basketball schedule from.
1070        This can either be an integer (1-3) or a string ("I"-"III").
1071
1072    `get_wbb_data` (bool, optional):
1073        Optional argument.
1074        If you want women's basketball data instead of men's basketball data,
1075        set this to `True`.
1076
1077    Usage
1078    ----------
1079    ```python
1080
1081    from ncaa_stats_py.basketball import get_basketball_day_schedule
1082
1083
1084    # Get all DI games that will be played on April 22th, 2025.
1085    print("Get all games that will be played on April 22th, 2025.")
1086    df = get_basketball_day_schedule("2025-04-22", level=1)
1087    print(df)
1088
1089    # Get all division II games that were played on February 14th, 2025.
1090    print("Get all division II games that were played on February 14th, 2025.")
1091    df = get_basketball_day_schedule("2025-02-14", level="I")
1092    print(df)
1093
1094    # Get all DI games that were played on December 10th, 2024.
1095    print("Get all games that were played on December 10th, 2024.")
1096    df = get_basketball_day_schedule("2024-12-10", level="I")
1097    print(df)
1098
1099    # Get all DI games (if any) that were played on December 12th, 2024.
1100    print("Get all DI games (if any) that were played on December 12th, 2024.")
1101    df = get_basketball_day_schedule("2024-12-12")
1102    print(df)
1103
1104    # Get all DII games played on January 14th, 2024.
1105    print("Get all DI games played on January 14th, 2024.")
1106    df = get_basketball_day_schedule("2024-01-14")
1107    print(df)
1108
1109    # Get all division III games played on December 16th, 2023.
1110    print("Get all division III games played on December 16th, 2023.")
1111    df = get_basketball_day_schedule("2023-12-16")
1112    print(df)
1113
1114    ```
1115
1116    Returns
1117    ----------
1118    A pandas `DataFrame` object with all basketball games played on that day,
1119    for that NCAA division/level.
1120
1121    """
1122
1123    season = 0
1124    sport_id = "MBB"
1125
1126    schedule_df = pd.DataFrame()
1127    schedule_df_arr = []
1128
1129    if isinstance(game_date, date):
1130        game_datetime = datetime.combine(
1131            game_date, datetime.min.time()
1132        )
1133    elif isinstance(game_date, datetime):
1134        game_datetime = game_date
1135    elif isinstance(game_date, str):
1136        game_datetime = parser.parse(
1137            game_date
1138        )
1139    else:
1140        unhandled_datatype = type(game_date)
1141        raise ValueError(
1142            f"Unhandled datatype for `game_date`: `{unhandled_datatype}`"
1143        )
1144
1145    if isinstance(level, int) and level == 1:
1146        formatted_level = "I"
1147        ncaa_level = 1
1148    elif isinstance(level, int) and level == 2:
1149        formatted_level = "II"
1150        ncaa_level = 2
1151    elif isinstance(level, int) and level == 3:
1152        formatted_level = "III"
1153        ncaa_level = 3
1154    elif isinstance(level, str) and (
1155        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
1156    ):
1157        ncaa_level = 1
1158        formatted_level = level.upper()
1159    elif isinstance(level, str) and (
1160        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
1161    ):
1162        ncaa_level = 2
1163        formatted_level = level.upper()
1164    elif isinstance(level, str) and (
1165        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
1166    ):
1167        ncaa_level = 3
1168        formatted_level = level.upper()
1169
1170    del level
1171
1172    if get_wbb_data is True:
1173        sport_id = "WBB"
1174    elif get_wbb_data is False:
1175        sport_id = "MBB"
1176    else:
1177        raise ValueError(
1178            f"Unhandled value for `get_wbb_data`: `{get_wbb_data}`"
1179        )
1180
1181    season = game_datetime.year
1182    game_month = game_datetime.month
1183    game_day = game_datetime.day
1184    game_year = game_datetime.year
1185
1186    if game_month > 7:
1187        season += 1
1188        url = (
1189            "https://stats.ncaa.org/contests/" +
1190            f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" +
1191            f"&academic_year={season}&division={ncaa_level}" +
1192            f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" +
1193            "&commit=Submit"
1194        )
1195    else:
1196        url = (
1197            "https://stats.ncaa.org/contests/" +
1198            f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" +
1199            f"&academic_year={season}&division={ncaa_level}" +
1200            f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" +
1201            "&commit=Submit"
1202        )
1203
1204    response = _get_webpage(url=url)
1205    soup = BeautifulSoup(response.text, features="lxml")
1206
1207    game_boxes = soup.find_all("div", {"class": "table-responsive"})
1208
1209    for box in game_boxes:
1210        game_id = None
1211        game_alt_text = None
1212        game_num = 1
1213        # t_box = box.find("table")
1214        table_box = box.find("table")
1215        table_rows = table_box.find_all("tr")
1216
1217        # Date/attendance
1218        game_date_str = table_rows[0].find("div", {"class": "col-6 p-0"}).text
1219        game_date_str = game_date_str.replace("\n", "")
1220        game_date_str = game_date_str.strip()
1221        game_date_str = game_date_str.replace("TBA ", "TBA")
1222        game_date_str = game_date_str.replace("TBD ", "TBD")
1223        game_date_str = game_date_str.replace("PM ", "PM")
1224        game_date_str = game_date_str.replace("AM ", "AM")
1225        game_date_str = game_date_str.strip()
1226        attendance_str = table_rows[0].find(
1227            "div",
1228            {"class": "col p-0 text-right"}
1229        ).text
1230
1231        attendance_str = attendance_str.replace("Attend:", "")
1232        attendance_str = attendance_str.replace(",", "")
1233        attendance_str = attendance_str.replace("\n", "")
1234        if (
1235            "st" in attendance_str.lower() or
1236            "nd" in attendance_str.lower() or
1237            "rd" in attendance_str.lower() or
1238            "th" in attendance_str.lower()
1239        ):
1240            # This is not an attendance,
1241            # this is whatever quarter/half/inning this game is in.
1242            attendance_num = None
1243        elif "final" in attendance_str.lower():
1244            attendance_num = None
1245        elif len(attendance_str) > 0:
1246            attendance_num = int(attendance_str)
1247        else:
1248            attendance_num = None
1249
1250        if "(" in game_date_str:
1251            game_date_str = game_date_str.replace(")", "")
1252            game_date_str, game_num = game_date_str.split("(")
1253            game_num = int(game_num)
1254
1255        if "TBA" in game_date_str:
1256            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
1257        elif "tba" in game_date_str:
1258            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
1259        elif "TBD" in game_date_str:
1260            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
1261        elif "tbd" in game_date_str:
1262            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
1263        elif (
1264            "tbd" not in game_date_str.lower() and
1265            ":" not in game_date_str.lower()
1266        ):
1267            game_date_str = game_date_str.replace(" ", "")
1268            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
1269        else:
1270            game_datetime = datetime.strptime(
1271                game_date_str,
1272                '%m/%d/%Y %I:%M %p'
1273            )
1274        game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
1275
1276        game_alt_text = table_rows[1].find_all("td")[0].text
1277        if game_alt_text is not None and len(game_alt_text) > 0:
1278            game_alt_text = game_alt_text.replace("\n", "")
1279            game_alt_text = game_alt_text.strip()
1280
1281        if len(game_alt_text) == 0:
1282            game_alt_text = None
1283
1284        urls_arr = box.find_all("a")
1285
1286        for u in urls_arr:
1287            url_temp = u.get("href")
1288            if "contests" in url_temp:
1289                game_id = url_temp
1290                del url_temp
1291
1292        if game_id is None:
1293            for r in range(0, len(table_rows)):
1294                temp = table_rows[r]
1295                temp_id = temp.get("id")
1296
1297                if temp_id is not None and len(temp_id) > 0:
1298                    game_id = temp_id
1299
1300        del urls_arr
1301
1302        game_id = game_id.replace("/contests", "")
1303        game_id = game_id.replace("/box_score", "")
1304        game_id = game_id.replace("/livestream_scoreboards", "")
1305        game_id = game_id.replace("/", "")
1306        game_id = game_id.replace("contest_", "")
1307        game_id = int(game_id)
1308
1309        table_rows = table_box.find_all("tr", {"id": f"contest_{game_id}"})
1310        away_team_row = table_rows[0]
1311        home_team_row = table_rows[1]
1312
1313        # Away team
1314        td_arr = away_team_row.find_all("td")
1315
1316        try:
1317            away_team_name = td_arr[0].find("img").get("alt")
1318        except Exception:
1319            away_team_name = td_arr[1].text
1320        away_team_name = away_team_name.replace("\n", "")
1321        away_team_name = away_team_name.strip()
1322
1323        try:
1324            away_team_id = td_arr[1].find("a").get("href")
1325            away_team_id = away_team_id.replace("/teams/", "")
1326            away_team_id = int(away_team_id)
1327        except AttributeError:
1328            away_team_id = None
1329            logging.info("No team ID found for the away team")
1330        except Exception as e:
1331            raise e
1332
1333        away_points_scored = td_arr[-1].text
1334        away_points_scored = away_points_scored.replace("\n", "")
1335        away_points_scored = away_points_scored.replace("\xa0", "")
1336        if len(away_points_scored) > 0:
1337            away_points_scored = int(away_points_scored)
1338        else:
1339            away_points_scored = 0
1340
1341        del td_arr
1342
1343        # Home team
1344        td_arr = home_team_row.find_all("td")
1345
1346        try:
1347            home_team_name = td_arr[0].find("img").get("alt")
1348        except Exception:
1349            home_team_name = td_arr[1].text
1350        home_team_name = home_team_name.replace("\n", "")
1351        home_team_name = home_team_name.strip()
1352
1353        try:
1354            home_team_id = td_arr[1].find("a").get("href")
1355            home_team_id = home_team_id.replace("/teams/", "")
1356            home_team_id = int(home_team_id)
1357        except AttributeError:
1358            home_team_id = None
1359            logging.info("No team ID found for the home team")
1360        except Exception as e:
1361            raise e
1362
1363        home_points_scored = td_arr[-1].text
1364        home_points_scored = home_points_scored.replace("\n", "")
1365        home_points_scored = home_points_scored.replace("\xa0", "")
1366        if len(home_points_scored) > 0:
1367            home_points_scored = int(home_points_scored)
1368        else:
1369            home_points_scored = 0
1370
1371        temp_df = pd.DataFrame(
1372            {
1373                "season": season,
1374                "sport_id": sport_id,
1375                "game_date": game_datetime.strftime("%Y-%m-%d"),
1376                "game_datetime": game_datetime.isoformat(),
1377                "game_id": game_id,
1378                "formatted_level": formatted_level,
1379                "ncaa_level": ncaa_level,
1380                "game_alt_text": game_alt_text,
1381                "away_team_id": away_team_id,
1382                "away_team_name": away_team_name,
1383                "home_team_id": home_team_id,
1384                "home_team_name": home_team_name,
1385                "home_points_scored": home_points_scored,
1386                "away_points_scored": away_points_scored,
1387                "attendance": attendance_num
1388            },
1389            index=[0]
1390        )
1391        schedule_df_arr.append(temp_df)
1392
1393        del temp_df
1394
1395    if len(schedule_df_arr) >= 1:
1396        schedule_df = pd.concat(schedule_df_arr, ignore_index=True)
1397    else:
1398        logging.warning(
1399            "Could not find any game(s) for "
1400            + f"{game_datetime.year:00d}-{game_datetime.month:00d}"
1401            + f"-{game_datetime.day:00d}. "
1402            + "If you believe this is an error, "
1403            + "please raise an issue at "
1404            + "\n https://github.com/armstjc/ncaa_stats_py/issues \n"
1405        )
1406    return schedule_df

Given a date and NCAA level, this function retrieves basketball every game for that date.

Parameters

game_date (int, mandatory): Required argument. Specifies the date you want a basketball schedule from. For best results, pass a string formatted as "YYYY-MM-DD".

level (int, mandatory): Required argument. Specifies the level/division you want a NCAA basketball schedule from. This can either be an integer (1-3) or a string ("I"-"III").

get_wbb_data (bool, optional): Optional argument. If you want women's basketball data instead of men's basketball data, set this to True.

Usage

from ncaa_stats_py.basketball import get_basketball_day_schedule


# Get all DI games that will be played on April 22th, 2025.
print("Get all games that will be played on April 22th, 2025.")
df = get_basketball_day_schedule("2025-04-22", level=1)
print(df)

# Get all division II games that were played on February 14th, 2025.
print("Get all division II games that were played on February 14th, 2025.")
df = get_basketball_day_schedule("2025-02-14", level="I")
print(df)

# Get all DI games that were played on December 10th, 2024.
print("Get all games that were played on December 10th, 2024.")
df = get_basketball_day_schedule("2024-12-10", level="I")
print(df)

# Get all DI games (if any) that were played on December 12th, 2024.
print("Get all DI games (if any) that were played on December 12th, 2024.")
df = get_basketball_day_schedule("2024-12-12")
print(df)

# Get all DII games played on January 14th, 2024.
print("Get all DI games played on January 14th, 2024.")
df = get_basketball_day_schedule("2024-01-14")
print(df)

# Get all division III games played on December 16th, 2023.
print("Get all division III games played on December 16th, 2023.")
df = get_basketball_day_schedule("2023-12-16")
print(df)

Returns

A pandas DataFrame object with all basketball games played on that day, for that NCAA division/level.

def get_full_basketball_schedule( season: int, level: str | int = 'I', get_wbb_data: bool = False) -> pandas.core.frame.DataFrame:
1409def get_full_basketball_schedule(
1410    season: int,
1411    level: str | int = "I",
1412    get_wbb_data: bool = False
1413) -> pd.DataFrame:
1414    """
1415    Retrieves a full basketball schedule,
1416    from an NCAA level (`"I"`, `"II"`, `"III"`).
1417    The way this is done is by going through every team in a division,
1418    and parsing the schedules of every team in a division.
1419
1420    This function will take time when first run (30-60 minutes)!
1421    You have been warned.
1422
1423    Parameters
1424    ----------
1425    `season` (int, mandatory):
1426        Specifies the season you want a schedule from.
1427
1428    `level` (int | str, mandatory):
1429        Specifies the team you want a schedule from.
1430
1431    `get_wbb_data` (bool, optional):
1432        Optional argument.
1433        If you want women's basketball data instead of men's basketball data,
1434        set this to `True`.
1435
1436    Usage
1437    ----------
1438    ```python
1439
1440    from ncaa_stats_py.basketball import get_full_basketball_schedule
1441
1442    # Get the entire 2024 schedule for the 2024 D1 basketball season.
1443    print("Get the entire 2024 schedule for the 2024 D1 basketball season.")
1444    df = get_full_basketball_schedule(season=2024, level="I")
1445    print(df)
1446
1447    # You can also input `level` as an integer.
1448    # In addition, this and other functions cache data,
1449    # so this should load very quickly
1450    # compared to the first run of this function.
1451    print("You can also input `level` as an integer.")
1452    print(
1453        "In addition, this and other functions cache data, "
1454        + "so this should load very quickly "
1455        + "compared to the first run of this function."
1456    )
1457    df = get_full_basketball_schedule(season=2024, level=1)
1458    print(df)
1459
1460    ```
1461
1462    Returns
1463    ----------
1464    A pandas `DataFrame` object with an NCAA basketball
1465    schedule for a specific season and level.
1466    """
1467
1468    sport_id = ""
1469    load_from_cache = True
1470    home_dir = expanduser("~")
1471    home_dir = _format_folder_str(home_dir)
1472    schedule_df = pd.DataFrame()
1473    schedule_df_arr = []
1474    temp_df = pd.DataFrame()
1475    formatted_level = ""
1476    ncaa_level = 0
1477
1478    if get_wbb_data is True:
1479        sport_id = "WBB"
1480    else:
1481        sport_id = "MBB"
1482
1483    if isinstance(level, int) and level == 1:
1484        formatted_level = "I"
1485        ncaa_level = 1
1486    elif isinstance(level, int) and level == 2:
1487        formatted_level = "II"
1488        ncaa_level = 2
1489    elif isinstance(level, int) and level == 3:
1490        formatted_level = "III"
1491        ncaa_level = 3
1492    elif isinstance(level, str) and (
1493        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
1494    ):
1495        ncaa_level = 1
1496        formatted_level = level.upper()
1497    elif isinstance(level, str) and (
1498        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
1499    ):
1500        ncaa_level = 2
1501        formatted_level = level.upper()
1502    elif isinstance(level, str) and (
1503        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
1504    ):
1505        ncaa_level = 3
1506        formatted_level = level.upper()
1507
1508    del level
1509
1510    if exists(f"{home_dir}/.ncaa_stats_py/"):
1511        pass
1512    else:
1513        mkdir(f"{home_dir}/.ncaa_stats_py/")
1514
1515    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
1516        pass
1517    else:
1518        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
1519
1520    if exists(
1521        f"{home_dir}/.ncaa_stats_py/" +
1522        f"basketball_{sport_id}/full_schedule/"
1523    ):
1524        pass
1525    else:
1526        mkdir(
1527            f"{home_dir}/.ncaa_stats_py/" +
1528            f"basketball_{sport_id}/full_schedule/"
1529        )
1530
1531    if exists(
1532        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/full_schedule/"
1533        + f"{season}_{formatted_level}_full_schedule.csv"
1534    ):
1535        teams_df = pd.read_csv(
1536            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/full_schedule/"
1537            + f"{season}_{formatted_level}_full_schedule.csv"
1538        )
1539        file_mod_datetime = datetime.fromtimestamp(
1540            getmtime(
1541                f"{home_dir}/.ncaa_stats_py/" +
1542                f"basketball_{sport_id}/full_schedule/"
1543                + f"{season}_{formatted_level}_full_schedule.csv"
1544            )
1545        )
1546    else:
1547        file_mod_datetime = datetime.today()
1548        load_from_cache = False
1549
1550    now = datetime.today()
1551
1552    age = now - file_mod_datetime
1553
1554    if (
1555        age.days > 1 and
1556        season >= now.year
1557    ):
1558        load_from_cache = False
1559
1560    if load_from_cache is True:
1561        return teams_df
1562
1563    teams_df = load_basketball_teams()
1564    teams_df = teams_df[
1565        (teams_df["season"] == season) &
1566        (teams_df["ncaa_division"] == ncaa_level)
1567    ]
1568    team_ids_arr = teams_df["team_id"].to_numpy()
1569
1570    for team_id in tqdm(team_ids_arr):
1571        temp_df = get_basketball_team_schedule(team_id=team_id)
1572        schedule_df_arr.append(temp_df)
1573
1574    schedule_df = pd.concat(schedule_df_arr, ignore_index=True)
1575    schedule_df = schedule_df.drop_duplicates(subset="game_id", keep="first")
1576    schedule_df.to_csv(
1577        f"{home_dir}/.ncaa_stats_py/"
1578        + f"basketball_{sport_id}/full_schedule/"
1579        + f"{season}_{formatted_level}_full_schedule.csv",
1580        index=False,
1581    )
1582    return schedule_df

Retrieves a full basketball schedule, from an NCAA level ("I", "II", "III"). The way this is done is by going through every team in a division, and parsing the schedules of every team in a division.

This function will take time when first run (30-60 minutes)! You have been warned.

Parameters

season (int, mandatory): Specifies the season you want a schedule from.

level (int | str, mandatory): Specifies the team you want a schedule from.

get_wbb_data (bool, optional): Optional argument. If you want women's basketball data instead of men's basketball data, set this to True.

Usage

from ncaa_stats_py.basketball import get_full_basketball_schedule

# Get the entire 2024 schedule for the 2024 D1 basketball season.
print("Get the entire 2024 schedule for the 2024 D1 basketball season.")
df = get_full_basketball_schedule(season=2024, level="I")
print(df)

# You can also input `level` as an integer.
# In addition, this and other functions cache data,
# so this should load very quickly
# compared to the first run of this function.
print("You can also input `level` as an integer.")
print(
    "In addition, this and other functions cache data, "
    + "so this should load very quickly "
    + "compared to the first run of this function."
)
df = get_full_basketball_schedule(season=2024, level=1)
print(df)

Returns

A pandas DataFrame object with an NCAA basketball schedule for a specific season and level.

def get_basketball_team_roster(team_id: int) -> pandas.core.frame.DataFrame:
1585def get_basketball_team_roster(team_id: int) -> pd.DataFrame:
1586    """
1587    Retrieves a basketball team's roster from a given team ID.
1588
1589    Parameters
1590    ----------
1591    `team_id` (int, mandatory):
1592        Required argument.
1593        Specifies the team you want a roster from.
1594        This is separate from a school ID, which identifies the institution.
1595        A team ID should be unique to a school, and a season.
1596
1597    Usage
1598    ----------
1599    ```python
1600
1601    from ncaa_stats_py.basketball import get_basketball_team_roster
1602
1603    ########################################
1604    #          Men's Basketball            #
1605    ########################################
1606
1607    # Get the basketball roster for the
1608    # 2024 Alabama St. MBB team (D1, ID: 560655).
1609    print(
1610        "Get the basketball roster for the " +
1611        "2024 Alabama St. MBB team (D1, ID: 560655)."
1612    )
1613    df = get_basketball_team_roster(560655)
1614    print(df)
1615
1616    # Get the basketball roster for the
1617    # 2023 Roberts Wesleyan MBB team (D2, ID: 542994).
1618    print(
1619        "Get the basketball roster for the " +
1620        "2023 Roberts Wesleyan MBB team (D2, ID: 542994)."
1621    )
1622    df = get_basketball_team_roster(542994)
1623    print(df)
1624
1625    # Get the basketball roster for the
1626    # 2022 Pacific Lutheran MBB team (D3, ID: 528255).
1627    print(
1628        "Get the basketball roster for the " +
1629        "2022 Pacific Lutheran MBB team (D3, ID: 528255)."
1630    )
1631    df = get_basketball_team_roster(528255)
1632    print(df)
1633
1634    ########################################
1635    #          Women's Basketball          #
1636    ########################################
1637
1638    # Get the basketball roster for the
1639    # 2021 Michigan St. WBB team (D1, ID: 506069).
1640    print(
1641        "Get the basketball roster for the " +
1642        "2021 Michigan St. WBB team (D1, ID: 506069)."
1643    )
1644    df = get_basketball_team_roster(506069)
1645    print(df)
1646
1647    # Get the basketball roster for the
1648    # 2020 Shippensburg WBB team (D2, ID: 484864).
1649    print(
1650        "Get the basketball roster for the " +
1651        "2020 Shippensburg WBB team (D2, ID: 484864)."
1652    )
1653    df = get_basketball_team_roster(484864)
1654    print(df)
1655
1656    # Get the basketball roster for the
1657    # 2019 Maranatha Baptist team (D3, ID: 452546).
1658    print(
1659        "Get the basketball roster for the " +
1660        "2019 Maranatha Baptist team (D3, ID: 452546)."
1661    )
1662    df = get_basketball_team_roster(452546)
1663    print(df)
1664
1665    ```
1666
1667    Returns
1668    ----------
1669    A pandas `DataFrame` object with
1670    an NCAA basketball team's roster for that season.
1671    """
1672    sport_id = ""
1673    roster_df = pd.DataFrame()
1674    roster_df_arr = []
1675    temp_df = pd.DataFrame()
1676    url = f"https://stats.ncaa.org/teams/{team_id}/roster"
1677    load_from_cache = True
1678    home_dir = expanduser("~")
1679    home_dir = _format_folder_str(home_dir)
1680
1681    stat_columns = [
1682        "season",
1683        "season_name",
1684        "sport_id",
1685        "ncaa_division",
1686        "ncaa_division_formatted",
1687        "team_conference_name",
1688        "school_id",
1689        "school_name",
1690        "player_id",
1691        "player_jersey_num",
1692        "player_full_name",
1693        "player_first_name",
1694        "player_last_name",
1695        "player_class",
1696        "player_positions",
1697        "player_height_string",
1698        "player_weight",
1699        "player_hometown",
1700        "player_high_school",
1701        "player_G",
1702        "player_GS",
1703        "player_url",
1704    ]
1705
1706    try:
1707        team_df = load_basketball_teams()
1708        team_df = team_df[team_df["team_id"] == team_id]
1709
1710        season = team_df["season"].iloc[0]
1711        ncaa_division = team_df["ncaa_division"].iloc[0]
1712        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
1713        team_conference_name = team_df["team_conference_name"].iloc[0]
1714        school_name = team_df["school_name"].iloc[0]
1715        school_id = int(team_df["school_id"].iloc[0])
1716        sport_id = "MBB"
1717    except Exception:
1718        team_df = load_basketball_teams(get_wbb_data=True)
1719        team_df = team_df[team_df["team_id"] == team_id]
1720
1721        season = team_df["season"].iloc[0]
1722        ncaa_division = team_df["ncaa_division"].iloc[0]
1723        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
1724        team_conference_name = team_df["team_conference_name"].iloc[0]
1725        school_name = team_df["school_name"].iloc[0]
1726        school_id = int(team_df["school_id"].iloc[0])
1727        school_id = int(team_df["school_id"].iloc[0])
1728        sport_id = "WBB"
1729
1730    del team_df
1731
1732    if exists(f"{home_dir}/.ncaa_stats_py/"):
1733        pass
1734    else:
1735        mkdir(f"{home_dir}/.ncaa_stats_py/")
1736
1737    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
1738        pass
1739    else:
1740        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
1741
1742    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/"):
1743        pass
1744    else:
1745        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/")
1746
1747    if exists(
1748        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/" +
1749        f"{team_id}_roster.csv"
1750    ):
1751        teams_df = pd.read_csv(
1752            f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/" +
1753            f"{team_id}_roster.csv"
1754        )
1755        file_mod_datetime = datetime.fromtimestamp(
1756            getmtime(
1757                f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/" +
1758                f"{team_id}_roster.csv"
1759            )
1760        )
1761    else:
1762        file_mod_datetime = datetime.today()
1763        load_from_cache = False
1764
1765    now = datetime.today()
1766
1767    age = now - file_mod_datetime
1768
1769    if (
1770        age.days >= 14 and
1771        season >= now.year
1772    ):
1773        load_from_cache = False
1774
1775    if load_from_cache is True:
1776        return teams_df
1777
1778    response = _get_webpage(url=url)
1779    soup = BeautifulSoup(response.text, features="lxml")
1780    try:
1781        school_name = soup.find(
1782            "div",
1783            {"class": "card"}
1784        ).find("img").get("alt")
1785    except Exception:
1786        school_name = soup.find("div", {"class": "card"}).find("a").text
1787        school_name = school_name.rsplit(" ", maxsplit=1)[0]
1788
1789    season_name = (
1790        soup.find("select", {"id": "year_list"})
1791        .find("option", {"selected": "selected"})
1792        .text
1793    )
1794    # For NCAA basketball, the season always starts in the spring semester,
1795    # and ends in the fall semester.
1796    # Thus, if `season_name` = "2011-12", this is the "2012" basketball season,
1797    # because 2012 would encompass the spring and fall semesters
1798    # for NCAA member institutions.
1799    season = f"{season_name[0:2]}{season_name[-2:]}"
1800    season = int(season)
1801
1802    try:
1803        table = soup.find(
1804            "table",
1805            {"class": "dataTable small_font"},
1806        )
1807
1808        table_headers = table.find("thead").find_all("th")
1809    except Exception:
1810        table = soup.find(
1811            "table",
1812            {"class": "dataTable small_font no_padding"},
1813        )
1814
1815        table_headers = table.find("thead").find_all("th")
1816    table_headers = [x.text for x in table_headers]
1817
1818    t_rows = table.find("tbody").find_all("tr")
1819
1820    for t in t_rows:
1821        t_cells = t.find_all("td")
1822        t_cells = [x.text for x in t_cells]
1823
1824        temp_df = pd.DataFrame(
1825            data=[t_cells],
1826            columns=table_headers,
1827            # index=[0]
1828        )
1829
1830        player_id = t.find("a").get("href")
1831        # temp_df["school_name"] = school_name
1832        temp_df["player_url"] = f"https://stats.ncaa.org{player_id}"
1833
1834        player_id = player_id.replace("/players", "").replace("/", "")
1835        player_id = int(player_id)
1836
1837        temp_df["player_id"] = player_id
1838
1839        roster_df_arr.append(temp_df)
1840        del temp_df
1841
1842    roster_df = pd.concat(roster_df_arr, ignore_index=True)
1843    roster_df = roster_df.infer_objects()
1844    roster_df["season"] = season
1845    roster_df["season_name"] = season_name
1846    roster_df["ncaa_division"] = ncaa_division
1847    roster_df["ncaa_division_formatted"] = ncaa_division_formatted
1848    roster_df["team_conference_name"] = team_conference_name
1849    roster_df["school_id"] = school_id
1850    roster_df["school_name"] = school_name
1851    roster_df["sport_id"] = sport_id
1852
1853    roster_df.rename(
1854        columns={
1855            "GP": "player_G",
1856            "GS": "player_GS",
1857            "#": "player_jersey_num",
1858            "Name": "player_full_name",
1859            "Class": "player_class",
1860            "Position": "player_positions",
1861            "Height": "player_height_string",
1862            "Hometown": "player_hometown",
1863            "High School": "player_high_school",
1864        },
1865        inplace=True
1866    )
1867
1868    roster_df[["player_first_name", "player_last_name"]] = roster_df[
1869        "player_full_name"
1870    ].str.split(" ", n=1, expand=True)
1871    roster_df = roster_df.infer_objects()
1872
1873    for i in roster_df.columns:
1874        if i in stat_columns:
1875            pass
1876        else:
1877            raise ValueError(
1878                f"Unhandled column name {i}"
1879            )
1880
1881    roster_df = roster_df.infer_objects().reindex(columns=stat_columns)
1882
1883    roster_df.to_csv(
1884        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/rosters/" +
1885        f"{team_id}_roster.csv",
1886        index=False,
1887    )
1888    return roster_df

Retrieves a basketball team's roster from a given team ID.

Parameters

team_id (int, mandatory): Required argument. Specifies the team you want a roster from. This is separate from a school ID, which identifies the institution. A team ID should be unique to a school, and a season.

Usage

from ncaa_stats_py.basketball import get_basketball_team_roster

########################################
#          Men's Basketball            #
########################################

# Get the basketball roster for the
# 2024 Alabama St. MBB team (D1, ID: 560655).
print(
    "Get the basketball roster for the " +
    "2024 Alabama St. MBB team (D1, ID: 560655)."
)
df = get_basketball_team_roster(560655)
print(df)

# Get the basketball roster for the
# 2023 Roberts Wesleyan MBB team (D2, ID: 542994).
print(
    "Get the basketball roster for the " +
    "2023 Roberts Wesleyan MBB team (D2, ID: 542994)."
)
df = get_basketball_team_roster(542994)
print(df)

# Get the basketball roster for the
# 2022 Pacific Lutheran MBB team (D3, ID: 528255).
print(
    "Get the basketball roster for the " +
    "2022 Pacific Lutheran MBB team (D3, ID: 528255)."
)
df = get_basketball_team_roster(528255)
print(df)

########################################
#          Women's Basketball          #
########################################

# Get the basketball roster for the
# 2021 Michigan St. WBB team (D1, ID: 506069).
print(
    "Get the basketball roster for the " +
    "2021 Michigan St. WBB team (D1, ID: 506069)."
)
df = get_basketball_team_roster(506069)
print(df)

# Get the basketball roster for the
# 2020 Shippensburg WBB team (D2, ID: 484864).
print(
    "Get the basketball roster for the " +
    "2020 Shippensburg WBB team (D2, ID: 484864)."
)
df = get_basketball_team_roster(484864)
print(df)

# Get the basketball roster for the
# 2019 Maranatha Baptist team (D3, ID: 452546).
print(
    "Get the basketball roster for the " +
    "2019 Maranatha Baptist team (D3, ID: 452546)."
)
df = get_basketball_team_roster(452546)
print(df)

Returns

A pandas DataFrame object with an NCAA basketball team's roster for that season.

def get_basketball_player_season_stats(team_id: int) -> pandas.core.frame.DataFrame:
1891def get_basketball_player_season_stats(
1892    team_id: int,
1893) -> pd.DataFrame:
1894    """
1895    Given a team ID, this function retrieves and parses
1896    the season stats for all of the players in a given basketball team.
1897
1898    Parameters
1899    ----------
1900    `team_id` (int, mandatory):
1901        Required argument.
1902        Specifies the team you want basketball stats from.
1903        This is separate from a school ID, which identifies the institution.
1904        A team ID should be unique to a school, and a season.
1905
1906    Usage
1907    ----------
1908    ```python
1909
1910    from ncaa_stats_py.basketball import get_basketball_player_season_stats
1911
1912    ########################################
1913    #          Men's Basketball            #
1914    ########################################
1915
1916    # Get the season stats for the
1917    # 2024 Illinois MBB team (D1, ID: 560955).
1918    print(
1919        "Get the season stats for the " +
1920        "2024 Illinois MBB team (D1, ID: 560955)."
1921    )
1922    df = get_basketball_player_season_stats(560955)
1923    print(df)
1924
1925    # Get the season stats for the
1926    # 2023 Chico St. MBB team (D2, ID: 542605).
1927    print(
1928        "Get the season stats for the " +
1929        "2023 Chico St. MBB team (D2, ID: 542605)."
1930    )
1931    df = get_basketball_player_season_stats(542605)
1932    print(df)
1933
1934    # Get the season stats for the
1935    # 2022 Maine Maritime MBB team (D3, ID: 528070).
1936    print(
1937        "Get the season stats for the " +
1938        "2022 Maine Maritime MBB team (D3, ID: 528070)."
1939    )
1940    df = get_basketball_player_season_stats(528070)
1941    print(df)
1942
1943    ########################################
1944    #          Women's Basketball          #
1945    ########################################
1946
1947    # Get the season stats for the
1948    # 2021 Louisville WBB team (D1, ID: 506050).
1949    print(
1950        "Get the season stats for the " +
1951        "2021 Louisville WBB team (D1, ID: 506050)."
1952    )
1953    df = get_basketball_player_season_stats(506050)
1954    print(df)
1955
1956    # Get the season stats for the
1957    # 2020 Paine WBB team (D2, ID: 484830).
1958    print(
1959        "Get the season stats for the " +
1960        "2020 Paine WBB team (D2, ID: 484830)."
1961    )
1962    df = get_basketball_player_season_stats(484830)
1963    print(df)
1964
1965    # Get the season stats for the
1966    # 2019 Pomona-Pitzer team (D3, ID: 452413).
1967    print(
1968        "Get the season stats for the " +
1969        "2019 Pomona-Pitzer team (D3, ID: 452413)."
1970    )
1971    df = get_basketball_player_season_stats(452413)
1972    print(df)
1973
1974    ```
1975
1976    Returns
1977    ----------
1978    A pandas `DataFrame` object with the season batting stats for
1979    all players with a given NCAA basketball team.
1980    """
1981
1982    sport_id = ""
1983    load_from_cache = True
1984    stats_df = pd.DataFrame()
1985    stats_df_arr = []
1986    temp_df = pd.DataFrame()
1987
1988    stat_columns = [
1989        "season",
1990        "season_name",
1991        "sport_id",
1992        "team_id",
1993        "team_conference_name",
1994        "school_id",
1995        "school_name",
1996        "ncaa_division",
1997        "ncaa_division_formatted",
1998        "player_id",
1999        "player_jersey_number",
2000        "player_last_name",
2001        "player_first_name",
2002        "player_full_name",
2003        "player_class",
2004        "player_position",
2005        "player_height",
2006        "GP",
2007        "GS",
2008        "MP_str",
2009        "MP_minutes",
2010        "MP_seconds",
2011        "MP_total_seconds",
2012        "FGM",
2013        "FGA",
2014        "FG%",
2015        "eFG%",
2016        "TSA",
2017        "TS%",
2018        "2PM",
2019        "2PA",
2020        "2FG%",
2021        "3PM",
2022        "3PA",
2023        "3FG%",
2024        "FT",
2025        "FTA",
2026        "FT%",
2027        "PTS",
2028        "ORB",
2029        "DRB",
2030        "TRB",
2031        "Avg",
2032        "AST",
2033        "TOV",
2034        "TOV%",
2035        "STL",
2036        "BLK",
2037        "PF",
2038        "DBL_DBL",
2039        "TRP_DBL",
2040        "DQ",
2041        "TF",
2042    ]
2043
2044    # if get_wbb_data is True:
2045    #     sport_id = "WBB"
2046    # else:
2047    #     sport_id = "MBB"
2048
2049    try:
2050        team_df = load_basketball_teams()
2051
2052        team_df = team_df[team_df["team_id"] == team_id]
2053
2054        season = team_df["season"].iloc[0]
2055        ncaa_division = int(team_df["ncaa_division"].iloc[0])
2056        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2057        team_conference_name = team_df["team_conference_name"].iloc[0]
2058        school_name = team_df["school_name"].iloc[0]
2059        school_id = int(team_df["school_id"].iloc[0])
2060        sport_id = "MBB"
2061    except Exception:
2062        team_df = load_basketball_teams(get_wbb_data=True)
2063
2064        team_df = team_df[team_df["team_id"] == team_id]
2065
2066        season = team_df["season"].iloc[0]
2067        ncaa_division = int(team_df["ncaa_division"].iloc[0])
2068        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2069        team_conference_name = team_df["team_conference_name"].iloc[0]
2070        school_name = team_df["school_name"].iloc[0]
2071        school_id = int(team_df["school_id"].iloc[0])
2072        sport_id = "WBB"
2073
2074    del team_df
2075
2076    # stat_id = _get_stat_id(
2077    #     sport="basketball",
2078    #     season=season,
2079    #     stat_type="batting"
2080    # )
2081
2082    home_dir = expanduser("~")
2083    home_dir = _format_folder_str(home_dir)
2084
2085    url = f"https://stats.ncaa.org/teams/{team_id}/season_to_date_stats"
2086
2087    if exists(f"{home_dir}/.ncaa_stats_py/"):
2088        pass
2089    else:
2090        mkdir(f"{home_dir}/.ncaa_stats_py/")
2091
2092    if exists(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"):
2093        pass
2094    else:
2095        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/")
2096
2097    if exists(
2098        f"{home_dir}/.ncaa_stats_py/" +
2099        f"basketball_{sport_id}/player_season_stats/"
2100    ):
2101        pass
2102    else:
2103        mkdir(
2104            f"{home_dir}/.ncaa_stats_py/" +
2105            f"basketball_{sport_id}/player_season_stats/"
2106        )
2107
2108    if exists(
2109        f"{home_dir}/.ncaa_stats_py/" +
2110        f"basketball_{sport_id}/player_season_stats/"
2111        + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2112    ):
2113        games_df = pd.read_csv(
2114            f"{home_dir}/.ncaa_stats_py/" +
2115            f"basketball_{sport_id}/player_season_stats/"
2116            + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2117        )
2118        file_mod_datetime = datetime.fromtimestamp(
2119            getmtime(
2120                f"{home_dir}/.ncaa_stats_py/" +
2121                f"basketball_{sport_id}/player_season_stats/"
2122                + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2123            )
2124        )
2125    else:
2126        file_mod_datetime = datetime.today()
2127        load_from_cache = False
2128
2129    now = datetime.today()
2130
2131    age = now - file_mod_datetime
2132
2133    if (
2134        age.days > 1 and
2135        season >= now.year
2136    ):
2137        load_from_cache = False
2138
2139    if load_from_cache is True:
2140        return games_df
2141
2142    response = _get_webpage(url=url)
2143    soup = BeautifulSoup(response.text, features="lxml")
2144    # try:
2145    #     school_name = soup.find(
2146    #         "div", {"class": "card"}
2147    #     ).find("img").get("alt")
2148    # except Exception:
2149    #     school_name = soup.find("div", {"class": "card"}).find("a").text
2150    #     school_name = school_name.rsplit(" ", maxsplit=1)[0]
2151
2152    season_name = (
2153        soup.find("select", {"id": "year_list"})
2154        .find("option", {"selected": "selected"})
2155        .text
2156    )
2157    # For NCAA basketball, the season always starts in the fall semester,
2158    # and ends in the spring semester.
2159    # Thus, if `season_name` = "2011-12", this is the "2012" basketball season,
2160    # because 2012 would encompass the fall and spring semesters
2161    # for NCAA member institutions.
2162    season = f"{season_name[0:2]}{season_name[-2:]}"
2163    season = int(season)
2164
2165    # stat_categories_arr = soup.find(
2166    #     "ul", {"class": "nav nav-tabs padding-nav"}
2167    # ).find_all("a")
2168
2169    table_data = soup.find(
2170        "table",
2171        {"id": "stat_grid", "class": "small_font dataTable table-bordered"},
2172    )
2173
2174    temp_table_headers = table_data.find("thead").find("tr").find_all("th")
2175    table_headers = [x.text for x in temp_table_headers]
2176
2177    del temp_table_headers
2178
2179    t_rows = table_data.find("tbody").find_all("tr", {"class": "text"})
2180    for t in t_rows:
2181        p_last = ""
2182        p_first = ""
2183        t_cells = t.find_all("td")
2184        if "team" in t_cells[1].text.lower():
2185            continue
2186        p_sortable = t_cells[1].get("data-order")
2187        if len(p_sortable) == 2:
2188            p_last, p_first = p_sortable.split(",")
2189        elif len(p_sortable) == 3:
2190            p_last, temp_name, p_first = p_sortable.split(",")
2191            p_last = f"{p_last} {temp_name}"
2192
2193        t_cells = [x.text.strip() for x in t_cells]
2194
2195        temp_df = pd.DataFrame(
2196            data=[t_cells],
2197            columns=table_headers,
2198            # index=[0]
2199        )
2200
2201        player_id = t.find("a").get("href")
2202
2203        # temp_df["player_url"] = f"https://stats.ncaa.org{player_id}"
2204        player_id = player_id.replace("/players", "").replace("/", "")
2205
2206        # stat_id = -1
2207        # if "year_stat_category_id" in player_id:
2208        #     stat_id = player_id
2209        #     stat_id = stat_id.rsplit("?")[-1]
2210        #     stat_id = stat_id.replace("?", "").replace(
2211        #         "year_stat_category_id=", ""
2212        #     )
2213        #     stat_id = int(stat_id)
2214
2215        #     player_id = player_id.split("?")[0]
2216
2217        player_id = int(player_id)
2218
2219        temp_df["player_id"] = player_id
2220        temp_df["player_last_name"] = p_last.strip()
2221        temp_df["player_first_name"] = p_first.strip()
2222
2223        stats_df_arr.append(temp_df)
2224        del temp_df
2225
2226    stats_df = pd.concat(stats_df_arr, ignore_index=True)
2227    stats_df = stats_df.replace("", None)
2228
2229    # stats_df["stat_id"] = stat_id
2230    stats_df["season"] = season
2231    stats_df["season_name"] = season_name
2232    stats_df["school_id"] = school_id
2233    stats_df["school_name"] = school_name
2234    stats_df["ncaa_division"] = ncaa_division
2235    stats_df["ncaa_division_formatted"] = ncaa_division_formatted
2236    stats_df["team_conference_name"] = team_conference_name
2237    stats_df["sport_id"] = sport_id
2238    stats_df["team_id"] = team_id
2239
2240    stats_df = stats_df.infer_objects()
2241
2242    stats_df.rename(
2243        columns={
2244            "#": "player_jersey_number",
2245            "Player": "player_full_name",
2246            "Yr": "player_class",
2247            "Pos": "player_position",
2248            "Ht": "player_height",
2249            "B/T": "player_bats_throws",
2250            "3FG": "3PM",
2251            "3FGA": "3PA",
2252            "ORebs": "ORB",
2253            "DRebs": "DRB",
2254            "Tot Reb": "TRB",
2255            "TO": "TOV",
2256            "Dbl Dbl": "DBL_DBL",
2257            "Trpl Dbl": "TRP_DBL",
2258            "Fouls": "PF",
2259            'Tech Fouls': "TF",
2260            'Effective FG Pct.': "eFG%",
2261            "MP": "MP_str",
2262            "Min": "MP_str",
2263            "Off Reb": "ORB",
2264            "Def Reb": "DRB",
2265            "ST": "STL",
2266            "BLKS": "BLK"
2267        },
2268        inplace=True,
2269    )
2270    stats_df = stats_df.infer_objects().fillna(0)
2271    stats_df = stats_df.astype(
2272        {
2273            "GP": "uint16",
2274            "GS": "uint16",
2275            "FGM": "uint16",
2276            "FGA": "uint16",
2277            "3PM": "uint16",
2278            "3PA": "uint16",
2279            "FT": "uint16",
2280            "FTA": "uint16",
2281            "PTS": "uint16",
2282            "ORB": "uint16",
2283            "DRB": "uint16",
2284            "TRB": "uint16",
2285            "AST": "uint16",
2286            "TOV": "uint16",
2287            "STL": "uint16",
2288            "BLK": "uint16",
2289            "PF": "uint16",
2290            "DBL_DBL": "uint16",
2291            "TRP_DBL": "uint16",
2292            "school_id": "uint32",
2293        }
2294    )
2295
2296    # This is a separate function call because these stats
2297    # *don't* exist in every season.
2298    if "DQ" not in stats_df.columns:
2299        stats_df["DQ"] = None
2300
2301    if "TF" not in stats_df.columns:
2302        stats_df["TF"] = None
2303
2304    stats_df = stats_df.astype(
2305        {
2306            "DQ": "uint16",
2307            "TF": "uint16",
2308        },
2309        errors="ignore"
2310    )
2311
2312    stats_df[["MP_minutes", "MP_seconds"]] = stats_df["MP_str"].str.split(
2313        ":", expand=True
2314    )
2315    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
2316        "MP_minutes", "MP_seconds"
2317    ]].astype("uint64")
2318    stats_df["MP_total_seconds"] = (
2319        stats_df["MP_seconds"] + (stats_df["MP_minutes"] * 60)
2320    )
2321
2322    stats_df["FG%"] = (stats_df["FGM"] / stats_df["FGA"])
2323    stats_df["FG%"] = stats_df["FG%"].round(4)
2324
2325    stats_df["3P%"] = (stats_df["3PM"] / stats_df["3PA"])
2326    stats_df["3P%"] = stats_df["3P%"].round(4)
2327
2328    stats_df["FT%"] = (stats_df["FT"] / stats_df["FTA"])
2329    stats_df["FT%"] = stats_df["FT%"].round(4)
2330
2331    stats_df["2PM"] = (stats_df["FGM"] - stats_df["3PM"])
2332    stats_df["2PA"] = (stats_df["FGA"] - stats_df["3PA"])
2333    stats_df["2P%"] = (stats_df["2PM"] / stats_df["2PA"])
2334    stats_df["2P%"] = stats_df["2P%"].round(4)
2335
2336    stats_df["eFG%"] = (
2337        (
2338            stats_df["FGM"] +
2339            (stats_df["3PM"] * 0.5)
2340        ) /
2341        stats_df["FGA"]
2342    )
2343    stats_df["eFG%"] = stats_df["eFG%"].round(4)
2344
2345    stats_df["TSA"] = (
2346        stats_df["FGA"] + (stats_df["FTA"] * 0.44)
2347    )
2348    stats_df["TS%"] = stats_df["PTS"] / (2 * stats_df["TSA"])
2349    stats_df["TS%"] = stats_df["TS%"].round(4)
2350
2351    stats_df["TOV%"] = (
2352        stats_df["TOV"] /
2353        (
2354            stats_df["FGA"] +
2355            (stats_df["FTA"] * 0.44) +
2356            stats_df["TOV"]
2357        )
2358    )
2359    stats_df["TOV%"] = stats_df["TOV%"].round(4)
2360    # In many seasons, there is an ["Avg"] column
2361    # that would otherwise completely screw up
2362    # any attempts to use the final DataFrame,
2363    # because it would be a duplicate column
2364    # that pandas wouldn't complain about
2365    # until it's too late.
2366
2367    duplicate_cols = stats_df.columns[stats_df.columns.duplicated()]
2368    stats_df.drop(columns=duplicate_cols, inplace=True)
2369    # stats_df = stats_df.T.drop_duplicates().T
2370    stats_df = stats_df.reindex(columns=stat_columns)
2371    # print(stats_df.columns)
2372    stats_df.to_csv(
2373        f"{home_dir}/.ncaa_stats_py/" +
2374        f"basketball_{sport_id}/player_season_stats/" +
2375        f"{season:00d}_{school_id:00d}_player_season_stats.csv",
2376        index=False,
2377    )
2378
2379    return stats_df

Given a team ID, this function retrieves and parses the season stats for all of the players in a given basketball team.

Parameters

team_id (int, mandatory): Required argument. Specifies the team you want basketball stats from. This is separate from a school ID, which identifies the institution. A team ID should be unique to a school, and a season.

Usage

from ncaa_stats_py.basketball import get_basketball_player_season_stats

########################################
#          Men's Basketball            #
########################################

# Get the season stats for the
# 2024 Illinois MBB team (D1, ID: 560955).
print(
    "Get the season stats for the " +
    "2024 Illinois MBB team (D1, ID: 560955)."
)
df = get_basketball_player_season_stats(560955)
print(df)

# Get the season stats for the
# 2023 Chico St. MBB team (D2, ID: 542605).
print(
    "Get the season stats for the " +
    "2023 Chico St. MBB team (D2, ID: 542605)."
)
df = get_basketball_player_season_stats(542605)
print(df)

# Get the season stats for the
# 2022 Maine Maritime MBB team (D3, ID: 528070).
print(
    "Get the season stats for the " +
    "2022 Maine Maritime MBB team (D3, ID: 528070)."
)
df = get_basketball_player_season_stats(528070)
print(df)

########################################
#          Women's Basketball          #
########################################

# Get the season stats for the
# 2021 Louisville WBB team (D1, ID: 506050).
print(
    "Get the season stats for the " +
    "2021 Louisville WBB team (D1, ID: 506050)."
)
df = get_basketball_player_season_stats(506050)
print(df)

# Get the season stats for the
# 2020 Paine WBB team (D2, ID: 484830).
print(
    "Get the season stats for the " +
    "2020 Paine WBB team (D2, ID: 484830)."
)
df = get_basketball_player_season_stats(484830)
print(df)

# Get the season stats for the
# 2019 Pomona-Pitzer team (D3, ID: 452413).
print(
    "Get the season stats for the " +
    "2019 Pomona-Pitzer team (D3, ID: 452413)."
)
df = get_basketball_player_season_stats(452413)
print(df)

Returns

A pandas DataFrame object with the season batting stats for all players with a given NCAA basketball team.

def get_basketball_player_game_stats(player_id: int, season: int) -> pandas.core.frame.DataFrame:
2382def get_basketball_player_game_stats(
2383    player_id: int,
2384    season: int
2385) -> pd.DataFrame:
2386    """
2387    Given a valid player ID and season,
2388    this function retrieves the game stats for this player at a game level.
2389
2390    Parameters
2391    ----------
2392    `player_id` (int, mandatory):
2393        Required argument.
2394        Specifies the player you want game stats from.
2395
2396    `season` (int, mandatory):
2397        Required argument.
2398        Specifies the season you want game stats from.
2399
2400    Usage
2401    ----------
2402    ```python
2403
2404    from ncaa_stats_py.basketball import (
2405        get_basketball_player_game_stats
2406    )
2407
2408    # Get the batting stats of Jacob Berry in 2022 (LSU).
2409    print(
2410        "Get the batting stats of Jacob Berry in 2022 (LSU)."
2411    )
2412    df = get_basketball_player_game_stats(player_id=7579336, season=2022)
2413    print(df)
2414
2415    # Get the batting stats of Alec Burleson in 2019 (ECU).
2416    print(
2417        "Get the batting stats of Alec Burleson in 2019 (ECU)."
2418    )
2419    df = get_basketball_player_game_stats(player_id=6015715, season=2019)
2420    print(df)
2421
2422    # Get the batting stats of Hunter Bishop in 2018 (Arizona St.).
2423    print(
2424        "Get the batting stats of Hunter Bishop in 2018 (Arizona St.)."
2425    )
2426    df = get_basketball_player_game_stats(player_id=6014052, season=2019)
2427    print(df)
2428
2429    ```
2430
2431    Returns
2432    ----------
2433    A pandas `DataFrame` object with a player's batting game logs
2434    in a given season.
2435    """
2436    sport_id = ""
2437
2438    stat_columns = [
2439        "season",
2440        "game_id",
2441        "game_num",
2442        "player_id",
2443        "date",
2444        "opponent",
2445        "Result",
2446        "team_score",
2447        "opponent_score",
2448        "MP_str",
2449        "MP_minutes",
2450        "MP_seconds",
2451        "MP_total_seconds",
2452        "GP",
2453        "GS",
2454        "FGM",
2455        "FGA",
2456        "FG%",
2457        "eFG%",
2458        "2PM",
2459        "2PA",
2460        "2P%",
2461        "3PM",
2462        "3PA",
2463        "3P%",
2464        "FT",
2465        "FTA",
2466        "FT%",
2467        "ORB",
2468        "DRB",
2469        "TRB",
2470        "AST",
2471        "TOV",
2472        "TOV%",
2473        "STL",
2474        "BLK",
2475        "PF",
2476        "DQ",
2477        "TF",
2478        "TSA",
2479        "TS%",
2480        "PTS",
2481        "DBL_DBL",
2482        "TRP_DBL",
2483    ]
2484    load_from_cache = True
2485    stats_df = pd.DataFrame()
2486    stats_df_arr = []
2487    temp_df = pd.DataFrame()
2488    home_dir = expanduser("~")
2489    home_dir = _format_folder_str(home_dir)
2490
2491    # stat_id = _get_stat_id(
2492    #     sport="basketball",
2493    #     season=season,
2494    #     stat_type="batting"
2495    # )
2496    url = f"https://stats.ncaa.org/players/{player_id}"
2497
2498    if exists(f"{home_dir}/.ncaa_stats_py/"):
2499        pass
2500    else:
2501        mkdir(f"{home_dir}/.ncaa_stats_py/")
2502
2503    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/"):
2504        pass
2505    else:
2506        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/")
2507
2508    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/player_game_stats/"):
2509        pass
2510    else:
2511        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/player_game_stats/")
2512
2513    if exists(
2514        f"{home_dir}/.ncaa_stats_py/basketball_MBB/player_game_stats/"
2515        + f"{season}_{player_id}_player_game_stats.csv"
2516    ):
2517        games_df = pd.read_csv(
2518            f"{home_dir}/.ncaa_stats_py/basketball_MBB/player_game_stats/"
2519            + f"{season}_{player_id}_player_game_stats.csv"
2520        )
2521        file_mod_datetime = datetime.fromtimestamp(
2522            getmtime(
2523                f"{home_dir}/.ncaa_stats_py/basketball_MBB/"
2524                + "player_game_stats/"
2525                + f"{season}_{player_id}_player_game_stats.csv"
2526            )
2527        )
2528        games_df = games_df.infer_objects()
2529        load_from_cache = True
2530    else:
2531        file_mod_datetime = datetime.today()
2532        load_from_cache = False
2533
2534    if exists(f"{home_dir}/.ncaa_stats_py/"):
2535        pass
2536    else:
2537        mkdir(f"{home_dir}/.ncaa_stats_py/")
2538
2539    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/"):
2540        pass
2541    else:
2542        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/")
2543
2544    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/player_game_stats/"):
2545        pass
2546    else:
2547        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/player_game_stats/")
2548
2549    if exists(
2550        f"{home_dir}/.ncaa_stats_py/basketball_WBB/player_game_stats/"
2551        + f"{season}_{player_id}_player_game_stats.csv"
2552    ):
2553        games_df = pd.read_csv(
2554            f"{home_dir}/.ncaa_stats_py/basketball_WBB/player_game_stats/"
2555            + f"{season}_{player_id}_player_game_stats.csv"
2556        )
2557        file_mod_datetime = datetime.fromtimestamp(
2558            getmtime(
2559                f"{home_dir}/.ncaa_stats_py/basketball_WBB/"
2560                + "player_game_stats/"
2561                + f"{season}_{player_id}_player_game_stats.csv"
2562            )
2563        )
2564        games_df = games_df.infer_objects()
2565        load_from_cache = True
2566    else:
2567        logging.info("Could not find a WBB player game stats file")
2568
2569    now = datetime.today()
2570
2571    age = now - file_mod_datetime
2572
2573    if (
2574        age.days > 1 and
2575        (season - 1) >= now.year
2576    ):
2577        load_from_cache = False
2578
2579    if load_from_cache is True:
2580        return games_df
2581
2582    # team_df = load_basketball_teams()
2583
2584    # team_df = team_df[team_df["team_id"] == team_id]
2585
2586    # season = team_df["season"].iloc[0]
2587    # ncaa_division = team_df["ncaa_division"].iloc[0]
2588    # ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2589    # team_conference_name = team_df["team_conference_name"].iloc[0]
2590    # school_name = team_df["school_name"].iloc[0]
2591    # school_id = int(team_df["school_id"].iloc[0])
2592
2593    # del team_df
2594    response = _get_webpage(url=url)
2595    soup = BeautifulSoup(response.text, features="lxml")
2596
2597    table_navigation = soup.find("ul", {"class": "nav nav-tabs padding-nav"})
2598    table_nav_card = table_navigation.find_all("a")
2599
2600    for u in table_nav_card:
2601        url_str = u.get("href")
2602        if "MBB" in url_str.upper():
2603            sport_id = "MBB"
2604        elif "WBB" in url_str.upper():
2605            sport_id = "WBB"
2606
2607    if sport_id is None or len(sport_id) == 0:
2608        # This should **never** be the case IRL,
2609        # but in case something weird happened and
2610        # we can't make a determination of if this is a
2611        # MBB player or a WBB player, and we somehow haven't
2612        # crashed by this point, set the sport ID to
2613        # "MBB" by default so we don't have other weirdness.
2614        logging.error(
2615            f"Could not determine if player ID {player_id} " +
2616            "is a MBB or a WBB player. " +
2617            "Because this cannot be determined, " +
2618            "we will make the automatic assumption that this is a MBB player."
2619        )
2620        sport_id = "MBB"
2621
2622    table_data = soup.find_all(
2623        "table", {"class": "small_font dataTable table-bordered"}
2624    )[1]
2625
2626    temp_table_headers = table_data.find("thead").find("tr").find_all("th")
2627    table_headers = [x.text for x in temp_table_headers]
2628
2629    del temp_table_headers
2630
2631    temp_t_rows = table_data.find("tbody")
2632    temp_t_rows = temp_t_rows.find_all("tr")
2633
2634    for t in temp_t_rows:
2635        game_num = 1
2636        ot_periods = 0
2637        # innings = 9
2638        row_id = t.get("id")
2639        opp_team_name = ""
2640
2641        if "contest" not in row_id:
2642            continue
2643        del row_id
2644
2645        t_cells = t.find_all("td")
2646        t_cells = [x.text.strip() for x in t_cells]
2647
2648        g_date = t_cells[0]
2649
2650        if "(" in g_date:
2651            g_date, game_num = g_date.split("(")
2652            g_date = g_date.strip()
2653
2654            game_num = game_num.replace(")", "")
2655            game_num = int(game_num)
2656
2657        try:
2658            opp_team_id = t.find_all("td")[1].find("a").get("href")
2659        except AttributeError as e:
2660            logging.info(
2661                "Could not extract a team ID for this game. " +
2662                f"Full exception {e}"
2663            )
2664        except Exception as e:
2665            logging.warning(
2666                "An unhandled exception has occurred when "
2667                + "trying to get the opposition team ID for this game. "
2668                f"Full exception `{e}`."
2669            )
2670            raise e
2671
2672        try:
2673            opp_team_id = opp_team_id.replace("/teams/", "")
2674            opp_team_id = opp_team_id.replace(
2675                "javascript:toggleDefensiveStats(", ""
2676            )
2677            opp_team_id = opp_team_id.replace(");", "")
2678            opp_team_id = int(opp_team_id)
2679
2680            temp_df["opponent_team_id"] = opp_team_id
2681        except Exception:
2682            logging.info(
2683                "Couldn't find the opposition team naIDme "
2684                + "for this row. "
2685            )
2686            opp_team_id = None
2687        # print(i.find("td").text)
2688        try:
2689            opp_team_name = t.find_all("td")[1].find_all("img")[1].get("alt")
2690        except AttributeError:
2691            logging.info(
2692                "Couldn't find the opposition team name "
2693                + "for this row from an image element. "
2694                + "Attempting a backup method"
2695            )
2696            opp_team_name = t_cells[1]
2697        except IndexError:
2698            logging.info(
2699                "Couldn't find the opposition team name "
2700                + "for this row from an image element. "
2701                + "Attempting a backup method"
2702            )
2703            opp_team_name = t_cells[1]
2704        except Exception as e:
2705            logging.warning(
2706                "Unhandled exception when trying to get the "
2707                + "opposition team name from this game. "
2708                + f"Full exception `{e}`"
2709            )
2710            raise e
2711
2712        if opp_team_name == "Defensive Stats":
2713            opp_team_name = t_cells[1]
2714
2715        if "@" in opp_team_name:
2716            opp_team_name = opp_team_name.split("@")[0]
2717
2718        result_str = t_cells[2]
2719
2720        result_str = (
2721            result_str.lower().replace("w", "").replace("l", "").replace(
2722                "t", ""
2723            )
2724        )
2725
2726        if (
2727            result_str.lower() == "ppd" or
2728            result_str.lower() == "" or
2729            result_str.lower() == "canceed"
2730        ):
2731            continue
2732
2733        result_str = result_str.replace("\n", "")
2734        result_str = result_str.replace("*", "")
2735
2736        tm_score, opp_score = result_str.split("-")
2737        t_cells = [x.replace("*", "") for x in t_cells]
2738        t_cells = [x.replace("/", "") for x in t_cells]
2739        t_cells = [x.replace("\\", "") for x in t_cells]
2740
2741        temp_df = pd.DataFrame(
2742            data=[t_cells],
2743            columns=table_headers,
2744            # index=[0]
2745        )
2746
2747        tm_score = int(tm_score)
2748        if "(" in opp_score:
2749            opp_score = opp_score.replace(")", "")
2750            opp_score, ot_periods = opp_score.split("(")
2751            temp_df["ot_periods"] = ot_periods
2752
2753        if "\n" in opp_score:
2754            opp_score = opp_score.strip()
2755            # opp_score = opp_score
2756        opp_score = int(opp_score)
2757
2758        temp_df["team_score"] = tm_score
2759        temp_df["opponent_score"] = opp_score
2760
2761        del tm_score
2762        del opp_score
2763
2764        g_id = t.find_all("td")[2].find("a").get("href")
2765
2766        g_id = g_id.replace("/contests", "")
2767        g_id = g_id.replace("/box_score", "")
2768        g_id = g_id.replace("/", "")
2769
2770        g_id = int(g_id)
2771        temp_df["game_id"] = g_id
2772
2773        del g_id
2774        temp_df.rename(
2775            columns={"Opponent": "opponent", "Date": "date"},
2776            inplace=True,
2777        )
2778        game_date = datetime.strptime(g_date, "%m/%d/%Y").date()
2779
2780        temp_df["date"] = game_date
2781        temp_df["game_num"] = game_num
2782        # temp_df["game_innings"] = innings
2783
2784        if len(opp_team_name) > 0:
2785            temp_df["opponent"] = opp_team_name
2786        del opp_team_name
2787
2788        duplicate_cols = temp_df.columns[temp_df.columns.duplicated()]
2789        temp_df.drop(columns=duplicate_cols, inplace=True)
2790
2791        stats_df_arr.append(temp_df)
2792        del temp_df
2793
2794    stats_df = pd.concat(stats_df_arr, ignore_index=True)
2795    stats_df = stats_df.replace("/", "", regex=True)
2796    stats_df = stats_df.replace("", np.nan)
2797    stats_df = stats_df.infer_objects()
2798
2799    stats_df["player_id"] = player_id
2800    stats_df["season"] = season
2801    # In many seasons, there is an ["Avg"] column
2802    # that would otherwise completely screw up
2803    # any attempts to use the final DataFrame,
2804    # because it would be a duplicate column
2805    # that pandas wouldn't complain about
2806    # until it's too late.
2807
2808    duplicate_cols = stats_df.columns[stats_df.columns.duplicated()]
2809    stats_df.drop(columns=duplicate_cols, inplace=True)
2810
2811    stats_df.rename(
2812        columns={
2813            "#": "player_jersey_number",
2814            "Player": "player_full_name",
2815            "Yr": "player_class",
2816            "Pos": "player_position",
2817            "Ht": "player_height",
2818            "B/T": "player_bats_throws",
2819            "3FG": "3PM",
2820            "3FGA": "3PA",
2821            "ORebs": "ORB",
2822            "DRebs": "DRB",
2823            "Tot Reb": "TRB",
2824            "TO": "TOV",
2825            "Dbl Dbl": "DBL_DBL",
2826            "Trpl Dbl": "TRP_DBL",
2827            "Fouls": "PF",
2828            'Tech Fouls': "TF",
2829            'Effective FG Pct.': "eFG%",
2830            "MP": "MP_str",
2831            "Min": "MP_str",
2832            "Off Reb": "ORB",
2833            "Def Reb": "DRB",
2834            "ST": "STL",
2835            "3FG%": "3P%",
2836            "BLKS": "BLK"
2837        },
2838        inplace=True,
2839    )
2840
2841    # This is a separate function call because these stats
2842    # *don't* exist in every season.
2843    if "GS" not in stats_df.columns:
2844        stats_df["GS"] = None
2845
2846    if "DQ" not in stats_df.columns:
2847        stats_df["DQ"] = None
2848
2849    if "TF" not in stats_df.columns:
2850        stats_df["TF"] = None
2851
2852    if "DBL_DBL" not in stats_df.columns:
2853        stats_df["DBL_DBL"] = None
2854
2855    if "TRP_DBL" not in stats_df.columns:
2856        stats_df["TRP_DBL"] = None
2857
2858    stats_df = stats_df.astype(
2859        {
2860            "DQ": "uint16",
2861            "TF": "uint16",
2862        },
2863        errors="ignore"
2864    )
2865
2866    stats_df = stats_df.infer_objects().fillna(0)
2867    stats_df = stats_df.astype(
2868        {
2869            "GP": "uint16",
2870            "GS": "uint16",
2871            "FGM": "uint16",
2872            "FGA": "uint16",
2873            "3PM": "uint16",
2874            "3PA": "uint16",
2875            "FT": "uint16",
2876            "FTA": "uint16",
2877            "PTS": "uint16",
2878            "ORB": "uint16",
2879            "DRB": "uint16",
2880            "TRB": "uint16",
2881            "AST": "uint16",
2882            "TOV": "uint16",
2883            "STL": "uint16",
2884            "BLK": "uint16",
2885            "PF": "uint16",
2886            "DBL_DBL": "uint16",
2887            "TRP_DBL": "uint16",
2888            # "school_id": "uint32",
2889        }
2890    )
2891
2892    stats_df[["MP_minutes", "MP_seconds"]] = stats_df["MP_str"].str.split(
2893        ":", expand=True
2894    )
2895    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
2896        "MP_minutes", "MP_seconds"
2897    ]].fillna(0)
2898    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
2899        "MP_minutes", "MP_seconds"
2900    ]].astype("uint16")
2901    stats_df["MP_total_seconds"] = (
2902        stats_df["MP_seconds"] + (stats_df["MP_minutes"] * 60)
2903    )
2904
2905    stats_df["FG%"] = (stats_df["FGM"] / stats_df["FGA"])
2906    stats_df["FG%"] = stats_df["FG%"].round(4)
2907
2908    stats_df["3P%"] = (stats_df["3PM"] / stats_df["3PA"])
2909    stats_df["3P%"] = stats_df["3P%"].round(4)
2910
2911    stats_df["FT%"] = (stats_df["FT"] / stats_df["FTA"])
2912    stats_df["FT%"] = stats_df["FT%"].round(4)
2913
2914    stats_df["2PM"] = (stats_df["FGM"] - stats_df["3PM"])
2915    stats_df["2PA"] = (stats_df["FGA"] - stats_df["3PA"])
2916    stats_df["2P%"] = (stats_df["2PM"] / stats_df["2PA"])
2917    stats_df["2P%"] = stats_df["2P%"].round(4)
2918
2919    stats_df["eFG%"] = (
2920        (
2921            stats_df["FGM"] +
2922            (stats_df["3PM"] * 0.5)
2923        ) /
2924        stats_df["FGA"]
2925    )
2926    stats_df["eFG%"] = stats_df["eFG%"].round(4)
2927
2928    stats_df["TSA"] = (
2929        stats_df["FGA"] + (stats_df["FTA"] * 0.44)
2930    )
2931    stats_df["TS%"] = stats_df["PTS"] / (2 * stats_df["TSA"])
2932    stats_df["TS%"] = stats_df["TS%"].round(4)
2933
2934    stats_df["TOV%"] = (
2935        stats_df["TOV"] /
2936        (
2937            stats_df["FGA"] +
2938            (stats_df["FTA"] * 0.44) +
2939            stats_df["TOV"]
2940        )
2941    )
2942    stats_df["TOV%"] = stats_df["TOV%"].round(4)
2943    stats_df = stats_df.reindex(
2944        columns=stat_columns
2945    )
2946    # print(stats_df.columns)
2947    stats_df.to_csv(
2948        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/"
2949        + "player_game_stats/"
2950        + f"{season}_{player_id}_player_game_stats.csv",
2951        index=False,
2952    )
2953    return stats_df

Given a valid player ID and season, this function retrieves the game stats for this player at a game level.

Parameters

player_id (int, mandatory): Required argument. Specifies the player you want game stats from.

season (int, mandatory): Required argument. Specifies the season you want game stats from.

Usage

from ncaa_stats_py.basketball import (
    get_basketball_player_game_stats
)

# Get the batting stats of Jacob Berry in 2022 (LSU).
print(
    "Get the batting stats of Jacob Berry in 2022 (LSU)."
)
df = get_basketball_player_game_stats(player_id=7579336, season=2022)
print(df)

# Get the batting stats of Alec Burleson in 2019 (ECU).
print(
    "Get the batting stats of Alec Burleson in 2019 (ECU)."
)
df = get_basketball_player_game_stats(player_id=6015715, season=2019)
print(df)

# Get the batting stats of Hunter Bishop in 2018 (Arizona St.).
print(
    "Get the batting stats of Hunter Bishop in 2018 (Arizona St.)."
)
df = get_basketball_player_game_stats(player_id=6014052, season=2019)
print(df)

Returns

A pandas DataFrame object with a player's batting game logs in a given season.

def get_basketball_game_player_stats(game_id: int) -> pandas.core.frame.DataFrame:
2956def get_basketball_game_player_stats(game_id: int) -> pd.DataFrame:
2957    """
2958    Given a valid game ID,
2959    this function will attempt to get all player game stats, if possible.
2960
2961    NOTE: Due to an issue with [stats.ncaa.org](stats.ncaa.org),
2962    full player game stats may not be loaded in through this function.
2963
2964    This is a known issue, however you should be able to get position
2965    data and starters information through this function
2966
2967    Parameters
2968    ----------
2969    `game_id` (int, mandatory):
2970        Required argument.
2971        Specifies the game you want player game stats from.
2972
2973    Usage
2974    ----------
2975    ```python
2976
2977    from ncaa_stats_py.basketball import get_basketball_game_player_stats
2978
2979    ########################################
2980    #          Men's Basketball            #
2981    ########################################
2982
2983    # Get the game stats of the
2984    # 2024 NCAA D1 Men's Basketball National Championship game.
2985    print(
2986        "Get the game stats of the "
2987        + "2024 NCAA D1 Men's Basketball National Championship game."
2988    )
2989    df = get_basketball_game_player_stats(5254137)
2990    print(df)
2991
2992    # Get the game stats of a March Madness game on March 29th, 2024
2993    # between Duke and the Houston Cougars.
2994    print(
2995        "Get the game stats of a March Madness game on March 29th, 2024 "
2996        + "between Duke and the Houston Cougars."
2997    )
2998    df = get_basketball_game_player_stats(5254126)
2999    print(df)
3000
3001    # Get the game stats of a St. Patrick's Day
3002    # game between the Duquesne Dukes and VCU Rams (D1).
3003    print(
3004        "Get the game stats of a St. Patrick's Day "
3005        + "game between the Duquesne Dukes and VCU Rams (D1)."
3006    )
3007    df = get_basketball_game_player_stats(5252318)
3008    print(df)
3009
3010    # Get the game stats of a December 17th, 2023
3011    # game between the Barry Buccaneers and Findlay Oilers (D2).
3012    print(
3013        "Get the game stats of a December 17th, 2023 "
3014        + "game between the Barry Buccaneers and Findlay Oilers (D2)."
3015    )
3016    df = get_basketball_game_player_stats(3960610)
3017    print(df)
3018
3019    # Get the game stats of a Valentine's Day
3020    # game between the Kalamazoo Hornets and the Trine Thunder (D2).
3021    print(
3022        "Get the game stats of a Valentine's Day "
3023        + "game between the Kalamazoo Hornets and the Trine Thunder (D2)."
3024    )
3025    df = get_basketball_game_player_stats(3967963)
3026    print(df)
3027
3028
3029    ########################################
3030    #          Women's Basketball          #
3031    ########################################
3032
3033    # Get the game stats of the
3034    # 2024 NCAA D1 Women's Basketball National Championship game.
3035    print(
3036        "Get the game stats of the "
3037        + "2024 NCAA D1 Women's Basketball National Championship game"
3038    )
3039    df = get_basketball_game_player_stats(5254137)
3040    print(df)
3041
3042    # Get the game stats of a March 3rd, 2024
3043    # game between Duke and the North Carolina Tar Heels.
3044    print(
3045        "Get the game stats of a March 3rd, 2024 "
3046        + "game between Duke and the North Carolina Tar Heels"
3047    )
3048    df = get_basketball_game_player_stats(3984600)
3049    print(df)
3050
3051    # Get the game stats of a Thanksgiving Day
3052    # game between the Sacred Heart Pioneers and the P.R.-Mayaguez Janes (D2).
3053    print(
3054        "Get the game stats of a Thanksgiving Day "
3055        + "game between the Sacred Heart Pioneers and "
3056        + "the P.R.-Mayaguez Janes (D2)."
3057    )
3058    df = get_basketball_game_player_stats(3972687)
3059    print(df)
3060
3061    # Get the game stats of a January 21st, 2024
3062    # game between the Puget Sound Loggers
3063    # and the Whitworth Pirates (D3).
3064    print(
3065        "Get the game stats of a January 21st, 2024 "
3066        + "game between the Puget Sound Loggers and "
3067        + "the Whitworth Pirates (D3)."
3068    )
3069    df = get_basketball_game_player_stats(3979051)
3070    print(df)
3071    ```
3072
3073    Returns
3074    ----------
3075    A pandas `DataFrame` object with player game stats in a given game.
3076
3077    """
3078    load_from_cache = True
3079
3080    sport_id = ""
3081    season = 0
3082
3083    mbb_teams_df = load_basketball_teams(get_wbb_data=False)
3084    mbb_team_ids_arr = mbb_teams_df["team_id"].to_list()
3085
3086    wbb_teams_df = load_basketball_teams(get_wbb_data=True)
3087    wbb_team_ids_arr = wbb_teams_df["team_id"].to_list()
3088
3089    stats_df = pd.DataFrame()
3090    stats_df_arr = []
3091
3092    temp_df = pd.DataFrame()
3093    home_dir = expanduser("~")
3094    home_dir = _format_folder_str(home_dir)
3095
3096    stat_columns = [
3097        "season",
3098        "game_id",
3099        "team_id",
3100        "team_name",
3101        "player_id",
3102        "player_num",
3103        "player_full_name",
3104        "player_position",
3105        "GP",
3106        "GS",
3107        "MP_str",
3108        "MP_minutes",
3109        "MP_seconds",
3110        "MP_total_seconds",
3111        "FGM",
3112        "FGA",
3113        "FG%",
3114        "3PM",
3115        "3PA",
3116        "3P%",
3117        "2PM",
3118        "2PA",
3119        "2P%",
3120        "eFG%",
3121        "FT",
3122        "FTA",
3123        "FT%",
3124        "TSA",
3125        "TS%",
3126        "ORB",
3127        "DRB",
3128        "TRB",
3129        "AST",
3130        "STL",
3131        "BLK",
3132        "TOV",
3133        "TOV%",
3134        "PF",
3135        "TF",
3136        "PTS",
3137        "DQ",
3138        "DBL_DBL",
3139        "TRP_DBL",
3140    ]
3141
3142    url = f"https://stats.ncaa.org/contests/{game_id}/individual_stats"
3143
3144    if exists(f"{home_dir}/.ncaa_stats_py/"):
3145        pass
3146    else:
3147        mkdir(f"{home_dir}/.ncaa_stats_py/")
3148
3149    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/"):
3150        pass
3151    else:
3152        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/")
3153
3154    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/"):
3155        pass
3156    else:
3157        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/")
3158
3159    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/"):
3160        pass
3161    else:
3162        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/")
3163
3164    if exists(
3165        f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/"
3166        + f"{game_id}_player_game_stats.csv"
3167    ):
3168        games_df = pd.read_csv(
3169            f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/"
3170            + f"{game_id}_player_game_stats.csv"
3171        )
3172        games_df = games_df.infer_objects()
3173        file_mod_datetime = datetime.fromtimestamp(
3174            getmtime(
3175                f"{home_dir}/.ncaa_stats_py/basketball_MBB/game_stats/player/"
3176                + f"{game_id}_player_game_stats.csv"
3177            )
3178        )
3179        load_from_cache = True
3180    else:
3181        file_mod_datetime = datetime.today()
3182        load_from_cache = False
3183
3184    if exists(f"{home_dir}/.ncaa_stats_py/"):
3185        pass
3186    else:
3187        mkdir(f"{home_dir}/.ncaa_stats_py/")
3188
3189    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/"):
3190        pass
3191    else:
3192        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/")
3193
3194    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/"):
3195        pass
3196    else:
3197        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/")
3198
3199    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/"):
3200        pass
3201    else:
3202        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/")
3203
3204    if exists(
3205        f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/"
3206        + f"{game_id}_player_game_stats.csv"
3207    ):
3208        games_df = pd.read_csv(
3209            f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/"
3210            + f"{game_id}_player_game_stats.csv"
3211        )
3212        games_df = games_df.infer_objects()
3213        file_mod_datetime = datetime.fromtimestamp(
3214            getmtime(
3215                f"{home_dir}/.ncaa_stats_py/basketball_WBB/game_stats/player/"
3216                + f"{game_id}_player_game_stats.csv"
3217            )
3218        )
3219        load_from_cache = True
3220    else:
3221        logging.info("Could not find a WBB player game stats file")
3222
3223    now = datetime.today()
3224
3225    age = now - file_mod_datetime
3226
3227    if age.days >= 35:
3228        load_from_cache = False
3229
3230    if load_from_cache is True:
3231        return games_df
3232
3233    response = _get_webpage(url=url)
3234    soup = BeautifulSoup(response.text, features="lxml")
3235
3236    # table_data = soup.find_all(
3237    #     "table",
3238    #     {"class": "small_font dataTable table-bordered"}
3239    # )[1]
3240    table_boxes = soup.find_all("div", {"class": "card p-0 table-responsive"})
3241
3242    for box in table_boxes:
3243        t_header = box.find(
3244            "div", {"class": "card-header"}
3245        ).find(
3246            "div", {"class": "row"}
3247        )
3248
3249        t_header_str = t_header.text
3250        t_header_str = t_header_str.replace("Period Stats", "")
3251        t_header_str = t_header_str.replace("\n", "")
3252        t_header_str = t_header_str.strip()
3253
3254        team_id = t_header.find("a").get("href")
3255        team_id = team_id.replace("/teams", "")
3256        team_id = team_id.replace("/", "")
3257        team_id = int(team_id)
3258
3259        table_data = box.find(
3260            "table",
3261            {"class": "display dataTable small_font"}
3262        )
3263        table_headers = box.find("thead").find_all("th")
3264        table_headers = [x.text for x in table_headers]
3265
3266        temp_t_rows = table_data.find("tbody")
3267        temp_t_rows = temp_t_rows.find_all("tr")
3268
3269        spec_stats_df = pd.DataFrame()
3270        spec_stats_df_arr = []
3271        for t in temp_t_rows:
3272            # row_id = t.get("id")
3273            game_played = 1
3274            game_started = 0
3275
3276            try:
3277                player_id = t.find("a").get("href")
3278                player_id = player_id.replace("/players", "")
3279                player_id = player_id.replace("/player", "")
3280                player_id = player_id.replace("/", "")
3281            except Exception as e:
3282                logging.debug(
3283                    "Could not replace player IDs. " +
3284                    f"Full exception: `{e}`"
3285                )
3286
3287            t_cells = t.find_all("td")
3288            p_name = t_cells[1].text.replace("\n", "")
3289            p_name = p_name.strip()
3290
3291            if t_header_str in p_name:
3292                continue
3293            elif p_name.lower() == "team":
3294                continue
3295            if "\xa0" in p_name:
3296                game_started = 0
3297
3298            t_cells = [x.text.strip() for x in t_cells]
3299            player_id = int(player_id)
3300
3301            temp_df = pd.DataFrame(
3302                data=[t_cells],
3303                columns=table_headers
3304            )
3305
3306            duplicate_cols = temp_df.columns[temp_df.columns.duplicated()]
3307            temp_df.drop(columns=duplicate_cols, inplace=True)
3308
3309            temp_df["player_id"] = player_id
3310            temp_df["GP"] = game_played
3311            temp_df["GS"] = game_started
3312
3313            spec_stats_df_arr.append(temp_df)
3314            del temp_df
3315
3316        spec_stats_df = pd.concat(
3317            spec_stats_df_arr,
3318            ignore_index=True
3319        )
3320
3321        if team_id in mbb_team_ids_arr:
3322            sport_id = "MBB"
3323            df = mbb_teams_df[mbb_teams_df["team_id"] == team_id]
3324            season = df["season"].iloc[0]
3325        elif team_id in wbb_team_ids_arr:
3326            sport_id = "WBB"
3327            df = wbb_teams_df[wbb_teams_df["team_id"] == team_id]
3328            season = df["season"].iloc[0]
3329        else:
3330            raise ValueError(
3331                f"Unhandled team ID {team_id}"
3332            )
3333        spec_stats_df["team_id"] = team_id
3334        spec_stats_df["team_name"] = t_header_str
3335        stats_df_arr.append(spec_stats_df)
3336        del spec_stats_df
3337
3338    stats_df = pd.concat(stats_df_arr)
3339    stats_df["season"] = season
3340    stats_df.rename(
3341        columns={
3342            "#": "player_num",
3343            "Name": "player_full_name",
3344            "P": "player_position",
3345            "MP": "MP_str",
3346            "3FG": "3PM",
3347            "3FGA": "3PA",
3348            "ORebs": "ORB",
3349            "DRebs": "DRB",
3350            "TotReb": "TRB",
3351            "TO": "TOV",
3352            "TechFouls": "TF",
3353            "Fouls": "PF"
3354        },
3355        inplace=True,
3356    )
3357
3358    if "GS" not in stats_df.columns:
3359        stats_df["GS"] = None
3360
3361    if "DQ" not in stats_df.columns:
3362        stats_df["DQ"] = None
3363
3364    if "TF" not in stats_df.columns:
3365        stats_df["TF"] = None
3366
3367    if "DBL_DBL" not in stats_df.columns:
3368        stats_df["DBL_DBL"] = None
3369
3370    if "TRP_DBL" not in stats_df.columns:
3371        stats_df["TRP_DBL"] = None
3372
3373    stats_df = stats_df.astype(
3374        {
3375            "DQ": "uint16",
3376            "TF": "uint16",
3377        },
3378        errors="ignore"
3379    )
3380
3381    stats_df = stats_df.infer_objects().fillna(0)
3382    stats_df = stats_df.astype(
3383        {
3384            "GP": "uint16",
3385            "GS": "uint16",
3386            "FGM": "uint16",
3387            "FGA": "uint16",
3388            "3PM": "uint16",
3389            "3PA": "uint16",
3390            "FT": "uint16",
3391            "FTA": "uint16",
3392            "PTS": "uint16",
3393            "ORB": "uint16",
3394            "DRB": "uint16",
3395            "TRB": "uint16",
3396            "AST": "uint16",
3397            "TOV": "uint16",
3398            "STL": "uint16",
3399            "BLK": "uint16",
3400            "PF": "uint16",
3401            "DBL_DBL": "uint16",
3402            "TRP_DBL": "uint16",
3403            # "school_id": "uint32",
3404        }
3405    )
3406
3407    stats_df[["MP_minutes", "MP_seconds"]] = stats_df["MP_str"].str.split(
3408        ":", expand=True
3409    )
3410    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
3411        "MP_minutes", "MP_seconds"
3412    ]].fillna(0)
3413    stats_df[["MP_minutes", "MP_seconds"]] = stats_df[[
3414        "MP_minutes", "MP_seconds"
3415    ]].astype("uint16")
3416    stats_df["MP_total_seconds"] = (
3417        stats_df["MP_seconds"] + (stats_df["MP_minutes"] * 60)
3418    )
3419
3420    stats_df["FG%"] = (stats_df["FGM"] / stats_df["FGA"])
3421    stats_df["FG%"] = stats_df["FG%"].round(4)
3422
3423    stats_df["3P%"] = (stats_df["3PM"] / stats_df["3PA"])
3424    stats_df["3P%"] = stats_df["3P%"].round(4)
3425
3426    stats_df["FT%"] = (stats_df["FT"] / stats_df["FTA"])
3427    stats_df["FT%"] = stats_df["FT%"].round(4)
3428
3429    stats_df["2PM"] = (stats_df["FGM"] - stats_df["3PM"])
3430    stats_df["2PA"] = (stats_df["FGA"] - stats_df["3PA"])
3431    stats_df["2P%"] = (stats_df["2PM"] / stats_df["2PA"])
3432    stats_df["2P%"] = stats_df["2P%"].round(4)
3433
3434    stats_df["eFG%"] = (
3435        (
3436            stats_df["FGM"] +
3437            (stats_df["3PM"] * 0.5)
3438        ) /
3439        stats_df["FGA"]
3440    )
3441    stats_df["eFG%"] = stats_df["eFG%"].round(4)
3442
3443    stats_df["TSA"] = (
3444        stats_df["FGA"] + (stats_df["FTA"] * 0.44)
3445    )
3446    stats_df["TS%"] = stats_df["PTS"] / (2 * stats_df["TSA"])
3447    stats_df["TS%"] = stats_df["TS%"].round(4)
3448
3449    stats_df["TOV%"] = (
3450        stats_df["TOV"] /
3451        (
3452            stats_df["FGA"] +
3453            (stats_df["FTA"] * 0.44) +
3454            stats_df["TOV"]
3455        )
3456    )
3457    stats_df["TOV%"] = stats_df["TOV%"].round(4)
3458
3459    double_double_stats = ["PTS", "TRB", "AST", "BLK", "STL"]
3460    stats_df["DBL_DBL"] = (stats_df[double_double_stats] >= 10).sum(1) >= 2
3461    stats_df["TRP_DBL"] = (stats_df[double_double_stats] >= 10).sum(1) >= 3
3462
3463    stats_df = stats_df.astype(
3464        {
3465            "DBL_DBL": "uint16",
3466            "TRP_DBL": "uint16",
3467        },
3468        errors="ignore"
3469    )
3470    stats_df = stats_df.reindex(
3471        columns=stat_columns
3472    )
3473    stats_df["game_id"] = game_id
3474    # print(stats_df.columns)
3475    stats_df.to_csv(
3476        f"{home_dir}/.ncaa_stats_py/basketball_{sport_id}/game_stats/player/"
3477        + f"{game_id}_player_game_stats.csv",
3478        index=False
3479    )
3480    return stats_df

Given a valid game ID, this function will attempt to get all player game stats, if possible.

NOTE: Due to an issue with stats.ncaa.org, full player game stats may not be loaded in through this function.

This is a known issue, however you should be able to get position data and starters information through this function

Parameters

game_id (int, mandatory): Required argument. Specifies the game you want player game stats from.

Usage

from ncaa_stats_py.basketball import get_basketball_game_player_stats

########################################
#          Men's Basketball            #
########################################

# Get the game stats of the
# 2024 NCAA D1 Men's Basketball National Championship game.
print(
    "Get the game stats of the "
    + "2024 NCAA D1 Men's Basketball National Championship game."
)
df = get_basketball_game_player_stats(5254137)
print(df)

# Get the game stats of a March Madness game on March 29th, 2024
# between Duke and the Houston Cougars.
print(
    "Get the game stats of a March Madness game on March 29th, 2024 "
    + "between Duke and the Houston Cougars."
)
df = get_basketball_game_player_stats(5254126)
print(df)

# Get the game stats of a St. Patrick's Day
# game between the Duquesne Dukes and VCU Rams (D1).
print(
    "Get the game stats of a St. Patrick's Day "
    + "game between the Duquesne Dukes and VCU Rams (D1)."
)
df = get_basketball_game_player_stats(5252318)
print(df)

# Get the game stats of a December 17th, 2023
# game between the Barry Buccaneers and Findlay Oilers (D2).
print(
    "Get the game stats of a December 17th, 2023 "
    + "game between the Barry Buccaneers and Findlay Oilers (D2)."
)
df = get_basketball_game_player_stats(3960610)
print(df)

# Get the game stats of a Valentine's Day
# game between the Kalamazoo Hornets and the Trine Thunder (D2).
print(
    "Get the game stats of a Valentine's Day "
    + "game between the Kalamazoo Hornets and the Trine Thunder (D2)."
)
df = get_basketball_game_player_stats(3967963)
print(df)


########################################
#          Women's Basketball          #
########################################

# Get the game stats of the
# 2024 NCAA D1 Women's Basketball National Championship game.
print(
    "Get the game stats of the "
    + "2024 NCAA D1 Women's Basketball National Championship game"
)
df = get_basketball_game_player_stats(5254137)
print(df)

# Get the game stats of a March 3rd, 2024
# game between Duke and the North Carolina Tar Heels.
print(
    "Get the game stats of a March 3rd, 2024 "
    + "game between Duke and the North Carolina Tar Heels"
)
df = get_basketball_game_player_stats(3984600)
print(df)

# Get the game stats of a Thanksgiving Day
# game between the Sacred Heart Pioneers and the P.R.-Mayaguez Janes (D2).
print(
    "Get the game stats of a Thanksgiving Day "
    + "game between the Sacred Heart Pioneers and "
    + "the P.R.-Mayaguez Janes (D2)."
)
df = get_basketball_game_player_stats(3972687)
print(df)

# Get the game stats of a January 21st, 2024
# game between the Puget Sound Loggers
# and the Whitworth Pirates (D3).
print(
    "Get the game stats of a January 21st, 2024 "
    + "game between the Puget Sound Loggers and "
    + "the Whitworth Pirates (D3)."
)
df = get_basketball_game_player_stats(3979051)
print(df)

Returns

A pandas DataFrame object with player game stats in a given game.

def get_basketball_game_team_stats(game_id: int) -> pandas.core.frame.DataFrame:
3483def get_basketball_game_team_stats(game_id: int) -> pd.DataFrame:
3484    """
3485    Given a valid game ID,
3486    this function will attempt to get all team game stats, if possible.
3487
3488    NOTE: Due to an issue with [stats.ncaa.org](stats.ncaa.org),
3489    full team game stats may not be loaded in through this function.
3490
3491    This is a known issue, however you should be able to get position
3492    data and starters information through this function
3493
3494    Parameters
3495    ----------
3496    `game_id` (int, mandatory):
3497        Required argument.
3498        Specifies the game you want team game stats from.
3499
3500    Usage
3501    ----------
3502    ```python
3503
3504    from ncaa_stats_py.basketball import get_basketball_game_team_stats
3505
3506    ########################################
3507    #          Men's Basketball            #
3508    ########################################
3509
3510    # Get the game stats of the
3511    # 2024 NCAA D1 Men's Basketball National Championship game.
3512    print(
3513        "Get the game stats of the "
3514        + "2024 NCAA D1 Men's Basketball National Championship game."
3515    )
3516    df = get_basketball_game_team_stats(5254137)
3517    print(df)
3518
3519    # Get the game stats of a March Madness game on March 29th, 2024
3520    # between Duke and the Houston Cougars.
3521    print(
3522        "Get the game stats of a March Madness game on March 29th, 2024 "
3523        + "between Duke and the Houston Cougars."
3524    )
3525    df = get_basketball_game_team_stats(5254126)
3526    print(df)
3527
3528    # Get the game stats of a St. Patrick's Day
3529    # game between the Duquesne Dukes and VCU Rams (D1).
3530    print(
3531        "Get the game stats of a St. Patrick's Day "
3532        + "game between the Duquesne Dukes and VCU Rams (D1)."
3533    )
3534    df = get_basketball_game_team_stats(5252318)
3535    print(df)
3536
3537    # Get the game stats of a December 17th, 2023
3538    # game between the Barry Buccaneers and Findlay Oilers (D2).
3539    print(
3540        "Get the game stats of a December 17th, 2023 "
3541        + "game between the Barry Buccaneers and Findlay Oilers (D2)."
3542    )
3543    df = get_basketball_game_team_stats(3960610)
3544    print(df)
3545
3546    # Get the game stats of a Valentine's Day
3547    # game between the Kalamazoo Hornets and the Trine Thunder (D2).
3548    print(
3549        "Get the game stats of a Valentine's Day "
3550        + "game between the Kalamazoo Hornets and the Trine Thunder (D2)."
3551    )
3552    df = get_basketball_game_team_stats(3967963)
3553    print(df)
3554
3555
3556    ########################################
3557    #          Women's Basketball          #
3558    ########################################
3559
3560    # Get the game stats of the
3561    # 2024 NCAA D1 Women's Basketball National Championship game.
3562    print(
3563        "Get the game stats of the "
3564        + "2024 NCAA D1 Women's Basketball National Championship game"
3565    )
3566    df = get_basketball_game_team_stats(5254137)
3567    print(df)
3568
3569    # Get the game stats of a March 3rd, 2024
3570    # game between Duke and the North Carolina Tar Heels.
3571    print(
3572        "Get the game stats of a March 3rd, 2024 "
3573        + "game between Duke and the North Carolina Tar Heels"
3574    )
3575    df = get_basketball_game_team_stats(3984600)
3576    print(df)
3577
3578    # Get the game stats of a Thanksgiving Day
3579    # game between the Sacred Heart Pioneers and the P.R.-Mayaguez Janes (D2).
3580    print(
3581        "Get the game stats of a Thanksgiving Day "
3582        + "game between the Sacred Heart Pioneers and "
3583        + "the P.R.-Mayaguez Janes (D2)."
3584    )
3585    df = get_basketball_game_team_stats(3972687)
3586    print(df)
3587
3588    # Get the game stats of a January 21st, 2024
3589    # game between the Puget Sound Loggers
3590    # and the Whitworth Pirates (D3).
3591    print(
3592        "Get the game stats of a January 21st, 2024 "
3593        + "game between the Puget Sound Loggers and "
3594        + "the Whitworth Pirates (D3)."
3595    )
3596    df = get_basketball_game_team_stats(3979051)
3597
3598    ```
3599
3600    Returns
3601    ----------
3602    A pandas `DataFrame` object with team game stats in a given game.
3603
3604    """
3605    df = get_basketball_game_player_stats(game_id=game_id)
3606    # print(df.columns)
3607    df = df.infer_objects()
3608    stats_df = df.groupby(
3609        ["season", "game_id", "team_id", "team_name"],
3610        as_index=False
3611    ).agg(
3612        {
3613            # "MP_minutes": "sum",
3614            # "MP_seconds": "sum",
3615            "MP_total_seconds": "sum",
3616            "FGM": "sum",
3617            "FGA": "sum",
3618            "3PM": "sum",
3619            "3PA": "sum",
3620            "2PM": "sum",
3621            "2PA": "sum",
3622            "FT": "sum",
3623            "FTA": "sum",
3624            "ORB": "sum",
3625            "DRB": "sum",
3626            "TRB": "sum",
3627            "AST": "sum",
3628            "STL": "sum",
3629            "BLK": "sum",
3630            "TOV": "sum",
3631            "PF": "sum",
3632            "TF": "sum",
3633            "PTS": "sum",
3634            "DQ": "sum",
3635            "DBL_DBL": "sum",
3636            "TRP_DBL": "sum",
3637        }
3638    )
3639    stats_df["MP_str"] = stats_df["MP_total_seconds"].map(
3640        _get_minute_formatted_time_from_seconds
3641    )
3642
3643    stats_df["FG%"] = (stats_df["FGM"] / stats_df["FGA"])
3644    stats_df["FG%"] = stats_df["FG%"].round(4)
3645
3646    stats_df["3P%"] = (stats_df["3PM"] / stats_df["3PA"])
3647    stats_df["3P%"] = stats_df["3P%"].round(4)
3648
3649    stats_df["FT%"] = (stats_df["FT"] / stats_df["FTA"])
3650    stats_df["FT%"] = stats_df["FT%"].round(4)
3651
3652    stats_df["2PM"] = (stats_df["FGM"] - stats_df["3PM"])
3653    stats_df["2PA"] = (stats_df["FGA"] - stats_df["3PA"])
3654    stats_df["2P%"] = (stats_df["2PM"] / stats_df["2PA"])
3655    stats_df["2P%"] = stats_df["2P%"].round(4)
3656
3657    stats_df["eFG%"] = (
3658        (
3659            stats_df["FGM"] +
3660            (stats_df["3PM"] * 0.5)
3661        ) /
3662        stats_df["FGA"]
3663    )
3664    stats_df["eFG%"] = stats_df["eFG%"].round(4)
3665
3666    stats_df["TSA"] = (
3667        stats_df["FGA"] + (stats_df["FTA"] * 0.44)
3668    )
3669    stats_df["TS%"] = stats_df["PTS"] / (2 * stats_df["TSA"])
3670    stats_df["TS%"] = stats_df["TS%"].round(4)
3671
3672    stats_df["TOV%"] = (
3673        stats_df["TOV"] /
3674        (
3675            stats_df["FGA"] +
3676            (stats_df["FTA"] * 0.44) +
3677            stats_df["TOV"]
3678        )
3679    )
3680    stats_df["TOV%"] = stats_df["TOV%"].round(4)
3681
3682    return stats_df

Given a valid game ID, this function will attempt to get all team game stats, if possible.

NOTE: Due to an issue with stats.ncaa.org, full team game stats may not be loaded in through this function.

This is a known issue, however you should be able to get position data and starters information through this function

Parameters

game_id (int, mandatory): Required argument. Specifies the game you want team game stats from.

Usage

from ncaa_stats_py.basketball import get_basketball_game_team_stats

########################################
#          Men's Basketball            #
########################################

# Get the game stats of the
# 2024 NCAA D1 Men's Basketball National Championship game.
print(
    "Get the game stats of the "
    + "2024 NCAA D1 Men's Basketball National Championship game."
)
df = get_basketball_game_team_stats(5254137)
print(df)

# Get the game stats of a March Madness game on March 29th, 2024
# between Duke and the Houston Cougars.
print(
    "Get the game stats of a March Madness game on March 29th, 2024 "
    + "between Duke and the Houston Cougars."
)
df = get_basketball_game_team_stats(5254126)
print(df)

# Get the game stats of a St. Patrick's Day
# game between the Duquesne Dukes and VCU Rams (D1).
print(
    "Get the game stats of a St. Patrick's Day "
    + "game between the Duquesne Dukes and VCU Rams (D1)."
)
df = get_basketball_game_team_stats(5252318)
print(df)

# Get the game stats of a December 17th, 2023
# game between the Barry Buccaneers and Findlay Oilers (D2).
print(
    "Get the game stats of a December 17th, 2023 "
    + "game between the Barry Buccaneers and Findlay Oilers (D2)."
)
df = get_basketball_game_team_stats(3960610)
print(df)

# Get the game stats of a Valentine's Day
# game between the Kalamazoo Hornets and the Trine Thunder (D2).
print(
    "Get the game stats of a Valentine's Day "
    + "game between the Kalamazoo Hornets and the Trine Thunder (D2)."
)
df = get_basketball_game_team_stats(3967963)
print(df)


########################################
#          Women's Basketball          #
########################################

# Get the game stats of the
# 2024 NCAA D1 Women's Basketball National Championship game.
print(
    "Get the game stats of the "
    + "2024 NCAA D1 Women's Basketball National Championship game"
)
df = get_basketball_game_team_stats(5254137)
print(df)

# Get the game stats of a March 3rd, 2024
# game between Duke and the North Carolina Tar Heels.
print(
    "Get the game stats of a March 3rd, 2024 "
    + "game between Duke and the North Carolina Tar Heels"
)
df = get_basketball_game_team_stats(3984600)
print(df)

# Get the game stats of a Thanksgiving Day
# game between the Sacred Heart Pioneers and the P.R.-Mayaguez Janes (D2).
print(
    "Get the game stats of a Thanksgiving Day "
    + "game between the Sacred Heart Pioneers and "
    + "the P.R.-Mayaguez Janes (D2)."
)
df = get_basketball_game_team_stats(3972687)
print(df)

# Get the game stats of a January 21st, 2024
# game between the Puget Sound Loggers
# and the Whitworth Pirates (D3).
print(
    "Get the game stats of a January 21st, 2024 "
    + "game between the Puget Sound Loggers and "
    + "the Whitworth Pirates (D3)."
)
df = get_basketball_game_team_stats(3979051)

Returns

A pandas DataFrame object with team game stats in a given game.

def get_basketball_raw_pbp(game_id: int) -> pandas.core.frame.DataFrame:
3685def get_basketball_raw_pbp(game_id: int) -> pd.DataFrame:
3686    """
3687    Given a valid game ID,
3688    this function will attempt to get the raw play-by-play (PBP)
3689    data for that game.
3690
3691    Parameters
3692    ----------
3693    `game_id` (int, mandatory):
3694        Required argument.
3695        Specifies the game you want play-by-play data (PBP) from.
3696
3697    Usage
3698    ----------
3699    ```python
3700
3701    from ncaa_stats_py.basketball import get_basketball_raw_pbp
3702
3703    ########################################
3704    #          Men's Basketball            #
3705    ########################################
3706
3707    # Get the play-by-play data of the
3708    # 2024 NCAA D1 Men's Basketball National Championship game.
3709    print(
3710        "Get the play-by-play data of the "
3711        + "2024 NCAA D1 Men's Basketball National Championship game."
3712    )
3713    df = get_basketball_raw_pbp(5254137)
3714    print(df)
3715
3716    # Get the play-by-play data of a March Madness game on March 29th, 2024
3717    # between Duke and the Houston Cougars.
3718    print(
3719        "Get the play-by-play data "
3720        + "of a March Madness game on March 29th, 2024 "
3721        + "between Duke and the Houston Cougars."
3722    )
3723    df = get_basketball_raw_pbp(5254126)
3724    print(df)
3725
3726    # Get the play-by-play data of a February 28th
3727    # game between the Winthrop Eagles and High Point Panthers.
3728    print(
3729        "Get the play-by-play data of a February 28th "
3730        + "game between the Winthrop Eagles and High Point Panthers."
3731    )
3732    df = get_basketball_raw_pbp(3969302)
3733    print(df)
3734
3735    # Get the play-by-play data of a December 19th, 2022
3736    # game between the San Francisco St. Gators and
3737    # the Cal St. Monterey Bay Otters (D2).
3738    print(
3739        "Get the play-by-play data of a December 19th, 2022 "
3740        + "game between the San Francisco St. Gators and " +
3741        "the Cal St. Monterey Bay Otters (D2)."
3742    )
3743    df = get_basketball_raw_pbp(2341500)
3744    print(df)
3745
3746    # Get the play-by-play data of a January 3rd, 2022
3747    # game between the Hamline Pipers and the St. Olaf Oles (D3).
3748    print(
3749        "Get the play-by-play data of a January 3rd, 2022 "
3750        + "game between the Hamline Pipers and the St. Olaf Oles (D3)."
3751    )
3752    df = get_basketball_raw_pbp(3967963)
3753    print(df)
3754
3755
3756    ########################################
3757    #          Women's Basketball          #
3758    ########################################
3759
3760    # Get the play-by-play data of the
3761    # 2024 NCAA D1 Women's Basketball National Championship game.
3762    print(
3763        "Get the play-by-play data of the "
3764        + "2024 NCAA D1 Women's Basketball National Championship game."
3765    )
3766    df = get_basketball_raw_pbp(5254137)
3767    print(df)
3768
3769    # Get the play-by-play data of a March 12th, 2021
3770    # game between the La Salle Explorers and the Dayton Flyers.
3771    print(
3772        "Get the play-by-play data of a March 12th, 2021 "
3773        + "game between the La Salle Explorers and the Dayton Flyers."
3774    )
3775    df = get_basketball_raw_pbp(2055636)
3776    print(df)
3777
3778    # Get the play-by-play data of a February 6th, 2020
3779    # game between Purdue Northwest and the Michigan Tech Huskies (D2).
3780    print(
3781        "Get the play-by-play data of a Thanksgiving Day "
3782        + "game between the Sacred Heart Pioneers and "
3783        + "the P.R.-Mayaguez Janes (D2)."
3784    )
3785    df = get_basketball_raw_pbp(1793405)
3786    print(df)
3787
3788    # Get the play-by-play data of a January 5th, 2019
3789    # game between the Puget Sound Loggers
3790    # and the Whitworth Pirates (D3).
3791    print(
3792        "Get the play-by-play data of a January 5th, 2019 "
3793        + "game between the Simpson Storm and "
3794        + "the Dubuque Spartans (D3)."
3795    )
3796    df = get_basketball_raw_pbp(1625974)
3797    print(df)
3798
3799    ```
3800
3801    Returns
3802    ----------
3803    A pandas `DataFrame` object with a play-by-play (PBP) data in a given game.
3804
3805    """
3806    load_from_cache = True
3807    is_overtime = False
3808
3809    sport_id = ""
3810    season = 0
3811    away_score = 0
3812    home_score = 0
3813
3814    mbb_teams_df = load_basketball_teams(get_wbb_data=False)
3815    mbb_team_ids_arr = mbb_teams_df["team_id"].to_list()
3816
3817    wbb_teams_df = load_basketball_teams(get_wbb_data=True)
3818    wbb_team_ids_arr = wbb_teams_df["team_id"].to_list()
3819
3820    pbp_df = pd.DataFrame()
3821    pbp_df_arr = []
3822    temp_df = pd.DataFrame()
3823
3824    temp_df = pd.DataFrame()
3825    home_dir = expanduser("~")
3826    home_dir = _format_folder_str(home_dir)
3827
3828    stat_columns = [
3829        "season",
3830        "game_id",
3831        "sport_id",
3832        "game_datetime",
3833        "half_num",
3834        "event_num",
3835        "game_time_str",
3836        "game_time_seconds",
3837        "game_time_milliseconds",
3838        "event_team",
3839        "event_text",
3840        "is_overtime",
3841        "stadium_name",
3842        "attendance",
3843        "away_team_id",
3844        "away_team_name",
3845        "home_team_id",
3846        "home_team_name",
3847    ]
3848
3849    url = f"https://stats.ncaa.org/contests/{game_id}/play_by_play"
3850
3851    if exists(f"{home_dir}/.ncaa_stats_py/"):
3852        pass
3853    else:
3854        mkdir(f"{home_dir}/.ncaa_stats_py/")
3855
3856    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/"):
3857        pass
3858    else:
3859        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/")
3860
3861    if exists(f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"):
3862        pass
3863    else:
3864        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/")
3865
3866    if exists(
3867        f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"
3868        + f"{game_id}_raw_pbp.csv"
3869    ):
3870        games_df = pd.read_csv(
3871            f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"
3872            + f"{game_id}_raw_pbp.csv"
3873        )
3874        games_df = games_df.infer_objects()
3875        file_mod_datetime = datetime.fromtimestamp(
3876            getmtime(
3877                f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"
3878                + f"{game_id}_raw_pbp.csv"
3879            )
3880        )
3881        load_from_cache = True
3882    else:
3883        file_mod_datetime = datetime.today()
3884        load_from_cache = False
3885
3886    if exists(f"{home_dir}/.ncaa_stats_py/"):
3887        pass
3888    else:
3889        mkdir(f"{home_dir}/.ncaa_stats_py/")
3890
3891    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/"):
3892        pass
3893    else:
3894        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/")
3895
3896    if exists(f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"):
3897        pass
3898    else:
3899        mkdir(f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/")
3900
3901    if exists(
3902        f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"
3903        + f"{game_id}_raw_pbp.csv"
3904    ):
3905        games_df = pd.read_csv(
3906            f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"
3907            + f"{game_id}_raw_pbp.csv"
3908        )
3909        games_df = games_df.infer_objects()
3910        file_mod_datetime = datetime.fromtimestamp(
3911            getmtime(
3912                f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"
3913                + f"{game_id}_raw_pbp.csv"
3914            )
3915        )
3916        load_from_cache = True
3917    else:
3918        logging.info("Could not find a WBB player game stats file")
3919
3920    now = datetime.today()
3921
3922    age = now - file_mod_datetime
3923
3924    if age.days >= 35:
3925        load_from_cache = False
3926
3927    if load_from_cache is True:
3928        return games_df
3929
3930    response = _get_webpage(url=url)
3931    soup = BeautifulSoup(response.text, features="lxml")
3932
3933    info_table = soup.find(
3934        "td",
3935        {
3936            "style": "padding: 0px 30px 0px 30px",
3937            "class": "d-none d-md-table-cell"
3938        }
3939    ).find(
3940        "table",
3941        {"style": "border-collapse: collapse"}
3942    )
3943
3944    info_table_rows = info_table.find_all("tr")
3945
3946    game_date_str = info_table_rows[3].find("td").text
3947    if "TBA" in game_date_str:
3948        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
3949    elif "tba" in game_date_str:
3950        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
3951    elif "TBD" in game_date_str:
3952        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
3953    elif "tbd" in game_date_str:
3954        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
3955    elif (
3956        "tbd" not in game_date_str.lower() and
3957        ":" not in game_date_str.lower()
3958    ):
3959        game_date_str = game_date_str.replace(" ", "")
3960        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
3961    else:
3962        game_datetime = datetime.strptime(
3963            game_date_str,
3964            '%m/%d/%Y %I:%M %p'
3965        )
3966    game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
3967    game_date_str = game_datetime.isoformat()
3968    del game_datetime
3969
3970    stadium_str = info_table_rows[4].find("td").text
3971
3972    attendance_str = info_table_rows[5].find("td").text
3973    attendance_int = re.findall(
3974        r"([0-9\,]+)",
3975        attendance_str
3976    )[0]
3977    attendance_int = attendance_int.replace(",", "")
3978    attendance_int = int(attendance_int)
3979
3980    del attendance_str
3981    team_cards = soup.find_all(
3982        "td",
3983        {
3984            "valign": "center",
3985            "class": "grey_text d-none d-sm-table-cell"
3986        }
3987    )
3988
3989    away_url = team_cards[0].find_all("a")
3990    away_url = away_url[0]
3991    home_url = team_cards[1].find_all("a")
3992    home_url = home_url[0]
3993
3994    away_team_name = away_url.text
3995    home_team_name = home_url.text
3996
3997    away_team_id = away_url.get("href")
3998    home_team_id = home_url.get("href")
3999
4000    away_team_id = away_team_id.replace("/teams", "")
4001    away_team_id = away_team_id.replace("/team", "")
4002    away_team_id = away_team_id.replace("/", "")
4003    away_team_id = int(away_team_id)
4004
4005    home_team_id = home_team_id.replace("/teams", "")
4006    home_team_id = home_team_id.replace("/team", "")
4007    home_team_id = home_team_id.replace("/", "")
4008    home_team_id = int(home_team_id)
4009
4010    if home_team_id in mbb_team_ids_arr:
4011        sport_id = "MBB"
4012        temp_df = mbb_teams_df[mbb_teams_df["team_id"] == home_team_id]
4013        season = temp_df["season"].iloc[0]
4014        del temp_df
4015    elif home_team_id in wbb_team_ids_arr:
4016        sport_id = "WBB"
4017        temp_df = wbb_teams_df[wbb_teams_df["team_id"] == home_team_id]
4018        season = temp_df["season"].iloc[0]
4019        del temp_df
4020    # This should never be the case,
4021    # but if something goes very horribly wrong,
4022    # double check the away team ID to
4023    # the MBB and WBB team ID list.
4024    elif away_team_id in mbb_team_ids_arr:
4025        sport_id = "MBB"
4026        temp_df = mbb_teams_df[mbb_teams_df["team_id"] == away_team_id]
4027        season = temp_df["season"].iloc[0]
4028        del temp_df
4029    elif away_team_id in wbb_team_ids_arr:
4030        sport_id = "WBB"
4031        temp_df = wbb_teams_df[wbb_teams_df["team_id"] == home_team_id]
4032        season = temp_df["season"].iloc[0]
4033        del temp_df
4034    # If we get to this, we are in a code red situation.
4035    # "SHUT IT DOWN" - Gordon Ramsay
4036    else:
4037        raise ValueError(
4038            "Could not identify if this is a " +
4039            "MBB or WBB game based on team IDs. "
4040        )
4041
4042    section_cards = soup.find_all(
4043        "div",
4044        {"class": "row justify-content-md-center w-100"}
4045    )
4046
4047    for card in section_cards:
4048        # top_bot = ""
4049        event_text = ""
4050        half_str = card.find(
4051            "div",
4052            {"class": "card-header"}
4053        ).text
4054        half_num = re.findall(
4055            r"([0-9]+)",
4056            half_str
4057        )
4058
4059        half_num = int(half_num[0])
4060        if "ot" in half_str.lower():
4061            is_overtime = True
4062            half_num += 2
4063        table_body = card.find("table").find("tbody").find_all("tr")
4064
4065        for row in table_body:
4066            t_cells = row.find_all("td")
4067            t_cells = [x.text.strip() for x in t_cells]
4068            game_time_str = t_cells[0]
4069
4070            if len(t_cells[1]) > 0:
4071                event_team = away_team_id
4072                event_text = t_cells[1]
4073            elif len(t_cells[3]) > 0:
4074                event_team = home_team_id
4075                event_text = t_cells[3]
4076
4077            if t_cells[1].lower() == "game start":
4078                pass
4079            elif t_cells[1].lower() == "jumpball startperiod":
4080                pass
4081            elif t_cells[1].lower() == "period start":
4082                pass
4083            elif t_cells[1].lower() == "period end confirmed;":
4084                pass
4085            elif t_cells[1].lower() == "period end confirmed":
4086                pass
4087            elif t_cells[1].lower() == "game end confirmed;":
4088                pass
4089            elif t_cells[1].lower() == "game end confirmed":
4090                pass
4091            elif t_cells[1].lower() == "timeout commercial":
4092                pass
4093            else:
4094                away_score, home_score = t_cells[2].split("-")
4095
4096            away_score = int(away_score)
4097            home_score = int(home_score)
4098            if len(game_time_str.split(":")) == 3:
4099                temp_time_minutes, temp_time_seconds, game_time_ms = \
4100                    game_time_str.split(":")
4101            elif len(game_time_str.split(":")) == 2:
4102                temp_time_minutes, temp_time_seconds = \
4103                    game_time_str.split(":")
4104                game_time_ms = 0
4105
4106            temp_time_minutes = int(temp_time_minutes)
4107            temp_time_seconds = int(temp_time_seconds)
4108            game_time_ms = int(game_time_ms)
4109            game_time_seconds = temp_time_seconds + (temp_time_minutes * 60)
4110
4111            if half_num == 1:
4112                half_seconds_remaining = game_time_seconds
4113                half_ms_remaining = game_time_ms
4114
4115                game_time_seconds += 1200
4116            else:
4117                half_seconds_remaining = game_time_seconds
4118                half_ms_remaining = game_time_ms
4119
4120            temp_df = pd.DataFrame(
4121                {
4122                    # "season": season,
4123                    # "game_id": game_id,
4124                    # "sport_id": sport_id,
4125                    # "away_team_id": away_team_id,
4126                    # "away_team_name": away_team_name,
4127                    # "home_team_id": home_team_id,
4128                    # "home_team_name": home_team_name,
4129                    "game_time_str": game_time_str,
4130                    "half_seconds_remaining": half_seconds_remaining,
4131                    "half_milliseconds_remaining": half_ms_remaining,
4132                    "game_seconds_remaining": game_time_seconds,
4133                    "game_milliseconds_remaining": game_time_ms,
4134                    "half_num": half_num,
4135                    "event_team": event_team,
4136                    "event_text": event_text,
4137                    "is_overtime": is_overtime
4138                },
4139                index=[0],
4140            )
4141            pbp_df_arr.append(temp_df)
4142
4143    pbp_df = pd.concat(pbp_df_arr, ignore_index=True)
4144    pbp_df["event_num"] = pbp_df.index + 1
4145    pbp_df["game_datetime"] = game_date_str
4146    pbp_df["season"] = season
4147    pbp_df["game_id"] = game_id
4148    pbp_df["sport_id"] = sport_id
4149    pbp_df["stadium_name"] = stadium_str
4150    pbp_df["attendance"] = attendance_int
4151    pbp_df["away_team_id"] = away_team_id
4152    pbp_df["away_team_name"] = away_team_name
4153    pbp_df["home_team_id"] = home_team_id
4154    pbp_df["home_team_name"] = home_team_name
4155
4156    pbp_df = pbp_df.reindex(columns=stat_columns)
4157    pbp_df = pbp_df.infer_objects()
4158
4159    if sport_id == "MBB":
4160        pbp_df.to_csv(
4161            f"{home_dir}/.ncaa_stats_py/basketball_MBB/raw_pbp/"
4162            + f"{game_id}_raw_pbp.csv",
4163            index=False
4164        )
4165    elif sport_id == "WBB":
4166        pbp_df.to_csv(
4167            f"{home_dir}/.ncaa_stats_py/basketball_WBB/raw_pbp/"
4168            + f"{game_id}_raw_pbp.csv",
4169            index=False
4170        )
4171    else:
4172        raise ValueError(
4173            f"Improper Sport ID: `{sport_id}`"
4174        )
4175
4176    return pbp_df

Given a valid game ID, this function will attempt to get the raw play-by-play (PBP) data for that game.

Parameters

game_id (int, mandatory): Required argument. Specifies the game you want play-by-play data (PBP) from.

Usage

from ncaa_stats_py.basketball import get_basketball_raw_pbp

########################################
#          Men's Basketball            #
########################################

# Get the play-by-play data of the
# 2024 NCAA D1 Men's Basketball National Championship game.
print(
    "Get the play-by-play data of the "
    + "2024 NCAA D1 Men's Basketball National Championship game."
)
df = get_basketball_raw_pbp(5254137)
print(df)

# Get the play-by-play data of a March Madness game on March 29th, 2024
# between Duke and the Houston Cougars.
print(
    "Get the play-by-play data "
    + "of a March Madness game on March 29th, 2024 "
    + "between Duke and the Houston Cougars."
)
df = get_basketball_raw_pbp(5254126)
print(df)

# Get the play-by-play data of a February 28th
# game between the Winthrop Eagles and High Point Panthers.
print(
    "Get the play-by-play data of a February 28th "
    + "game between the Winthrop Eagles and High Point Panthers."
)
df = get_basketball_raw_pbp(3969302)
print(df)

# Get the play-by-play data of a December 19th, 2022
# game between the San Francisco St. Gators and
# the Cal St. Monterey Bay Otters (D2).
print(
    "Get the play-by-play data of a December 19th, 2022 "
    + "game between the San Francisco St. Gators and " +
    "the Cal St. Monterey Bay Otters (D2)."
)
df = get_basketball_raw_pbp(2341500)
print(df)

# Get the play-by-play data of a January 3rd, 2022
# game between the Hamline Pipers and the St. Olaf Oles (D3).
print(
    "Get the play-by-play data of a January 3rd, 2022 "
    + "game between the Hamline Pipers and the St. Olaf Oles (D3)."
)
df = get_basketball_raw_pbp(3967963)
print(df)


########################################
#          Women's Basketball          #
########################################

# Get the play-by-play data of the
# 2024 NCAA D1 Women's Basketball National Championship game.
print(
    "Get the play-by-play data of the "
    + "2024 NCAA D1 Women's Basketball National Championship game."
)
df = get_basketball_raw_pbp(5254137)
print(df)

# Get the play-by-play data of a March 12th, 2021
# game between the La Salle Explorers and the Dayton Flyers.
print(
    "Get the play-by-play data of a March 12th, 2021 "
    + "game between the La Salle Explorers and the Dayton Flyers."
)
df = get_basketball_raw_pbp(2055636)
print(df)

# Get the play-by-play data of a February 6th, 2020
# game between Purdue Northwest and the Michigan Tech Huskies (D2).
print(
    "Get the play-by-play data of a Thanksgiving Day "
    + "game between the Sacred Heart Pioneers and "
    + "the P.R.-Mayaguez Janes (D2)."
)
df = get_basketball_raw_pbp(1793405)
print(df)

# Get the play-by-play data of a January 5th, 2019
# game between the Puget Sound Loggers
# and the Whitworth Pirates (D3).
print(
    "Get the play-by-play data of a January 5th, 2019 "
    + "game between the Simpson Storm and "
    + "the Dubuque Spartans (D3)."
)
df = get_basketball_raw_pbp(1625974)
print(df)

Returns

A pandas DataFrame object with a play-by-play (PBP) data in a given game.

def get_basketball_game_starters(game_id: int) -> list:
4179def get_basketball_game_starters(game_id: int) -> list:
4180    """
4181    Given a valid game ID, this function will attempt to
4182    get the starting lineup out of the raw play-by-play data
4183    from the game.
4184
4185    NOTE #1: The layout of the list will be as follows:
4186
4187    > | Index |   **Away players**   |
4188    > | :---: | :------------------: |
4189    > |   0   | Away team starter #1 |
4190    > |   1   | Away team starter #2 |
4191    > |   2   | Away team starter #3 |
4192    > |   3   | Away team starter #4 |
4193    > |   4   | Away team starter #5 |
4194
4195    > | Index |   **Home players**   |
4196    > | :---: | :------------------: |
4197    > |   5   | Home team starter #1 |
4198    > |   6   | Home team starter #2 |
4199    > |   7   | Home team starter #3 |
4200    > |   8   | Home team starter #4 |
4201    > |   9   | Home team starter #5 |
4202
4203    NOTE #2: Starters are listed in order of when they first sub out.
4204    Do not assume that starter #5 for a team is a center,
4205    or that starter #1 is a PG!
4206
4207    Returns
4208    ----------
4209    A list of starters from a specific basketball game ID.
4210
4211    """
4212    starters_list = []
4213    pbp_df = get_basketball_raw_pbp(game_id=game_id)
4214    away_team_id = pbp_df["away_team_id"].iloc[0]
4215    home_team_id = pbp_df["home_team_id"].iloc[0]
4216    # pointer_int = 0
4217
4218    for team_id in [away_team_id, home_team_id]:
4219        temp_starters_list = []
4220
4221        temp_df = pbp_df[pbp_df["event_team"] == team_id]
4222
4223        play_text_list = temp_df["event_text"].to_list()
4224
4225        for play_txt in play_text_list:
4226            if len(temp_starters_list) == 5:
4227                break
4228            elif "substitution out" in play_txt:
4229                player_txt = play_txt.split(",")[0]
4230                if play_txt in temp_starters_list:
4231                    pass
4232                elif player_txt.lower() == "team":
4233                    pass
4234                elif (player_txt is None) or (len(player_txt) == 0):
4235                    raise ValueError(
4236                        "Player cannot be NULL."
4237                    )
4238                else:
4239                    temp_starters_list.append(player_txt)
4240
4241        if len(temp_starters_list) < 5:
4242            raise ValueError(
4243                f"Could not find all 5 starters for team ID {team_id} " +
4244                f"in game ID {game_id}"
4245            )
4246        for txt in temp_starters_list:
4247            starters_list.append(txt)
4248    return starters_list

Given a valid game ID, this function will attempt to get the starting lineup out of the raw play-by-play data from the game.

NOTE #1: The layout of the list will be as follows:

Index Away players
0 Away team starter #1
1 Away team starter #2
2 Away team starter #3
3 Away team starter #4
4 Away team starter #5
Index Home players
5 Home team starter #1
6 Home team starter #2
7 Home team starter #3
8 Home team starter #4
9 Home team starter #5

NOTE #2: Starters are listed in order of when they first sub out. Do not assume that starter #5 for a team is a center, or that starter #1 is a PG!

Returns

A list of starters from a specific basketball game ID.

def get_basketball_game_shot_locations(game_id: int) -> pandas.core.frame.DataFrame:
4251def get_basketball_game_shot_locations(game_id: int) -> pd.DataFrame:
4252    """ """
4253    raise NotImplementedError(
4254        "It's not implemented yet."
4255    )