ncaa_stats_py.volleyball

   1# Author: Joseph Armstrong (armstrongjoseph08@gmail.com)
   2# File Name: `volleyball.py`
   3# Purpose: Houses functions that allows one to access NCAA volleyball data
   4# Creation Date: 2024-09-20 08:15 PM EDT
   5# Update History:
   6# - 2024-09-20 08:15 PM EDT
   7# - 2025-01-04 03:00 PM EDT
   8# - 2025-01-18 02:44 PM EDT
   9# - 2025-02-01 02:40 PM EDT
  10# - 2025-02-05 08:50 PM EDT
  11
  12
  13import logging
  14import re
  15from datetime import date, datetime
  16from os import mkdir
  17from os.path import exists, expanduser, getmtime
  18
  19import numpy as np
  20import pandas as pd
  21from bs4 import BeautifulSoup
  22from dateutil import parser
  23from pytz import timezone
  24from tqdm import tqdm
  25
  26from ncaa_stats_py.helpers.volleyball import _volleyball_pbp_helper
  27from ncaa_stats_py.utls import (
  28    _format_folder_str,
  29    _get_schools,
  30    _get_webpage,
  31    _name_smother,
  32)
  33
  34
  35def get_volleyball_teams(
  36    season: int,
  37    level: str | int,
  38    get_mens_data: bool = False
  39) -> pd.DataFrame:
  40    """
  41    Retrieves a list of volleyball teams from the NCAA.
  42
  43    Parameters
  44    ----------
  45    `season` (int, mandatory):
  46        Required argument.
  47        Specifies the season you want NCAA volleyball team information from.
  48
  49    `level` (int, mandatory):
  50        Required argument.
  51        Specifies the level/division you want
  52        NCAA volleyball team information from.
  53        This can either be an integer (1-3) or a string ("I"-"III").
  54
  55    `get_mens_data` (bool, optional):
  56        Optional argument.
  57        If you want men's volleyball data instead of women's volleyball data,
  58        set this to `True`.
  59
  60    Usage
  61    ----------
  62    ```python
  63
  64    from ncaa_stats_py.volleyball import get_volleyball_teams
  65
  66    ########################################
  67    #          Men's volleyball            #
  68    ########################################
  69
  70    # Get all D1 men's volleyball teams for the 2024 season.
  71    print("Get all D1 men's volleyball teams for the 2024 season.")
  72    df = get_volleyball_teams(2024, 1)
  73    print(df)
  74
  75    # Get all D2 men's volleyball teams for the 2023 season.
  76    print("Get all D2 men's volleyball teams for the 2023 season.")
  77    df = get_volleyball_teams(2023, 2)
  78    print(df)
  79
  80    # Get all D3 men's volleyball teams for the 2022 season.
  81    print("Get all D3 men's volleyball teams for the 2022 season.")
  82    df = get_volleyball_teams(2022, 3)
  83    print(df)
  84
  85    # Get all D1 men's volleyball teams for the 2021 season.
  86    print("Get all D1 men's volleyball teams for the 2021 season.")
  87    df = get_volleyball_teams(2021, "I")
  88    print(df)
  89
  90    # Get all D2 men's volleyball teams for the 2020 season.
  91    print("Get all D2 men's volleyball teams for the 2020 season.")
  92    df = get_volleyball_teams(2020, "II")
  93    print(df)
  94
  95    # Get all D3 men's volleyball teams for the 2019 season.
  96    print("Get all D3 men's volleyball teams for the 2019 season.")
  97    df = get_volleyball_teams(2019, "III")
  98    print(df)
  99
 100    ########################################
 101    #          Women's volleyball          #
 102    ########################################
 103
 104    # Get all D1 women's volleyball teams for the 2024 season.
 105    print(
 106        "Get all D1 women's volleyball teams for the 2024 season."
 107    )
 108    df = get_volleyball_teams(2024, 1)
 109    print(df)
 110
 111    # Get all D2 women's volleyball teams for the 2023 season.
 112    print(
 113        "Get all D2 women's volleyball teams for the 2023 season."
 114    )
 115    df = get_volleyball_teams(2023, 2)
 116    print(df)
 117
 118    # Get all D3 women's volleyball teams for the 2022 season.
 119    print(
 120        "Get all D3 women's volleyball teams for the 2022 season."
 121    )
 122    df = get_volleyball_teams(2022, 3)
 123    print(df)
 124
 125    # Get all D1 women's volleyball teams for the 2021 season.
 126    print(
 127        "Get all D1 women's volleyball teams for the 2021 season."
 128    )
 129    df = get_volleyball_teams(2021, "I")
 130    print(df)
 131
 132    # Get all D2 women's volleyball teams for the 2020 season.
 133    print(
 134        "Get all D2 women's volleyball teams for the 2020 season."
 135    )
 136    df = get_volleyball_teams(2020, "II")
 137    print(df)
 138
 139    # Get all D3 women's volleyball teams for the 2019 season.
 140    print(
 141        "Get all D3 women's volleyball teams for the 2019 season."
 142    )
 143    df = get_volleyball_teams(2019, "III")
 144    print(df)
 145
 146    ```
 147
 148    Returns
 149    ----------
 150    A pandas `DataFrame` object with a list of college volleyball teams
 151    in that season and NCAA level.
 152    """
 153    # def is_comment(elem):
 154    #     return isinstance(elem, Comment)
 155    sport_id = ""
 156    # stat_sequence = 0
 157    load_from_cache = True
 158    home_dir = expanduser("~")
 159    home_dir = _format_folder_str(home_dir)
 160    teams_df = pd.DataFrame()
 161    teams_df_arr = []
 162    temp_df = pd.DataFrame()
 163    formatted_level = ""
 164    ncaa_level = 0
 165
 166    if get_mens_data is True:
 167        sport_id = "MVB"
 168        stat_sequence = 528
 169    elif get_mens_data is False:
 170        sport_id = "WVB"
 171        stat_sequence = 48
 172
 173    if isinstance(level, int) and level == 1:
 174        formatted_level = "I"
 175        ncaa_level = 1
 176    elif isinstance(level, int) and level == 2:
 177        formatted_level = "II"
 178        ncaa_level = 2
 179    elif isinstance(level, int) and level == 3:
 180        formatted_level = "III"
 181        ncaa_level = 3
 182    elif isinstance(level, str) and (
 183        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
 184    ):
 185        ncaa_level = 1
 186        formatted_level = level.upper()
 187    elif isinstance(level, str) and (
 188        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
 189    ):
 190        ncaa_level = 2
 191        formatted_level = level.upper()
 192    elif isinstance(level, str) and (
 193        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
 194    ):
 195        ncaa_level = 3
 196        formatted_level = level.upper()
 197
 198    if exists(f"{home_dir}/.ncaa_stats_py/"):
 199        pass
 200    else:
 201        mkdir(f"{home_dir}/.ncaa_stats_py/")
 202
 203    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
 204        pass
 205    else:
 206        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
 207
 208    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"):
 209        pass
 210    else:
 211        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/")
 212
 213    if exists(
 214        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"
 215        + f"{season}_{formatted_level}_teams.csv"
 216    ):
 217        teams_df = pd.read_csv(
 218            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"
 219            + f"{season}_{formatted_level}_teams.csv"
 220        )
 221        file_mod_datetime = datetime.fromtimestamp(
 222            getmtime(
 223                f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"
 224                + f"{season}_{formatted_level}_teams.csv"
 225            )
 226        )
 227    else:
 228        file_mod_datetime = datetime.today()
 229        load_from_cache = False
 230
 231    now = datetime.today()
 232
 233    age = now - file_mod_datetime
 234
 235    if (
 236        age.days > 1 and
 237        season >= (now.year - 1) and
 238        now.month <= 7
 239    ):
 240        load_from_cache = False
 241    elif age.days >= 35:
 242        load_from_cache = False
 243
 244    if load_from_cache is True:
 245        return teams_df
 246
 247    logging.warning(
 248        f"Either we could not load {season} D{level} schools from cache, "
 249        + "or it's time to refresh the cached data."
 250    )
 251    schools_df = _get_schools()
 252
 253    # Volleyball
 254    if sport_id == "MVB":
 255        url = (
 256            "https://stats.ncaa.org/rankings/change_sport_year_div?"
 257            + f"academic_year={season}.0&division={ncaa_level}.0" +
 258            f"&sport_code={sport_id}"
 259        )
 260    elif sport_id == "WVB":
 261        url = (
 262            "https://stats.ncaa.org/rankings/change_sport_year_div?"
 263            + f"academic_year={season+1}.0&division={ncaa_level}.0" +
 264            f"&sport_code={sport_id}"
 265        )
 266
 267    response = _get_webpage(url=url)
 268
 269    soup = BeautifulSoup(response.text, features="lxml")
 270    ranking_periods = soup.find("select", {"name": "rp", "id": "rp"})
 271    ranking_periods = ranking_periods.find_all("option")
 272
 273    rp_value = 0
 274    found_value = False
 275
 276    while found_value is False:
 277        # print("check")
 278        for rp in ranking_periods:
 279            if "final" in rp.text.lower():
 280                rp_value = rp.get("value")
 281                found_value = True
 282                break
 283                # pass
 284            elif "-" in rp.text.lower():
 285                pass
 286            else:
 287                rp_value = rp.get("value")
 288                found_value = True
 289                break
 290
 291    if sport_id == "MVB":
 292        url = (
 293            "https://stats.ncaa.org/rankings/institution_trends?"
 294            + f"academic_year={season}.0&division={ncaa_level}.0&"
 295            + f"ranking_period={rp_value}&sport_code={sport_id}"
 296        )
 297    elif sport_id == "WVB":
 298        url = (
 299            "https://stats.ncaa.org/rankings/institution_trends?"
 300            + f"academic_year={season+1}.0&division={ncaa_level}.0&"
 301            + f"ranking_period={rp_value}&sport_code={sport_id}"
 302        )
 303
 304    best_method = True
 305    if (
 306        (season < 2017 and sport_id == "MVB")
 307    ):
 308        url = (
 309            "https://stats.ncaa.org/rankings/national_ranking?"
 310            + f"academic_year={season}.0&division={ncaa_level}.0&"
 311            + f"ranking_period={rp_value}&sport_code={sport_id}"
 312            + f"&stat_seq={stat_sequence}.0"
 313        )
 314        response = _get_webpage(url=url)
 315        best_method = False
 316    elif (
 317        (season < 2017 and sport_id == "WVB")
 318    ):
 319        url = (
 320            "https://stats.ncaa.org/rankings/national_ranking?"
 321            + f"academic_year={season+1}.0&division={ncaa_level}.0&"
 322            + f"ranking_period={rp_value}&sport_code={sport_id}"
 323            + f"&stat_seq={stat_sequence}.0"
 324        )
 325        response = _get_webpage(url=url)
 326        best_method = False
 327    elif sport_id == "MVB":
 328        try:
 329            response = _get_webpage(url=url)
 330        except Exception as e:
 331            logging.info(f"Found exception when loading teams `{e}`")
 332            logging.info("Attempting backup method.")
 333            url = (
 334                "https://stats.ncaa.org/rankings/national_ranking?"
 335                + f"academic_year={season}.0&division={ncaa_level}.0&"
 336                + f"ranking_period={rp_value}&sport_code={sport_id}"
 337                + f"&stat_seq={stat_sequence}.0"
 338            )
 339            response = _get_webpage(url=url)
 340            best_method = False
 341    else:
 342        try:
 343            response = _get_webpage(url=url)
 344        except Exception as e:
 345            logging.info(f"Found exception when loading teams `{e}`")
 346            logging.info("Attempting backup method.")
 347            url = (
 348                "https://stats.ncaa.org/rankings/national_ranking?"
 349                + f"academic_year={season+1}.0&division={ncaa_level}.0&"
 350                + f"ranking_period={rp_value}&sport_code={sport_id}"
 351                + f"&stat_seq={stat_sequence}.0"
 352            )
 353            response = _get_webpage(url=url)
 354            best_method = False
 355
 356    soup = BeautifulSoup(response.text, features="lxml")
 357
 358    if best_method is True:
 359        soup = soup.find(
 360            "table",
 361            {"id": "stat_grid"},
 362        )
 363        soup = soup.find("tbody")
 364        t_rows = soup.find_all("tr")
 365
 366        for t in t_rows:
 367            team_id = t.find("a")
 368            team_id = team_id.get("href")
 369            team_id = team_id.replace("/teams/", "")
 370            team_id = int(team_id)
 371            team_name = t.find_all("td")[0].text
 372            team_conference_name = t.find_all("td")[1].text
 373            # del team
 374            temp_df = pd.DataFrame(
 375                {
 376                    "season": season,
 377                    "ncaa_division": ncaa_level,
 378                    "ncaa_division_formatted": formatted_level,
 379                    "team_conference_name": team_conference_name,
 380                    "team_id": team_id,
 381                    "school_name": team_name,
 382                    "sport_id": sport_id,
 383                },
 384                index=[0],
 385            )
 386            teams_df_arr.append(temp_df)
 387            del temp_df
 388    else:
 389        soup = soup.find(
 390            "table",
 391            {"id": "rankings_table"},
 392        )
 393        soup = soup.find("tbody")
 394        t_rows = soup.find_all("tr")
 395
 396        for t in t_rows:
 397            team_id = t.find("a")
 398            team_id = team_id.get("href")
 399            team_id = team_id.replace("/teams/", "")
 400            team_id = int(team_id)
 401            team = t.find_all("td")[1].get("data-order")
 402            team_name, team_conference_name = team.split(",")
 403            del team
 404            temp_df = pd.DataFrame(
 405                {
 406                    "season": season,
 407                    "ncaa_division": ncaa_level,
 408                    "ncaa_division_formatted": formatted_level,
 409                    "team_conference_name": team_conference_name,
 410                    "team_id": team_id,
 411                    "school_name": team_name,
 412                    "sport_id": sport_id,
 413                },
 414                index=[0],
 415            )
 416            teams_df_arr.append(temp_df)
 417            del temp_df
 418
 419    teams_df = pd.concat(teams_df_arr, ignore_index=True)
 420    teams_df = pd.merge(
 421        left=teams_df,
 422        right=schools_df,
 423        on=["school_name"],
 424        how="left"
 425    )
 426    teams_df.sort_values(by=["team_id"], inplace=True)
 427
 428    teams_df.to_csv(
 429        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"
 430        + f"{season}_{formatted_level}_teams.csv",
 431        index=False,
 432    )
 433
 434    return teams_df
 435
 436
 437def load_volleyball_teams(
 438    start_year: int = 2011,
 439    get_mens_data: bool = False
 440) -> pd.DataFrame:
 441    """
 442    Compiles a list of known NCAA volleyball teams in NCAA volleyball history.
 443
 444    Parameters
 445    ----------
 446    `start_year` (int, optional):
 447        Optional argument.
 448        Specifies the first season you want
 449        NCAA volleyball team information from.
 450
 451    `get_mens_data` (bool, optional):
 452        Optional argument.
 453        If you want men's volleyball data instead of women's volleyball data,
 454        set this to `True`.
 455
 456    Usage
 457    ----------
 458    ```python
 459
 460    from ncaa_stats_py.volleyball import load_volleyball_teams
 461
 462    # WARNING: Running this script "as-is" for the first time may
 463    #          take some time.
 464    #          The *N*th time you run this script will be faster.
 465
 466    # Load in every women's volleyball team
 467    # from 2011 to present day.
 468    print(
 469        "Load in every women's volleyball team " +
 470        "from 2011 to present day."
 471    )
 472    df = load_volleyball_teams(get_mens_data=True)
 473    print(df)
 474
 475    # Load in every men's volleyball team
 476    # from 2011 to present day.
 477    print(
 478        "Load in every men's volleyball team " +
 479        "from 2011 to present day."
 480    )
 481    df = load_volleyball_teams()
 482    print(df)
 483
 484    # Load in every men's volleyball team
 485    # from 2020 to present day.
 486    print(
 487        "Load in every men's volleyball team " +
 488        "from 2020 to present day."
 489    )
 490    df = load_volleyball_teams(start_year=2020)
 491    print(df)
 492
 493    ```
 494
 495    Returns
 496    ----------
 497    A pandas `DataFrame` object with a list of
 498    all known college volleyball teams.
 499
 500    """
 501    # start_year = 2008
 502
 503    # if get_mens_data is True:
 504    #     sport_id = "WVB"
 505    # else:
 506    #     sport_id = "MVB"
 507
 508    teams_df = pd.DataFrame()
 509    teams_df_arr = []
 510    temp_df = pd.DataFrame()
 511
 512    now = datetime.now()
 513    mens_ncaa_divisions = ["I", "III"]
 514    womens_ncaa_divisions = ["I", "II", "III"]
 515    if now.month > 5 and get_mens_data is False:
 516        ncaa_seasons = [x for x in range(start_year, (now.year + 2))]
 517    elif now.month < 5 and get_mens_data is True:
 518        ncaa_seasons = [x for x in range(start_year, (now.year + 1))]
 519    else:
 520        ncaa_seasons = [x for x in range(start_year, (now.year + 1))]
 521
 522    logging.info(
 523        "Loading in all NCAA volleyball teams. "
 524        + "If this is the first time you're seeing this message, "
 525        + "it may take some time (3-10 minutes) for this to load."
 526    )
 527
 528    if get_mens_data is True:
 529        for s in ncaa_seasons:
 530            logging.info(
 531                f"Loading in men's volleyball teams for the {s} season."
 532            )
 533            for d in mens_ncaa_divisions:
 534                temp_df = get_volleyball_teams(
 535                    season=s,
 536                    level=d,
 537                    get_mens_data=True
 538                )
 539                teams_df_arr.append(temp_df)
 540                del temp_df
 541    else:
 542        for s in ncaa_seasons:
 543            logging.info(
 544                f"Loading in women's volleyball teams for the {s} season."
 545            )
 546            for d in womens_ncaa_divisions:
 547                temp_df = get_volleyball_teams(
 548                    season=s,
 549                    level=d
 550                )
 551                teams_df_arr.append(temp_df)
 552                del temp_df
 553
 554    teams_df = pd.concat(teams_df_arr, ignore_index=True)
 555    teams_df = teams_df.infer_objects()
 556    return teams_df
 557
 558
 559def get_volleyball_team_schedule(team_id: int) -> pd.DataFrame:
 560    """
 561    Retrieves a team schedule, from a valid NCAA volleyball team ID.
 562
 563    Parameters
 564    ----------
 565    `team_id` (int, mandatory):
 566        Required argument.
 567        Specifies the team you want a schedule from.
 568        This is separate from a school ID, which identifies the institution.
 569        A team ID should be unique to a school, and a season.
 570
 571    Usage
 572    ----------
 573    ```python
 574
 575    from ncaa_stats_py.volleyball import get_volleyball_team_schedule
 576
 577    ########################################
 578    #          Women's volleyball          #
 579    ########################################
 580
 581    # Get the team schedule for the
 582    # 2024 Toledo WVB team (D1, ID: 585329).
 583    print(
 584        "Get the team schedule for the " +
 585        "2024 Toledo WVB team (D1, ID: 585329)."
 586    )
 587    df = get_volleyball_team_schedule(585329)
 588    print(df)
 589
 590    # Get the team schedule for the
 591    # 2023 Black Hills St. WVB team (D2, ID: 559709).
 592    print(
 593        "Get the team schedule for the " +
 594        "2023 Black Hills St. WVB team (D2, ID: 559709)."
 595    )
 596    df = get_volleyball_team_schedule(559709)
 597    print(df)
 598
 599    # Get the team schedule for the
 600    # 2022 Mount Mary WVB team (D3, ID: 539750).
 601    print(
 602        "Get the team schedule for the " +
 603        "2022 Mount Mary WVB team (D3, ID: 539750)."
 604    )
 605    df = get_volleyball_team_schedule(539750)
 606    print(df)
 607
 608    # Get the team schedule for the
 609    # 2021 TCU WVB team (D1, ID: 522750).
 610    print(
 611        "Get the team schedule for the " +
 612        "2024 TCU WVB team (D1, ID: 522750)."
 613    )
 614    df = get_volleyball_team_schedule(522750)
 615    print(df)
 616
 617    # Get the team schedule for the
 618    # 2020 Purdue Northwest WVB team (D2, ID: 504832).
 619    print(
 620        "Get the team schedule for the " +
 621        "2020 Purdue Northwest WVB team (D2, ID: 504832)."
 622    )
 623    df = get_volleyball_team_schedule(504832)
 624    print(df)
 625
 626    # Get the team schedule for the
 627    # 2019 Juniata WVB team (D3, ID: 482642).
 628    print(
 629        "Get the team schedule for the " +
 630        "2019 Juniata WVB team (D3, ID: 482642)."
 631    )
 632    df = get_volleyball_team_schedule(482642)
 633    print(df)
 634
 635    ########################################
 636    #          Men's volleyball            #
 637    ########################################
 638
 639    # Get the team schedule for the
 640    # 2024 Missouri S&T MVB team (D1, ID: 573720).
 641    print(
 642        "Get the team schedule for the " +
 643        "2024 Missouri S&T MVB team (D1, ID: 573720)."
 644    )
 645    df = get_volleyball_team_schedule(573720)
 646    print(df)
 647
 648    # Get the team schedule for the
 649    # 2023 Rockford MVB team (D3, ID: 550890).
 650    print(
 651        "Get the team schedule for the " +
 652        "2023 Rockford MVB team (D3, ID: 550890)."
 653    )
 654    df = get_volleyball_team_schedule(550890)
 655    print(df)
 656
 657    # Get the team schedule for the
 658    # 2022 McKendree MVB team (D1, ID: 529896).
 659    print(
 660        "Get the team schedule for the " +
 661        "2022 McKendreeMaritime MVB team (D1, ID: 529896)."
 662    )
 663    df = get_volleyball_team_schedule(529896)
 664    print(df)
 665
 666    # Get the team schedule for the
 667    # 2021 Concordia Chicago MVB team (D3, ID: 508505).
 668    print(
 669        "Get the team schedule for the " +
 670        "2021 Concordia Chicago MVB team (D3, ID: 508505)."
 671    )
 672    df = get_volleyball_team_schedule(508505)
 673    print(df)
 674
 675    # Get the team schedule for the
 676    # 2020 St. Francis Brooklyn MVB team (D1, ID: 487992).
 677    print(
 678        "Get the team schedule for the " +
 679        "2020 St. Francis Brooklyn MVB team (D1, ID: 487992)."
 680    )
 681    df = get_volleyball_team_schedule(487992)
 682    print(df)
 683
 684    # Get the team schedule for the
 685    # 2019 Loras MVB team (D3, ID: 453845).
 686    print(
 687        "Get the team schedule for the " +
 688        "2019 Loras MVB team (D3, ID: 453845)."
 689    )
 690    df = get_volleyball_team_schedule(453845)
 691    print(df)
 692
 693    ```
 694
 695    Returns
 696    ----------
 697    A pandas `DataFrame` object with an NCAA volleyball team's schedule.
 698
 699    """
 700
 701    sport_id = ""
 702    schools_df = _get_schools()
 703    games_df = pd.DataFrame()
 704    games_df_arr = []
 705    season = 0
 706    temp_df = pd.DataFrame()
 707    load_from_cache = True
 708
 709    home_dir = expanduser("~")
 710    home_dir = _format_folder_str(home_dir)
 711
 712    url = f"https://stats.ncaa.org/teams/{team_id}"
 713
 714    try:
 715        team_df = load_volleyball_teams()
 716        team_df = team_df[team_df["team_id"] == team_id]
 717        season = team_df["season"].iloc[0]
 718        ncaa_division = team_df["ncaa_division"].iloc[0]
 719        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
 720        sport_id = "WVB"
 721    except Exception:
 722        team_df = load_volleyball_teams(get_mens_data=True)
 723        team_df = team_df[team_df["team_id"] == team_id]
 724        season = team_df["season"].iloc[0]
 725        ncaa_division = team_df["ncaa_division"].iloc[0]
 726        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
 727        sport_id = "MVB"
 728    # team_conference_name = team_df["team_conference_name"].iloc[0]
 729    # school_name = team_df["school_name"].iloc[0]
 730    # school_id = int(team_df["school_id"].iloc[0])
 731
 732    del team_df
 733
 734    if exists(f"{home_dir}/.ncaa_stats_py/"):
 735        pass
 736    else:
 737        mkdir(f"{home_dir}/.ncaa_stats_py/")
 738
 739    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
 740        pass
 741    else:
 742        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
 743
 744    if exists(
 745        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/"
 746    ):
 747        pass
 748    else:
 749        mkdir(
 750            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/"
 751        )
 752
 753    if exists(
 754        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/"
 755        + f"{team_id}_team_schedule.csv"
 756    ):
 757        games_df = pd.read_csv(
 758            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/"
 759            + f"{team_id}_team_schedule.csv"
 760        )
 761        file_mod_datetime = datetime.fromtimestamp(
 762            getmtime(
 763                f"{home_dir}/.ncaa_stats_py/"
 764                + f"volleyball_{sport_id}/team_schedule/"
 765                + f"{team_id}_team_schedule.csv"
 766            )
 767        )
 768    else:
 769        file_mod_datetime = datetime.today()
 770        load_from_cache = False
 771
 772    now = datetime.today()
 773
 774    age = now - file_mod_datetime
 775    if (
 776        age.days > 1 and
 777        season >= now.year
 778    ):
 779        load_from_cache = False
 780
 781    if load_from_cache is True:
 782        return games_df
 783
 784    response = _get_webpage(url=url)
 785    soup = BeautifulSoup(response.text, features="lxml")
 786
 787    school_name = soup.find("div", {"class": "card"}).find("img").get("alt")
 788    season_name = (
 789        soup.find("select", {"id": "year_list"})
 790        .find("option", {"selected": "selected"})
 791        .text
 792    )
 793
 794    soup = soup.find_all(
 795        "div",
 796        {"class": "col p-0"},
 797    )
 798
 799    # declaring it here to prevent potential problems down the road.
 800    table_data = ""
 801    for s in soup:
 802        try:
 803            temp_name = s.find("div", {"class": "card-header"})
 804            temp_name = temp_name.text
 805        except Exception as e:
 806            logging.warning(
 807                f"Could not parse card header. Full exception `{e}`. "
 808                + "Attempting alternate method."
 809            )
 810            temp_name = s.find("tr", {"class": "heading"}).find("td").text
 811
 812        if "schedule" in temp_name.lower():
 813            table_data = s.find("table")
 814
 815    t_rows = table_data.find_all("tr", {"class": "underline_rows"})
 816
 817    if len(t_rows) == 0:
 818        t_rows = table_data.find_all("tr")
 819
 820    for g in t_rows:
 821        is_valid_row = True
 822        game_num = 1
 823        ot_periods = 0
 824        is_home_game = True
 825        is_neutral_game = False
 826
 827        cells = g.find_all("td")
 828        if len(cells) <= 1:
 829            # Because of how *well* designed
 830            # stats.ncaa.org is, if we have to use execute
 831            # the `if len(t_rows) == 0:` code,
 832            # we need to catch any cases where every element in a
 833            # table row (`<tr>`) is a table header (`<th>`),
 834            # instead of a table data cell (`<td>`)
 835            continue
 836
 837        game_date = cells[0].text
 838
 839        # If "(" is in the same cell as the date,
 840        # this means that this game is an extra innings game.
 841        # The number encased in `()` is the actual number of innings.
 842        # We need to remove that from the date,
 843        # and move it into a separate variable.
 844        if "(" in game_date:
 845            game_date = game_date.replace(")", "")
 846            game_date, game_num = game_date.split("(")
 847            game_date = game_date.strip()
 848            game_num = int(game_num.strip())
 849
 850        if ":" in game_date and ("PM" in game_date or "AM" in game_date):
 851            game_date = datetime.strptime(
 852                game_date,
 853                "%m/%d/%Y %I:%M %p"
 854            ).date()
 855        else:
 856            game_date = datetime.strptime(
 857                game_date,
 858                "%m/%d/%Y"
 859            ).date()
 860
 861        try:
 862            opp_team_id = cells[1].find("a").get("href")
 863        except IndexError:
 864            logging.info(
 865                "Skipping row because it is clearly "
 866                + "not a row that has schedule data."
 867            )
 868            is_valid_row = False
 869        except AttributeError as e:
 870            logging.info(
 871                "Could not extract a team ID for this game. " +
 872                f"Full exception {e}"
 873            )
 874            opp_team_id = "-1"
 875        except Exception as e:
 876            logging.warning(
 877                "An unhandled exception has occurred when "
 878                + "trying to get the opposition team ID for this game. "
 879                f"Full exception `{e}`."
 880            )
 881            raise e
 882        if is_valid_row is True:
 883            if opp_team_id is not None:
 884                opp_team_id = opp_team_id.replace("/teams/", "")
 885                opp_team_id = int(opp_team_id)
 886
 887                try:
 888                    opp_team_name = cells[1].find("img").get("alt")
 889                except AttributeError:
 890                    logging.info(
 891                        "Couldn't find the opposition team name "
 892                        + "for this row from an image element. "
 893                        + "Attempting a backup method"
 894                    )
 895                    opp_team_name = cells[1].text
 896                except Exception as e:
 897                    logging.info(
 898                        "Unhandled exception when trying to get the "
 899                        + "opposition team name from this game. "
 900                        + f"Full exception `{e}`"
 901                    )
 902                    raise e
 903            else:
 904                opp_team_name = cells[1].text
 905
 906            if opp_team_name[0] == "@":
 907                # The logic for determining if this game was a
 908                # neutral site game doesn't care if that info is in
 909                # `opp_team_name`.
 910                opp_team_name = opp_team_name.strip().replace("@", "")
 911            elif "@" in opp_team_name:
 912                opp_team_name = opp_team_name.strip().split("@")[0]
 913            # opp_team_show_name = cells[1].text.strip()
 914
 915            opp_text = cells[1].text
 916            opp_text = opp_text.strip()
 917            if "@" in opp_text and opp_text[0] == "@":
 918                is_home_game = False
 919            elif "@" in opp_text and opp_text[0] != "@":
 920                is_neutral_game = True
 921                is_home_game = False
 922            # This is just to cover conference and NCAA championship
 923            # tournaments.
 924            elif "championship" in opp_text.lower():
 925                is_neutral_game = True
 926                is_home_game = False
 927            elif "ncaa" in opp_text.lower():
 928                is_neutral_game = True
 929                is_home_game = False
 930
 931            del opp_text
 932
 933            score = cells[2].text.strip()
 934            if len(score) == 0:
 935                score_1 = 0
 936                score_2 = 0
 937            elif (
 938                "canceled" not in score.lower() and
 939                "ppd" not in score.lower()
 940            ):
 941                score_1, score_2 = score.split("-")
 942
 943                # `score_1` should be "W `n`", "L `n`", or "T `n`",
 944                # with `n` representing the number of runs this team
 945                # scored in this game.
 946                # Let's remove the "W", "L", or "T" from `score_1`,
 947                # and determine which team won later on in this code.
 948                if any(x in score_1 for x in ["W", "L", "T"]):
 949                    score_1 = score_1.split(" ")[1]
 950
 951                if "(" in score_2:
 952                    score_2 = score_2.replace(")", "")
 953                    score_2, ot_periods = score_2.split("(")
 954                    ot_periods = ot_periods.replace("OT", "")
 955                    ot_periods = ot_periods.replace(" ", "")
 956                    ot_periods = int(ot_periods)
 957
 958                if ot_periods is None:
 959                    ot_periods = 0
 960                score_1 = int(score_1)
 961                score_2 = int(score_2)
 962            else:
 963                score_1 = None
 964                score_2 = None
 965
 966            try:
 967                game_id = cells[2].find("a").get("href")
 968                game_id = game_id.replace("/contests", "")
 969                game_id = game_id.replace("/box_score", "")
 970                game_id = game_id.replace("/", "")
 971                game_id = int(game_id)
 972                game_url = (
 973                    f"https://stats.ncaa.org/contests/{game_id}/box_score"
 974                )
 975            except AttributeError as e:
 976                logging.info(
 977                    "Could not parse a game ID for this game. "
 978                    + f"Full exception `{e}`."
 979                )
 980                game_id = None
 981                game_url = None
 982            except Exception as e:
 983                logging.info(
 984                    "An unhandled exception occurred when trying "
 985                    + "to find a game ID for this game. "
 986                    + f"Full exception `{e}`."
 987                )
 988                raise e
 989
 990            try:
 991                attendance = cells[3].text
 992                attendance = attendance.replace(",", "")
 993                attendance = attendance.replace("\n", "")
 994                attendance = int(attendance)
 995            except IndexError as e:
 996                logging.info(
 997                    "It doesn't appear as if there is an attendance column "
 998                    + "for this team's schedule table."
 999                    f"Full exception `{e}`."
1000                )
1001                attendance = None
1002            except ValueError as e:
1003                logging.info(
1004                    "There doesn't appear as if "
1005                    + "there is a recorded attendance. "
1006                    + "for this game/row. "
1007                    f"Full exception `{e}`."
1008                )
1009                attendance = None
1010            except Exception as e:
1011                logging.info(
1012                    "An unhandled exception occurred when trying "
1013                    + "to find this game's attendance. "
1014                    + f"Full exception `{e}`."
1015                )
1016                raise e
1017
1018            if is_home_game is True:
1019                temp_df = pd.DataFrame(
1020                    {
1021                        "season": season,
1022                        "season_name": season_name,
1023                        "game_id": game_id,
1024                        "game_date": game_date,
1025                        "game_num": game_num,
1026                        "ot_periods": ot_periods,
1027                        "home_team_id": team_id,
1028                        "home_team_name": school_name,
1029                        "away_team_id": opp_team_id,
1030                        "away_team_name": opp_team_name,
1031                        "home_team_sets_won": score_1,
1032                        "away_team_sets_won": score_2,
1033                        "is_neutral_game": is_neutral_game,
1034                        "game_url": game_url,
1035                    },
1036                    index=[0],
1037                )
1038                games_df_arr.append(temp_df)
1039                del temp_df
1040            elif is_neutral_game is True:
1041                # For the sake of simplicity,
1042                # order both team ID's,
1043                # and set the lower number of the two as
1044                # the "away" team in this neutral site game,
1045                # just so there's no confusion if someone
1046                # combines a ton of these team schedule `DataFrame`s,
1047                # and wants to remove duplicates afterwards.
1048                t_ids = [opp_team_id, team_id]
1049                t_ids.sort()
1050
1051                if t_ids[0] == team_id:
1052                    # home
1053                    temp_df = pd.DataFrame(
1054                        {
1055                            "season": season,
1056                            "season_name": season_name,
1057                            "game_id": game_id,
1058                            "game_date": game_date,
1059                            "game_num": game_num,
1060                            "ot_periods": ot_periods,
1061                            "home_team_id": team_id,
1062                            "home_team_name": school_name,
1063                            "away_team_id": opp_team_id,
1064                            "away_team_name": opp_team_name,
1065                            "home_team_sets_won": score_1,
1066                            "away_team_sets_won": score_2,
1067                            "is_neutral_game": is_neutral_game,
1068                            "game_url": game_url,
1069                        },
1070                        index=[0],
1071                    )
1072
1073                else:
1074                    # away
1075                    temp_df = pd.DataFrame(
1076                        {
1077                            "season": season,
1078                            "season_name": season_name,
1079                            "game_id": game_id,
1080                            "game_date": game_date,
1081                            "game_num": game_num,
1082                            "ot_periods": ot_periods,
1083                            "home_team_id": opp_team_id,
1084                            "home_team_name": opp_team_name,
1085                            "away_team_id": team_id,
1086                            "away_team_name": school_name,
1087                            "home_team_sets_won": score_2,
1088                            "away_team_sets_won": score_1,
1089                            "is_neutral_game": is_neutral_game,
1090                            "game_url": game_url,
1091                        },
1092                        index=[0],
1093                    )
1094
1095                games_df_arr.append(temp_df)
1096                del temp_df
1097            else:
1098                temp_df = pd.DataFrame(
1099                    {
1100                        "season": season,
1101                        "season_name": season_name,
1102                        "game_id": game_id,
1103                        "game_date": game_date,
1104                        "game_num": game_num,
1105                        "ot_periods": ot_periods,
1106                        "home_team_id": opp_team_id,
1107                        "home_team_name": opp_team_name,
1108                        "away_team_id": team_id,
1109                        "away_team_name": school_name,
1110                        "home_team_sets_won": score_2,
1111                        "away_team_sets_won": score_1,
1112                        "is_neutral_game": is_neutral_game,
1113                        "game_url": game_url,
1114                    },
1115                    index=[0],
1116                )
1117
1118                games_df_arr.append(temp_df)
1119                del temp_df
1120
1121        # team_photo = team_id.find("img").get("src")
1122
1123    games_df = pd.concat(games_df_arr, ignore_index=True)
1124
1125    temp_df = schools_df.rename(
1126        columns={
1127            "school_name": "home_team_name",
1128            "school_id": "home_school_id"
1129        }
1130    )
1131    games_df = games_df.merge(right=temp_df, on="home_team_name", how="left")
1132
1133    temp_df = schools_df.rename(
1134        columns={
1135            "school_name": "away_team_name",
1136            "school_id": "away_school_id"
1137        }
1138    )
1139    games_df = games_df.merge(right=temp_df, on="away_team_name", how="left")
1140    games_df["ncaa_division"] = ncaa_division
1141    games_df["ncaa_division_formatted"] = ncaa_division_formatted
1142
1143    # games_df["game_url"] = games_df["game_url"].str.replace("/box_score", "")
1144    games_df.to_csv(
1145        f"{home_dir}/.ncaa_stats_py/"
1146        + f"volleyball_{sport_id}/team_schedule/"
1147        + f"{team_id}_team_schedule.csv",
1148        index=False,
1149    )
1150
1151    return games_df
1152
1153
1154def get_volleyball_day_schedule(
1155    game_date: str | date | datetime,
1156    level: str | int = "I",
1157    get_mens_data: bool = False
1158):
1159    """
1160    Given a date and NCAA level, this function retrieves volleyball every game
1161    for that date.
1162
1163    Parameters
1164    ----------
1165    `game_date` (int, mandatory):
1166        Required argument.
1167        Specifies the date you want a volleyball schedule from.
1168        For best results, pass a string formatted as "YYYY-MM-DD".
1169
1170    `level` (int, mandatory):
1171        Required argument.
1172        Specifies the level/division you want a
1173        NCAA volleyball schedule from.
1174        This can either be an integer (1-3) or a string ("I"-"III").
1175
1176    `get_mens_data` (bool, optional):
1177        Optional argument.
1178        If you want men's volleyball data instead of women's volleyball data,
1179        set this to `True`.
1180
1181    Usage
1182    ----------
1183    ```python
1184
1185    from ncaa_stats_py.volleyball import get_volleyball_day_schedule
1186
1187    ########################################
1188    #         Women's Volleyball           #
1189    ########################################
1190
1191    # Get all DI games (if any) that were played on December 22th, 2024.
1192    print("Get all games (if any) that were played on December 22th, 2024.")
1193    df = get_volleyball_day_schedule("2024-12-22", level=1)
1194    print(df)
1195
1196    # Get all division II games that were played on November 24th, 2024.
1197    print("Get all division II games that were played on November 24th, 2024.")
1198    df = get_volleyball_day_schedule("2024-11-24", level="II")
1199    print(df)
1200
1201    # Get all DIII games that were played on October 27th, 2024.
1202    print("Get all DIII games that were played on October 27th, 2024.")
1203    df = get_volleyball_day_schedule("2024-10-27", level="III")
1204    print(df)
1205
1206    # Get all DI games (if any) that were played on September 29th, 2024.
1207    print(
1208        "Get all DI games (if any) that were played on September 29th, 2024."
1209    )
1210    df = get_volleyball_day_schedule("2024-09-29")
1211    print(df)
1212
1213    # Get all DII games played on August 30th, 2024.
1214    print("Get all DI games played on August 30th, 2024.")
1215    df = get_volleyball_day_schedule("2024-08-30")
1216    print(df)
1217
1218    # Get all division III games played on September 23rd, 2023.
1219    print("Get all division III games played on September 23rd, 2023.")
1220    df = get_volleyball_day_schedule("2023-09-23", level="III")
1221    print(df)
1222
1223    ########################################
1224    #          Men's Volleyball            #
1225    ########################################
1226
1227    # Get all DI games that will be played on April 12th, 2025.
1228    print("Get all games that will be played on April 12th, 2025.")
1229    df = get_volleyball_day_schedule("2025-04-12", level=1, get_mens_data=True)
1230    print(df)
1231
1232    # Get all DI games that were played on January 30th, 2025.
1233    print("Get all games that were played on January 30th, 2025.")
1234    df = get_volleyball_day_schedule(
1235        "2025-01-30", level="I", get_mens_data=True
1236    )
1237    print(df)
1238
1239    # Get all division III games that were played on April 6th, 2024.
1240    print("Get all division III games that were played on April 6th, 2024.")
1241    df = get_volleyball_day_schedule(
1242        "2025-04-05", level="III", get_mens_data=True
1243    )
1244    print(df)
1245
1246    # Get all DI games (if any) that were played on March 30th, 2024.
1247    print("Get all DI games (if any) that were played on March 30th, 2024.")
1248    df = get_volleyball_day_schedule("2024-03-30", get_mens_data=True)
1249    print(df)
1250
1251    # Get all DI games played on February 23rd, 2024.
1252    print("Get all DI games played on February 23rd, 2024.")
1253    df = get_volleyball_day_schedule("2024-02-23", get_mens_data=True)
1254    print(df)
1255
1256    # Get all division III games played on February 11th, 2023.
1257    print("Get all division III games played on February 11th, 2023.")
1258    df = get_volleyball_day_schedule("2024-02-11", level=3, get_mens_data=True)
1259    print(df)
1260
1261    ```
1262
1263    Returns
1264    ----------
1265    A pandas `DataFrame` object with all volleyball games played on that day,
1266    for that NCAA division/level.
1267
1268    """
1269
1270    season = 0
1271    sport_id = "WVB"
1272
1273    schedule_df = pd.DataFrame()
1274    schedule_df_arr = []
1275
1276    if isinstance(game_date, date):
1277        game_datetime = datetime.combine(
1278            game_date, datetime.min.time()
1279        )
1280    elif isinstance(game_date, datetime):
1281        game_datetime = game_date
1282    elif isinstance(game_date, str):
1283        game_datetime = parser.parse(
1284            game_date
1285        )
1286    else:
1287        unhandled_datatype = type(game_date)
1288        raise ValueError(
1289            f"Unhandled datatype for `game_date`: `{unhandled_datatype}`"
1290        )
1291
1292    if isinstance(level, int) and level == 1:
1293        formatted_level = "I"
1294        ncaa_level = 1
1295    elif isinstance(level, int) and level == 2:
1296        formatted_level = "II"
1297        ncaa_level = 2
1298    elif isinstance(level, int) and level == 3:
1299        formatted_level = "III"
1300        ncaa_level = 3
1301    elif isinstance(level, str) and (
1302        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
1303    ):
1304        ncaa_level = 1
1305        formatted_level = level.upper()
1306    elif isinstance(level, str) and (
1307        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
1308    ):
1309        ncaa_level = 2
1310        formatted_level = level.upper()
1311    elif isinstance(level, str) and (
1312        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
1313    ):
1314        ncaa_level = 3
1315        formatted_level = level.upper()
1316
1317    del level
1318
1319    if get_mens_data is True:
1320        sport_id = "MVB"
1321    elif get_mens_data is False:
1322        sport_id = "WVB"
1323    else:
1324        raise ValueError(
1325            f"Unhandled value for `get_wbb_data`: `{get_mens_data}`"
1326        )
1327
1328    season = game_datetime.year
1329    game_month = game_datetime.month
1330    game_day = game_datetime.day
1331    game_year = game_datetime.year
1332
1333    if game_month > 7:
1334        season += 1
1335        url = (
1336            "https://stats.ncaa.org/contests/" +
1337            f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" +
1338            f"&academic_year={season}&division={ncaa_level}" +
1339            f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" +
1340            "&commit=Submit"
1341        )
1342    else:
1343        url = (
1344            "https://stats.ncaa.org/contests/" +
1345            f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" +
1346            f"&academic_year={season}&division={ncaa_level}" +
1347            f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" +
1348            "&commit=Submit"
1349        )
1350
1351    response = _get_webpage(url=url)
1352    soup = BeautifulSoup(response.text, features="lxml")
1353
1354    game_boxes = soup.find_all("div", {"class": "table-responsive"})
1355
1356    for box in game_boxes:
1357        game_id = None
1358        game_alt_text = None
1359        game_num = 1
1360        # t_box = box.find("table")
1361        table_box = box.find("table")
1362        table_rows = table_box.find_all("tr")
1363
1364        # Date/attendance
1365        game_date_str = table_rows[0].find("div", {"class": "col-6 p-0"}).text
1366        game_date_str = game_date_str.replace("\n", "")
1367        game_date_str = game_date_str.strip()
1368        game_date_str = game_date_str.replace("TBA ", "TBA")
1369        game_date_str = game_date_str.replace("TBD ", "TBD")
1370        game_date_str = game_date_str.replace("PM ", "PM")
1371        game_date_str = game_date_str.replace("AM ", "AM")
1372        game_date_str = game_date_str.strip()
1373        attendance_str = table_rows[0].find(
1374            "div",
1375            {"class": "col p-0 text-right"}
1376        ).text
1377
1378        attendance_str = attendance_str.replace("Attend:", "")
1379        attendance_str = attendance_str.replace(",", "")
1380        attendance_str = attendance_str.replace("\n", "")
1381        if (
1382            "st" in attendance_str.lower() or
1383            "nd" in attendance_str.lower() or
1384            "rd" in attendance_str.lower() or
1385            "th" in attendance_str.lower()
1386        ):
1387            # This is not an attendance,
1388            # this is whatever quarter/half/inning this game is in.
1389            attendance_num = None
1390        elif "final" in attendance_str.lower():
1391            attendance_num = None
1392        elif len(attendance_str) > 0:
1393            attendance_num = int(attendance_str)
1394        else:
1395            attendance_num = None
1396
1397        if "(" in game_date_str:
1398            game_date_str = game_date_str.replace(")", "")
1399            game_date_str, game_num = game_date_str.split("(")
1400            game_num = int(game_num)
1401
1402        if "TBA" in game_date_str:
1403            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
1404        elif "tba" in game_date_str:
1405            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
1406        elif "TBD" in game_date_str:
1407            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
1408        elif "tbd" in game_date_str:
1409            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
1410        elif (
1411            "tbd" not in game_date_str.lower() and
1412            ":" not in game_date_str.lower()
1413        ):
1414            game_date_str = game_date_str.replace(" ", "")
1415            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
1416        else:
1417            game_datetime = datetime.strptime(
1418                game_date_str,
1419                '%m/%d/%Y %I:%M %p'
1420            )
1421        game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
1422
1423        game_alt_text = table_rows[1].find_all("td")[0].text
1424        if game_alt_text is not None and len(game_alt_text) > 0:
1425            game_alt_text = game_alt_text.replace("\n", "")
1426            game_alt_text = game_alt_text.strip()
1427
1428        if len(game_alt_text) == 0:
1429            game_alt_text = None
1430
1431        urls_arr = box.find_all("a")
1432
1433        for u in urls_arr:
1434            url_temp = u.get("href")
1435            if "contests" in url_temp:
1436                game_id = url_temp
1437                del url_temp
1438
1439        if game_id is None:
1440            for r in range(0, len(table_rows)):
1441                temp = table_rows[r]
1442                temp_id = temp.get("id")
1443
1444                if temp_id is not None and len(temp_id) > 0:
1445                    game_id = temp_id
1446
1447        del urls_arr
1448
1449        game_id = game_id.replace("/contests", "")
1450        game_id = game_id.replace("/box_score", "")
1451        game_id = game_id.replace("/livestream_scoreboards", "")
1452        game_id = game_id.replace("/", "")
1453        game_id = game_id.replace("contest_", "")
1454        game_id = int(game_id)
1455
1456        table_rows = table_box.find_all("tr", {"id": f"contest_{game_id}"})
1457        away_team_row = table_rows[0]
1458        home_team_row = table_rows[1]
1459
1460        # Away team
1461        td_arr = away_team_row.find_all("td")
1462
1463        try:
1464            away_team_name = td_arr[0].find("img").get("alt")
1465        except Exception:
1466            away_team_name = td_arr[1].text
1467        away_team_name = away_team_name.replace("\n", "")
1468        away_team_name = away_team_name.strip()
1469
1470        try:
1471            away_team_id = td_arr[1].find("a").get("href")
1472            away_team_id = away_team_id.replace("/teams/", "")
1473            away_team_id = int(away_team_id)
1474        except AttributeError:
1475            away_team_id = None
1476            logging.info("No team ID found for the away team")
1477        except Exception as e:
1478            raise e
1479
1480        away_sets_scored = td_arr[-1].text
1481        away_sets_scored = away_sets_scored.replace("\n", "")
1482        away_sets_scored = away_sets_scored.replace("\xa0", "")
1483
1484        if "ppd" in away_sets_scored.lower():
1485            continue
1486        elif "cancel" in away_sets_scored.lower():
1487            continue
1488
1489        if len(away_sets_scored) > 0:
1490            away_sets_scored = int(away_sets_scored)
1491        else:
1492            away_sets_scored = 0
1493
1494        del td_arr
1495
1496        # Home team
1497        td_arr = home_team_row.find_all("td")
1498
1499        try:
1500            home_team_name = td_arr[0].find("img").get("alt")
1501        except Exception:
1502            home_team_name = td_arr[1].text
1503        home_team_name = home_team_name.replace("\n", "")
1504        home_team_name = home_team_name.strip()
1505
1506        try:
1507            home_team_id = td_arr[1].find("a").get("href")
1508            home_team_id = home_team_id.replace("/teams/", "")
1509            home_team_id = int(home_team_id)
1510        except AttributeError:
1511            home_team_id = None
1512            logging.info("No team ID found for the home team")
1513        except Exception as e:
1514            raise e
1515
1516        home_sets_scored = td_arr[-1].text
1517        home_sets_scored = home_sets_scored.replace("\n", "")
1518        home_sets_scored = home_sets_scored.replace("\xa0", "")
1519
1520        if "ppd" in home_sets_scored.lower():
1521            continue
1522        elif "cancel" in home_sets_scored.lower():
1523            continue
1524
1525        if len(home_sets_scored) > 0:
1526            home_sets_scored = int(home_sets_scored)
1527        else:
1528            home_sets_scored = 0
1529
1530        temp_df = pd.DataFrame(
1531            {
1532                "season": season,
1533                "sport_id": sport_id,
1534                "game_date": game_datetime.strftime("%Y-%m-%d"),
1535                "game_datetime": game_datetime.isoformat(),
1536                "game_id": game_id,
1537                "formatted_level": formatted_level,
1538                "ncaa_level": ncaa_level,
1539                "game_alt_text": game_alt_text,
1540                "away_team_id": away_team_id,
1541                "away_team_name": away_team_name,
1542                "home_team_id": home_team_id,
1543                "home_team_name": home_team_name,
1544                "home_sets_scored": home_sets_scored,
1545                "away_sets_scored": away_sets_scored,
1546                "attendance": attendance_num
1547            },
1548            index=[0]
1549        )
1550        schedule_df_arr.append(temp_df)
1551
1552        del temp_df
1553
1554    if len(schedule_df_arr) >= 1:
1555        schedule_df = pd.concat(schedule_df_arr, ignore_index=True)
1556    else:
1557        logging.warning(
1558            "Could not find any game(s) for "
1559            + f"{game_datetime.year:00d}-{game_datetime.month:00d}"
1560            + f"-{game_datetime.day:00d}. "
1561            + "If you believe this is an error, "
1562            + "please raise an issue at "
1563            + "\n https://github.com/armstjc/ncaa_stats_py/issues \n"
1564        )
1565    return schedule_df
1566
1567
1568def get_full_volleyball_schedule(
1569    season: int,
1570    level: str | int = "I",
1571    get_mens_data: bool = True
1572) -> pd.DataFrame:
1573    """
1574    Retrieves a full volleyball schedule,
1575    from an NCAA level (`"I"`, `"II"`, `"III"`).
1576    The way this is done is by going through every team in a division,
1577    and parsing the schedules of every team in a division.
1578
1579    This function will take time when first run (30-60 minutes)!
1580    You have been warned.
1581
1582    Parameters
1583    ----------
1584    `season` (int, mandatory):
1585        Specifies the season you want a schedule from.
1586
1587    `level` (int | str, mandatory):
1588        Specifies the team you want a schedule from.
1589
1590    `get_mens_data` (bool, optional):
1591        Optional argument.
1592        If you want men's volleyball data instead of women's volleyball data,
1593        set this to `True`.
1594
1595    Usage
1596    ----------
1597    ```python
1598
1599    from ncaa_stats_py.volleyball import get_full_volleyball_schedule
1600
1601    ##############################################################################
1602    # NOTE
1603    # This function will easily take an hour or more
1604    # to run for the first time in a given season and NCAA level!
1605    # You have been warned!
1606    ##############################################################################
1607
1608    # Get the entire 2024 schedule for the 2024 women's D1 volleyball season.
1609    print(
1610        "Get the entire 2024 schedule " +
1611        "for the 2024 women's D1 volleyball season."
1612    )
1613    df = get_full_volleyball_schedule(season=2024, level="I")
1614    print(df)
1615
1616    # Get the entire 2024 schedule for the 2024 men's D1 volleyball season.
1617    # print(
1618    #     "Get the entire 2024 schedule for " +
1619    #     "the 2024 men's D1 volleyball season."
1620    # )
1621    # df = get_full_volleyball_schedule(
1622    #     season=2024,
1623    #     level="I",
1624    #     get_mens_data=True
1625    # )
1626    # print(df)
1627
1628    # You can also input `level` as an integer.
1629    # In addition, this and other functions cache data,
1630    # so this should load very quickly
1631    # compared to the first run of this function.
1632    print("You can also input `level` as an integer.")
1633    print(
1634        "In addition, this and other functions cache data, "
1635        + "so this should load very quickly "
1636        + "compared to the first run of this function."
1637    )
1638    df = get_full_volleyball_schedule(season=2024, level=1)
1639    print(df)
1640
1641    ```
1642
1643    Returns
1644    ----------
1645    A pandas `DataFrame` object with an NCAA volleyball
1646    schedule for a specific season and level.
1647    """
1648
1649    sport_id = ""
1650    load_from_cache = True
1651    home_dir = expanduser("~")
1652    home_dir = _format_folder_str(home_dir)
1653    schedule_df = pd.DataFrame()
1654    schedule_df_arr = []
1655    temp_df = pd.DataFrame()
1656    formatted_level = ""
1657    ncaa_level = 0
1658
1659    if get_mens_data is True:
1660        sport_id = "MVB"
1661    else:
1662        sport_id = "WVB"
1663
1664    if isinstance(level, int) and level == 1:
1665        formatted_level = "I"
1666        ncaa_level = 1
1667    elif isinstance(level, int) and level == 2:
1668        formatted_level = "II"
1669        ncaa_level = 2
1670    elif isinstance(level, int) and level == 3:
1671        formatted_level = "III"
1672        ncaa_level = 3
1673    elif isinstance(level, str) and (
1674        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
1675    ):
1676        ncaa_level = 1
1677        formatted_level = level.upper()
1678    elif isinstance(level, str) and (
1679        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
1680    ):
1681        ncaa_level = 2
1682        formatted_level = level.upper()
1683    elif isinstance(level, str) and (
1684        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
1685    ):
1686        ncaa_level = 3
1687        formatted_level = level.upper()
1688
1689    del level
1690
1691    if exists(f"{home_dir}/.ncaa_stats_py/"):
1692        pass
1693    else:
1694        mkdir(f"{home_dir}/.ncaa_stats_py/")
1695
1696    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
1697        pass
1698    else:
1699        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
1700
1701    if exists(
1702        f"{home_dir}/.ncaa_stats_py/" +
1703        f"volleyball_{sport_id}/full_schedule/"
1704    ):
1705        pass
1706    else:
1707        mkdir(
1708            f"{home_dir}/.ncaa_stats_py/" +
1709            f"volleyball_{sport_id}/full_schedule/"
1710        )
1711
1712    if exists(
1713        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/full_schedule/"
1714        + f"{season}_{formatted_level}_full_schedule.csv"
1715    ):
1716        teams_df = pd.read_csv(
1717            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/full_schedule/"
1718            + f"{season}_{formatted_level}_full_schedule.csv"
1719        )
1720        file_mod_datetime = datetime.fromtimestamp(
1721            getmtime(
1722                f"{home_dir}/.ncaa_stats_py/" +
1723                f"volleyball_{sport_id}/full_schedule/"
1724                + f"{season}_{formatted_level}_full_schedule.csv"
1725            )
1726        )
1727    else:
1728        file_mod_datetime = datetime.today()
1729        load_from_cache = False
1730
1731    now = datetime.today()
1732
1733    age = now - file_mod_datetime
1734
1735    if (
1736        age.days > 1 and
1737        season >= now.year
1738    ):
1739        load_from_cache = False
1740
1741    if load_from_cache is True:
1742        return teams_df
1743
1744    teams_df = load_volleyball_teams()
1745    teams_df = teams_df[
1746        (teams_df["season"] == season) &
1747        (teams_df["ncaa_division"] == ncaa_level)
1748    ]
1749    team_ids_arr = teams_df["team_id"].to_numpy()
1750
1751    for team_id in tqdm(team_ids_arr):
1752        temp_df = get_volleyball_team_schedule(team_id=team_id)
1753        schedule_df_arr.append(temp_df)
1754
1755    schedule_df = pd.concat(schedule_df_arr, ignore_index=True)
1756    schedule_df = schedule_df.drop_duplicates(subset="game_id", keep="first")
1757    schedule_df.to_csv(
1758        f"{home_dir}/.ncaa_stats_py/"
1759        + f"volleyball_{sport_id}/full_schedule/"
1760        + f"{season}_{formatted_level}_full_schedule.csv",
1761        index=False,
1762    )
1763    return schedule_df
1764
1765
1766def get_volleyball_team_roster(team_id: int) -> pd.DataFrame:
1767    """
1768    Retrieves a volleyball team's roster from a given team ID.
1769
1770    Parameters
1771    ----------
1772    `team_id` (int, mandatory):
1773        Required argument.
1774        Specifies the team you want a roster from.
1775        This is separate from a school ID, which identifies the institution.
1776        A team ID should be unique to a school, and a season.
1777
1778    Usage
1779    ----------
1780    ```python
1781
1782    from ncaa_stats_py.volleyball import get_volleyball_team_roster
1783
1784    ########################################
1785    #          Women's volleyball          #
1786    ########################################
1787
1788    # Get the volleyball roster for the
1789    # 2024 Weber St. WVB team (D1, ID: 585347).
1790    print(
1791        "Get the volleyball roster for the " +
1792        "2024 Weber St. WVB team (D1, ID: 585347)."
1793    )
1794    df = get_volleyball_team_roster(585347)
1795    print(df)
1796
1797    # Get the volleyball roster for the
1798    # 2023 Montevallo WVB team (D2, ID: 559599).
1799    print(
1800        "Get the volleyball roster for the " +
1801        "2023 Montevallo WVB team (D2, ID: 559599)."
1802    )
1803    df = get_volleyball_team_roster(559599)
1804    print(df)
1805
1806    # Get the volleyball roster for the
1807    # 2022 Millsaps team (D3, ID: 539944).
1808    print(
1809        "Get the volleyball roster for the " +
1810        "2022 Millsaps team (D3, ID: 539944)."
1811    )
1812    df = get_volleyball_team_roster(539944)
1813    print(df)
1814
1815    # Get the volleyball roster for the
1816    # 2021 Binghamton WVB team (D1, ID: 522893).
1817    print(
1818        "Get the volleyball roster for the " +
1819        "2021 Binghamton WVB team (D1, ID: 522893)."
1820    )
1821    df = get_volleyball_team_roster(522893)
1822    print(df)
1823
1824    # Get the volleyball roster for the
1825    # 2020 Holy Family WVB team (D2, ID: 504760).
1826    print(
1827        "Get the volleyball roster for the " +
1828        "2020 Holy Family WVB team (D2, ID: 504760)."
1829    )
1830    df = get_volleyball_team_roster(504760)
1831    print(df)
1832
1833    # Get the volleyball roster for the
1834    # 2019 Franciscan team (D3, ID: 482939).
1835    print(
1836        "Get the volleyball roster for the " +
1837        "2019 Franciscan team (D3, ID: 482939)."
1838    )
1839    df = get_volleyball_team_roster(482939)
1840    print(df)
1841
1842    ########################################
1843    #          Men's volleyball            #
1844    ########################################
1845
1846    # Get the volleyball roster for the
1847    # 2024 Hawaii MVB team (D1, ID: 573674).
1848    print(
1849        "Get the volleyball roster for the " +
1850        "2024 Hawaii MVB team (D1, ID: 573674)."
1851    )
1852    df = get_volleyball_team_roster(573674)
1853    print(df)
1854
1855    # Get the volleyball roster for the
1856    # 2023 Widener MVB team (D3, ID: 550860).
1857    print(
1858        "Get the volleyball roster for the " +
1859        "2023 Widener MVB team (D3, ID: 550860)."
1860    )
1861    df = get_volleyball_team_roster(550860)
1862    print(df)
1863
1864    # Get the volleyball roster for the
1865    # 2022 Alderson Broaddus MVB team (D1, ID: 529880).
1866    print(
1867        "Get the volleyball roster for the " +
1868        "2022 Alderson Broaddus MVB team (D1, ID: 529880)."
1869    )
1870    df = get_volleyball_team_roster(529880)
1871    print(df)
1872
1873    # Get the volleyball roster for the
1874    # 2021 Geneva MVB team (D3, ID: 508506).
1875    print(
1876        "Get the volleyball roster for the " +
1877        "2021 Geneva MVB team (D3, ID: 508506)."
1878    )
1879    df = get_volleyball_team_roster(508506)
1880    print(df)
1881
1882    # Get the volleyball roster for the
1883    # 2020 Urbana MVB team (D1, ID: 484975).
1884    print(
1885        "Get the volleyball roster for the " +
1886        "2020 Urbana MVB team (D1, ID: 484975)."
1887    )
1888    df = get_volleyball_team_roster(484975)
1889    print(df)
1890
1891    # Get the volleyball roster for the
1892    # 2019 Eastern Nazarene MVB team (D3, ID: 453876).
1893    print(
1894        "Get the volleyball roster for the " +
1895        "2019 Eastern Nazarene MVB team (D3, ID: 453876)."
1896    )
1897    df = get_volleyball_team_roster(453876)
1898    print(df)
1899
1900    ```
1901
1902    Returns
1903    ----------
1904    A pandas `DataFrame` object with
1905    an NCAA volleyball team's roster for that season.
1906    """
1907    sport_id = ""
1908    roster_df = pd.DataFrame()
1909    roster_df_arr = []
1910    temp_df = pd.DataFrame()
1911    url = f"https://stats.ncaa.org/teams/{team_id}/roster"
1912    load_from_cache = True
1913    home_dir = expanduser("~")
1914    home_dir = _format_folder_str(home_dir)
1915
1916    stat_columns = [
1917        "season",
1918        "season_name",
1919        "sport_id",
1920        "ncaa_division",
1921        "ncaa_division_formatted",
1922        "team_conference_name",
1923        "school_id",
1924        "school_name",
1925        "player_id",
1926        "player_jersey_num",
1927        "player_full_name",
1928        "player_first_name",
1929        "player_last_name",
1930        "player_class",
1931        "player_positions",
1932        "player_height_string",
1933        "player_weight",
1934        "player_hometown",
1935        "player_high_school",
1936        "player_G",
1937        "player_GS",
1938        "player_url",
1939    ]
1940
1941    try:
1942        team_df = load_volleyball_teams()
1943        team_df = team_df[team_df["team_id"] == team_id]
1944
1945        season = team_df["season"].iloc[0]
1946        ncaa_division = team_df["ncaa_division"].iloc[0]
1947        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
1948        team_conference_name = team_df["team_conference_name"].iloc[0]
1949        school_name = team_df["school_name"].iloc[0]
1950        school_id = int(team_df["school_id"].iloc[0])
1951        sport_id = "WVB"
1952    except Exception:
1953        team_df = load_volleyball_teams(get_mens_data=True)
1954        team_df = team_df[team_df["team_id"] == team_id]
1955
1956        season = team_df["season"].iloc[0]
1957        ncaa_division = team_df["ncaa_division"].iloc[0]
1958        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
1959        team_conference_name = team_df["team_conference_name"].iloc[0]
1960        school_name = team_df["school_name"].iloc[0]
1961        school_id = int(team_df["school_id"].iloc[0])
1962        school_id = int(team_df["school_id"].iloc[0])
1963        sport_id = "MVB"
1964
1965    del team_df
1966
1967    if exists(f"{home_dir}/.ncaa_stats_py/"):
1968        pass
1969    else:
1970        mkdir(f"{home_dir}/.ncaa_stats_py/")
1971
1972    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
1973        pass
1974    else:
1975        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
1976
1977    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/"):
1978        pass
1979    else:
1980        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/")
1981
1982    if exists(
1983        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" +
1984        f"{team_id}_roster.csv"
1985    ):
1986        teams_df = pd.read_csv(
1987            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" +
1988            f"{team_id}_roster.csv"
1989        )
1990        file_mod_datetime = datetime.fromtimestamp(
1991            getmtime(
1992                f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" +
1993                f"{team_id}_roster.csv"
1994            )
1995        )
1996    else:
1997        file_mod_datetime = datetime.today()
1998        load_from_cache = False
1999
2000    now = datetime.today()
2001
2002    age = now - file_mod_datetime
2003
2004    if (
2005        age.days >= 14 and
2006        season >= now.year
2007    ):
2008        load_from_cache = False
2009
2010    if load_from_cache is True:
2011        return teams_df
2012
2013    response = _get_webpage(url=url)
2014    soup = BeautifulSoup(response.text, features="lxml")
2015    try:
2016        school_name = soup.find(
2017            "div",
2018            {"class": "card"}
2019        ).find("img").get("alt")
2020    except Exception:
2021        school_name = soup.find("div", {"class": "card"}).find("a").text
2022        school_name = school_name.rsplit(" ", maxsplit=1)[0]
2023
2024    season_name = (
2025        soup.find("select", {"id": "year_list"})
2026        .find("option", {"selected": "selected"})
2027        .text
2028    )
2029
2030    try:
2031        table = soup.find(
2032            "table",
2033            {"class": "dataTable small_font"},
2034        )
2035
2036        table_headers = table.find("thead").find_all("th")
2037    except Exception:
2038        table = soup.find(
2039            "table",
2040            {"class": "dataTable small_font no_padding"},
2041        )
2042
2043        table_headers = table.find("thead").find_all("th")
2044    table_headers = [x.text for x in table_headers]
2045
2046    t_rows = table.find("tbody").find_all("tr")
2047
2048    for t in t_rows:
2049        t_cells = t.find_all("td")
2050        t_cells = [x.text for x in t_cells]
2051
2052        temp_df = pd.DataFrame(
2053            data=[t_cells],
2054            columns=table_headers,
2055            # index=[0]
2056        )
2057
2058        player_id = t.find("a").get("href")
2059        # temp_df["school_name"] = school_name
2060        temp_df["player_url"] = f"https://stats.ncaa.org{player_id}"
2061
2062        player_id = player_id.replace("/players", "").replace("/", "")
2063        player_id = int(player_id)
2064
2065        temp_df["player_id"] = player_id
2066
2067        roster_df_arr.append(temp_df)
2068        del temp_df
2069
2070    roster_df = pd.concat(roster_df_arr, ignore_index=True)
2071    roster_df = roster_df.infer_objects()
2072    roster_df["season"] = season
2073    roster_df["season_name"] = season_name
2074    roster_df["ncaa_division"] = ncaa_division
2075    roster_df["ncaa_division_formatted"] = ncaa_division_formatted
2076    roster_df["team_conference_name"] = team_conference_name
2077    roster_df["school_id"] = school_id
2078    roster_df["school_name"] = school_name
2079    roster_df["sport_id"] = sport_id
2080
2081    roster_df.rename(
2082        columns={
2083            "GP": "player_G",
2084            "GS": "player_GS",
2085            "#": "player_jersey_num",
2086            "Name": "player_full_name",
2087            "Class": "player_class",
2088            "Position": "player_positions",
2089            "Height": "player_height_string",
2090            "Bats": "player_batting_hand",
2091            "Throws": "player_throwing_hand",
2092            "Hometown": "player_hometown",
2093            "High School": "player_high_school",
2094        },
2095        inplace=True
2096    )
2097
2098    # print(roster_df.columns)
2099
2100    roster_df[["player_first_name", "player_last_name"]] = roster_df[
2101        "player_full_name"
2102    ].str.split(" ", n=1, expand=True)
2103    roster_df = roster_df.infer_objects()
2104
2105    for i in roster_df.columns:
2106        if i in stat_columns:
2107            pass
2108        else:
2109            raise ValueError(
2110                f"Unhandled column name {i}"
2111            )
2112
2113    roster_df = roster_df.infer_objects().reindex(columns=stat_columns)
2114
2115    roster_df.to_csv(
2116        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" +
2117        f"{team_id}_roster.csv",
2118        index=False,
2119    )
2120    return roster_df
2121
2122
2123def get_volleyball_player_season_stats(
2124    team_id: int,
2125) -> pd.DataFrame:
2126    """
2127    Given a team ID, this function retrieves and parses
2128    the season stats for all of the players in a given volleyball team.
2129
2130    Parameters
2131    ----------
2132    `team_id` (int, mandatory):
2133        Required argument.
2134        Specifies the team you want volleyball stats from.
2135        This is separate from a school ID, which identifies the institution.
2136        A team ID should be unique to a school, and a season.
2137
2138    Usage
2139    ----------
2140    ```python
2141
2142    from ncaa_stats_py.volleyball import get_volleyball_player_season_stats
2143
2144
2145    ########################################
2146    #          Women's volleyball          #
2147    ########################################
2148
2149    # Get the season stats for the
2150    # 2024 Ohio St. team (D1, ID: 585398).
2151    print(
2152        "Get the season stats for the " +
2153        "2024 Ohio St. WVB team (D1, ID: 585398)."
2154    )
2155    df = get_volleyball_player_season_stats(585398)
2156    print(df)
2157
2158    # Get the season stats for the
2159    # 2023 Emory & Henry WVB team (D2, ID: 559738).
2160    print(
2161        "Get the season stats for the " +
2162        "2023 Emory & Henry WVB team (D2, ID: 559738)."
2163    )
2164    df = get_volleyball_player_season_stats(559738)
2165    print(df)
2166
2167    # Get the season stats for the
2168    # 2022 Fredonia WVB team (D3, ID: 539881).
2169    print(
2170        "Get the season stats for the " +
2171        "2022 Fredonia WVB team (D3, ID: 539881)."
2172    )
2173    df = get_volleyball_player_season_stats(539881)
2174    print(df)
2175
2176    # Get the season stats for the
2177    # 2021 Oklahoma WVB team (D1, ID: 523163).
2178    print(
2179        "Get the season stats for the " +
2180        "2021 Oklahoma WVB team (D1, ID: 523163)."
2181    )
2182    df = get_volleyball_player_season_stats(523163)
2183    print(df)
2184
2185    # Get the season stats for the
2186    # 2020 North Greenville WVB team (D2, ID: 504820).
2187    print(
2188        "Get the season stats for the " +
2189        "2020 North Greenville WVB team (D2, ID: 504820)."
2190    )
2191    df = get_volleyball_player_season_stats(504820)
2192    print(df)
2193
2194    # Get the season stats for the
2195    # 2019 SUNY Potsdam team (D3, ID: 482714).
2196    print(
2197        "Get the season stats for the " +
2198        "2019 SUNY Potsdam team (D3, ID: 482714)."
2199    )
2200    df = get_volleyball_player_season_stats(482714)
2201    print(df)
2202
2203    ########################################
2204    #          Men's volleyball            #
2205    ########################################
2206
2207    # Get the season stats for the
2208    # 2024 Lees-McRae MVB team (D1, ID: 573699).
2209    print(
2210        "Get the season stats for the " +
2211        "2024 Lees-McRae MVB team (D1, ID: 573699)."
2212    )
2213    df = get_volleyball_player_season_stats(573699)
2214    print(df)
2215
2216    # Get the season stats for the
2217    # 2023 Elizabethtown MVB team (D3, ID: 550871).
2218    print(
2219        "Get the season stats for the " +
2220        "2023 Elizabethtown MVB team (D3, ID: 550871)."
2221    )
2222    df = get_volleyball_player_season_stats(550871)
2223    print(df)
2224
2225    # Get the season stats for the
2226    # 2022 Limestone MVB team (D1, ID: 529884).
2227    print(
2228        "Get the season stats for the " +
2229        "2022 Limestone MVB team (D1, ID: 529884)."
2230    )
2231    df = get_volleyball_player_season_stats(529884)
2232    print(df)
2233
2234    # Get the season stats for the
2235    # 2021 Maranatha Baptist MVB team (D3, ID: 508471).
2236    print(
2237        "Get the season stats for the " +
2238        "2021 Maranatha Baptist MVB team (D3, ID: 508471)."
2239    )
2240    df = get_volleyball_player_season_stats(508471)
2241    print(df)
2242
2243    # Get the season stats for the
2244    # 2020 CUI MVB team (D1, ID: 484972).
2245    print(
2246        "Get the season stats for the " +
2247        "2020 CUI MVB team (D1, ID: 484972)."
2248    )
2249    df = get_volleyball_player_season_stats(484972)
2250    print(df)
2251
2252    # Get the season stats for the
2253    # 2019 SUNY New Paltz MVB team (D3, ID: 453851).
2254    print(
2255        "Get the season stats for the " +
2256        "2019 SUNY New Paltz MVB team (D3, ID: 453851)."
2257    )
2258    df = get_volleyball_player_season_stats(453851)
2259    print(df)
2260
2261    ```
2262
2263    Returns
2264    ----------
2265    A pandas `DataFrame` object with the season batting stats for
2266    all players with a given NCAA volleyball team.
2267    """
2268
2269    sport_id = ""
2270    load_from_cache = True
2271    stats_df = pd.DataFrame()
2272    stats_df_arr = []
2273    temp_df = pd.DataFrame()
2274
2275    stat_columns = [
2276        "season",
2277        "season_name",
2278        "sport_id",
2279        "team_id",
2280        "team_conference_name",
2281        "school_id",
2282        "school_name",
2283        "ncaa_division",
2284        "ncaa_division_formatted",
2285        "player_id",
2286        "player_jersey_number",
2287        "player_last_name",
2288        "player_first_name",
2289        "player_full_name",
2290        "player_class",
2291        "player_position",
2292        "player_height",
2293        "GP",
2294        "GS",
2295        "sets_played",
2296        "MS",
2297        "kills",
2298        "errors",
2299        "total_attacks",
2300        "hit%",
2301        "assists",
2302        "aces",
2303        "serve_errors",
2304        "digs",
2305        "return_attacks",
2306        "return_errors",
2307        "solo_blocks",
2308        "assisted_blocks",
2309        "block_errors",
2310        "total_blocks",
2311        "points",
2312        "BHE",
2313        "serve_attempts",
2314        "DBL_DBL",
2315        "TRP_DBL",
2316    ]
2317
2318    try:
2319        team_df = load_volleyball_teams()
2320
2321        team_df = team_df[team_df["team_id"] == team_id]
2322
2323        season = team_df["season"].iloc[0]
2324        ncaa_division = int(team_df["ncaa_division"].iloc[0])
2325        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2326        team_conference_name = team_df["team_conference_name"].iloc[0]
2327        school_name = team_df["school_name"].iloc[0]
2328        school_id = int(team_df["school_id"].iloc[0])
2329        sport_id = "WVB"
2330    except Exception:
2331        team_df = load_volleyball_teams(get_mens_data=True)
2332
2333        team_df = team_df[team_df["team_id"] == team_id]
2334
2335        season = team_df["season"].iloc[0]
2336        ncaa_division = int(team_df["ncaa_division"].iloc[0])
2337        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2338        team_conference_name = team_df["team_conference_name"].iloc[0]
2339        school_name = team_df["school_name"].iloc[0]
2340        school_id = int(team_df["school_id"].iloc[0])
2341        sport_id = "MVB"
2342
2343    del team_df
2344
2345    home_dir = expanduser("~")
2346    home_dir = _format_folder_str(home_dir)
2347
2348    url = f"https://stats.ncaa.org/teams/{team_id}/season_to_date_stats"
2349
2350    if exists(f"{home_dir}/.ncaa_stats_py/"):
2351        pass
2352    else:
2353        mkdir(f"{home_dir}/.ncaa_stats_py/")
2354
2355    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
2356        pass
2357    else:
2358        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
2359
2360    if exists(
2361        f"{home_dir}/.ncaa_stats_py/" +
2362        f"volleyball_{sport_id}/player_season_stats/"
2363    ):
2364        pass
2365    else:
2366        mkdir(
2367            f"{home_dir}/.ncaa_stats_py/" +
2368            f"volleyball_{sport_id}/player_season_stats/"
2369        )
2370
2371    if exists(
2372        f"{home_dir}/.ncaa_stats_py/" +
2373        f"volleyball_{sport_id}/player_season_stats/"
2374        + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2375    ):
2376        games_df = pd.read_csv(
2377            f"{home_dir}/.ncaa_stats_py/" +
2378            f"volleyball_{sport_id}/player_season_stats/"
2379            + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2380        )
2381        file_mod_datetime = datetime.fromtimestamp(
2382            getmtime(
2383                f"{home_dir}/.ncaa_stats_py/" +
2384                f"volleyball_{sport_id}/player_season_stats/"
2385                + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2386            )
2387        )
2388    else:
2389        file_mod_datetime = datetime.today()
2390        load_from_cache = False
2391
2392    now = datetime.today()
2393
2394    age = now - file_mod_datetime
2395
2396    if (
2397        age.days > 1 and
2398        season >= now.year
2399    ):
2400        load_from_cache = False
2401
2402    if load_from_cache is True:
2403        return games_df
2404
2405    response = _get_webpage(url=url)
2406
2407    soup = BeautifulSoup(response.text, features="lxml")
2408
2409    season_name = (
2410        soup.find("select", {"id": "year_list"})
2411        .find("option", {"selected": "selected"})
2412        .text
2413    )
2414
2415    if sport_id == "MVB":
2416        season = f"{season_name[0:2]}{season_name[-2:]}"
2417        season = int(season)
2418    elif sport_id == "WVB":
2419        season = f"{season_name[0:4]}"
2420        season = int(season)
2421
2422    table_data = soup.find(
2423        "table",
2424        {"id": "stat_grid", "class": "small_font dataTable table-bordered"},
2425    )
2426
2427    temp_table_headers = table_data.find("thead").find("tr").find_all("th")
2428    table_headers = [x.text for x in temp_table_headers]
2429
2430    del temp_table_headers
2431
2432    t_rows = table_data.find("tbody").find_all("tr", {"class": "text"})
2433    for t in t_rows:
2434        p_last = ""
2435        p_first = ""
2436        t_cells = t.find_all("td")
2437        if "team" in t_cells[1].text.lower():
2438            continue
2439        p_sortable = t_cells[1].get("data-order")
2440        if len(p_sortable) == 2:
2441            p_last, p_first = p_sortable.split(",")
2442        elif len(p_sortable) == 3:
2443            p_last, temp_name, p_first = p_sortable.split(",")
2444            p_last = f"{p_last} {temp_name}"
2445
2446        t_cells = [x.text.strip() for x in t_cells]
2447        t_cells = [x.replace(",", "") for x in t_cells]
2448
2449        temp_df = pd.DataFrame(
2450            data=[t_cells],
2451            columns=table_headers,
2452            # index=[0]
2453        )
2454
2455        player_id = t.find("a").get("href")
2456
2457        # temp_df["player_url"] = f"https://stats.ncaa.org{player_id}"
2458        player_id = player_id.replace("/players", "").replace("/", "")
2459
2460        player_id = int(player_id)
2461
2462        temp_df["player_id"] = player_id
2463        temp_df["player_last_name"] = p_last.strip()
2464        temp_df["player_first_name"] = p_first.strip()
2465
2466        stats_df_arr.append(temp_df)
2467        del temp_df
2468
2469    stats_df = pd.concat(stats_df_arr, ignore_index=True)
2470    stats_df = stats_df.replace("", None)
2471
2472    # stats_df["stat_id"] = stat_id
2473    stats_df["season"] = season
2474    stats_df["season_name"] = season_name
2475    stats_df["school_id"] = school_id
2476    stats_df["school_name"] = school_name
2477    stats_df["ncaa_division"] = ncaa_division
2478    stats_df["ncaa_division_formatted"] = ncaa_division_formatted
2479    stats_df["team_conference_name"] = team_conference_name
2480    stats_df["sport_id"] = sport_id
2481    stats_df["team_id"] = team_id
2482
2483    stats_df = stats_df.infer_objects()
2484
2485    stats_df.rename(
2486        columns={
2487            "#": "player_jersey_number",
2488            "Player": "player_full_name",
2489            "Yr": "player_class",
2490            "Pos": "player_position",
2491            "Ht": "player_height",
2492            "S": "sets_played",
2493            "Kills": "kills",
2494            "Errors": "errors",
2495            "Total Attacks": "total_attacks",
2496            "Hit Pct": "hit%",
2497            "Assists": "assists",
2498            "Aces": "aces",
2499            "SErr": "serve_errors",
2500            "Digs": "digs",
2501            "RetAtt": "return_attacks",
2502            "RErr": "return_errors",
2503            "Block Solos": "solo_blocks",
2504            "Block Assists": "assisted_blocks",
2505            "BErr": "block_errors",
2506            "PTS": "points",
2507            "Trpl Dbl": "TRP_DBL",
2508            "Dbl Dbl": "DBL_DBL",
2509            "TB": "total_blocks",
2510            "SrvAtt": "serve_attempts",
2511        },
2512        inplace=True,
2513    )
2514
2515    for i in stats_df.columns:
2516        if i in stat_columns:
2517            pass
2518        elif "Attend" in stat_columns:
2519            pass
2520        else:
2521            raise ValueError(
2522                f"Unhandled column name {i}"
2523            )
2524    stats_df = stats_df.reindex(columns=stat_columns)
2525
2526    stats_df = stats_df.infer_objects().fillna(0)
2527    stats_df = stats_df.astype(
2528        {
2529            "GP": "uint16",
2530            "GS": "uint16",
2531            "sets_played": "uint16",
2532            "kills": "uint16",
2533            "errors": "uint16",
2534            "total_attacks": "uint16",
2535            "hit%": "float32",
2536            "assists": "uint16",
2537            "aces": "uint16",
2538            "serve_errors": "uint16",
2539            "digs": "uint16",
2540            "return_attacks": "uint16",
2541            "return_errors": "uint16",
2542            "solo_blocks": "uint16",
2543            "assisted_blocks": "uint16",
2544            "block_errors": "uint16",
2545            "points": "float32",
2546            "BHE": "uint16",
2547            "TRP_DBL": "uint16",
2548            "serve_attempts": "uint16",
2549            "total_blocks": "float32",
2550            "DBL_DBL": "uint16",
2551            "school_id": "uint32",
2552        }
2553    )
2554
2555    stats_df["hit%"] = stats_df["hit%"].round(3)
2556    stats_df["points"] = stats_df["points"].round(1)
2557
2558    stats_df.to_csv(
2559        f"{home_dir}/.ncaa_stats_py/" +
2560        f"volleyball_{sport_id}/player_season_stats/" +
2561        f"{season:00d}_{school_id:00d}_player_season_stats.csv",
2562        index=False,
2563    )
2564
2565    return stats_df
2566
2567
2568def get_volleyball_player_game_stats(
2569    player_id: int
2570) -> pd.DataFrame:
2571    """
2572    Given a valid player ID and season,
2573    this function retrieves the game stats for this player at a game level.
2574
2575    Parameters
2576    ----------
2577    `player_id` (int, mandatory):
2578        Required argument.
2579        Specifies the player you want game stats from.
2580
2581    `season` (int, mandatory):
2582        Required argument.
2583        Specifies the season you want game stats from.
2584
2585    Usage
2586    ----------
2587    ```python
2588
2589    from ncaa_stats_py.volleyball import (
2590        get_volleyball_player_game_stats
2591    )
2592
2593    ########################################
2594    #          Women's volleyball          #
2595    ########################################
2596
2597    # Get the game stats of Zuzanna Wieczorek in 2024 (Idaho).
2598    print(
2599        "Get the game stats of Zuzanna Wieczorek in 2024 (Idaho)."
2600    )
2601    df = get_volleyball_player_game_stats(player_id=8432514)
2602    print(df)
2603
2604    # Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2).
2605    print(
2606        "Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2)."
2607    )
2608    df = get_volleyball_player_game_stats(player_id=8145555)
2609    print(df)
2610
2611    # Get the game stats of Lauren Gips in 2022 (Babson, D3).
2612    print(
2613        "Get the game stats of Lauren Gips in 2022 (Babson, D3)."
2614    )
2615    df = get_volleyball_player_game_stats(player_id=7876821)
2616    print(df)
2617
2618    # Get the game stats of Rhett Robinson in 2021 (North Texas).
2619    print(
2620        "Get the game stats of Rhett Robinson in 2021 (North Texas)."
2621    )
2622    df = get_volleyball_player_game_stats(player_id=7234089)
2623    print(df)
2624
2625    # Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2).
2626    print(
2627        "Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2)."
2628    )
2629    df = get_volleyball_player_game_stats(player_id=6822147)
2630    print(df)
2631
2632    # Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3).
2633    print(
2634        "Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3)."
2635    )
2636    df = get_volleyball_player_game_stats(player_id=6449807)
2637    print(df)
2638
2639    ########################################
2640    #          Men's volleyball            #
2641    ########################################
2642
2643    # Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial).
2644    print(
2645        "Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial)."
2646    )
2647    df = get_volleyball_player_game_stats(player_id=8253076)
2648    print(df)
2649
2650    # Get the game stats of Ray Rodriguez in 2023 (Lehman, D3).
2651    print(
2652        "Get the game stats of Ray Rodriguez in 2023 (Lehman, D3)."
2653    )
2654    df = get_volleyball_player_game_stats(player_id=7883459)
2655    print(df)
2656
2657    # Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus).
2658    print(
2659        "Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus)."
2660    )
2661    df = get_volleyball_player_game_stats(player_id=7413984)
2662    print(df)
2663
2664    # Get the game stats of Tyler Anderson in 2021 (Alvernia, D3).
2665    print(
2666        "Get the game stats of Tyler Anderson in 2021 (Alvernia, D3)."
2667    )
2668    df = get_volleyball_player_game_stats(player_id=7118023)
2669    print(df)
2670
2671    # Get the game stats of Jaylen Jasper in 2020 (Stanford).
2672    print(
2673        "Get the game stats of Jaylen Jasper in 2020 (Stanford)."
2674    )
2675    df = get_volleyball_player_game_stats(player_id=6357146)
2676    print(df)
2677
2678    # Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3).
2679    print(
2680        "Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3)."
2681    )
2682    df = get_volleyball_player_game_stats(player_id=5816111)
2683    print(df)
2684
2685    ```
2686
2687    Returns
2688    ----------
2689    A pandas `DataFrame` object with a player's batting game logs
2690    in a given season.
2691    """
2692    sport_id = ""
2693
2694    stat_columns = [
2695        "season",
2696        "sport_id",
2697        "game_id",
2698        "game_num",
2699        "player_id",
2700        "date",
2701        "opponent",
2702        "Result",
2703        "team_sets_won",
2704        "opponent_sets_won",
2705        "GP",
2706        # "GS",
2707        "sets_played",
2708        "MS",
2709        "kills",
2710        "errors",
2711        "total_attacks",
2712        "hit%",
2713        "assists",
2714        "aces",
2715        "serve_errors",
2716        "digs",
2717        "return_attacks",
2718        "return_errors",
2719        "solo_blocks",
2720        "assisted_blocks",
2721        "block_errors",
2722        "total_blocks",
2723        "points",
2724        "BHE",
2725        "serve_attempts",
2726        "DBL_DBL",
2727        "TRP_DBL",
2728    ]
2729
2730    load_from_cache = True
2731    stats_df = pd.DataFrame()
2732    stats_df_arr = []
2733    temp_df = pd.DataFrame()
2734    sport_id = ""
2735    home_dir = expanduser("~")
2736    home_dir = _format_folder_str(home_dir)
2737
2738    # stat_id = _get_stat_id(
2739    #     sport="volleyball",
2740    #     season=season,
2741    #     stat_type="batting"
2742    # )
2743    url = f"https://stats.ncaa.org/players/{player_id}"
2744
2745    if exists(f"{home_dir}/.ncaa_stats_py/"):
2746        pass
2747    else:
2748        mkdir(f"{home_dir}/.ncaa_stats_py/")
2749
2750    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"):
2751        pass
2752    else:
2753        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/")
2754
2755    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/"):
2756        pass
2757    else:
2758        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/")
2759
2760    if exists(
2761        f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/"
2762        + f"{player_id}_player_game_stats.csv"
2763    ):
2764        games_df = pd.read_csv(
2765            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/"
2766            + f"{player_id}_player_game_stats.csv"
2767        )
2768        file_mod_datetime = datetime.fromtimestamp(
2769            getmtime(
2770                f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"
2771                + "player_game_stats/"
2772                + f"{player_id}_player_game_stats.csv"
2773            )
2774        )
2775        games_df = games_df.infer_objects()
2776        load_from_cache = True
2777    else:
2778        file_mod_datetime = datetime.today()
2779        load_from_cache = False
2780
2781    if exists(f"{home_dir}/.ncaa_stats_py/"):
2782        pass
2783    else:
2784        mkdir(f"{home_dir}/.ncaa_stats_py/")
2785
2786    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"):
2787        pass
2788    else:
2789        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/")
2790
2791    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/"):
2792        pass
2793    else:
2794        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/")
2795
2796    if exists(
2797        f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/"
2798        + f"{player_id}_player_game_stats.csv"
2799    ):
2800        games_df = pd.read_csv(
2801            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/"
2802            + f"{player_id}_player_game_stats.csv"
2803        )
2804        file_mod_datetime = datetime.fromtimestamp(
2805            getmtime(
2806                f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"
2807                + "player_game_stats/"
2808                + f"{player_id}_player_game_stats.csv"
2809            )
2810        )
2811        games_df = games_df.infer_objects()
2812        load_from_cache = True
2813    else:
2814        logging.info("Could not find a WVB player game stats file")
2815
2816    now = datetime.today()
2817
2818    age = now - file_mod_datetime
2819
2820    if (
2821        age.days >= 1
2822    ):
2823        load_from_cache = False
2824
2825    if load_from_cache is True:
2826        return games_df
2827
2828    # team_df = load_volleyball_teams()
2829
2830    # team_df = team_df[team_df["team_id"] == team_id]
2831
2832    # season = team_df["season"].iloc[0]
2833    # ncaa_division = team_df["ncaa_division"].iloc[0]
2834    # ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2835    # team_conference_name = team_df["team_conference_name"].iloc[0]
2836    # school_name = team_df["school_name"].iloc[0]
2837    # school_id = int(team_df["school_id"].iloc[0])
2838
2839    # del team_df
2840    response = _get_webpage(url=url)
2841    soup = BeautifulSoup(response.text, features="lxml")
2842
2843    table_navigation = soup.find("ul", {"class": "nav nav-tabs padding-nav"})
2844    table_nav_card = table_navigation.find_all("a")
2845
2846    for u in table_nav_card:
2847        url_str = u.get("href")
2848        if "MVB" in url_str.upper():
2849            sport_id = "MVB"
2850        elif "WVB" in url_str.upper():
2851            sport_id = "WVB"
2852
2853    if sport_id is None or len(sport_id) == 0:
2854        # This should **never** be the case IRL,
2855        # but in case something weird happened and
2856        # we can't make a determination of if this is a
2857        # MVB player or a WVB player, and we somehow haven't
2858        # crashed by this point, set the sport ID to
2859        # "MVB" by default so we don't have other weirdness.
2860        logging.error(
2861            f"Could not determine if player ID {player_id} " +
2862            "is a MVB or a WVB player. " +
2863            "Because this cannot be determined, " +
2864            "we will make the automatic assumption that this is a MVB player."
2865        )
2866        sport_id = "MVB"
2867
2868    table_data = soup.find_all(
2869        "table", {"class": "small_font dataTable table-bordered"}
2870    )[1]
2871
2872    temp_table_headers = table_data.find("thead").find("tr").find_all("th")
2873    table_headers = [x.text for x in temp_table_headers]
2874
2875    del temp_table_headers
2876
2877    temp_t_rows = table_data.find("tbody")
2878    temp_t_rows = temp_t_rows.find_all("tr")
2879    season_name = (
2880        soup.find("select", {"id": "year_list"})
2881        .find("option", {"selected": "selected"})
2882        .text
2883    )
2884
2885    if sport_id == "MVB":
2886        season = f"{season_name[0:2]}{season_name[-2:]}"
2887        season = int(season)
2888    elif sport_id == "WVB":
2889        season = f"{season_name[0:4]}"
2890        season = int(season)
2891
2892    for t in temp_t_rows:
2893        game_num = 1
2894        ot_periods = 0
2895        # innings = 9
2896        row_id = t.get("id")
2897        opp_team_name = ""
2898
2899        if "contest" not in row_id:
2900            continue
2901        del row_id
2902
2903        t_cells = t.find_all("td")
2904        t_cells = [x.text.strip() for x in t_cells]
2905
2906        g_date = t_cells[0]
2907
2908        if "(" in g_date:
2909            g_date, game_num = g_date.split("(")
2910            g_date = g_date.strip()
2911
2912            game_num = game_num.replace(")", "")
2913            game_num = int(game_num)
2914
2915        try:
2916            opp_team_id = t.find_all("td")[1].find("a").get("href")
2917        except AttributeError as e:
2918            logging.info(
2919                "Could not extract a team ID for this game. " +
2920                f"Full exception {e}"
2921            )
2922        except Exception as e:
2923            logging.warning(
2924                "An unhandled exception has occurred when "
2925                + "trying to get the opposition team ID for this game. "
2926                f"Full exception `{e}`."
2927            )
2928            raise e
2929
2930        try:
2931            opp_team_id = opp_team_id.replace("/teams/", "")
2932            opp_team_id = opp_team_id.replace(
2933                "javascript:toggleDefensiveStats(", ""
2934            )
2935            opp_team_id = opp_team_id.replace(");", "")
2936            opp_team_id = int(opp_team_id)
2937
2938            temp_df["opponent_team_id"] = opp_team_id
2939        except Exception:
2940            logging.info(
2941                "Couldn't find the opposition team naIDme "
2942                + "for this row. "
2943            )
2944            opp_team_id = None
2945        # print(i.find("td").text)
2946        try:
2947            opp_team_name = t.find_all("td")[1].find_all("img")[1].get("alt")
2948        except AttributeError:
2949            logging.info(
2950                "Couldn't find the opposition team name "
2951                + "for this row from an image element. "
2952                + "Attempting a backup method"
2953            )
2954            opp_team_name = t_cells[1]
2955        except IndexError:
2956            logging.info(
2957                "Couldn't find the opposition team name "
2958                + "for this row from an image element. "
2959                + "Attempting a backup method"
2960            )
2961            opp_team_name = t_cells[1]
2962        except Exception as e:
2963            logging.warning(
2964                "Unhandled exception when trying to get the "
2965                + "opposition team name from this game. "
2966                + f"Full exception `{e}`"
2967            )
2968            raise e
2969
2970        if opp_team_name == "Defensive Stats":
2971            opp_team_name = t_cells[1]
2972
2973        if "@" in opp_team_name:
2974            opp_team_name = opp_team_name.split("@")[0]
2975
2976        result_str = t_cells[2]
2977
2978        result_str = (
2979            result_str.lower().replace("w", "").replace("l", "").replace(
2980                "t", ""
2981            )
2982        )
2983
2984        if (
2985            result_str.lower() == "ppd" or
2986            result_str.lower() == "" or
2987            result_str.lower() == "canceed"
2988        ):
2989            continue
2990
2991        result_str = result_str.replace("\n", "")
2992        result_str = result_str.replace("*", "")
2993
2994        tm_score, opp_score = result_str.split("-")
2995        t_cells = [x.replace("*", "") for x in t_cells]
2996        t_cells = [x.replace("/", "") for x in t_cells]
2997        t_cells = [x.replace("\\", "") for x in t_cells]
2998
2999        temp_df = pd.DataFrame(
3000            data=[t_cells],
3001            columns=table_headers,
3002            # index=[0]
3003        )
3004
3005        tm_score = int(tm_score)
3006        if "(" in opp_score:
3007            opp_score = opp_score.replace(")", "")
3008            opp_score, ot_periods = opp_score.split("(")
3009            temp_df["ot_periods"] = ot_periods
3010
3011        if "\n" in opp_score:
3012            opp_score = opp_score.strip()
3013            # opp_score = opp_score
3014        opp_score = int(opp_score)
3015
3016        temp_df["team_sets_won"] = tm_score
3017        temp_df["opponent_sets_won"] = opp_score
3018
3019        del tm_score
3020        del opp_score
3021
3022        try:
3023            g_id = t.find_all("td")[2].find("a").get("href")
3024
3025            g_id = g_id.replace("/contests", "")
3026            g_id = g_id.replace("/box_score", "")
3027            g_id = g_id.replace("/", "")
3028
3029            g_id = int(g_id)
3030            temp_df["game_id"] = g_id
3031            del g_id
3032        except AttributeError:
3033            logging.warning(
3034                f"Could not find a game ID for a {g_date} game " +
3035                f"against {opp_team_name}."
3036            )
3037            temp_df["game_id"] = None
3038        except Exception as e:
3039            raise e
3040
3041        temp_df.rename(
3042            columns={"Opponent": "opponent", "Date": "date"},
3043            inplace=True,
3044        )
3045        game_date = datetime.strptime(g_date, "%m/%d/%Y").date()
3046
3047        temp_df["date"] = game_date
3048        temp_df["game_num"] = game_num
3049        # temp_df["game_innings"] = innings
3050
3051        if len(opp_team_name) > 0:
3052            temp_df["opponent"] = opp_team_name
3053        del opp_team_name
3054
3055        duplicate_cols = temp_df.columns[temp_df.columns.duplicated()]
3056        temp_df.drop(columns=duplicate_cols, inplace=True)
3057
3058        stats_df_arr.append(temp_df)
3059        del temp_df
3060
3061    stats_df = pd.concat(stats_df_arr, ignore_index=True)
3062    stats_df = stats_df.replace("/", "", regex=True)
3063    stats_df = stats_df.replace("", np.nan)
3064    stats_df = stats_df.infer_objects()
3065
3066    stats_df["player_id"] = player_id
3067    stats_df["sport_id"] = sport_id
3068    stats_df["season"] = season
3069
3070    stats_df.rename(
3071        columns={
3072            "#": "player_jersey_number",
3073            "Player": "player_full_name",
3074            "Yr": "player_class",
3075            "Pos": "player_position",
3076            "Ht": "player_height",
3077            "S": "sets_played",
3078            "Kills": "kills",
3079            "Errors": "errors",
3080            "Total Attacks": "total_attacks",
3081            "TotalAttacks": "total_attacks",
3082            "Hit Pct": "hit%",
3083            "HitPct": "hit%",
3084            "Assists": "assists",
3085            "Aces": "aces",
3086            "SErr": "serve_errors",
3087            "Digs": "digs",
3088            "RetAtt": "return_attacks",
3089            "RErr": "return_errors",
3090            "Block Solos": "solo_blocks",
3091            "BlockSolos": "solo_blocks",
3092            "Block Assists": "assisted_blocks",
3093            "BlockAssists": "assisted_blocks",
3094            "BErr": "block_errors",
3095            "PTS": "points",
3096            "Trpl Dbl": "TRP_DBL",
3097            "Dbl Dbl": "DBL_DBL",
3098            "TB": "total_blocks",
3099            "SrvAtt": "serve_attempts",
3100        },
3101        inplace=True,
3102    )
3103    # This is a separate function call because these stats
3104    # *don't* exist in every season.
3105
3106    if "serve_attempts" not in stats_df.columns:
3107        stats_df["serve_attempts"] = None
3108
3109    if "return_attacks" not in stats_df.columns:
3110        stats_df["return_attacks"] = None
3111
3112    stats_df = stats_df.infer_objects().fillna(0)
3113    stats_df = stats_df.astype(
3114        {
3115            "GP": "uint16",
3116            "sets_played": "uint16",
3117            # "MS": "uint16",
3118            "kills": "uint16",
3119            "errors": "uint16",
3120            "total_attacks": "uint16",
3121            "hit%": "float32",
3122            "assists": "uint16",
3123            "aces": "uint16",
3124            "serve_errors": "uint16",
3125            "digs": "uint16",
3126            "return_attacks": "uint16",
3127            "return_errors": "uint16",
3128            "solo_blocks": "uint16",
3129            "assisted_blocks": "uint16",
3130            "block_errors": "uint16",
3131            # "total_blocks": "uint16",
3132            "points": "float32",
3133            "BHE": "uint16",
3134            "serve_attempts": "uint16",
3135            # "DBL_DBL": "uint8",
3136            # "TRP_DBL": "uint8",
3137        }
3138    )
3139
3140    stats_df.loc[
3141        (stats_df["solo_blocks"] > 0) | (stats_df["assisted_blocks"] > 0),
3142        "total_blocks"
3143    ] = (
3144        stats_df["solo_blocks"] +
3145        (stats_df["assisted_blocks"] / 2)
3146    )
3147    stats_df["total_blocks"] = stats_df["total_blocks"].astype("float32")
3148
3149    # Columns used to calculate double doubles and triple doubles.
3150    # Credits:
3151    # https://en.wikipedia.org/wiki/Double_(volleyball)
3152    # https://stackoverflow.com/a/54381918
3153    double_stats_arr = [
3154        "aces",
3155        "kills",
3156        "total_blocks",
3157        "digs",
3158        "assists",
3159    ]
3160    stats_df["DBL_DBL"] = (
3161        (
3162            (stats_df[double_stats_arr] >= 10).sum(1)
3163        ) >= 2
3164    )
3165    stats_df["DBL_DBL"] = stats_df["DBL_DBL"].astype(int)
3166
3167    stats_df["TRP_DBL"] = (
3168        (
3169            (stats_df[double_stats_arr] >= 10).sum(1)
3170        ) >= 3
3171    )
3172    stats_df["TRP_DBL"] = stats_df["TRP_DBL"].astype(int)
3173
3174    for i in stats_df.columns:
3175        if i in stat_columns:
3176            pass
3177        elif "Attend" in stat_columns:
3178            pass
3179        else:
3180            raise ValueError(
3181                f"Unhandled column name {i}"
3182            )
3183    stats_df = stats_df.reindex(columns=stat_columns)
3184
3185    stats_df.to_csv(
3186        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"
3187        + "player_game_stats/"
3188        + f"{player_id}_player_game_stats.csv",
3189        index=False,
3190    )
3191    return stats_df
3192
3193
3194def get_volleyball_game_player_stats(game_id: int) -> pd.DataFrame:
3195    """
3196    Given a valid game ID,
3197    this function will attempt to get all player game stats, if possible.
3198
3199    Parameters
3200    ----------
3201    `game_id` (int, mandatory):
3202        Required argument.
3203        Specifies the game you want player game stats from.
3204
3205    Usage
3206    ----------
3207    ```python
3208
3209    from ncaa_stats_py.volleyball import get_volleyball_game_player_stats
3210
3211    ########################################
3212    #          Women's volleyball          #
3213    ########################################
3214
3215    # Get the game stats of the
3216    # 2024 NCAA D1 Women's Volleyball National Championship game.
3217    print(
3218        "Get the game stats of the "
3219        + "2024 NCAA D1 Women's volleyball National Championship game"
3220    )
3221    df = get_volleyball_game_player_stats(6080706)
3222    print(df)
3223
3224    # Get the game stats of a September 14th, 2024
3225    # game between the UNC Asheville Bulldogs and the Iona Gaels.
3226    print(
3227        "Get the game stats of a September 14th, 2024 "
3228        + "game between the UNC Asheville Bulldogs "
3229        + "and the Iona Gaels"
3230    )
3231    df = get_volleyball_game_player_stats(5670752)
3232    print(df)
3233
3234    # Get the game stats of a September 16th, 2023
3235    # game between the Saginaw Valley Cardinals
3236    # and the Lake Superior St. Lakes.
3237    print(
3238        "Get the game stats of a September 16th, 2023 "
3239        + "game between the Saginaw Valley Cardinals "
3240        + "and the Lake Superior St. Lakes."
3241    )
3242    df = get_volleyball_game_player_stats(3243563)
3243    print(df)
3244
3245    # Get the game stats of a October 15th, 2022
3246    # game between the Macalester Scots
3247    # and the St. Scholastica Saints (D3).
3248    print(
3249        "Get the game stats of a October 15th, 2022 "
3250        + "game between the Macalester Scots and "
3251        + "the St. Scholastica Saints (D3)."
3252    )
3253    df = get_volleyball_game_player_stats(2307684)
3254    print(df)
3255
3256    # Get the game stats of a October 24th, 2021
3257    # game between the Howard Bison and the UMES Hawks.
3258    print(
3259        "Get the game stats of a October 24th, 2021 "
3260        + "game between the Howard Bison and the UMES Hawks."
3261    )
3262    df = get_volleyball_game_player_stats(2113627)
3263    print(df)
3264
3265    # Get the game stats of a March 5th, 2021
3266    # game between the Notre Dame (OH) Falcons
3267    # and the Alderson Broaddus Battlers.
3268    print(
3269        "Get the game stats of a March 5th, 2021 "
3270        + "game between the Notre Dame (OH) Falcons "
3271        + "and the Alderson Broaddus Battlers."
3272    )
3273    df = get_volleyball_game_player_stats(2005442)
3274    print(df)
3275
3276    # Get the game stats of a November 14th, 2019
3277    # game between the Wittenberg Tigers
3278    # and the Muskingum Fighting Muskies (D3).
3279    print(
3280        "Get the game stats of a November 14th, 2019 "
3281        + "game between the Wittenberg Tigers and "
3282        + "the Muskingum Fighting Muskies (D3)."
3283    )
3284    df = get_volleyball_game_player_stats(1815514)
3285    print(df)
3286
3287    ########################################
3288    #          Men's volleyball            #
3289    ########################################
3290
3291    # Get the game stats of the
3292    # 2024 NCAA D1 Men's Volleyball National Championship game.
3293    print(
3294        "Get the game stats of the "
3295        + "2024 NCAA D1 Men's volleyball National Championship game"
3296    )
3297    df = get_volleyball_game_player_stats(5282845)
3298    print(df)
3299
3300    # Get the game stats of a January 14th, 2025
3301    # game between the Kean Cougars and the Arcadia Knights.
3302    print(
3303        "Get the game stats of a January 14th, 2025 "
3304        + "game between the UNC Asheville Bulldogs "
3305        + "and the Iona Gaels"
3306    )
3307    df = get_volleyball_game_player_stats(6081598)
3308    print(df)
3309
3310    # Get the game stats of a January 13th, 2024
3311    # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
3312    print(
3313        "Get the game stats of a September 14th, 2024 "
3314        + "game between the Purdue Fort Wayne Mastodons "
3315        + "and the NJIT Highlanders."
3316    )
3317    df = get_volleyball_game_player_stats(4473231)
3318    print(df)
3319
3320    # Get the game stats of a January 21st, 2023
3321    # game between the Baruch Bearcats and the Widener Pride.
3322    print(
3323        "Get the game stats of a January 21st, 2023 "
3324        + "game between the Baruch Bearcats and the Widener Pride."
3325    )
3326    df = get_volleyball_game_player_stats(2355323)
3327    print(df)
3328
3329    # Get the game stats of a February 24th, 2022
3330    # game between the Ball St. Cardinals and the Lindenwood Lions.
3331    print(
3332        "Get the game stats of a February 24th, 2022 "
3333        + "game between the Ball St. Cardinals and the Lindenwood Lions."
3334    )
3335    df = get_volleyball_game_player_stats(2162239)
3336    print(df)
3337
3338    # Get the game stats of a March 20th, 2021
3339    # game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals.
3340    print(
3341        "Get the game stats of a March 20th, 2021 "
3342        + "game between the SUNY New Paltz Hawks "
3343        + "and the St. John Fisher Cardinals."
3344    )
3345    df = get_volleyball_game_player_stats(2059180)
3346    print(df)
3347
3348    # Get the game stats of a March 1th, 2020
3349    # game between the USC Trojans and the CUI Golden Eagles.
3350    print(
3351        "Get the game stats of a March 1th, 2020 "
3352        + "game between the USC Trojans and the CUI Golden Eagles."
3353    )
3354    df = get_volleyball_game_player_stats(1820058)
3355    print(df)
3356
3357    # Get the game stats of an April 4th, 2019
3358    # game between the Lesly Lynx and the Pine Manor Gators (D3).
3359    print(
3360        "Get the game stats of an April 4th, 2019 "
3361        + "game between the Lesly Lynx and the Pine Manor Gators (D3)."
3362    )
3363    df = get_volleyball_game_player_stats(1723131)
3364    print(df)
3365
3366
3367    ```
3368
3369    Returns
3370    ----------
3371    A pandas `DataFrame` object with player game stats in a given game.
3372
3373    """
3374    load_from_cache = True
3375
3376    sport_id = ""
3377    season = 0
3378
3379    MVB_teams_df = load_volleyball_teams(get_mens_data=True)
3380    MVB_team_ids_arr = MVB_teams_df["team_id"].to_list()
3381
3382    WVB_teams_df = load_volleyball_teams(get_mens_data=False)
3383    WVB_team_ids_arr = WVB_teams_df["team_id"].to_list()
3384
3385    stats_df = pd.DataFrame()
3386    stats_df_arr = []
3387
3388    temp_df = pd.DataFrame()
3389    home_dir = expanduser("~")
3390    home_dir = _format_folder_str(home_dir)
3391
3392    stat_columns = [
3393        "season",
3394        "sport_id",
3395        "game_datetime",
3396        "game_id",
3397        "team_id",
3398        "team_name",
3399        "player_id",
3400        "player_num",
3401        "player_full_name",
3402        "player_position",
3403        "GP",
3404        "sets_played",
3405        "kills",
3406        "errors",
3407        "total_attacks",
3408        "hit%",
3409        "assists",
3410        "aces",
3411        "serve_errors",
3412        "digs",
3413        "return_attacks",
3414        "return_errors",
3415        "solo_blocks",
3416        "assisted_blocks",
3417        "block_errors",
3418        "total_blocks",
3419        "points",
3420        "BHE",
3421        "DBL_DBL",
3422        "TRP_DBL",
3423    ]
3424
3425    url = f"https://stats.ncaa.org/contests/{game_id}/individual_stats"
3426
3427    if exists(f"{home_dir}/.ncaa_stats_py/"):
3428        pass
3429    else:
3430        mkdir(f"{home_dir}/.ncaa_stats_py/")
3431
3432    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"):
3433        pass
3434    else:
3435        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/")
3436
3437    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/"):
3438        pass
3439    else:
3440        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/")
3441
3442    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"):
3443        pass
3444    else:
3445        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/")
3446
3447    if exists(
3448        f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"
3449        + f"{game_id}_player_game_stats.csv"
3450    ):
3451        games_df = pd.read_csv(
3452            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"
3453            + f"{game_id}_player_game_stats.csv"
3454        )
3455        games_df = games_df.infer_objects()
3456        file_mod_datetime = datetime.fromtimestamp(
3457            getmtime(
3458                f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"
3459                + f"{game_id}_player_game_stats.csv"
3460            )
3461        )
3462        load_from_cache = True
3463    else:
3464        file_mod_datetime = datetime.today()
3465        load_from_cache = False
3466
3467    if exists(f"{home_dir}/.ncaa_stats_py/"):
3468        pass
3469    else:
3470        mkdir(f"{home_dir}/.ncaa_stats_py/")
3471
3472    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"):
3473        pass
3474    else:
3475        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/")
3476
3477    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/"):
3478        pass
3479    else:
3480        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/")
3481
3482    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"):
3483        pass
3484    else:
3485        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/")
3486
3487    if exists(
3488        f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"
3489        + f"{game_id}_player_game_stats.csv"
3490    ):
3491        games_df = pd.read_csv(
3492            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"
3493            + f"{game_id}_player_game_stats.csv"
3494        )
3495        games_df = games_df.infer_objects()
3496        file_mod_datetime = datetime.fromtimestamp(
3497            getmtime(
3498                f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"
3499                + f"{game_id}_player_game_stats.csv"
3500            )
3501        )
3502        load_from_cache = True
3503    else:
3504        logging.info("Could not find a WVB player game stats file")
3505
3506    now = datetime.today()
3507
3508    age = now - file_mod_datetime
3509
3510    if age.days >= 35:
3511        load_from_cache = False
3512
3513    if load_from_cache is True:
3514        return games_df
3515
3516    response = _get_webpage(url=url)
3517    soup = BeautifulSoup(response.text, features="lxml")
3518
3519    info_table = soup.find(
3520        "td",
3521        {
3522            "style": "padding: 0px 30px 0px 30px",
3523            "class": "d-none d-md-table-cell"
3524        }
3525    ).find(
3526        "table",
3527        {"style": "border-collapse: collapse"}
3528    )
3529
3530    info_table_rows = info_table.find_all("tr")
3531
3532    game_date_str = info_table_rows[3].find("td").text
3533    if "TBA" in game_date_str:
3534        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
3535    elif "tba" in game_date_str:
3536        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
3537    elif "TBD" in game_date_str:
3538        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
3539    elif "tbd" in game_date_str:
3540        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
3541    elif (
3542        "tbd" not in game_date_str.lower() and
3543        ":" not in game_date_str.lower()
3544    ):
3545        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
3546    else:
3547        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y %I:%M %p')
3548    game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
3549    game_date_str = game_datetime.isoformat()
3550    del game_datetime
3551
3552    table_boxes = soup.find_all("div", {"class": "card p-0 table-responsive"})
3553
3554    for box in table_boxes:
3555        t_header = box.find(
3556            "div", {"class": "card-header"}
3557        ).find(
3558            "div", {"class": "row"}
3559        )
3560
3561        t_header_str = t_header.text
3562        t_header_str = t_header_str.replace("Period Stats", "")
3563        t_header_str = t_header_str.replace("\n", "")
3564        t_header_str = t_header_str.strip()
3565
3566        team_id = t_header.find("a").get("href")
3567        team_id = team_id.replace("/teams", "")
3568        team_id = team_id.replace("/", "")
3569        team_id = int(team_id)
3570
3571        table_data = box.find(
3572            "table",
3573            {"class": "display dataTable small_font"}
3574        )
3575        table_headers = box.find("thead").find_all("th")
3576        table_headers = [x.text for x in table_headers]
3577
3578        temp_t_rows = table_data.find("tbody")
3579        temp_t_rows = temp_t_rows.find_all("tr")
3580
3581        spec_stats_df = pd.DataFrame()
3582        spec_stats_df_arr = []
3583        for t in temp_t_rows:
3584            # row_id = t.get("id")
3585            game_played = 1
3586            # game_started = 0
3587
3588            try:
3589                player_id = t.find("a").get("href")
3590                player_id = player_id.replace("/players", "")
3591                player_id = player_id.replace("/player", "")
3592                player_id = player_id.replace("/", "")
3593            except Exception as e:
3594                logging.debug(
3595                    "Could not replace player IDs. " +
3596                    f"Full exception: `{e}`"
3597                )
3598
3599            t_cells = t.find_all("td")
3600            p_name = t_cells[1].text.replace("\n", "")
3601            p_name = p_name.strip()
3602
3603            if t_header_str in p_name:
3604                continue
3605            elif p_name.lower() == "team":
3606                continue
3607            # if "\xa0" in p_name:
3608            #     game_started = 0
3609
3610            t_cells = [x.text.strip() for x in t_cells]
3611            player_id = int(player_id)
3612
3613            temp_df = pd.DataFrame(
3614                data=[t_cells],
3615                columns=table_headers
3616            )
3617
3618            duplicate_cols = temp_df.columns[temp_df.columns.duplicated()]
3619            temp_df.drop(columns=duplicate_cols, inplace=True)
3620
3621            temp_df["player_id"] = player_id
3622            temp_df["GP"] = game_played
3623            # temp_df["GS"] = game_started
3624
3625            spec_stats_df_arr.append(temp_df)
3626            del temp_df
3627
3628        spec_stats_df = pd.concat(
3629            spec_stats_df_arr,
3630            ignore_index=True
3631        )
3632
3633        if team_id in MVB_team_ids_arr:
3634            sport_id = "MVB"
3635            df = MVB_teams_df[MVB_teams_df["team_id"] == team_id]
3636            season = df["season"].iloc[0]
3637        elif team_id in WVB_team_ids_arr:
3638            sport_id = "WVB"
3639            df = WVB_teams_df[WVB_teams_df["team_id"] == team_id]
3640            season = df["season"].iloc[0]
3641        else:
3642            raise ValueError(
3643                f"Unhandled team ID {team_id}"
3644            )
3645
3646        spec_stats_df["team_id"] = team_id
3647        spec_stats_df["team_name"] = t_header_str
3648        stats_df_arr.append(spec_stats_df)
3649        del spec_stats_df
3650
3651    stats_df = pd.concat(stats_df_arr)
3652    stats_df["season"] = season
3653    stats_df.rename(
3654        columns={
3655            "#": "player_num",
3656            "Name": "player_full_name",
3657            "P": "player_position",
3658            "Ht": "player_height",
3659            "S": "sets_played",
3660            "Kills": "kills",
3661            "Errors": "errors",
3662            "Total Attacks": "total_attacks",
3663            "TotalAttacks": "total_attacks",
3664            "Hit Pct": "hit%",
3665            "HitPct": "hit%",
3666            "Assists": "assists",
3667            "Aces": "aces",
3668            "SErr": "serve_errors",
3669            "Digs": "digs",
3670            "RetAtt": "return_attacks",
3671            "RErr": "return_errors",
3672            "Block Solos": "solo_blocks",
3673            "BlockSolos": "solo_blocks",
3674            "Block Assists": "assisted_blocks",
3675            "BlockAssists": "assisted_blocks",
3676            "BErr": "block_errors",
3677            "PTS": "points",
3678            "Trpl Dbl": "TRP_DBL",
3679            "Dbl Dbl": "DBL_DBL",
3680            "TB": "total_blocks",
3681            "SrvAtt": "serve_attempts",
3682        },
3683        inplace=True,
3684    )
3685
3686    if "return_attacks" not in stats_df.columns:
3687        stats_df["return_attacks"] = None
3688
3689    if "serve_attempts" not in stats_df.columns:
3690        stats_df["serve_attempts"] = None
3691
3692    stats_df = stats_df.infer_objects().fillna(0)
3693    stats_df = stats_df.astype(
3694        {
3695            "GP": "uint16",
3696            "sets_played": "uint16",
3697            # "MS": "uint16",
3698            "kills": "uint16",
3699            "errors": "uint16",
3700            "total_attacks": "uint16",
3701            "hit%": "float32",
3702            "assists": "uint16",
3703            "aces": "uint16",
3704            "serve_errors": "uint16",
3705            "digs": "uint16",
3706            "return_attacks": "uint16",
3707            "return_errors": "uint16",
3708            "solo_blocks": "uint16",
3709            "assisted_blocks": "uint16",
3710            "block_errors": "uint16",
3711            # "total_blocks": "uint16",
3712            "points": "float32",
3713            "BHE": "uint16",
3714            "serve_attempts": "uint16",
3715            # "DBL_DBL": "uint8",
3716            # "TRP_DBL": "uint8",
3717        }
3718    )
3719    # print(stats_df.columns)
3720    stats_df["game_datetime"] = game_date_str
3721    stats_df["sport_id"] = sport_id
3722
3723    stats_df["game_id"] = game_id
3724
3725    stats_df["total_blocks"] = (
3726        stats_df["solo_blocks"] +
3727        (stats_df["assisted_blocks"] / 2)
3728    )
3729    stats_df["total_blocks"] = stats_df["total_blocks"].astype("float32")
3730
3731    # Columns used to calculate double doubles and triple doubles.
3732    # Credits:
3733    # https://en.wikipedia.org/wiki/Double_(volleyball)
3734    # https://stackoverflow.com/a/54381918
3735    double_stats_arr = [
3736        "aces",
3737        "kills",
3738        "total_blocks",
3739        "digs",
3740        "assists",
3741    ]
3742    stats_df["DBL_DBL"] = ((stats_df[double_stats_arr] >= 10).sum(1)) >= 2
3743    stats_df["DBL_DBL"] = stats_df["DBL_DBL"].astype(int)
3744
3745    stats_df["TRP_DBL"] = ((stats_df[double_stats_arr] >= 10).sum(1)) >= 3
3746    stats_df["TRP_DBL"] = stats_df["TRP_DBL"].astype(int)
3747
3748    for i in stats_df.columns:
3749        if i in stat_columns:
3750            pass
3751        elif "Attend" in stat_columns:
3752            pass
3753        else:
3754            raise ValueError(
3755                f"Unhandled column name {i}"
3756            )
3757
3758    stats_df = stats_df.reindex(
3759        columns=stat_columns
3760    )
3761
3762    # print(stats_df.columns)
3763    stats_df.to_csv(
3764        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/game_stats/player/"
3765        + f"{game_id}_player_game_stats.csv",
3766        index=False
3767    )
3768    return stats_df
3769
3770
3771def get_volleyball_game_team_stats(game_id: int) -> pd.DataFrame:
3772    """
3773    Given a valid game ID,
3774    this function will attempt to get all team game stats, if possible.
3775
3776    Parameters
3777    ----------
3778    `game_id` (int, mandatory):
3779        Required argument.
3780        Specifies the game you want team game stats from.
3781
3782    Usage
3783    ----------
3784    ```python
3785
3786    from ncaa_stats_py.volleyball import get_volleyball_game_team_stats
3787
3788    ########################################
3789    #          Women's volleyball          #
3790    ########################################
3791
3792    # Get the game stats of the
3793    # 2024 NCAA D1 Women's Volleyball National Championship game.
3794    print(
3795        "Get the game stats of the "
3796        + "2024 NCAA D1 Women's volleyball National Championship game"
3797    )
3798    df = get_volleyball_game_team_stats(6080706)
3799    print(df)
3800
3801    # Get the game stats of a September 14th, 2024
3802    # game between the UNC Asheville Bulldogs and the Iona Gaels.
3803    print(
3804        "Get the game stats of a September 14th, 2024 "
3805        + "game between the UNC Asheville Bulldogs "
3806        + "and the Iona Gaels"
3807    )
3808    df = get_volleyball_game_team_stats(5670752)
3809    print(df)
3810
3811    # Get the game stats of a September 16th, 2023
3812    # game between the Saginaw Valley Cardinals
3813    # and the Lake Superior St. Lakes.
3814    print(
3815        "Get the game stats of a September 16th, 2023 "
3816        + "game between the Saginaw Valley Cardinals "
3817        + "and the Lake Superior St. Lakes."
3818    )
3819    df = get_volleyball_game_team_stats(3243563)
3820    print(df)
3821
3822    # Get the game stats of a October 15th, 2022
3823    # game between the Macalester Scots
3824    # and the St. Scholastica Saints (D3).
3825    print(
3826        "Get the game stats of a October 15th, 2022 "
3827        + "game between the Macalester Scots and "
3828        + "the St. Scholastica Saints (D3)."
3829    )
3830    df = get_volleyball_game_team_stats(2307684)
3831    print(df)
3832
3833    # Get the game stats of a October 24th, 2021
3834    # game between the Howard Bison and the UMES Hawks.
3835    print(
3836        "Get the game stats of a October 24th, 2021 "
3837        + "game between the Howard Bison and the UMES Hawks."
3838    )
3839    df = get_volleyball_game_team_stats(2113627)
3840    print(df)
3841
3842    # Get the game stats of a March 5th, 2021
3843    # game between the Notre Dame (OH) Falcons
3844    # and the Alderson Broaddus Battlers.
3845    print(
3846        "Get the game stats of a March 5th, 2021 "
3847        + "game between the Notre Dame (OH) Falcons "
3848        + "and the Alderson Broaddus Battlers."
3849    )
3850    df = get_volleyball_game_team_stats(2005442)
3851    print(df)
3852
3853    # Get the game stats of a November 14th, 2019
3854    # game between the Wittenberg Tigers
3855    # and the Muskingum Fighting Muskies (D3).
3856    print(
3857        "Get the game stats of a November 14th, 2019 "
3858        + "game between the Wittenberg Tigers and "
3859        + "the Muskingum Fighting Muskies (D3)."
3860    )
3861    df = get_volleyball_game_team_stats(1815514)
3862    print(df)
3863
3864    ########################################
3865    #          Men's volleyball            #
3866    ########################################
3867
3868    # Get the game stats of the
3869    # 2024 NCAA D1 Men's Volleyball National Championship game.
3870    print(
3871        "Get the game stats of the "
3872        + "2024 NCAA D1 Men's volleyball National Championship game"
3873    )
3874    df = get_volleyball_game_team_stats(5282845)
3875    print(df)
3876
3877    # Get the game stats of a January 14th, 2025
3878    # game between the Kean Cougars and the Arcadia Knights.
3879    print(
3880        "Get the game stats of a January 14th, 2025 "
3881        + "game between the UNC Asheville Bulldogs "
3882        + "and the Iona Gaels"
3883    )
3884    df = get_volleyball_game_team_stats(6081598)
3885    print(df)
3886
3887    # Get the game stats of a January 13th, 2024
3888    # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
3889    print(
3890        "Get the game stats of a September 14th, 2024 "
3891        + "game between the Purdue Fort Wayne Mastodons "
3892        + "and the NJIT Highlanders."
3893    )
3894    df = get_volleyball_game_team_stats(4473231)
3895    print(df)
3896
3897    # Get the game stats of a January 21st, 2023
3898    # game between the Baruch Bearcats and the Widener Pride.
3899    print(
3900        "Get the game stats of a January 21st, 2023 "
3901        + "game between the Baruch Bearcats and the Widener Pride."
3902    )
3903    df = get_volleyball_game_team_stats(2355323)
3904    print(df)
3905
3906    # Get the game stats of a February 24th, 2022
3907    # game between the Ball St. Cardinals and the Lindenwood Lions.
3908    print(
3909        "Get the game stats of a February 24th, 2022 "
3910        + "game between the Ball St. Cardinals and the Lindenwood Lions."
3911    )
3912    df = get_volleyball_game_team_stats(2162239)
3913    print(df)
3914
3915    # Get the game stats of a March 20th, 2021
3916    # game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals.
3917    print(
3918        "Get the game stats of a March 20th, 2021 "
3919        + "game between the SUNY New Paltz Hawks "
3920        + "and the St. John Fisher Cardinals."
3921    )
3922    df = get_volleyball_game_team_stats(2059180)
3923    print(df)
3924
3925    # Get the game stats of a March 1th, 2020
3926    # game between the USC Trojans and the CUI Golden Eagles.
3927    print(
3928        "Get the game stats of a March 1th, 2020 "
3929        + "game between the USC Trojans and the CUI Golden Eagles."
3930    )
3931    df = get_volleyball_game_team_stats(1820058)
3932    print(df)
3933
3934    # Get the game stats of an April 4th, 2019
3935    # game between the Lesly Lynx and the Pine Manor Gators (D3).
3936    print(
3937        "Get the game stats of an April 4th, 2019 "
3938        + "game between the Lesly Lynx and the Pine Manor Gators (D3)."
3939    )
3940    df = get_volleyball_game_team_stats(1723131)
3941    print(df)
3942
3943    ```
3944
3945    Returns
3946    ----------
3947    A pandas `DataFrame` object with team game stats in a given game.
3948
3949    """
3950    df = get_volleyball_game_player_stats(game_id=game_id)
3951    # print(df.columns)
3952    df = df.infer_objects()
3953    stats_df = df.groupby(
3954        [
3955            "season",
3956            "sport_id",
3957            "game_datetime",
3958            "game_id",
3959            "team_id",
3960            "team_name"
3961        ],
3962        as_index=False,
3963    ).agg(
3964        {
3965            "sets_played": "sum",
3966            "kills": "sum",
3967            "errors": "sum",
3968            "total_attacks": "sum",
3969            # "hit%": "sum",
3970            "assists": "sum",
3971            "aces": "sum",
3972            "serve_errors": "sum",
3973            "digs": "sum",
3974            "return_attacks": "sum",
3975            "return_errors": "sum",
3976            "solo_blocks": "sum",
3977            "assisted_blocks": "sum",
3978            "block_errors": "sum",
3979            "total_blocks": "sum",
3980            "points": "sum",
3981            "BHE": "sum",
3982            "DBL_DBL": "sum",
3983            "TRP_DBL": "sum",
3984        }
3985    )
3986    stats_df["hit%"] = (
3987        (stats_df["kills"] - stats_df["errors"]) / stats_df["total_attacks"]
3988    )
3989    return stats_df
3990
3991
3992def get_volleyball_raw_pbp(game_id: int) -> pd.DataFrame:
3993    """
3994    Given a valid game ID,
3995    this function will attempt to get the raw play-by-play (PBP)
3996    data for that game.
3997
3998    Parameters
3999    ----------
4000    `game_id` (int, mandatory):
4001        Required argument.
4002        Specifies the game you want play-by-play data (PBP) from.
4003
4004    Usage
4005    ----------
4006    ```python
4007
4008    from ncaa_stats_py.volleyball import get_volleyball_raw_pbp
4009
4010    ########################################
4011    #          Women's volleyball          #
4012    ########################################
4013
4014    # Get the play-by-play data of the
4015    # 2024 NCAA D1 Women's Volleyball National Championship game.
4016    print(
4017        "Get the play-by-play data of the "
4018        + "2024 NCAA D1 Women's volleyball National Championship game"
4019    )
4020    df = get_volleyball_raw_pbp(6080706)
4021    print(df)
4022
4023    # Get the play-by-play data of a September 14th, 2024
4024    # game between the UNC Asheville Bulldogs and the Iona Gaels.
4025    print(
4026        "Get the play-by-play data of a September 14th, 2024 "
4027        + "game between the UNC Asheville Bulldogs "
4028        + "and the Iona Gaels"
4029    )
4030    df = get_volleyball_raw_pbp(5670752)
4031    print(df)
4032
4033    # Get the play-by-play data of a September 16th, 2023
4034    # game between the Saginaw Valley Cardinals
4035    # and the Lake Superior St. Lakes.
4036    print(
4037        "Get the play-by-play data of a September 16th, 2023 "
4038        + "game between the Saginaw Valley Cardinals "
4039        + "and the Lake Superior St. Lakes."
4040    )
4041    df = get_volleyball_raw_pbp(3243563)
4042    print(df)
4043
4044    # Get the play-by-play data of a October 15th, 2022
4045    # game between the Macalester Scots
4046    # and the St. Scholastica Saints (D3).
4047    print(
4048        "Get the play-by-play data of a October 15th, 2022 "
4049        + "game between the Macalester Scots and "
4050        + "the St. Scholastica Saints (D3)."
4051    )
4052    df = get_volleyball_raw_pbp(2307684)
4053    print(df)
4054
4055    # Get the play-by-play data of a October 24th, 2021
4056    # game between the Howard Bison and the UMES Hawks.
4057    print(
4058        "Get the play-by-play data of a October 24th, 2021 "
4059        + "game between the Howard Bison and the UMES Hawks."
4060    )
4061    df = get_volleyball_raw_pbp(2113627)
4062    print(df)
4063
4064    # Get the play-by-play data of a March 5th, 2021
4065    # game between the Notre Dame (OH) Falcons
4066    # and the Alderson Broaddus Battlers.
4067    print(
4068        "Get the play-by-play data of a March 5th, 2021 "
4069        + "game between the Notre Dame (OH) Falcons "
4070        + "and the Alderson Broaddus Battlers."
4071    )
4072    df = get_volleyball_raw_pbp(2005442)
4073    print(df)
4074
4075    # Get the play-by-play data of a November 14th, 2019
4076    # game between the Wittenberg Tigers
4077    # and the Muskingum Fighting Muskies (D3).
4078    print(
4079        "Get the play-by-play data of a November 14th, 2019 "
4080        + "game between the Wittenberg Tigers and "
4081        + "the Muskingum Fighting Muskies (D3)."
4082    )
4083    df = get_volleyball_raw_pbp(1815514)
4084    print(df)
4085
4086    ########################################
4087    #          Men's volleyball            #
4088    ########################################
4089
4090    # Get the play-by-play data of the
4091    # 2024 NCAA D1 Men's Volleyball National Championship game.
4092    print(
4093        "Get the play-by-play data of the "
4094        + "2024 NCAA D1 Men's volleyball National Championship game"
4095    )
4096    df = get_volleyball_raw_pbp(5282845)
4097    print(df)
4098
4099    # Get the play-by-play data of a January 14th, 2025
4100    # game between the Kean Cougars and the Arcadia Knights.
4101    print(
4102        "Get the play-by-play data of a January 14th, 2025 "
4103        + "game between the UNC Asheville Bulldogs "
4104        + "and the Iona Gaels"
4105    )
4106    df = get_volleyball_raw_pbp(6081598)
4107    print(df)
4108
4109    # Get the play-by-play data of a January 13th, 2024
4110    # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
4111    print(
4112        "Get the play-by-play data of a September 14th, 2024 "
4113        + "game between the Purdue Fort Wayne Mastodons "
4114        + "and the NJIT Highlanders."
4115    )
4116    df = get_volleyball_raw_pbp(4473231)
4117    print(df)
4118
4119    # Get the play-by-play data of a January 21st, 2023
4120    # game between the Baruch Bearcats and the Widener Pride.
4121    print(
4122        "Get the play-by-play data of a January 21st, 2023 "
4123        + "game between the Baruch Bearcats and the Widener Pride."
4124    )
4125    df = get_volleyball_raw_pbp(2355323)
4126    print(df)
4127
4128    # Get the play-by-play data of a February 24th, 2022
4129    # game between the Ball St. Cardinals and the Lindenwood Lions.
4130    print(
4131        "Get the play-by-play data of a February 24th, 2022 "
4132        + "game between the Ball St. Cardinals and the Lindenwood Lions."
4133    )
4134    df = get_volleyball_raw_pbp(2162239)
4135    print(df)
4136
4137    # Get the play-by-play data of a March 7th, 2021
4138    # game between the Adrian Bulldogs and the Baldwin Wallace Yellow Jackets.
4139    print(
4140        "Get the play-by-play data of a March 7th, 2021 "
4141        + "game between the Adrian Bulldogs "
4142        + "and the Baldwin Wallace Yellow Jackets."
4143    )
4144    df = get_volleyball_raw_pbp(1998844)
4145    print(df)
4146
4147    # Get the play-by-play data of a March 1th, 2020
4148    # game between the USC Trojans and the CUI Golden Eagles.
4149    print(
4150        "Get the play-by-play data of a March 1th, 2020 "
4151        + "game between the USC Trojans and the CUI Golden Eagles."
4152    )
4153    df = get_volleyball_raw_pbp(1820058)
4154    print(df)
4155
4156    # Get the play-by-play data of an April 4th, 2019
4157    # game between the Lesly Lynx and the Pine Manor Gators (D3).
4158    print(
4159        "Get the play-by-play data of an April 4th, 2019 "
4160        + "game between the Lesly Lynx and the Pine Manor Gators (D3)."
4161    )
4162    df = get_volleyball_raw_pbp(1723131)
4163    print(df)
4164
4165    ```
4166
4167    Returns
4168    ----------
4169    A pandas `DataFrame` object with a play-by-play (PBP) data in a given game.
4170
4171    """
4172    load_from_cache = True
4173    # is_overtime = False
4174
4175    sport_id = ""
4176    season = 0
4177    away_score = 0
4178    home_score = 0
4179
4180    home_sets_won = 0
4181    away_sets_won = 0
4182
4183    home_set_1_score = 0
4184    away_set_1_score = 0
4185
4186    home_set_2_score = 0
4187    away_set_2_score = 0
4188
4189    home_set_3_score = 0
4190    away_set_3_score = 0
4191
4192    home_set_4_score = 0
4193    away_set_4_score = 0
4194
4195    home_set_5_score = 0
4196    away_set_5_score = 0
4197
4198    home_cumulative_score = 0
4199    away_cumulative_score = 0
4200
4201    MVB_teams_df = load_volleyball_teams(get_mens_data=True)
4202    MVB_team_ids_arr = MVB_teams_df["team_id"].to_list()
4203
4204    WVB_teams_df = load_volleyball_teams(get_mens_data=False)
4205    WVB_team_ids_arr = WVB_teams_df["team_id"].to_list()
4206
4207    pbp_df = pd.DataFrame()
4208    pbp_df_arr = []
4209    temp_df = pd.DataFrame()
4210
4211    temp_df = pd.DataFrame()
4212    home_dir = expanduser("~")
4213    home_dir = _format_folder_str(home_dir)
4214
4215    stat_columns = [
4216        "season",
4217        "game_id",
4218        "sport_id",
4219        "game_datetime",
4220        "set_num",
4221        "event_num",
4222        "event_team",
4223        "event_text",
4224        "is_scoring_play",
4225        "home_set_score",
4226        "away_set_score",
4227        "is_extra_points",
4228        "home_cumulative_score",
4229        "away_cumulative_score",
4230        "home_sets_won",
4231        "away_sets_won",
4232        "stadium_name",
4233        "attendance",
4234        "away_team_id",
4235        "away_team_name",
4236        "home_team_id",
4237        "home_team_name",
4238        "home_set_1_score",
4239        "away_set_1_score",
4240        "home_set_2_score",
4241        "away_set_2_score",
4242        "home_set_3_score",
4243        "away_set_3_score",
4244        "home_set_4_score",
4245        "away_set_4_score",
4246        "home_set_5_score",
4247        "away_set_5_score",
4248    ]
4249
4250    url = f"https://stats.ncaa.org/contests/{game_id}/play_by_play"
4251
4252    if exists(f"{home_dir}/.ncaa_stats_py/"):
4253        pass
4254    else:
4255        mkdir(f"{home_dir}/.ncaa_stats_py/")
4256
4257    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"):
4258        pass
4259    else:
4260        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/")
4261
4262    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"):
4263        pass
4264    else:
4265        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/")
4266
4267    if exists(
4268        f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"
4269        + f"{game_id}_raw_pbp.csv"
4270    ):
4271        games_df = pd.read_csv(
4272            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"
4273            + f"{game_id}_raw_pbp.csv"
4274        )
4275        games_df = games_df.infer_objects()
4276        file_mod_datetime = datetime.fromtimestamp(
4277            getmtime(
4278                f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"
4279                + f"{game_id}_raw_pbp.csv"
4280            )
4281        )
4282        load_from_cache = True
4283    else:
4284        file_mod_datetime = datetime.today()
4285        load_from_cache = False
4286
4287    if exists(f"{home_dir}/.ncaa_stats_py/"):
4288        pass
4289    else:
4290        mkdir(f"{home_dir}/.ncaa_stats_py/")
4291
4292    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"):
4293        pass
4294    else:
4295        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/")
4296
4297    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"):
4298        pass
4299    else:
4300        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/")
4301
4302    if exists(
4303        f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"
4304        + f"{game_id}_raw_pbp.csv"
4305    ):
4306        games_df = pd.read_csv(
4307            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"
4308            + f"{game_id}_raw_pbp.csv"
4309        )
4310        games_df = games_df.infer_objects()
4311        file_mod_datetime = datetime.fromtimestamp(
4312            getmtime(
4313                f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"
4314                + f"{game_id}_raw_pbp.csv"
4315            )
4316        )
4317        load_from_cache = True
4318    else:
4319        logging.info("Could not find a WVB player game stats file")
4320
4321    now = datetime.today()
4322
4323    age = now - file_mod_datetime
4324
4325    if age.days >= 35:
4326        load_from_cache = False
4327
4328    if load_from_cache is True:
4329        return games_df
4330
4331    response = _get_webpage(url=url)
4332    soup = BeautifulSoup(response.text, features="lxml")
4333
4334    info_table = soup.find(
4335        "td",
4336        {
4337            "style": "padding: 0px 30px 0px 30px",
4338            "class": "d-none d-md-table-cell"
4339        }
4340    ).find(
4341        "table",
4342        {"style": "border-collapse: collapse"}
4343    )
4344
4345    info_table_rows = info_table.find_all("tr")
4346
4347    game_date_str = info_table_rows[3].find("td").text
4348    if "TBA" in game_date_str:
4349        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
4350    elif "tba" in game_date_str:
4351        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
4352    elif "TBD" in game_date_str:
4353        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
4354    elif "tbd" in game_date_str:
4355        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
4356    elif (
4357        "tbd" not in game_date_str.lower() and
4358        ":" not in game_date_str.lower()
4359    ):
4360        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
4361    else:
4362        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y %I:%M %p')
4363    game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
4364    game_date_str = game_datetime.isoformat()
4365    # del game_datetime
4366
4367    stadium_str = info_table_rows[4].find("td").text
4368
4369    attendance_str = info_table_rows[5].find("td").text
4370    attendance_int = re.findall(
4371        r"([0-9\,]+)",
4372        attendance_str
4373    )[0]
4374    attendance_int = attendance_int.replace(",", "")
4375    attendance_int = int(attendance_int)
4376
4377    del attendance_str
4378    team_cards = soup.find_all(
4379        "td",
4380        {
4381            "valign": "center",
4382            "class": "grey_text d-none d-sm-table-cell"
4383        }
4384    )
4385
4386    away_url = team_cards[0].find_all("a")
4387    away_url = away_url[0]
4388    home_url = team_cards[1].find_all("a")
4389    home_url = home_url[0]
4390
4391    away_team_name = away_url.text
4392    home_team_name = home_url.text
4393
4394    away_team_id = away_url.get("href")
4395    home_team_id = home_url.get("href")
4396
4397    away_team_id = away_team_id.replace("/teams", "")
4398    away_team_id = away_team_id.replace("/team", "")
4399    away_team_id = away_team_id.replace("/", "")
4400    away_team_id = int(away_team_id)
4401
4402    home_team_id = home_team_id.replace("/teams", "")
4403    home_team_id = home_team_id.replace("/team", "")
4404    home_team_id = home_team_id.replace("/", "")
4405    home_team_id = int(home_team_id)
4406
4407    if home_team_id in MVB_team_ids_arr:
4408        sport_id = "MVB"
4409        temp_df = MVB_teams_df[MVB_teams_df["team_id"] == home_team_id]
4410        season = temp_df["season"].iloc[0]
4411        del temp_df
4412    elif home_team_id in WVB_team_ids_arr:
4413        sport_id = "WVB"
4414        temp_df = WVB_teams_df[WVB_teams_df["team_id"] == home_team_id]
4415        season = temp_df["season"].iloc[0]
4416        del temp_df
4417    # This should never be the case,
4418    # but if something goes very horribly wrong,
4419    # double check the away team ID to
4420    # the MVB and WVB team ID list.
4421    elif away_team_id in MVB_team_ids_arr:
4422        sport_id = "MVB"
4423        temp_df = MVB_teams_df[MVB_teams_df["team_id"] == away_team_id]
4424        season = temp_df["season"].iloc[0]
4425        del temp_df
4426    elif away_team_id in WVB_team_ids_arr:
4427        sport_id = "WVB"
4428        temp_df = WVB_teams_df[WVB_teams_df["team_id"] == home_team_id]
4429        season = temp_df["season"].iloc[0]
4430        del temp_df
4431    # If we get to this, we are in a code red situation.
4432    # "SHUT IT DOWN" - Gordon Ramsay
4433    else:
4434        raise ValueError(
4435            "Could not identify if this is a " +
4436            "MVB or WVB game based on team IDs. "
4437        )
4438
4439    section_cards = soup.find_all(
4440        "div",
4441        {"class": "row justify-content-md-center w-100"}
4442    )
4443
4444    if len(section_cards) == 0:
4445        logging.warning(
4446            f"Could not find any plays for game ID `{game_id}`. " +
4447            "Returning empty DataFrame."
4448        )
4449        df = pd.DataFrame(columns=stat_columns)
4450        return df
4451
4452    # play_id = 0
4453    for card in section_cards:
4454        is_extra_points = False
4455        event_text = ""
4456
4457        set_num_str = card.find(
4458            "div",
4459            {"class": "card-header"}
4460        ).text
4461        set_num = re.findall(
4462            r"([0-9]+)",
4463            set_num_str
4464        )
4465
4466        set_num = int(set_num[0])
4467
4468        table_body = card.find("table").find("tbody").find_all("tr")
4469
4470        # pbp rows
4471        for row in table_body:
4472            is_scoring_play = True
4473            t_cells = row.find_all("td")
4474            t_cells = [x.text.strip() for x in t_cells]
4475            game_time_str = t_cells[0]
4476
4477            if len(t_cells[0]) > 0:
4478                event_team = away_team_id
4479                event_text = t_cells[0]
4480            elif len(t_cells[2]) > 0:
4481                event_team = home_team_id
4482                event_text = t_cells[2]
4483
4484            if "+" in event_text:
4485                temp = event_text.split("\n")
4486                if len(temp) >= 2:
4487                    event_text = temp[1]
4488                else:
4489                    raise Exception(
4490                        "Unhandled situation " +
4491                        f"when parsing a scoring play: `{temp}`"
4492                    )
4493                # print()
4494            else:
4495                event_text = event_text.replace("\n", "")
4496
4497            event_text = event_text.replace("  ", " ")
4498            event_text = event_text.strip()
4499
4500            if len(t_cells) == 3:
4501                try:
4502                    away_score, home_score = t_cells[1].split("-")
4503
4504                    away_score = int(away_score)
4505                    home_score = int(home_score)
4506                    is_scoring_play = True
4507                except ValueError:
4508                    logging.info(
4509                        "Could not extract a score " +
4510                        f"from the following play `{event_text}`"
4511                    )
4512                    is_scoring_play = False
4513                except Exception as e:
4514                    logging.warning(
4515                        f"An unhandled exception has occurred: `{e}`"
4516                    )
4517                    raise e
4518                    # scoring_play = False
4519            elif len(t_cells) > 3:
4520                raise SyntaxError(
4521                    f"Unhandled PBP row format in game ID `{game_id}`"
4522                )
4523
4524            if set_num <= 4 and home_score == 24 and away_score == 24:
4525                is_extra_points = True
4526            elif set_num == 5 and home_score == 14 and away_score == 14:
4527                is_extra_points = True
4528
4529            temp_home_cumulative_score = home_cumulative_score + home_score
4530            temp_away_cumulative_score = away_cumulative_score + away_score
4531
4532            temp_df = pd.DataFrame(
4533                {
4534                    # "season": season,
4535                    # "game_id": game_id,
4536                    # "sport_id": sport_id,
4537                    # "away_team_id": away_team_id,
4538                    # "away_team_name": away_team_name,
4539                    # "home_team_id": home_team_id,
4540                    # "home_team_name": home_team_name,
4541                    "game_time_str": game_time_str,
4542                    "set_num": set_num,
4543                    "away_set_score": away_score,
4544                    "home_set_score": home_score,
4545                    "event_team": event_team,
4546                    "event_text": event_text,
4547                    "is_scoring_play": is_scoring_play,
4548                    "is_extra_points": is_extra_points,
4549                    "home_cumulative_score": temp_home_cumulative_score,
4550                    "away_cumulative_score": temp_away_cumulative_score,
4551                    "home_sets_won": home_sets_won,
4552                    "away_sets_won": away_sets_won,
4553                },
4554                index=[0],
4555            )
4556            pbp_df_arr.append(temp_df)
4557
4558        if set_num == 1:
4559            home_set_1_score = home_score
4560            away_set_1_score = away_score
4561            home_cumulative_score = home_set_1_score
4562            away_cumulative_score = away_set_1_score
4563        elif set_num == 2:
4564            home_set_2_score = home_score
4565            away_set_2_score = away_score
4566            home_cumulative_score += home_set_2_score
4567            away_cumulative_score += away_set_2_score
4568        elif set_num == 3:
4569            home_set_3_score = home_score
4570            away_set_3_score = away_score
4571            home_cumulative_score += home_set_3_score
4572            away_cumulative_score += away_set_3_score
4573        elif set_num == 4:
4574            home_set_4_score = home_score
4575            away_set_4_score = away_score
4576            home_cumulative_score += home_set_4_score
4577            away_cumulative_score += away_set_4_score
4578        elif set_num == 5:
4579            home_set_5_score = home_score
4580            away_set_5_score = away_score
4581            home_cumulative_score += home_set_4_score
4582            away_cumulative_score += away_set_4_score
4583
4584        if temp_away_cumulative_score > home_cumulative_score:
4585            away_sets_won += 1
4586        elif temp_away_cumulative_score < home_cumulative_score:
4587            home_sets_won += 1
4588
4589        # End of set play
4590        temp_df = pd.DataFrame(
4591            {
4592                # "season": season,
4593                # "game_id": game_id,
4594                # "sport_id": sport_id,
4595                # "away_team_id": away_team_id,
4596                # "away_team_name": away_team_name,
4597                # "home_team_id": home_team_id,
4598                # "home_team_name": home_team_name,
4599                "game_time_str": game_time_str,
4600                "set_num": set_num,
4601                "away_set_score": away_score,
4602                "home_set_score": home_score,
4603                "event_team": event_team,
4604                "event_text": f"END SET {set_num}",
4605                "is_scoring_play": is_scoring_play,
4606                "is_extra_points": is_extra_points,
4607                "home_cumulative_score": temp_home_cumulative_score,
4608                "away_cumulative_score": temp_away_cumulative_score,
4609                "home_sets_won": home_sets_won,
4610                "away_sets_won": away_sets_won,
4611            },
4612            index=[0],
4613        )
4614        pbp_df_arr.append(temp_df)
4615
4616    # End of game play
4617    temp_df = pd.DataFrame(
4618        {
4619            # "season": season,
4620            # "game_id": game_id,
4621            # "sport_id": sport_id,
4622            # "away_team_id": away_team_id,
4623            # "away_team_name": away_team_name,
4624            # "home_team_id": home_team_id,
4625            # "home_team_name": home_team_name,
4626            "game_time_str": game_time_str,
4627            "set_num": set_num,
4628            "away_set_score": away_score,
4629            "home_set_score": home_score,
4630            "event_team": event_team,
4631            "event_text": "END MATCH",
4632            "is_scoring_play": is_scoring_play,
4633            "is_extra_points": is_extra_points,
4634            "home_cumulative_score": temp_home_cumulative_score,
4635            "away_cumulative_score": temp_away_cumulative_score,
4636            "home_sets_won": home_sets_won,
4637            "away_sets_won": away_sets_won,
4638        },
4639        index=[0],
4640    )
4641    pbp_df_arr.append(temp_df)
4642    pbp_df = pd.concat(pbp_df_arr, ignore_index=True)
4643    pbp_df["event_num"] = pbp_df.index + 1
4644    pbp_df["game_datetime"] = game_date_str
4645    pbp_df["season"] = season
4646    pbp_df["game_id"] = game_id
4647    pbp_df["sport_id"] = sport_id
4648    pbp_df["stadium_name"] = stadium_str
4649    pbp_df["attendance"] = attendance_int
4650    pbp_df["away_team_id"] = away_team_id
4651    pbp_df["away_team_name"] = away_team_name
4652    pbp_df["home_team_id"] = home_team_id
4653    pbp_df["home_team_name"] = home_team_name
4654
4655    pbp_df["home_set_1_score"] = home_set_1_score
4656    pbp_df["away_set_1_score"] = away_set_1_score
4657
4658    pbp_df["home_set_2_score"] = home_set_2_score
4659    pbp_df["away_set_2_score"] = away_set_2_score
4660
4661    pbp_df["home_set_3_score"] = home_set_3_score
4662    pbp_df["away_set_3_score"] = away_set_3_score
4663
4664    pbp_df["home_set_4_score"] = home_set_4_score
4665    pbp_df["away_set_4_score"] = away_set_4_score
4666
4667    pbp_df["home_set_5_score"] = home_set_5_score
4668    pbp_df["away_set_5_score"] = away_set_5_score
4669
4670    # print(pbp_df.columns)
4671    pbp_df = pbp_df.reindex(columns=stat_columns)
4672    pbp_df = pbp_df.infer_objects()
4673
4674    if sport_id == "MVB":
4675        pbp_df.to_csv(
4676            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"
4677            + f"{game_id}_raw_pbp.csv",
4678            index=False
4679        )
4680    elif sport_id == "WVB":
4681        pbp_df.to_csv(
4682            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"
4683            + f"{game_id}_raw_pbp.csv",
4684            index=False
4685        )
4686    else:
4687        raise ValueError(
4688            f"Improper Sport ID: `{sport_id}`"
4689        )
4690
4691    return pbp_df
4692
4693
4694def get_parsed_volleyball_pbp(game_id: int) -> pd.DataFrame:
4695    """
4696    Given a valid game ID,
4697    this function will attempt to parse play-by-play (PBP)
4698    data for that game.
4699
4700    Parameters
4701    ----------
4702    `game_id` (int, mandatory):
4703        Required argument.
4704        Specifies the game you want play-by-play data (PBP) from.
4705
4706    Usage
4707    ----------
4708    ```python
4709    ```
4710
4711    Returns
4712    ----------
4713    A pandas `DataFrame` object with a play-by-play (PBP) data in a given game.
4714
4715    """
4716    home_team_id = 0
4717    away_team_id = 0
4718    sport_id = ""
4719
4720    home_roster_df = pd.DataFrame()
4721    away_roster_df = pd.DataFrame()
4722
4723    home_dir = expanduser("~")
4724    home_dir = _format_folder_str(home_dir)
4725
4726    if exists(f"{home_dir}/.ncaa_stats_py/"):
4727        pass
4728    else:
4729        mkdir(f"{home_dir}/.ncaa_stats_py/")
4730
4731    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"):
4732        pass
4733    else:
4734        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/")
4735
4736    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"):
4737        pass
4738    else:
4739        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/")
4740
4741    if exists(
4742        f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"
4743        + f"{game_id}_parsed_pbp.csv"
4744    ):
4745        games_df = pd.read_csv(
4746            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"
4747            + f"{game_id}_parsed_pbp.csv"
4748        )
4749        games_df = games_df.infer_objects()
4750        file_mod_datetime = datetime.fromtimestamp(
4751            getmtime(
4752                f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"
4753                + f"{game_id}_parsed_pbp.csv"
4754            )
4755        )
4756        load_from_cache = True
4757    else:
4758        file_mod_datetime = datetime.today()
4759        load_from_cache = False
4760
4761    if exists(f"{home_dir}/.ncaa_stats_py/"):
4762        pass
4763    else:
4764        mkdir(f"{home_dir}/.ncaa_stats_py/")
4765
4766    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"):
4767        pass
4768    else:
4769        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/")
4770
4771    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"):
4772        pass
4773    else:
4774        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/")
4775
4776    if exists(
4777        f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"
4778        + f"{game_id}_parsed_pbp.csv"
4779    ):
4780        games_df = pd.read_csv(
4781            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"
4782            + f"{game_id}_parsed_pbp.csv"
4783        )
4784        games_df = games_df.infer_objects()
4785        file_mod_datetime = datetime.fromtimestamp(
4786            getmtime(
4787                f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"
4788                + f"{game_id}_parsed_pbp.csv"
4789            )
4790        )
4791        load_from_cache = True
4792    else:
4793        logging.info("Could not find a WVB player game stats file")
4794
4795    now = datetime.today()
4796
4797    age = now - file_mod_datetime
4798
4799    if age.days > 1:
4800        load_from_cache = False
4801
4802    if load_from_cache is True:
4803        return games_df
4804
4805    raw_df = get_volleyball_raw_pbp(game_id=game_id)
4806
4807    sport_id = raw_df["sport_id"].iloc[0]
4808    home_team_id = raw_df["home_team_id"].iloc[0]
4809    away_team_id = raw_df["away_team_id"].iloc[0]
4810
4811    pbp_df = _volleyball_pbp_helper(raw_df=raw_df)
4812
4813    home_roster_df = get_volleyball_team_roster(team_id=home_team_id)
4814    home_roster_df["Name"] = home_roster_df["Name"].str.lower()
4815
4816    away_roster_df = get_volleyball_team_roster(team_id=away_team_id)
4817    away_roster_df["Name"] = away_roster_df["Name"].str.lower()
4818
4819    home_players_arr = dict(
4820        zip(
4821            home_roster_df["Name"], home_roster_df["player_id"]
4822        )
4823    )
4824    away_players_arr = dict(
4825        zip(
4826            away_roster_df["Name"], away_roster_df["player_id"]
4827        )
4828    )
4829    players_arr = home_players_arr | away_players_arr
4830    name_cols = [
4831        "substitution_player_1_name",
4832        "substitution_player_2_name",
4833        "substitution_player_3_name",
4834        "substitution_player_4_name",
4835        "serve_player_name",
4836        "reception_player_name",
4837        "set_player_name",
4838        "set_error_player_name",
4839        "attack_player_name",
4840        "dig_player_name",
4841        "kill_player_name",
4842        "block_player_1_name",
4843        "block_player_2_name",
4844        "ball_handling_error_player_name",
4845        "dig_error_player_name",
4846    ]
4847    id_cols = [
4848        "substitution_player_1_id",
4849        "substitution_player_2_id",
4850        "substitution_player_3_id",
4851        "substitution_player_4_id",
4852        "serve_player_id",
4853        "reception_player_id",
4854        "set_player_id",
4855        "set_error_player_id",
4856        "attack_player_id",
4857        "dig_player_id",
4858        "kill_player_id",
4859        "block_player_1_id",
4860        "block_player_2_id",
4861        "ball_handling_error_player_id",
4862        "dig_error_player_id",
4863    ]
4864
4865    for i in range(0, len(id_cols)):
4866        name_column = name_cols[i]
4867        id_column = id_cols[i]
4868        pbp_df[name_column] = pbp_df[name_column].str.replace("3a", "")
4869        pbp_df[name_column] = pbp_df[name_column].str.replace(".", "")
4870        pbp_df[id_column] = pbp_df[name_column].str.lower()
4871        pbp_df.loc[pbp_df[id_column].notnull(), id_column] = pbp_df[
4872            id_column
4873        ].map(_name_smother)
4874        pbp_df[id_column] = pbp_df[id_column].map(players_arr)
4875
4876    pbp_df.to_csv(
4877        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/parsed_pbp/"
4878        + f"{game_id}_parsed_pbp.csv",
4879        index=False
4880    )
4881    return pbp_df
def get_volleyball_teams( season: int, level: str | int, get_mens_data: bool = False) -> pandas.core.frame.DataFrame:
 36def get_volleyball_teams(
 37    season: int,
 38    level: str | int,
 39    get_mens_data: bool = False
 40) -> pd.DataFrame:
 41    """
 42    Retrieves a list of volleyball teams from the NCAA.
 43
 44    Parameters
 45    ----------
 46    `season` (int, mandatory):
 47        Required argument.
 48        Specifies the season you want NCAA volleyball team information from.
 49
 50    `level` (int, mandatory):
 51        Required argument.
 52        Specifies the level/division you want
 53        NCAA volleyball team information from.
 54        This can either be an integer (1-3) or a string ("I"-"III").
 55
 56    `get_mens_data` (bool, optional):
 57        Optional argument.
 58        If you want men's volleyball data instead of women's volleyball data,
 59        set this to `True`.
 60
 61    Usage
 62    ----------
 63    ```python
 64
 65    from ncaa_stats_py.volleyball import get_volleyball_teams
 66
 67    ########################################
 68    #          Men's volleyball            #
 69    ########################################
 70
 71    # Get all D1 men's volleyball teams for the 2024 season.
 72    print("Get all D1 men's volleyball teams for the 2024 season.")
 73    df = get_volleyball_teams(2024, 1)
 74    print(df)
 75
 76    # Get all D2 men's volleyball teams for the 2023 season.
 77    print("Get all D2 men's volleyball teams for the 2023 season.")
 78    df = get_volleyball_teams(2023, 2)
 79    print(df)
 80
 81    # Get all D3 men's volleyball teams for the 2022 season.
 82    print("Get all D3 men's volleyball teams for the 2022 season.")
 83    df = get_volleyball_teams(2022, 3)
 84    print(df)
 85
 86    # Get all D1 men's volleyball teams for the 2021 season.
 87    print("Get all D1 men's volleyball teams for the 2021 season.")
 88    df = get_volleyball_teams(2021, "I")
 89    print(df)
 90
 91    # Get all D2 men's volleyball teams for the 2020 season.
 92    print("Get all D2 men's volleyball teams for the 2020 season.")
 93    df = get_volleyball_teams(2020, "II")
 94    print(df)
 95
 96    # Get all D3 men's volleyball teams for the 2019 season.
 97    print("Get all D3 men's volleyball teams for the 2019 season.")
 98    df = get_volleyball_teams(2019, "III")
 99    print(df)
100
101    ########################################
102    #          Women's volleyball          #
103    ########################################
104
105    # Get all D1 women's volleyball teams for the 2024 season.
106    print(
107        "Get all D1 women's volleyball teams for the 2024 season."
108    )
109    df = get_volleyball_teams(2024, 1)
110    print(df)
111
112    # Get all D2 women's volleyball teams for the 2023 season.
113    print(
114        "Get all D2 women's volleyball teams for the 2023 season."
115    )
116    df = get_volleyball_teams(2023, 2)
117    print(df)
118
119    # Get all D3 women's volleyball teams for the 2022 season.
120    print(
121        "Get all D3 women's volleyball teams for the 2022 season."
122    )
123    df = get_volleyball_teams(2022, 3)
124    print(df)
125
126    # Get all D1 women's volleyball teams for the 2021 season.
127    print(
128        "Get all D1 women's volleyball teams for the 2021 season."
129    )
130    df = get_volleyball_teams(2021, "I")
131    print(df)
132
133    # Get all D2 women's volleyball teams for the 2020 season.
134    print(
135        "Get all D2 women's volleyball teams for the 2020 season."
136    )
137    df = get_volleyball_teams(2020, "II")
138    print(df)
139
140    # Get all D3 women's volleyball teams for the 2019 season.
141    print(
142        "Get all D3 women's volleyball teams for the 2019 season."
143    )
144    df = get_volleyball_teams(2019, "III")
145    print(df)
146
147    ```
148
149    Returns
150    ----------
151    A pandas `DataFrame` object with a list of college volleyball teams
152    in that season and NCAA level.
153    """
154    # def is_comment(elem):
155    #     return isinstance(elem, Comment)
156    sport_id = ""
157    # stat_sequence = 0
158    load_from_cache = True
159    home_dir = expanduser("~")
160    home_dir = _format_folder_str(home_dir)
161    teams_df = pd.DataFrame()
162    teams_df_arr = []
163    temp_df = pd.DataFrame()
164    formatted_level = ""
165    ncaa_level = 0
166
167    if get_mens_data is True:
168        sport_id = "MVB"
169        stat_sequence = 528
170    elif get_mens_data is False:
171        sport_id = "WVB"
172        stat_sequence = 48
173
174    if isinstance(level, int) and level == 1:
175        formatted_level = "I"
176        ncaa_level = 1
177    elif isinstance(level, int) and level == 2:
178        formatted_level = "II"
179        ncaa_level = 2
180    elif isinstance(level, int) and level == 3:
181        formatted_level = "III"
182        ncaa_level = 3
183    elif isinstance(level, str) and (
184        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
185    ):
186        ncaa_level = 1
187        formatted_level = level.upper()
188    elif isinstance(level, str) and (
189        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
190    ):
191        ncaa_level = 2
192        formatted_level = level.upper()
193    elif isinstance(level, str) and (
194        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
195    ):
196        ncaa_level = 3
197        formatted_level = level.upper()
198
199    if exists(f"{home_dir}/.ncaa_stats_py/"):
200        pass
201    else:
202        mkdir(f"{home_dir}/.ncaa_stats_py/")
203
204    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
205        pass
206    else:
207        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
208
209    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"):
210        pass
211    else:
212        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/")
213
214    if exists(
215        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"
216        + f"{season}_{formatted_level}_teams.csv"
217    ):
218        teams_df = pd.read_csv(
219            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"
220            + f"{season}_{formatted_level}_teams.csv"
221        )
222        file_mod_datetime = datetime.fromtimestamp(
223            getmtime(
224                f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"
225                + f"{season}_{formatted_level}_teams.csv"
226            )
227        )
228    else:
229        file_mod_datetime = datetime.today()
230        load_from_cache = False
231
232    now = datetime.today()
233
234    age = now - file_mod_datetime
235
236    if (
237        age.days > 1 and
238        season >= (now.year - 1) and
239        now.month <= 7
240    ):
241        load_from_cache = False
242    elif age.days >= 35:
243        load_from_cache = False
244
245    if load_from_cache is True:
246        return teams_df
247
248    logging.warning(
249        f"Either we could not load {season} D{level} schools from cache, "
250        + "or it's time to refresh the cached data."
251    )
252    schools_df = _get_schools()
253
254    # Volleyball
255    if sport_id == "MVB":
256        url = (
257            "https://stats.ncaa.org/rankings/change_sport_year_div?"
258            + f"academic_year={season}.0&division={ncaa_level}.0" +
259            f"&sport_code={sport_id}"
260        )
261    elif sport_id == "WVB":
262        url = (
263            "https://stats.ncaa.org/rankings/change_sport_year_div?"
264            + f"academic_year={season+1}.0&division={ncaa_level}.0" +
265            f"&sport_code={sport_id}"
266        )
267
268    response = _get_webpage(url=url)
269
270    soup = BeautifulSoup(response.text, features="lxml")
271    ranking_periods = soup.find("select", {"name": "rp", "id": "rp"})
272    ranking_periods = ranking_periods.find_all("option")
273
274    rp_value = 0
275    found_value = False
276
277    while found_value is False:
278        # print("check")
279        for rp in ranking_periods:
280            if "final" in rp.text.lower():
281                rp_value = rp.get("value")
282                found_value = True
283                break
284                # pass
285            elif "-" in rp.text.lower():
286                pass
287            else:
288                rp_value = rp.get("value")
289                found_value = True
290                break
291
292    if sport_id == "MVB":
293        url = (
294            "https://stats.ncaa.org/rankings/institution_trends?"
295            + f"academic_year={season}.0&division={ncaa_level}.0&"
296            + f"ranking_period={rp_value}&sport_code={sport_id}"
297        )
298    elif sport_id == "WVB":
299        url = (
300            "https://stats.ncaa.org/rankings/institution_trends?"
301            + f"academic_year={season+1}.0&division={ncaa_level}.0&"
302            + f"ranking_period={rp_value}&sport_code={sport_id}"
303        )
304
305    best_method = True
306    if (
307        (season < 2017 and sport_id == "MVB")
308    ):
309        url = (
310            "https://stats.ncaa.org/rankings/national_ranking?"
311            + f"academic_year={season}.0&division={ncaa_level}.0&"
312            + f"ranking_period={rp_value}&sport_code={sport_id}"
313            + f"&stat_seq={stat_sequence}.0"
314        )
315        response = _get_webpage(url=url)
316        best_method = False
317    elif (
318        (season < 2017 and sport_id == "WVB")
319    ):
320        url = (
321            "https://stats.ncaa.org/rankings/national_ranking?"
322            + f"academic_year={season+1}.0&division={ncaa_level}.0&"
323            + f"ranking_period={rp_value}&sport_code={sport_id}"
324            + f"&stat_seq={stat_sequence}.0"
325        )
326        response = _get_webpage(url=url)
327        best_method = False
328    elif sport_id == "MVB":
329        try:
330            response = _get_webpage(url=url)
331        except Exception as e:
332            logging.info(f"Found exception when loading teams `{e}`")
333            logging.info("Attempting backup method.")
334            url = (
335                "https://stats.ncaa.org/rankings/national_ranking?"
336                + f"academic_year={season}.0&division={ncaa_level}.0&"
337                + f"ranking_period={rp_value}&sport_code={sport_id}"
338                + f"&stat_seq={stat_sequence}.0"
339            )
340            response = _get_webpage(url=url)
341            best_method = False
342    else:
343        try:
344            response = _get_webpage(url=url)
345        except Exception as e:
346            logging.info(f"Found exception when loading teams `{e}`")
347            logging.info("Attempting backup method.")
348            url = (
349                "https://stats.ncaa.org/rankings/national_ranking?"
350                + f"academic_year={season+1}.0&division={ncaa_level}.0&"
351                + f"ranking_period={rp_value}&sport_code={sport_id}"
352                + f"&stat_seq={stat_sequence}.0"
353            )
354            response = _get_webpage(url=url)
355            best_method = False
356
357    soup = BeautifulSoup(response.text, features="lxml")
358
359    if best_method is True:
360        soup = soup.find(
361            "table",
362            {"id": "stat_grid"},
363        )
364        soup = soup.find("tbody")
365        t_rows = soup.find_all("tr")
366
367        for t in t_rows:
368            team_id = t.find("a")
369            team_id = team_id.get("href")
370            team_id = team_id.replace("/teams/", "")
371            team_id = int(team_id)
372            team_name = t.find_all("td")[0].text
373            team_conference_name = t.find_all("td")[1].text
374            # del team
375            temp_df = pd.DataFrame(
376                {
377                    "season": season,
378                    "ncaa_division": ncaa_level,
379                    "ncaa_division_formatted": formatted_level,
380                    "team_conference_name": team_conference_name,
381                    "team_id": team_id,
382                    "school_name": team_name,
383                    "sport_id": sport_id,
384                },
385                index=[0],
386            )
387            teams_df_arr.append(temp_df)
388            del temp_df
389    else:
390        soup = soup.find(
391            "table",
392            {"id": "rankings_table"},
393        )
394        soup = soup.find("tbody")
395        t_rows = soup.find_all("tr")
396
397        for t in t_rows:
398            team_id = t.find("a")
399            team_id = team_id.get("href")
400            team_id = team_id.replace("/teams/", "")
401            team_id = int(team_id)
402            team = t.find_all("td")[1].get("data-order")
403            team_name, team_conference_name = team.split(",")
404            del team
405            temp_df = pd.DataFrame(
406                {
407                    "season": season,
408                    "ncaa_division": ncaa_level,
409                    "ncaa_division_formatted": formatted_level,
410                    "team_conference_name": team_conference_name,
411                    "team_id": team_id,
412                    "school_name": team_name,
413                    "sport_id": sport_id,
414                },
415                index=[0],
416            )
417            teams_df_arr.append(temp_df)
418            del temp_df
419
420    teams_df = pd.concat(teams_df_arr, ignore_index=True)
421    teams_df = pd.merge(
422        left=teams_df,
423        right=schools_df,
424        on=["school_name"],
425        how="left"
426    )
427    teams_df.sort_values(by=["team_id"], inplace=True)
428
429    teams_df.to_csv(
430        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"
431        + f"{season}_{formatted_level}_teams.csv",
432        index=False,
433    )
434
435    return teams_df

Retrieves a list of volleyball teams from the NCAA.

Parameters

season (int, mandatory): Required argument. Specifies the season you want NCAA volleyball team information from.

level (int, mandatory): Required argument. Specifies the level/division you want NCAA volleyball team information from. This can either be an integer (1-3) or a string ("I"-"III").

get_mens_data (bool, optional): Optional argument. If you want men's volleyball data instead of women's volleyball data, set this to True.

Usage

from ncaa_stats_py.volleyball import get_volleyball_teams

########################################
#          Men's volleyball            #
########################################

# Get all D1 men's volleyball teams for the 2024 season.
print("Get all D1 men's volleyball teams for the 2024 season.")
df = get_volleyball_teams(2024, 1)
print(df)

# Get all D2 men's volleyball teams for the 2023 season.
print("Get all D2 men's volleyball teams for the 2023 season.")
df = get_volleyball_teams(2023, 2)
print(df)

# Get all D3 men's volleyball teams for the 2022 season.
print("Get all D3 men's volleyball teams for the 2022 season.")
df = get_volleyball_teams(2022, 3)
print(df)

# Get all D1 men's volleyball teams for the 2021 season.
print("Get all D1 men's volleyball teams for the 2021 season.")
df = get_volleyball_teams(2021, "I")
print(df)

# Get all D2 men's volleyball teams for the 2020 season.
print("Get all D2 men's volleyball teams for the 2020 season.")
df = get_volleyball_teams(2020, "II")
print(df)

# Get all D3 men's volleyball teams for the 2019 season.
print("Get all D3 men's volleyball teams for the 2019 season.")
df = get_volleyball_teams(2019, "III")
print(df)

########################################
#          Women's volleyball          #
########################################

# Get all D1 women's volleyball teams for the 2024 season.
print(
    "Get all D1 women's volleyball teams for the 2024 season."
)
df = get_volleyball_teams(2024, 1)
print(df)

# Get all D2 women's volleyball teams for the 2023 season.
print(
    "Get all D2 women's volleyball teams for the 2023 season."
)
df = get_volleyball_teams(2023, 2)
print(df)

# Get all D3 women's volleyball teams for the 2022 season.
print(
    "Get all D3 women's volleyball teams for the 2022 season."
)
df = get_volleyball_teams(2022, 3)
print(df)

# Get all D1 women's volleyball teams for the 2021 season.
print(
    "Get all D1 women's volleyball teams for the 2021 season."
)
df = get_volleyball_teams(2021, "I")
print(df)

# Get all D2 women's volleyball teams for the 2020 season.
print(
    "Get all D2 women's volleyball teams for the 2020 season."
)
df = get_volleyball_teams(2020, "II")
print(df)

# Get all D3 women's volleyball teams for the 2019 season.
print(
    "Get all D3 women's volleyball teams for the 2019 season."
)
df = get_volleyball_teams(2019, "III")
print(df)

Returns

A pandas DataFrame object with a list of college volleyball teams in that season and NCAA level.

def load_volleyball_teams( start_year: int = 2011, get_mens_data: bool = False) -> pandas.core.frame.DataFrame:
438def load_volleyball_teams(
439    start_year: int = 2011,
440    get_mens_data: bool = False
441) -> pd.DataFrame:
442    """
443    Compiles a list of known NCAA volleyball teams in NCAA volleyball history.
444
445    Parameters
446    ----------
447    `start_year` (int, optional):
448        Optional argument.
449        Specifies the first season you want
450        NCAA volleyball team information from.
451
452    `get_mens_data` (bool, optional):
453        Optional argument.
454        If you want men's volleyball data instead of women's volleyball data,
455        set this to `True`.
456
457    Usage
458    ----------
459    ```python
460
461    from ncaa_stats_py.volleyball import load_volleyball_teams
462
463    # WARNING: Running this script "as-is" for the first time may
464    #          take some time.
465    #          The *N*th time you run this script will be faster.
466
467    # Load in every women's volleyball team
468    # from 2011 to present day.
469    print(
470        "Load in every women's volleyball team " +
471        "from 2011 to present day."
472    )
473    df = load_volleyball_teams(get_mens_data=True)
474    print(df)
475
476    # Load in every men's volleyball team
477    # from 2011 to present day.
478    print(
479        "Load in every men's volleyball team " +
480        "from 2011 to present day."
481    )
482    df = load_volleyball_teams()
483    print(df)
484
485    # Load in every men's volleyball team
486    # from 2020 to present day.
487    print(
488        "Load in every men's volleyball team " +
489        "from 2020 to present day."
490    )
491    df = load_volleyball_teams(start_year=2020)
492    print(df)
493
494    ```
495
496    Returns
497    ----------
498    A pandas `DataFrame` object with a list of
499    all known college volleyball teams.
500
501    """
502    # start_year = 2008
503
504    # if get_mens_data is True:
505    #     sport_id = "WVB"
506    # else:
507    #     sport_id = "MVB"
508
509    teams_df = pd.DataFrame()
510    teams_df_arr = []
511    temp_df = pd.DataFrame()
512
513    now = datetime.now()
514    mens_ncaa_divisions = ["I", "III"]
515    womens_ncaa_divisions = ["I", "II", "III"]
516    if now.month > 5 and get_mens_data is False:
517        ncaa_seasons = [x for x in range(start_year, (now.year + 2))]
518    elif now.month < 5 and get_mens_data is True:
519        ncaa_seasons = [x for x in range(start_year, (now.year + 1))]
520    else:
521        ncaa_seasons = [x for x in range(start_year, (now.year + 1))]
522
523    logging.info(
524        "Loading in all NCAA volleyball teams. "
525        + "If this is the first time you're seeing this message, "
526        + "it may take some time (3-10 minutes) for this to load."
527    )
528
529    if get_mens_data is True:
530        for s in ncaa_seasons:
531            logging.info(
532                f"Loading in men's volleyball teams for the {s} season."
533            )
534            for d in mens_ncaa_divisions:
535                temp_df = get_volleyball_teams(
536                    season=s,
537                    level=d,
538                    get_mens_data=True
539                )
540                teams_df_arr.append(temp_df)
541                del temp_df
542    else:
543        for s in ncaa_seasons:
544            logging.info(
545                f"Loading in women's volleyball teams for the {s} season."
546            )
547            for d in womens_ncaa_divisions:
548                temp_df = get_volleyball_teams(
549                    season=s,
550                    level=d
551                )
552                teams_df_arr.append(temp_df)
553                del temp_df
554
555    teams_df = pd.concat(teams_df_arr, ignore_index=True)
556    teams_df = teams_df.infer_objects()
557    return teams_df

Compiles a list of known NCAA volleyball teams in NCAA volleyball history.

Parameters

start_year (int, optional): Optional argument. Specifies the first season you want NCAA volleyball team information from.

get_mens_data (bool, optional): Optional argument. If you want men's volleyball data instead of women's volleyball data, set this to True.

Usage

from ncaa_stats_py.volleyball import load_volleyball_teams

# WARNING: Running this script "as-is" for the first time may
#          take some time.
#          The *N*th time you run this script will be faster.

# Load in every women's volleyball team
# from 2011 to present day.
print(
    "Load in every women's volleyball team " +
    "from 2011 to present day."
)
df = load_volleyball_teams(get_mens_data=True)
print(df)

# Load in every men's volleyball team
# from 2011 to present day.
print(
    "Load in every men's volleyball team " +
    "from 2011 to present day."
)
df = load_volleyball_teams()
print(df)

# Load in every men's volleyball team
# from 2020 to present day.
print(
    "Load in every men's volleyball team " +
    "from 2020 to present day."
)
df = load_volleyball_teams(start_year=2020)
print(df)

Returns

A pandas DataFrame object with a list of all known college volleyball teams.

def get_volleyball_team_schedule(team_id: int) -> pandas.core.frame.DataFrame:
 560def get_volleyball_team_schedule(team_id: int) -> pd.DataFrame:
 561    """
 562    Retrieves a team schedule, from a valid NCAA volleyball team ID.
 563
 564    Parameters
 565    ----------
 566    `team_id` (int, mandatory):
 567        Required argument.
 568        Specifies the team you want a schedule from.
 569        This is separate from a school ID, which identifies the institution.
 570        A team ID should be unique to a school, and a season.
 571
 572    Usage
 573    ----------
 574    ```python
 575
 576    from ncaa_stats_py.volleyball import get_volleyball_team_schedule
 577
 578    ########################################
 579    #          Women's volleyball          #
 580    ########################################
 581
 582    # Get the team schedule for the
 583    # 2024 Toledo WVB team (D1, ID: 585329).
 584    print(
 585        "Get the team schedule for the " +
 586        "2024 Toledo WVB team (D1, ID: 585329)."
 587    )
 588    df = get_volleyball_team_schedule(585329)
 589    print(df)
 590
 591    # Get the team schedule for the
 592    # 2023 Black Hills St. WVB team (D2, ID: 559709).
 593    print(
 594        "Get the team schedule for the " +
 595        "2023 Black Hills St. WVB team (D2, ID: 559709)."
 596    )
 597    df = get_volleyball_team_schedule(559709)
 598    print(df)
 599
 600    # Get the team schedule for the
 601    # 2022 Mount Mary WVB team (D3, ID: 539750).
 602    print(
 603        "Get the team schedule for the " +
 604        "2022 Mount Mary WVB team (D3, ID: 539750)."
 605    )
 606    df = get_volleyball_team_schedule(539750)
 607    print(df)
 608
 609    # Get the team schedule for the
 610    # 2021 TCU WVB team (D1, ID: 522750).
 611    print(
 612        "Get the team schedule for the " +
 613        "2024 TCU WVB team (D1, ID: 522750)."
 614    )
 615    df = get_volleyball_team_schedule(522750)
 616    print(df)
 617
 618    # Get the team schedule for the
 619    # 2020 Purdue Northwest WVB team (D2, ID: 504832).
 620    print(
 621        "Get the team schedule for the " +
 622        "2020 Purdue Northwest WVB team (D2, ID: 504832)."
 623    )
 624    df = get_volleyball_team_schedule(504832)
 625    print(df)
 626
 627    # Get the team schedule for the
 628    # 2019 Juniata WVB team (D3, ID: 482642).
 629    print(
 630        "Get the team schedule for the " +
 631        "2019 Juniata WVB team (D3, ID: 482642)."
 632    )
 633    df = get_volleyball_team_schedule(482642)
 634    print(df)
 635
 636    ########################################
 637    #          Men's volleyball            #
 638    ########################################
 639
 640    # Get the team schedule for the
 641    # 2024 Missouri S&T MVB team (D1, ID: 573720).
 642    print(
 643        "Get the team schedule for the " +
 644        "2024 Missouri S&T MVB team (D1, ID: 573720)."
 645    )
 646    df = get_volleyball_team_schedule(573720)
 647    print(df)
 648
 649    # Get the team schedule for the
 650    # 2023 Rockford MVB team (D3, ID: 550890).
 651    print(
 652        "Get the team schedule for the " +
 653        "2023 Rockford MVB team (D3, ID: 550890)."
 654    )
 655    df = get_volleyball_team_schedule(550890)
 656    print(df)
 657
 658    # Get the team schedule for the
 659    # 2022 McKendree MVB team (D1, ID: 529896).
 660    print(
 661        "Get the team schedule for the " +
 662        "2022 McKendreeMaritime MVB team (D1, ID: 529896)."
 663    )
 664    df = get_volleyball_team_schedule(529896)
 665    print(df)
 666
 667    # Get the team schedule for the
 668    # 2021 Concordia Chicago MVB team (D3, ID: 508505).
 669    print(
 670        "Get the team schedule for the " +
 671        "2021 Concordia Chicago MVB team (D3, ID: 508505)."
 672    )
 673    df = get_volleyball_team_schedule(508505)
 674    print(df)
 675
 676    # Get the team schedule for the
 677    # 2020 St. Francis Brooklyn MVB team (D1, ID: 487992).
 678    print(
 679        "Get the team schedule for the " +
 680        "2020 St. Francis Brooklyn MVB team (D1, ID: 487992)."
 681    )
 682    df = get_volleyball_team_schedule(487992)
 683    print(df)
 684
 685    # Get the team schedule for the
 686    # 2019 Loras MVB team (D3, ID: 453845).
 687    print(
 688        "Get the team schedule for the " +
 689        "2019 Loras MVB team (D3, ID: 453845)."
 690    )
 691    df = get_volleyball_team_schedule(453845)
 692    print(df)
 693
 694    ```
 695
 696    Returns
 697    ----------
 698    A pandas `DataFrame` object with an NCAA volleyball team's schedule.
 699
 700    """
 701
 702    sport_id = ""
 703    schools_df = _get_schools()
 704    games_df = pd.DataFrame()
 705    games_df_arr = []
 706    season = 0
 707    temp_df = pd.DataFrame()
 708    load_from_cache = True
 709
 710    home_dir = expanduser("~")
 711    home_dir = _format_folder_str(home_dir)
 712
 713    url = f"https://stats.ncaa.org/teams/{team_id}"
 714
 715    try:
 716        team_df = load_volleyball_teams()
 717        team_df = team_df[team_df["team_id"] == team_id]
 718        season = team_df["season"].iloc[0]
 719        ncaa_division = team_df["ncaa_division"].iloc[0]
 720        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
 721        sport_id = "WVB"
 722    except Exception:
 723        team_df = load_volleyball_teams(get_mens_data=True)
 724        team_df = team_df[team_df["team_id"] == team_id]
 725        season = team_df["season"].iloc[0]
 726        ncaa_division = team_df["ncaa_division"].iloc[0]
 727        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
 728        sport_id = "MVB"
 729    # team_conference_name = team_df["team_conference_name"].iloc[0]
 730    # school_name = team_df["school_name"].iloc[0]
 731    # school_id = int(team_df["school_id"].iloc[0])
 732
 733    del team_df
 734
 735    if exists(f"{home_dir}/.ncaa_stats_py/"):
 736        pass
 737    else:
 738        mkdir(f"{home_dir}/.ncaa_stats_py/")
 739
 740    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
 741        pass
 742    else:
 743        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
 744
 745    if exists(
 746        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/"
 747    ):
 748        pass
 749    else:
 750        mkdir(
 751            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/"
 752        )
 753
 754    if exists(
 755        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/"
 756        + f"{team_id}_team_schedule.csv"
 757    ):
 758        games_df = pd.read_csv(
 759            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/"
 760            + f"{team_id}_team_schedule.csv"
 761        )
 762        file_mod_datetime = datetime.fromtimestamp(
 763            getmtime(
 764                f"{home_dir}/.ncaa_stats_py/"
 765                + f"volleyball_{sport_id}/team_schedule/"
 766                + f"{team_id}_team_schedule.csv"
 767            )
 768        )
 769    else:
 770        file_mod_datetime = datetime.today()
 771        load_from_cache = False
 772
 773    now = datetime.today()
 774
 775    age = now - file_mod_datetime
 776    if (
 777        age.days > 1 and
 778        season >= now.year
 779    ):
 780        load_from_cache = False
 781
 782    if load_from_cache is True:
 783        return games_df
 784
 785    response = _get_webpage(url=url)
 786    soup = BeautifulSoup(response.text, features="lxml")
 787
 788    school_name = soup.find("div", {"class": "card"}).find("img").get("alt")
 789    season_name = (
 790        soup.find("select", {"id": "year_list"})
 791        .find("option", {"selected": "selected"})
 792        .text
 793    )
 794
 795    soup = soup.find_all(
 796        "div",
 797        {"class": "col p-0"},
 798    )
 799
 800    # declaring it here to prevent potential problems down the road.
 801    table_data = ""
 802    for s in soup:
 803        try:
 804            temp_name = s.find("div", {"class": "card-header"})
 805            temp_name = temp_name.text
 806        except Exception as e:
 807            logging.warning(
 808                f"Could not parse card header. Full exception `{e}`. "
 809                + "Attempting alternate method."
 810            )
 811            temp_name = s.find("tr", {"class": "heading"}).find("td").text
 812
 813        if "schedule" in temp_name.lower():
 814            table_data = s.find("table")
 815
 816    t_rows = table_data.find_all("tr", {"class": "underline_rows"})
 817
 818    if len(t_rows) == 0:
 819        t_rows = table_data.find_all("tr")
 820
 821    for g in t_rows:
 822        is_valid_row = True
 823        game_num = 1
 824        ot_periods = 0
 825        is_home_game = True
 826        is_neutral_game = False
 827
 828        cells = g.find_all("td")
 829        if len(cells) <= 1:
 830            # Because of how *well* designed
 831            # stats.ncaa.org is, if we have to use execute
 832            # the `if len(t_rows) == 0:` code,
 833            # we need to catch any cases where every element in a
 834            # table row (`<tr>`) is a table header (`<th>`),
 835            # instead of a table data cell (`<td>`)
 836            continue
 837
 838        game_date = cells[0].text
 839
 840        # If "(" is in the same cell as the date,
 841        # this means that this game is an extra innings game.
 842        # The number encased in `()` is the actual number of innings.
 843        # We need to remove that from the date,
 844        # and move it into a separate variable.
 845        if "(" in game_date:
 846            game_date = game_date.replace(")", "")
 847            game_date, game_num = game_date.split("(")
 848            game_date = game_date.strip()
 849            game_num = int(game_num.strip())
 850
 851        if ":" in game_date and ("PM" in game_date or "AM" in game_date):
 852            game_date = datetime.strptime(
 853                game_date,
 854                "%m/%d/%Y %I:%M %p"
 855            ).date()
 856        else:
 857            game_date = datetime.strptime(
 858                game_date,
 859                "%m/%d/%Y"
 860            ).date()
 861
 862        try:
 863            opp_team_id = cells[1].find("a").get("href")
 864        except IndexError:
 865            logging.info(
 866                "Skipping row because it is clearly "
 867                + "not a row that has schedule data."
 868            )
 869            is_valid_row = False
 870        except AttributeError as e:
 871            logging.info(
 872                "Could not extract a team ID for this game. " +
 873                f"Full exception {e}"
 874            )
 875            opp_team_id = "-1"
 876        except Exception as e:
 877            logging.warning(
 878                "An unhandled exception has occurred when "
 879                + "trying to get the opposition team ID for this game. "
 880                f"Full exception `{e}`."
 881            )
 882            raise e
 883        if is_valid_row is True:
 884            if opp_team_id is not None:
 885                opp_team_id = opp_team_id.replace("/teams/", "")
 886                opp_team_id = int(opp_team_id)
 887
 888                try:
 889                    opp_team_name = cells[1].find("img").get("alt")
 890                except AttributeError:
 891                    logging.info(
 892                        "Couldn't find the opposition team name "
 893                        + "for this row from an image element. "
 894                        + "Attempting a backup method"
 895                    )
 896                    opp_team_name = cells[1].text
 897                except Exception as e:
 898                    logging.info(
 899                        "Unhandled exception when trying to get the "
 900                        + "opposition team name from this game. "
 901                        + f"Full exception `{e}`"
 902                    )
 903                    raise e
 904            else:
 905                opp_team_name = cells[1].text
 906
 907            if opp_team_name[0] == "@":
 908                # The logic for determining if this game was a
 909                # neutral site game doesn't care if that info is in
 910                # `opp_team_name`.
 911                opp_team_name = opp_team_name.strip().replace("@", "")
 912            elif "@" in opp_team_name:
 913                opp_team_name = opp_team_name.strip().split("@")[0]
 914            # opp_team_show_name = cells[1].text.strip()
 915
 916            opp_text = cells[1].text
 917            opp_text = opp_text.strip()
 918            if "@" in opp_text and opp_text[0] == "@":
 919                is_home_game = False
 920            elif "@" in opp_text and opp_text[0] != "@":
 921                is_neutral_game = True
 922                is_home_game = False
 923            # This is just to cover conference and NCAA championship
 924            # tournaments.
 925            elif "championship" in opp_text.lower():
 926                is_neutral_game = True
 927                is_home_game = False
 928            elif "ncaa" in opp_text.lower():
 929                is_neutral_game = True
 930                is_home_game = False
 931
 932            del opp_text
 933
 934            score = cells[2].text.strip()
 935            if len(score) == 0:
 936                score_1 = 0
 937                score_2 = 0
 938            elif (
 939                "canceled" not in score.lower() and
 940                "ppd" not in score.lower()
 941            ):
 942                score_1, score_2 = score.split("-")
 943
 944                # `score_1` should be "W `n`", "L `n`", or "T `n`",
 945                # with `n` representing the number of runs this team
 946                # scored in this game.
 947                # Let's remove the "W", "L", or "T" from `score_1`,
 948                # and determine which team won later on in this code.
 949                if any(x in score_1 for x in ["W", "L", "T"]):
 950                    score_1 = score_1.split(" ")[1]
 951
 952                if "(" in score_2:
 953                    score_2 = score_2.replace(")", "")
 954                    score_2, ot_periods = score_2.split("(")
 955                    ot_periods = ot_periods.replace("OT", "")
 956                    ot_periods = ot_periods.replace(" ", "")
 957                    ot_periods = int(ot_periods)
 958
 959                if ot_periods is None:
 960                    ot_periods = 0
 961                score_1 = int(score_1)
 962                score_2 = int(score_2)
 963            else:
 964                score_1 = None
 965                score_2 = None
 966
 967            try:
 968                game_id = cells[2].find("a").get("href")
 969                game_id = game_id.replace("/contests", "")
 970                game_id = game_id.replace("/box_score", "")
 971                game_id = game_id.replace("/", "")
 972                game_id = int(game_id)
 973                game_url = (
 974                    f"https://stats.ncaa.org/contests/{game_id}/box_score"
 975                )
 976            except AttributeError as e:
 977                logging.info(
 978                    "Could not parse a game ID for this game. "
 979                    + f"Full exception `{e}`."
 980                )
 981                game_id = None
 982                game_url = None
 983            except Exception as e:
 984                logging.info(
 985                    "An unhandled exception occurred when trying "
 986                    + "to find a game ID for this game. "
 987                    + f"Full exception `{e}`."
 988                )
 989                raise e
 990
 991            try:
 992                attendance = cells[3].text
 993                attendance = attendance.replace(",", "")
 994                attendance = attendance.replace("\n", "")
 995                attendance = int(attendance)
 996            except IndexError as e:
 997                logging.info(
 998                    "It doesn't appear as if there is an attendance column "
 999                    + "for this team's schedule table."
1000                    f"Full exception `{e}`."
1001                )
1002                attendance = None
1003            except ValueError as e:
1004                logging.info(
1005                    "There doesn't appear as if "
1006                    + "there is a recorded attendance. "
1007                    + "for this game/row. "
1008                    f"Full exception `{e}`."
1009                )
1010                attendance = None
1011            except Exception as e:
1012                logging.info(
1013                    "An unhandled exception occurred when trying "
1014                    + "to find this game's attendance. "
1015                    + f"Full exception `{e}`."
1016                )
1017                raise e
1018
1019            if is_home_game is True:
1020                temp_df = pd.DataFrame(
1021                    {
1022                        "season": season,
1023                        "season_name": season_name,
1024                        "game_id": game_id,
1025                        "game_date": game_date,
1026                        "game_num": game_num,
1027                        "ot_periods": ot_periods,
1028                        "home_team_id": team_id,
1029                        "home_team_name": school_name,
1030                        "away_team_id": opp_team_id,
1031                        "away_team_name": opp_team_name,
1032                        "home_team_sets_won": score_1,
1033                        "away_team_sets_won": score_2,
1034                        "is_neutral_game": is_neutral_game,
1035                        "game_url": game_url,
1036                    },
1037                    index=[0],
1038                )
1039                games_df_arr.append(temp_df)
1040                del temp_df
1041            elif is_neutral_game is True:
1042                # For the sake of simplicity,
1043                # order both team ID's,
1044                # and set the lower number of the two as
1045                # the "away" team in this neutral site game,
1046                # just so there's no confusion if someone
1047                # combines a ton of these team schedule `DataFrame`s,
1048                # and wants to remove duplicates afterwards.
1049                t_ids = [opp_team_id, team_id]
1050                t_ids.sort()
1051
1052                if t_ids[0] == team_id:
1053                    # home
1054                    temp_df = pd.DataFrame(
1055                        {
1056                            "season": season,
1057                            "season_name": season_name,
1058                            "game_id": game_id,
1059                            "game_date": game_date,
1060                            "game_num": game_num,
1061                            "ot_periods": ot_periods,
1062                            "home_team_id": team_id,
1063                            "home_team_name": school_name,
1064                            "away_team_id": opp_team_id,
1065                            "away_team_name": opp_team_name,
1066                            "home_team_sets_won": score_1,
1067                            "away_team_sets_won": score_2,
1068                            "is_neutral_game": is_neutral_game,
1069                            "game_url": game_url,
1070                        },
1071                        index=[0],
1072                    )
1073
1074                else:
1075                    # away
1076                    temp_df = pd.DataFrame(
1077                        {
1078                            "season": season,
1079                            "season_name": season_name,
1080                            "game_id": game_id,
1081                            "game_date": game_date,
1082                            "game_num": game_num,
1083                            "ot_periods": ot_periods,
1084                            "home_team_id": opp_team_id,
1085                            "home_team_name": opp_team_name,
1086                            "away_team_id": team_id,
1087                            "away_team_name": school_name,
1088                            "home_team_sets_won": score_2,
1089                            "away_team_sets_won": score_1,
1090                            "is_neutral_game": is_neutral_game,
1091                            "game_url": game_url,
1092                        },
1093                        index=[0],
1094                    )
1095
1096                games_df_arr.append(temp_df)
1097                del temp_df
1098            else:
1099                temp_df = pd.DataFrame(
1100                    {
1101                        "season": season,
1102                        "season_name": season_name,
1103                        "game_id": game_id,
1104                        "game_date": game_date,
1105                        "game_num": game_num,
1106                        "ot_periods": ot_periods,
1107                        "home_team_id": opp_team_id,
1108                        "home_team_name": opp_team_name,
1109                        "away_team_id": team_id,
1110                        "away_team_name": school_name,
1111                        "home_team_sets_won": score_2,
1112                        "away_team_sets_won": score_1,
1113                        "is_neutral_game": is_neutral_game,
1114                        "game_url": game_url,
1115                    },
1116                    index=[0],
1117                )
1118
1119                games_df_arr.append(temp_df)
1120                del temp_df
1121
1122        # team_photo = team_id.find("img").get("src")
1123
1124    games_df = pd.concat(games_df_arr, ignore_index=True)
1125
1126    temp_df = schools_df.rename(
1127        columns={
1128            "school_name": "home_team_name",
1129            "school_id": "home_school_id"
1130        }
1131    )
1132    games_df = games_df.merge(right=temp_df, on="home_team_name", how="left")
1133
1134    temp_df = schools_df.rename(
1135        columns={
1136            "school_name": "away_team_name",
1137            "school_id": "away_school_id"
1138        }
1139    )
1140    games_df = games_df.merge(right=temp_df, on="away_team_name", how="left")
1141    games_df["ncaa_division"] = ncaa_division
1142    games_df["ncaa_division_formatted"] = ncaa_division_formatted
1143
1144    # games_df["game_url"] = games_df["game_url"].str.replace("/box_score", "")
1145    games_df.to_csv(
1146        f"{home_dir}/.ncaa_stats_py/"
1147        + f"volleyball_{sport_id}/team_schedule/"
1148        + f"{team_id}_team_schedule.csv",
1149        index=False,
1150    )
1151
1152    return games_df

Retrieves a team schedule, from a valid NCAA volleyball team ID.

Parameters

team_id (int, mandatory): Required argument. Specifies the team you want a schedule from. This is separate from a school ID, which identifies the institution. A team ID should be unique to a school, and a season.

Usage

from ncaa_stats_py.volleyball import get_volleyball_team_schedule

########################################
#          Women's volleyball          #
########################################

# Get the team schedule for the
# 2024 Toledo WVB team (D1, ID: 585329).
print(
    "Get the team schedule for the " +
    "2024 Toledo WVB team (D1, ID: 585329)."
)
df = get_volleyball_team_schedule(585329)
print(df)

# Get the team schedule for the
# 2023 Black Hills St. WVB team (D2, ID: 559709).
print(
    "Get the team schedule for the " +
    "2023 Black Hills St. WVB team (D2, ID: 559709)."
)
df = get_volleyball_team_schedule(559709)
print(df)

# Get the team schedule for the
# 2022 Mount Mary WVB team (D3, ID: 539750).
print(
    "Get the team schedule for the " +
    "2022 Mount Mary WVB team (D3, ID: 539750)."
)
df = get_volleyball_team_schedule(539750)
print(df)

# Get the team schedule for the
# 2021 TCU WVB team (D1, ID: 522750).
print(
    "Get the team schedule for the " +
    "2024 TCU WVB team (D1, ID: 522750)."
)
df = get_volleyball_team_schedule(522750)
print(df)

# Get the team schedule for the
# 2020 Purdue Northwest WVB team (D2, ID: 504832).
print(
    "Get the team schedule for the " +
    "2020 Purdue Northwest WVB team (D2, ID: 504832)."
)
df = get_volleyball_team_schedule(504832)
print(df)

# Get the team schedule for the
# 2019 Juniata WVB team (D3, ID: 482642).
print(
    "Get the team schedule for the " +
    "2019 Juniata WVB team (D3, ID: 482642)."
)
df = get_volleyball_team_schedule(482642)
print(df)

########################################
#          Men's volleyball            #
########################################

# Get the team schedule for the
# 2024 Missouri S&T MVB team (D1, ID: 573720).
print(
    "Get the team schedule for the " +
    "2024 Missouri S&T MVB team (D1, ID: 573720)."
)
df = get_volleyball_team_schedule(573720)
print(df)

# Get the team schedule for the
# 2023 Rockford MVB team (D3, ID: 550890).
print(
    "Get the team schedule for the " +
    "2023 Rockford MVB team (D3, ID: 550890)."
)
df = get_volleyball_team_schedule(550890)
print(df)

# Get the team schedule for the
# 2022 McKendree MVB team (D1, ID: 529896).
print(
    "Get the team schedule for the " +
    "2022 McKendreeMaritime MVB team (D1, ID: 529896)."
)
df = get_volleyball_team_schedule(529896)
print(df)

# Get the team schedule for the
# 2021 Concordia Chicago MVB team (D3, ID: 508505).
print(
    "Get the team schedule for the " +
    "2021 Concordia Chicago MVB team (D3, ID: 508505)."
)
df = get_volleyball_team_schedule(508505)
print(df)

# Get the team schedule for the
# 2020 St. Francis Brooklyn MVB team (D1, ID: 487992).
print(
    "Get the team schedule for the " +
    "2020 St. Francis Brooklyn MVB team (D1, ID: 487992)."
)
df = get_volleyball_team_schedule(487992)
print(df)

# Get the team schedule for the
# 2019 Loras MVB team (D3, ID: 453845).
print(
    "Get the team schedule for the " +
    "2019 Loras MVB team (D3, ID: 453845)."
)
df = get_volleyball_team_schedule(453845)
print(df)

Returns

A pandas DataFrame object with an NCAA volleyball team's schedule.

def get_volleyball_day_schedule( game_date: str | datetime.date | datetime.datetime, level: str | int = 'I', get_mens_data: bool = False):
1155def get_volleyball_day_schedule(
1156    game_date: str | date | datetime,
1157    level: str | int = "I",
1158    get_mens_data: bool = False
1159):
1160    """
1161    Given a date and NCAA level, this function retrieves volleyball every game
1162    for that date.
1163
1164    Parameters
1165    ----------
1166    `game_date` (int, mandatory):
1167        Required argument.
1168        Specifies the date you want a volleyball schedule from.
1169        For best results, pass a string formatted as "YYYY-MM-DD".
1170
1171    `level` (int, mandatory):
1172        Required argument.
1173        Specifies the level/division you want a
1174        NCAA volleyball schedule from.
1175        This can either be an integer (1-3) or a string ("I"-"III").
1176
1177    `get_mens_data` (bool, optional):
1178        Optional argument.
1179        If you want men's volleyball data instead of women's volleyball data,
1180        set this to `True`.
1181
1182    Usage
1183    ----------
1184    ```python
1185
1186    from ncaa_stats_py.volleyball import get_volleyball_day_schedule
1187
1188    ########################################
1189    #         Women's Volleyball           #
1190    ########################################
1191
1192    # Get all DI games (if any) that were played on December 22th, 2024.
1193    print("Get all games (if any) that were played on December 22th, 2024.")
1194    df = get_volleyball_day_schedule("2024-12-22", level=1)
1195    print(df)
1196
1197    # Get all division II games that were played on November 24th, 2024.
1198    print("Get all division II games that were played on November 24th, 2024.")
1199    df = get_volleyball_day_schedule("2024-11-24", level="II")
1200    print(df)
1201
1202    # Get all DIII games that were played on October 27th, 2024.
1203    print("Get all DIII games that were played on October 27th, 2024.")
1204    df = get_volleyball_day_schedule("2024-10-27", level="III")
1205    print(df)
1206
1207    # Get all DI games (if any) that were played on September 29th, 2024.
1208    print(
1209        "Get all DI games (if any) that were played on September 29th, 2024."
1210    )
1211    df = get_volleyball_day_schedule("2024-09-29")
1212    print(df)
1213
1214    # Get all DII games played on August 30th, 2024.
1215    print("Get all DI games played on August 30th, 2024.")
1216    df = get_volleyball_day_schedule("2024-08-30")
1217    print(df)
1218
1219    # Get all division III games played on September 23rd, 2023.
1220    print("Get all division III games played on September 23rd, 2023.")
1221    df = get_volleyball_day_schedule("2023-09-23", level="III")
1222    print(df)
1223
1224    ########################################
1225    #          Men's Volleyball            #
1226    ########################################
1227
1228    # Get all DI games that will be played on April 12th, 2025.
1229    print("Get all games that will be played on April 12th, 2025.")
1230    df = get_volleyball_day_schedule("2025-04-12", level=1, get_mens_data=True)
1231    print(df)
1232
1233    # Get all DI games that were played on January 30th, 2025.
1234    print("Get all games that were played on January 30th, 2025.")
1235    df = get_volleyball_day_schedule(
1236        "2025-01-30", level="I", get_mens_data=True
1237    )
1238    print(df)
1239
1240    # Get all division III games that were played on April 6th, 2024.
1241    print("Get all division III games that were played on April 6th, 2024.")
1242    df = get_volleyball_day_schedule(
1243        "2025-04-05", level="III", get_mens_data=True
1244    )
1245    print(df)
1246
1247    # Get all DI games (if any) that were played on March 30th, 2024.
1248    print("Get all DI games (if any) that were played on March 30th, 2024.")
1249    df = get_volleyball_day_schedule("2024-03-30", get_mens_data=True)
1250    print(df)
1251
1252    # Get all DI games played on February 23rd, 2024.
1253    print("Get all DI games played on February 23rd, 2024.")
1254    df = get_volleyball_day_schedule("2024-02-23", get_mens_data=True)
1255    print(df)
1256
1257    # Get all division III games played on February 11th, 2023.
1258    print("Get all division III games played on February 11th, 2023.")
1259    df = get_volleyball_day_schedule("2024-02-11", level=3, get_mens_data=True)
1260    print(df)
1261
1262    ```
1263
1264    Returns
1265    ----------
1266    A pandas `DataFrame` object with all volleyball games played on that day,
1267    for that NCAA division/level.
1268
1269    """
1270
1271    season = 0
1272    sport_id = "WVB"
1273
1274    schedule_df = pd.DataFrame()
1275    schedule_df_arr = []
1276
1277    if isinstance(game_date, date):
1278        game_datetime = datetime.combine(
1279            game_date, datetime.min.time()
1280        )
1281    elif isinstance(game_date, datetime):
1282        game_datetime = game_date
1283    elif isinstance(game_date, str):
1284        game_datetime = parser.parse(
1285            game_date
1286        )
1287    else:
1288        unhandled_datatype = type(game_date)
1289        raise ValueError(
1290            f"Unhandled datatype for `game_date`: `{unhandled_datatype}`"
1291        )
1292
1293    if isinstance(level, int) and level == 1:
1294        formatted_level = "I"
1295        ncaa_level = 1
1296    elif isinstance(level, int) and level == 2:
1297        formatted_level = "II"
1298        ncaa_level = 2
1299    elif isinstance(level, int) and level == 3:
1300        formatted_level = "III"
1301        ncaa_level = 3
1302    elif isinstance(level, str) and (
1303        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
1304    ):
1305        ncaa_level = 1
1306        formatted_level = level.upper()
1307    elif isinstance(level, str) and (
1308        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
1309    ):
1310        ncaa_level = 2
1311        formatted_level = level.upper()
1312    elif isinstance(level, str) and (
1313        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
1314    ):
1315        ncaa_level = 3
1316        formatted_level = level.upper()
1317
1318    del level
1319
1320    if get_mens_data is True:
1321        sport_id = "MVB"
1322    elif get_mens_data is False:
1323        sport_id = "WVB"
1324    else:
1325        raise ValueError(
1326            f"Unhandled value for `get_wbb_data`: `{get_mens_data}`"
1327        )
1328
1329    season = game_datetime.year
1330    game_month = game_datetime.month
1331    game_day = game_datetime.day
1332    game_year = game_datetime.year
1333
1334    if game_month > 7:
1335        season += 1
1336        url = (
1337            "https://stats.ncaa.org/contests/" +
1338            f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" +
1339            f"&academic_year={season}&division={ncaa_level}" +
1340            f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" +
1341            "&commit=Submit"
1342        )
1343    else:
1344        url = (
1345            "https://stats.ncaa.org/contests/" +
1346            f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" +
1347            f"&academic_year={season}&division={ncaa_level}" +
1348            f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" +
1349            "&commit=Submit"
1350        )
1351
1352    response = _get_webpage(url=url)
1353    soup = BeautifulSoup(response.text, features="lxml")
1354
1355    game_boxes = soup.find_all("div", {"class": "table-responsive"})
1356
1357    for box in game_boxes:
1358        game_id = None
1359        game_alt_text = None
1360        game_num = 1
1361        # t_box = box.find("table")
1362        table_box = box.find("table")
1363        table_rows = table_box.find_all("tr")
1364
1365        # Date/attendance
1366        game_date_str = table_rows[0].find("div", {"class": "col-6 p-0"}).text
1367        game_date_str = game_date_str.replace("\n", "")
1368        game_date_str = game_date_str.strip()
1369        game_date_str = game_date_str.replace("TBA ", "TBA")
1370        game_date_str = game_date_str.replace("TBD ", "TBD")
1371        game_date_str = game_date_str.replace("PM ", "PM")
1372        game_date_str = game_date_str.replace("AM ", "AM")
1373        game_date_str = game_date_str.strip()
1374        attendance_str = table_rows[0].find(
1375            "div",
1376            {"class": "col p-0 text-right"}
1377        ).text
1378
1379        attendance_str = attendance_str.replace("Attend:", "")
1380        attendance_str = attendance_str.replace(",", "")
1381        attendance_str = attendance_str.replace("\n", "")
1382        if (
1383            "st" in attendance_str.lower() or
1384            "nd" in attendance_str.lower() or
1385            "rd" in attendance_str.lower() or
1386            "th" in attendance_str.lower()
1387        ):
1388            # This is not an attendance,
1389            # this is whatever quarter/half/inning this game is in.
1390            attendance_num = None
1391        elif "final" in attendance_str.lower():
1392            attendance_num = None
1393        elif len(attendance_str) > 0:
1394            attendance_num = int(attendance_str)
1395        else:
1396            attendance_num = None
1397
1398        if "(" in game_date_str:
1399            game_date_str = game_date_str.replace(")", "")
1400            game_date_str, game_num = game_date_str.split("(")
1401            game_num = int(game_num)
1402
1403        if "TBA" in game_date_str:
1404            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
1405        elif "tba" in game_date_str:
1406            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
1407        elif "TBD" in game_date_str:
1408            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
1409        elif "tbd" in game_date_str:
1410            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
1411        elif (
1412            "tbd" not in game_date_str.lower() and
1413            ":" not in game_date_str.lower()
1414        ):
1415            game_date_str = game_date_str.replace(" ", "")
1416            game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
1417        else:
1418            game_datetime = datetime.strptime(
1419                game_date_str,
1420                '%m/%d/%Y %I:%M %p'
1421            )
1422        game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
1423
1424        game_alt_text = table_rows[1].find_all("td")[0].text
1425        if game_alt_text is not None and len(game_alt_text) > 0:
1426            game_alt_text = game_alt_text.replace("\n", "")
1427            game_alt_text = game_alt_text.strip()
1428
1429        if len(game_alt_text) == 0:
1430            game_alt_text = None
1431
1432        urls_arr = box.find_all("a")
1433
1434        for u in urls_arr:
1435            url_temp = u.get("href")
1436            if "contests" in url_temp:
1437                game_id = url_temp
1438                del url_temp
1439
1440        if game_id is None:
1441            for r in range(0, len(table_rows)):
1442                temp = table_rows[r]
1443                temp_id = temp.get("id")
1444
1445                if temp_id is not None and len(temp_id) > 0:
1446                    game_id = temp_id
1447
1448        del urls_arr
1449
1450        game_id = game_id.replace("/contests", "")
1451        game_id = game_id.replace("/box_score", "")
1452        game_id = game_id.replace("/livestream_scoreboards", "")
1453        game_id = game_id.replace("/", "")
1454        game_id = game_id.replace("contest_", "")
1455        game_id = int(game_id)
1456
1457        table_rows = table_box.find_all("tr", {"id": f"contest_{game_id}"})
1458        away_team_row = table_rows[0]
1459        home_team_row = table_rows[1]
1460
1461        # Away team
1462        td_arr = away_team_row.find_all("td")
1463
1464        try:
1465            away_team_name = td_arr[0].find("img").get("alt")
1466        except Exception:
1467            away_team_name = td_arr[1].text
1468        away_team_name = away_team_name.replace("\n", "")
1469        away_team_name = away_team_name.strip()
1470
1471        try:
1472            away_team_id = td_arr[1].find("a").get("href")
1473            away_team_id = away_team_id.replace("/teams/", "")
1474            away_team_id = int(away_team_id)
1475        except AttributeError:
1476            away_team_id = None
1477            logging.info("No team ID found for the away team")
1478        except Exception as e:
1479            raise e
1480
1481        away_sets_scored = td_arr[-1].text
1482        away_sets_scored = away_sets_scored.replace("\n", "")
1483        away_sets_scored = away_sets_scored.replace("\xa0", "")
1484
1485        if "ppd" in away_sets_scored.lower():
1486            continue
1487        elif "cancel" in away_sets_scored.lower():
1488            continue
1489
1490        if len(away_sets_scored) > 0:
1491            away_sets_scored = int(away_sets_scored)
1492        else:
1493            away_sets_scored = 0
1494
1495        del td_arr
1496
1497        # Home team
1498        td_arr = home_team_row.find_all("td")
1499
1500        try:
1501            home_team_name = td_arr[0].find("img").get("alt")
1502        except Exception:
1503            home_team_name = td_arr[1].text
1504        home_team_name = home_team_name.replace("\n", "")
1505        home_team_name = home_team_name.strip()
1506
1507        try:
1508            home_team_id = td_arr[1].find("a").get("href")
1509            home_team_id = home_team_id.replace("/teams/", "")
1510            home_team_id = int(home_team_id)
1511        except AttributeError:
1512            home_team_id = None
1513            logging.info("No team ID found for the home team")
1514        except Exception as e:
1515            raise e
1516
1517        home_sets_scored = td_arr[-1].text
1518        home_sets_scored = home_sets_scored.replace("\n", "")
1519        home_sets_scored = home_sets_scored.replace("\xa0", "")
1520
1521        if "ppd" in home_sets_scored.lower():
1522            continue
1523        elif "cancel" in home_sets_scored.lower():
1524            continue
1525
1526        if len(home_sets_scored) > 0:
1527            home_sets_scored = int(home_sets_scored)
1528        else:
1529            home_sets_scored = 0
1530
1531        temp_df = pd.DataFrame(
1532            {
1533                "season": season,
1534                "sport_id": sport_id,
1535                "game_date": game_datetime.strftime("%Y-%m-%d"),
1536                "game_datetime": game_datetime.isoformat(),
1537                "game_id": game_id,
1538                "formatted_level": formatted_level,
1539                "ncaa_level": ncaa_level,
1540                "game_alt_text": game_alt_text,
1541                "away_team_id": away_team_id,
1542                "away_team_name": away_team_name,
1543                "home_team_id": home_team_id,
1544                "home_team_name": home_team_name,
1545                "home_sets_scored": home_sets_scored,
1546                "away_sets_scored": away_sets_scored,
1547                "attendance": attendance_num
1548            },
1549            index=[0]
1550        )
1551        schedule_df_arr.append(temp_df)
1552
1553        del temp_df
1554
1555    if len(schedule_df_arr) >= 1:
1556        schedule_df = pd.concat(schedule_df_arr, ignore_index=True)
1557    else:
1558        logging.warning(
1559            "Could not find any game(s) for "
1560            + f"{game_datetime.year:00d}-{game_datetime.month:00d}"
1561            + f"-{game_datetime.day:00d}. "
1562            + "If you believe this is an error, "
1563            + "please raise an issue at "
1564            + "\n https://github.com/armstjc/ncaa_stats_py/issues \n"
1565        )
1566    return schedule_df

Given a date and NCAA level, this function retrieves volleyball every game for that date.

Parameters

game_date (int, mandatory): Required argument. Specifies the date you want a volleyball schedule from. For best results, pass a string formatted as "YYYY-MM-DD".

level (int, mandatory): Required argument. Specifies the level/division you want a NCAA volleyball schedule from. This can either be an integer (1-3) or a string ("I"-"III").

get_mens_data (bool, optional): Optional argument. If you want men's volleyball data instead of women's volleyball data, set this to True.

Usage

from ncaa_stats_py.volleyball import get_volleyball_day_schedule

########################################
#         Women's Volleyball           #
########################################

# Get all DI games (if any) that were played on December 22th, 2024.
print("Get all games (if any) that were played on December 22th, 2024.")
df = get_volleyball_day_schedule("2024-12-22", level=1)
print(df)

# Get all division II games that were played on November 24th, 2024.
print("Get all division II games that were played on November 24th, 2024.")
df = get_volleyball_day_schedule("2024-11-24", level="II")
print(df)

# Get all DIII games that were played on October 27th, 2024.
print("Get all DIII games that were played on October 27th, 2024.")
df = get_volleyball_day_schedule("2024-10-27", level="III")
print(df)

# Get all DI games (if any) that were played on September 29th, 2024.
print(
    "Get all DI games (if any) that were played on September 29th, 2024."
)
df = get_volleyball_day_schedule("2024-09-29")
print(df)

# Get all DII games played on August 30th, 2024.
print("Get all DI games played on August 30th, 2024.")
df = get_volleyball_day_schedule("2024-08-30")
print(df)

# Get all division III games played on September 23rd, 2023.
print("Get all division III games played on September 23rd, 2023.")
df = get_volleyball_day_schedule("2023-09-23", level="III")
print(df)

########################################
#          Men's Volleyball            #
########################################

# Get all DI games that will be played on April 12th, 2025.
print("Get all games that will be played on April 12th, 2025.")
df = get_volleyball_day_schedule("2025-04-12", level=1, get_mens_data=True)
print(df)

# Get all DI games that were played on January 30th, 2025.
print("Get all games that were played on January 30th, 2025.")
df = get_volleyball_day_schedule(
    "2025-01-30", level="I", get_mens_data=True
)
print(df)

# Get all division III games that were played on April 6th, 2024.
print("Get all division III games that were played on April 6th, 2024.")
df = get_volleyball_day_schedule(
    "2025-04-05", level="III", get_mens_data=True
)
print(df)

# Get all DI games (if any) that were played on March 30th, 2024.
print("Get all DI games (if any) that were played on March 30th, 2024.")
df = get_volleyball_day_schedule("2024-03-30", get_mens_data=True)
print(df)

# Get all DI games played on February 23rd, 2024.
print("Get all DI games played on February 23rd, 2024.")
df = get_volleyball_day_schedule("2024-02-23", get_mens_data=True)
print(df)

# Get all division III games played on February 11th, 2023.
print("Get all division III games played on February 11th, 2023.")
df = get_volleyball_day_schedule("2024-02-11", level=3, get_mens_data=True)
print(df)

Returns

A pandas DataFrame object with all volleyball games played on that day, for that NCAA division/level.

def get_full_volleyball_schedule( season: int, level: str | int = 'I', get_mens_data: bool = True) -> pandas.core.frame.DataFrame:
1569def get_full_volleyball_schedule(
1570    season: int,
1571    level: str | int = "I",
1572    get_mens_data: bool = True
1573) -> pd.DataFrame:
1574    """
1575    Retrieves a full volleyball schedule,
1576    from an NCAA level (`"I"`, `"II"`, `"III"`).
1577    The way this is done is by going through every team in a division,
1578    and parsing the schedules of every team in a division.
1579
1580    This function will take time when first run (30-60 minutes)!
1581    You have been warned.
1582
1583    Parameters
1584    ----------
1585    `season` (int, mandatory):
1586        Specifies the season you want a schedule from.
1587
1588    `level` (int | str, mandatory):
1589        Specifies the team you want a schedule from.
1590
1591    `get_mens_data` (bool, optional):
1592        Optional argument.
1593        If you want men's volleyball data instead of women's volleyball data,
1594        set this to `True`.
1595
1596    Usage
1597    ----------
1598    ```python
1599
1600    from ncaa_stats_py.volleyball import get_full_volleyball_schedule
1601
1602    ##############################################################################
1603    # NOTE
1604    # This function will easily take an hour or more
1605    # to run for the first time in a given season and NCAA level!
1606    # You have been warned!
1607    ##############################################################################
1608
1609    # Get the entire 2024 schedule for the 2024 women's D1 volleyball season.
1610    print(
1611        "Get the entire 2024 schedule " +
1612        "for the 2024 women's D1 volleyball season."
1613    )
1614    df = get_full_volleyball_schedule(season=2024, level="I")
1615    print(df)
1616
1617    # Get the entire 2024 schedule for the 2024 men's D1 volleyball season.
1618    # print(
1619    #     "Get the entire 2024 schedule for " +
1620    #     "the 2024 men's D1 volleyball season."
1621    # )
1622    # df = get_full_volleyball_schedule(
1623    #     season=2024,
1624    #     level="I",
1625    #     get_mens_data=True
1626    # )
1627    # print(df)
1628
1629    # You can also input `level` as an integer.
1630    # In addition, this and other functions cache data,
1631    # so this should load very quickly
1632    # compared to the first run of this function.
1633    print("You can also input `level` as an integer.")
1634    print(
1635        "In addition, this and other functions cache data, "
1636        + "so this should load very quickly "
1637        + "compared to the first run of this function."
1638    )
1639    df = get_full_volleyball_schedule(season=2024, level=1)
1640    print(df)
1641
1642    ```
1643
1644    Returns
1645    ----------
1646    A pandas `DataFrame` object with an NCAA volleyball
1647    schedule for a specific season and level.
1648    """
1649
1650    sport_id = ""
1651    load_from_cache = True
1652    home_dir = expanduser("~")
1653    home_dir = _format_folder_str(home_dir)
1654    schedule_df = pd.DataFrame()
1655    schedule_df_arr = []
1656    temp_df = pd.DataFrame()
1657    formatted_level = ""
1658    ncaa_level = 0
1659
1660    if get_mens_data is True:
1661        sport_id = "MVB"
1662    else:
1663        sport_id = "WVB"
1664
1665    if isinstance(level, int) and level == 1:
1666        formatted_level = "I"
1667        ncaa_level = 1
1668    elif isinstance(level, int) and level == 2:
1669        formatted_level = "II"
1670        ncaa_level = 2
1671    elif isinstance(level, int) and level == 3:
1672        formatted_level = "III"
1673        ncaa_level = 3
1674    elif isinstance(level, str) and (
1675        level.lower() == "i" or level.lower() == "d1" or level.lower() == "1"
1676    ):
1677        ncaa_level = 1
1678        formatted_level = level.upper()
1679    elif isinstance(level, str) and (
1680        level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2"
1681    ):
1682        ncaa_level = 2
1683        formatted_level = level.upper()
1684    elif isinstance(level, str) and (
1685        level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3"
1686    ):
1687        ncaa_level = 3
1688        formatted_level = level.upper()
1689
1690    del level
1691
1692    if exists(f"{home_dir}/.ncaa_stats_py/"):
1693        pass
1694    else:
1695        mkdir(f"{home_dir}/.ncaa_stats_py/")
1696
1697    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
1698        pass
1699    else:
1700        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
1701
1702    if exists(
1703        f"{home_dir}/.ncaa_stats_py/" +
1704        f"volleyball_{sport_id}/full_schedule/"
1705    ):
1706        pass
1707    else:
1708        mkdir(
1709            f"{home_dir}/.ncaa_stats_py/" +
1710            f"volleyball_{sport_id}/full_schedule/"
1711        )
1712
1713    if exists(
1714        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/full_schedule/"
1715        + f"{season}_{formatted_level}_full_schedule.csv"
1716    ):
1717        teams_df = pd.read_csv(
1718            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/full_schedule/"
1719            + f"{season}_{formatted_level}_full_schedule.csv"
1720        )
1721        file_mod_datetime = datetime.fromtimestamp(
1722            getmtime(
1723                f"{home_dir}/.ncaa_stats_py/" +
1724                f"volleyball_{sport_id}/full_schedule/"
1725                + f"{season}_{formatted_level}_full_schedule.csv"
1726            )
1727        )
1728    else:
1729        file_mod_datetime = datetime.today()
1730        load_from_cache = False
1731
1732    now = datetime.today()
1733
1734    age = now - file_mod_datetime
1735
1736    if (
1737        age.days > 1 and
1738        season >= now.year
1739    ):
1740        load_from_cache = False
1741
1742    if load_from_cache is True:
1743        return teams_df
1744
1745    teams_df = load_volleyball_teams()
1746    teams_df = teams_df[
1747        (teams_df["season"] == season) &
1748        (teams_df["ncaa_division"] == ncaa_level)
1749    ]
1750    team_ids_arr = teams_df["team_id"].to_numpy()
1751
1752    for team_id in tqdm(team_ids_arr):
1753        temp_df = get_volleyball_team_schedule(team_id=team_id)
1754        schedule_df_arr.append(temp_df)
1755
1756    schedule_df = pd.concat(schedule_df_arr, ignore_index=True)
1757    schedule_df = schedule_df.drop_duplicates(subset="game_id", keep="first")
1758    schedule_df.to_csv(
1759        f"{home_dir}/.ncaa_stats_py/"
1760        + f"volleyball_{sport_id}/full_schedule/"
1761        + f"{season}_{formatted_level}_full_schedule.csv",
1762        index=False,
1763    )
1764    return schedule_df

Retrieves a full volleyball schedule, from an NCAA level ("I", "II", "III"). The way this is done is by going through every team in a division, and parsing the schedules of every team in a division.

This function will take time when first run (30-60 minutes)! You have been warned.

Parameters

season (int, mandatory): Specifies the season you want a schedule from.

level (int | str, mandatory): Specifies the team you want a schedule from.

get_mens_data (bool, optional): Optional argument. If you want men's volleyball data instead of women's volleyball data, set this to True.

Usage

from ncaa_stats_py.volleyball import get_full_volleyball_schedule

##############################################################################
# NOTE
# This function will easily take an hour or more
# to run for the first time in a given season and NCAA level!
# You have been warned!
##############################################################################

# Get the entire 2024 schedule for the 2024 women's D1 volleyball season.
print(
    "Get the entire 2024 schedule " +
    "for the 2024 women's D1 volleyball season."
)
df = get_full_volleyball_schedule(season=2024, level="I")
print(df)

# Get the entire 2024 schedule for the 2024 men's D1 volleyball season.
# print(
#     "Get the entire 2024 schedule for " +
#     "the 2024 men's D1 volleyball season."
# )
# df = get_full_volleyball_schedule(
#     season=2024,
#     level="I",
#     get_mens_data=True
# )
# print(df)

# You can also input `level` as an integer.
# In addition, this and other functions cache data,
# so this should load very quickly
# compared to the first run of this function.
print("You can also input `level` as an integer.")
print(
    "In addition, this and other functions cache data, "
    + "so this should load very quickly "
    + "compared to the first run of this function."
)
df = get_full_volleyball_schedule(season=2024, level=1)
print(df)

Returns

A pandas DataFrame object with an NCAA volleyball schedule for a specific season and level.

def get_volleyball_team_roster(team_id: int) -> pandas.core.frame.DataFrame:
1767def get_volleyball_team_roster(team_id: int) -> pd.DataFrame:
1768    """
1769    Retrieves a volleyball team's roster from a given team ID.
1770
1771    Parameters
1772    ----------
1773    `team_id` (int, mandatory):
1774        Required argument.
1775        Specifies the team you want a roster from.
1776        This is separate from a school ID, which identifies the institution.
1777        A team ID should be unique to a school, and a season.
1778
1779    Usage
1780    ----------
1781    ```python
1782
1783    from ncaa_stats_py.volleyball import get_volleyball_team_roster
1784
1785    ########################################
1786    #          Women's volleyball          #
1787    ########################################
1788
1789    # Get the volleyball roster for the
1790    # 2024 Weber St. WVB team (D1, ID: 585347).
1791    print(
1792        "Get the volleyball roster for the " +
1793        "2024 Weber St. WVB team (D1, ID: 585347)."
1794    )
1795    df = get_volleyball_team_roster(585347)
1796    print(df)
1797
1798    # Get the volleyball roster for the
1799    # 2023 Montevallo WVB team (D2, ID: 559599).
1800    print(
1801        "Get the volleyball roster for the " +
1802        "2023 Montevallo WVB team (D2, ID: 559599)."
1803    )
1804    df = get_volleyball_team_roster(559599)
1805    print(df)
1806
1807    # Get the volleyball roster for the
1808    # 2022 Millsaps team (D3, ID: 539944).
1809    print(
1810        "Get the volleyball roster for the " +
1811        "2022 Millsaps team (D3, ID: 539944)."
1812    )
1813    df = get_volleyball_team_roster(539944)
1814    print(df)
1815
1816    # Get the volleyball roster for the
1817    # 2021 Binghamton WVB team (D1, ID: 522893).
1818    print(
1819        "Get the volleyball roster for the " +
1820        "2021 Binghamton WVB team (D1, ID: 522893)."
1821    )
1822    df = get_volleyball_team_roster(522893)
1823    print(df)
1824
1825    # Get the volleyball roster for the
1826    # 2020 Holy Family WVB team (D2, ID: 504760).
1827    print(
1828        "Get the volleyball roster for the " +
1829        "2020 Holy Family WVB team (D2, ID: 504760)."
1830    )
1831    df = get_volleyball_team_roster(504760)
1832    print(df)
1833
1834    # Get the volleyball roster for the
1835    # 2019 Franciscan team (D3, ID: 482939).
1836    print(
1837        "Get the volleyball roster for the " +
1838        "2019 Franciscan team (D3, ID: 482939)."
1839    )
1840    df = get_volleyball_team_roster(482939)
1841    print(df)
1842
1843    ########################################
1844    #          Men's volleyball            #
1845    ########################################
1846
1847    # Get the volleyball roster for the
1848    # 2024 Hawaii MVB team (D1, ID: 573674).
1849    print(
1850        "Get the volleyball roster for the " +
1851        "2024 Hawaii MVB team (D1, ID: 573674)."
1852    )
1853    df = get_volleyball_team_roster(573674)
1854    print(df)
1855
1856    # Get the volleyball roster for the
1857    # 2023 Widener MVB team (D3, ID: 550860).
1858    print(
1859        "Get the volleyball roster for the " +
1860        "2023 Widener MVB team (D3, ID: 550860)."
1861    )
1862    df = get_volleyball_team_roster(550860)
1863    print(df)
1864
1865    # Get the volleyball roster for the
1866    # 2022 Alderson Broaddus MVB team (D1, ID: 529880).
1867    print(
1868        "Get the volleyball roster for the " +
1869        "2022 Alderson Broaddus MVB team (D1, ID: 529880)."
1870    )
1871    df = get_volleyball_team_roster(529880)
1872    print(df)
1873
1874    # Get the volleyball roster for the
1875    # 2021 Geneva MVB team (D3, ID: 508506).
1876    print(
1877        "Get the volleyball roster for the " +
1878        "2021 Geneva MVB team (D3, ID: 508506)."
1879    )
1880    df = get_volleyball_team_roster(508506)
1881    print(df)
1882
1883    # Get the volleyball roster for the
1884    # 2020 Urbana MVB team (D1, ID: 484975).
1885    print(
1886        "Get the volleyball roster for the " +
1887        "2020 Urbana MVB team (D1, ID: 484975)."
1888    )
1889    df = get_volleyball_team_roster(484975)
1890    print(df)
1891
1892    # Get the volleyball roster for the
1893    # 2019 Eastern Nazarene MVB team (D3, ID: 453876).
1894    print(
1895        "Get the volleyball roster for the " +
1896        "2019 Eastern Nazarene MVB team (D3, ID: 453876)."
1897    )
1898    df = get_volleyball_team_roster(453876)
1899    print(df)
1900
1901    ```
1902
1903    Returns
1904    ----------
1905    A pandas `DataFrame` object with
1906    an NCAA volleyball team's roster for that season.
1907    """
1908    sport_id = ""
1909    roster_df = pd.DataFrame()
1910    roster_df_arr = []
1911    temp_df = pd.DataFrame()
1912    url = f"https://stats.ncaa.org/teams/{team_id}/roster"
1913    load_from_cache = True
1914    home_dir = expanduser("~")
1915    home_dir = _format_folder_str(home_dir)
1916
1917    stat_columns = [
1918        "season",
1919        "season_name",
1920        "sport_id",
1921        "ncaa_division",
1922        "ncaa_division_formatted",
1923        "team_conference_name",
1924        "school_id",
1925        "school_name",
1926        "player_id",
1927        "player_jersey_num",
1928        "player_full_name",
1929        "player_first_name",
1930        "player_last_name",
1931        "player_class",
1932        "player_positions",
1933        "player_height_string",
1934        "player_weight",
1935        "player_hometown",
1936        "player_high_school",
1937        "player_G",
1938        "player_GS",
1939        "player_url",
1940    ]
1941
1942    try:
1943        team_df = load_volleyball_teams()
1944        team_df = team_df[team_df["team_id"] == team_id]
1945
1946        season = team_df["season"].iloc[0]
1947        ncaa_division = team_df["ncaa_division"].iloc[0]
1948        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
1949        team_conference_name = team_df["team_conference_name"].iloc[0]
1950        school_name = team_df["school_name"].iloc[0]
1951        school_id = int(team_df["school_id"].iloc[0])
1952        sport_id = "WVB"
1953    except Exception:
1954        team_df = load_volleyball_teams(get_mens_data=True)
1955        team_df = team_df[team_df["team_id"] == team_id]
1956
1957        season = team_df["season"].iloc[0]
1958        ncaa_division = team_df["ncaa_division"].iloc[0]
1959        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
1960        team_conference_name = team_df["team_conference_name"].iloc[0]
1961        school_name = team_df["school_name"].iloc[0]
1962        school_id = int(team_df["school_id"].iloc[0])
1963        school_id = int(team_df["school_id"].iloc[0])
1964        sport_id = "MVB"
1965
1966    del team_df
1967
1968    if exists(f"{home_dir}/.ncaa_stats_py/"):
1969        pass
1970    else:
1971        mkdir(f"{home_dir}/.ncaa_stats_py/")
1972
1973    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
1974        pass
1975    else:
1976        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
1977
1978    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/"):
1979        pass
1980    else:
1981        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/")
1982
1983    if exists(
1984        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" +
1985        f"{team_id}_roster.csv"
1986    ):
1987        teams_df = pd.read_csv(
1988            f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" +
1989            f"{team_id}_roster.csv"
1990        )
1991        file_mod_datetime = datetime.fromtimestamp(
1992            getmtime(
1993                f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" +
1994                f"{team_id}_roster.csv"
1995            )
1996        )
1997    else:
1998        file_mod_datetime = datetime.today()
1999        load_from_cache = False
2000
2001    now = datetime.today()
2002
2003    age = now - file_mod_datetime
2004
2005    if (
2006        age.days >= 14 and
2007        season >= now.year
2008    ):
2009        load_from_cache = False
2010
2011    if load_from_cache is True:
2012        return teams_df
2013
2014    response = _get_webpage(url=url)
2015    soup = BeautifulSoup(response.text, features="lxml")
2016    try:
2017        school_name = soup.find(
2018            "div",
2019            {"class": "card"}
2020        ).find("img").get("alt")
2021    except Exception:
2022        school_name = soup.find("div", {"class": "card"}).find("a").text
2023        school_name = school_name.rsplit(" ", maxsplit=1)[0]
2024
2025    season_name = (
2026        soup.find("select", {"id": "year_list"})
2027        .find("option", {"selected": "selected"})
2028        .text
2029    )
2030
2031    try:
2032        table = soup.find(
2033            "table",
2034            {"class": "dataTable small_font"},
2035        )
2036
2037        table_headers = table.find("thead").find_all("th")
2038    except Exception:
2039        table = soup.find(
2040            "table",
2041            {"class": "dataTable small_font no_padding"},
2042        )
2043
2044        table_headers = table.find("thead").find_all("th")
2045    table_headers = [x.text for x in table_headers]
2046
2047    t_rows = table.find("tbody").find_all("tr")
2048
2049    for t in t_rows:
2050        t_cells = t.find_all("td")
2051        t_cells = [x.text for x in t_cells]
2052
2053        temp_df = pd.DataFrame(
2054            data=[t_cells],
2055            columns=table_headers,
2056            # index=[0]
2057        )
2058
2059        player_id = t.find("a").get("href")
2060        # temp_df["school_name"] = school_name
2061        temp_df["player_url"] = f"https://stats.ncaa.org{player_id}"
2062
2063        player_id = player_id.replace("/players", "").replace("/", "")
2064        player_id = int(player_id)
2065
2066        temp_df["player_id"] = player_id
2067
2068        roster_df_arr.append(temp_df)
2069        del temp_df
2070
2071    roster_df = pd.concat(roster_df_arr, ignore_index=True)
2072    roster_df = roster_df.infer_objects()
2073    roster_df["season"] = season
2074    roster_df["season_name"] = season_name
2075    roster_df["ncaa_division"] = ncaa_division
2076    roster_df["ncaa_division_formatted"] = ncaa_division_formatted
2077    roster_df["team_conference_name"] = team_conference_name
2078    roster_df["school_id"] = school_id
2079    roster_df["school_name"] = school_name
2080    roster_df["sport_id"] = sport_id
2081
2082    roster_df.rename(
2083        columns={
2084            "GP": "player_G",
2085            "GS": "player_GS",
2086            "#": "player_jersey_num",
2087            "Name": "player_full_name",
2088            "Class": "player_class",
2089            "Position": "player_positions",
2090            "Height": "player_height_string",
2091            "Bats": "player_batting_hand",
2092            "Throws": "player_throwing_hand",
2093            "Hometown": "player_hometown",
2094            "High School": "player_high_school",
2095        },
2096        inplace=True
2097    )
2098
2099    # print(roster_df.columns)
2100
2101    roster_df[["player_first_name", "player_last_name"]] = roster_df[
2102        "player_full_name"
2103    ].str.split(" ", n=1, expand=True)
2104    roster_df = roster_df.infer_objects()
2105
2106    for i in roster_df.columns:
2107        if i in stat_columns:
2108            pass
2109        else:
2110            raise ValueError(
2111                f"Unhandled column name {i}"
2112            )
2113
2114    roster_df = roster_df.infer_objects().reindex(columns=stat_columns)
2115
2116    roster_df.to_csv(
2117        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" +
2118        f"{team_id}_roster.csv",
2119        index=False,
2120    )
2121    return roster_df

Retrieves a volleyball team's roster from a given team ID.

Parameters

team_id (int, mandatory): Required argument. Specifies the team you want a roster from. This is separate from a school ID, which identifies the institution. A team ID should be unique to a school, and a season.

Usage

from ncaa_stats_py.volleyball import get_volleyball_team_roster

########################################
#          Women's volleyball          #
########################################

# Get the volleyball roster for the
# 2024 Weber St. WVB team (D1, ID: 585347).
print(
    "Get the volleyball roster for the " +
    "2024 Weber St. WVB team (D1, ID: 585347)."
)
df = get_volleyball_team_roster(585347)
print(df)

# Get the volleyball roster for the
# 2023 Montevallo WVB team (D2, ID: 559599).
print(
    "Get the volleyball roster for the " +
    "2023 Montevallo WVB team (D2, ID: 559599)."
)
df = get_volleyball_team_roster(559599)
print(df)

# Get the volleyball roster for the
# 2022 Millsaps team (D3, ID: 539944).
print(
    "Get the volleyball roster for the " +
    "2022 Millsaps team (D3, ID: 539944)."
)
df = get_volleyball_team_roster(539944)
print(df)

# Get the volleyball roster for the
# 2021 Binghamton WVB team (D1, ID: 522893).
print(
    "Get the volleyball roster for the " +
    "2021 Binghamton WVB team (D1, ID: 522893)."
)
df = get_volleyball_team_roster(522893)
print(df)

# Get the volleyball roster for the
# 2020 Holy Family WVB team (D2, ID: 504760).
print(
    "Get the volleyball roster for the " +
    "2020 Holy Family WVB team (D2, ID: 504760)."
)
df = get_volleyball_team_roster(504760)
print(df)

# Get the volleyball roster for the
# 2019 Franciscan team (D3, ID: 482939).
print(
    "Get the volleyball roster for the " +
    "2019 Franciscan team (D3, ID: 482939)."
)
df = get_volleyball_team_roster(482939)
print(df)

########################################
#          Men's volleyball            #
########################################

# Get the volleyball roster for the
# 2024 Hawaii MVB team (D1, ID: 573674).
print(
    "Get the volleyball roster for the " +
    "2024 Hawaii MVB team (D1, ID: 573674)."
)
df = get_volleyball_team_roster(573674)
print(df)

# Get the volleyball roster for the
# 2023 Widener MVB team (D3, ID: 550860).
print(
    "Get the volleyball roster for the " +
    "2023 Widener MVB team (D3, ID: 550860)."
)
df = get_volleyball_team_roster(550860)
print(df)

# Get the volleyball roster for the
# 2022 Alderson Broaddus MVB team (D1, ID: 529880).
print(
    "Get the volleyball roster for the " +
    "2022 Alderson Broaddus MVB team (D1, ID: 529880)."
)
df = get_volleyball_team_roster(529880)
print(df)

# Get the volleyball roster for the
# 2021 Geneva MVB team (D3, ID: 508506).
print(
    "Get the volleyball roster for the " +
    "2021 Geneva MVB team (D3, ID: 508506)."
)
df = get_volleyball_team_roster(508506)
print(df)

# Get the volleyball roster for the
# 2020 Urbana MVB team (D1, ID: 484975).
print(
    "Get the volleyball roster for the " +
    "2020 Urbana MVB team (D1, ID: 484975)."
)
df = get_volleyball_team_roster(484975)
print(df)

# Get the volleyball roster for the
# 2019 Eastern Nazarene MVB team (D3, ID: 453876).
print(
    "Get the volleyball roster for the " +
    "2019 Eastern Nazarene MVB team (D3, ID: 453876)."
)
df = get_volleyball_team_roster(453876)
print(df)

Returns

A pandas DataFrame object with an NCAA volleyball team's roster for that season.

def get_volleyball_player_season_stats(team_id: int) -> pandas.core.frame.DataFrame:
2124def get_volleyball_player_season_stats(
2125    team_id: int,
2126) -> pd.DataFrame:
2127    """
2128    Given a team ID, this function retrieves and parses
2129    the season stats for all of the players in a given volleyball team.
2130
2131    Parameters
2132    ----------
2133    `team_id` (int, mandatory):
2134        Required argument.
2135        Specifies the team you want volleyball stats from.
2136        This is separate from a school ID, which identifies the institution.
2137        A team ID should be unique to a school, and a season.
2138
2139    Usage
2140    ----------
2141    ```python
2142
2143    from ncaa_stats_py.volleyball import get_volleyball_player_season_stats
2144
2145
2146    ########################################
2147    #          Women's volleyball          #
2148    ########################################
2149
2150    # Get the season stats for the
2151    # 2024 Ohio St. team (D1, ID: 585398).
2152    print(
2153        "Get the season stats for the " +
2154        "2024 Ohio St. WVB team (D1, ID: 585398)."
2155    )
2156    df = get_volleyball_player_season_stats(585398)
2157    print(df)
2158
2159    # Get the season stats for the
2160    # 2023 Emory & Henry WVB team (D2, ID: 559738).
2161    print(
2162        "Get the season stats for the " +
2163        "2023 Emory & Henry WVB team (D2, ID: 559738)."
2164    )
2165    df = get_volleyball_player_season_stats(559738)
2166    print(df)
2167
2168    # Get the season stats for the
2169    # 2022 Fredonia WVB team (D3, ID: 539881).
2170    print(
2171        "Get the season stats for the " +
2172        "2022 Fredonia WVB team (D3, ID: 539881)."
2173    )
2174    df = get_volleyball_player_season_stats(539881)
2175    print(df)
2176
2177    # Get the season stats for the
2178    # 2021 Oklahoma WVB team (D1, ID: 523163).
2179    print(
2180        "Get the season stats for the " +
2181        "2021 Oklahoma WVB team (D1, ID: 523163)."
2182    )
2183    df = get_volleyball_player_season_stats(523163)
2184    print(df)
2185
2186    # Get the season stats for the
2187    # 2020 North Greenville WVB team (D2, ID: 504820).
2188    print(
2189        "Get the season stats for the " +
2190        "2020 North Greenville WVB team (D2, ID: 504820)."
2191    )
2192    df = get_volleyball_player_season_stats(504820)
2193    print(df)
2194
2195    # Get the season stats for the
2196    # 2019 SUNY Potsdam team (D3, ID: 482714).
2197    print(
2198        "Get the season stats for the " +
2199        "2019 SUNY Potsdam team (D3, ID: 482714)."
2200    )
2201    df = get_volleyball_player_season_stats(482714)
2202    print(df)
2203
2204    ########################################
2205    #          Men's volleyball            #
2206    ########################################
2207
2208    # Get the season stats for the
2209    # 2024 Lees-McRae MVB team (D1, ID: 573699).
2210    print(
2211        "Get the season stats for the " +
2212        "2024 Lees-McRae MVB team (D1, ID: 573699)."
2213    )
2214    df = get_volleyball_player_season_stats(573699)
2215    print(df)
2216
2217    # Get the season stats for the
2218    # 2023 Elizabethtown MVB team (D3, ID: 550871).
2219    print(
2220        "Get the season stats for the " +
2221        "2023 Elizabethtown MVB team (D3, ID: 550871)."
2222    )
2223    df = get_volleyball_player_season_stats(550871)
2224    print(df)
2225
2226    # Get the season stats for the
2227    # 2022 Limestone MVB team (D1, ID: 529884).
2228    print(
2229        "Get the season stats for the " +
2230        "2022 Limestone MVB team (D1, ID: 529884)."
2231    )
2232    df = get_volleyball_player_season_stats(529884)
2233    print(df)
2234
2235    # Get the season stats for the
2236    # 2021 Maranatha Baptist MVB team (D3, ID: 508471).
2237    print(
2238        "Get the season stats for the " +
2239        "2021 Maranatha Baptist MVB team (D3, ID: 508471)."
2240    )
2241    df = get_volleyball_player_season_stats(508471)
2242    print(df)
2243
2244    # Get the season stats for the
2245    # 2020 CUI MVB team (D1, ID: 484972).
2246    print(
2247        "Get the season stats for the " +
2248        "2020 CUI MVB team (D1, ID: 484972)."
2249    )
2250    df = get_volleyball_player_season_stats(484972)
2251    print(df)
2252
2253    # Get the season stats for the
2254    # 2019 SUNY New Paltz MVB team (D3, ID: 453851).
2255    print(
2256        "Get the season stats for the " +
2257        "2019 SUNY New Paltz MVB team (D3, ID: 453851)."
2258    )
2259    df = get_volleyball_player_season_stats(453851)
2260    print(df)
2261
2262    ```
2263
2264    Returns
2265    ----------
2266    A pandas `DataFrame` object with the season batting stats for
2267    all players with a given NCAA volleyball team.
2268    """
2269
2270    sport_id = ""
2271    load_from_cache = True
2272    stats_df = pd.DataFrame()
2273    stats_df_arr = []
2274    temp_df = pd.DataFrame()
2275
2276    stat_columns = [
2277        "season",
2278        "season_name",
2279        "sport_id",
2280        "team_id",
2281        "team_conference_name",
2282        "school_id",
2283        "school_name",
2284        "ncaa_division",
2285        "ncaa_division_formatted",
2286        "player_id",
2287        "player_jersey_number",
2288        "player_last_name",
2289        "player_first_name",
2290        "player_full_name",
2291        "player_class",
2292        "player_position",
2293        "player_height",
2294        "GP",
2295        "GS",
2296        "sets_played",
2297        "MS",
2298        "kills",
2299        "errors",
2300        "total_attacks",
2301        "hit%",
2302        "assists",
2303        "aces",
2304        "serve_errors",
2305        "digs",
2306        "return_attacks",
2307        "return_errors",
2308        "solo_blocks",
2309        "assisted_blocks",
2310        "block_errors",
2311        "total_blocks",
2312        "points",
2313        "BHE",
2314        "serve_attempts",
2315        "DBL_DBL",
2316        "TRP_DBL",
2317    ]
2318
2319    try:
2320        team_df = load_volleyball_teams()
2321
2322        team_df = team_df[team_df["team_id"] == team_id]
2323
2324        season = team_df["season"].iloc[0]
2325        ncaa_division = int(team_df["ncaa_division"].iloc[0])
2326        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2327        team_conference_name = team_df["team_conference_name"].iloc[0]
2328        school_name = team_df["school_name"].iloc[0]
2329        school_id = int(team_df["school_id"].iloc[0])
2330        sport_id = "WVB"
2331    except Exception:
2332        team_df = load_volleyball_teams(get_mens_data=True)
2333
2334        team_df = team_df[team_df["team_id"] == team_id]
2335
2336        season = team_df["season"].iloc[0]
2337        ncaa_division = int(team_df["ncaa_division"].iloc[0])
2338        ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2339        team_conference_name = team_df["team_conference_name"].iloc[0]
2340        school_name = team_df["school_name"].iloc[0]
2341        school_id = int(team_df["school_id"].iloc[0])
2342        sport_id = "MVB"
2343
2344    del team_df
2345
2346    home_dir = expanduser("~")
2347    home_dir = _format_folder_str(home_dir)
2348
2349    url = f"https://stats.ncaa.org/teams/{team_id}/season_to_date_stats"
2350
2351    if exists(f"{home_dir}/.ncaa_stats_py/"):
2352        pass
2353    else:
2354        mkdir(f"{home_dir}/.ncaa_stats_py/")
2355
2356    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"):
2357        pass
2358    else:
2359        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/")
2360
2361    if exists(
2362        f"{home_dir}/.ncaa_stats_py/" +
2363        f"volleyball_{sport_id}/player_season_stats/"
2364    ):
2365        pass
2366    else:
2367        mkdir(
2368            f"{home_dir}/.ncaa_stats_py/" +
2369            f"volleyball_{sport_id}/player_season_stats/"
2370        )
2371
2372    if exists(
2373        f"{home_dir}/.ncaa_stats_py/" +
2374        f"volleyball_{sport_id}/player_season_stats/"
2375        + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2376    ):
2377        games_df = pd.read_csv(
2378            f"{home_dir}/.ncaa_stats_py/" +
2379            f"volleyball_{sport_id}/player_season_stats/"
2380            + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2381        )
2382        file_mod_datetime = datetime.fromtimestamp(
2383            getmtime(
2384                f"{home_dir}/.ncaa_stats_py/" +
2385                f"volleyball_{sport_id}/player_season_stats/"
2386                + f"{season:00d}_{school_id:00d}_player_season_stats.csv"
2387            )
2388        )
2389    else:
2390        file_mod_datetime = datetime.today()
2391        load_from_cache = False
2392
2393    now = datetime.today()
2394
2395    age = now - file_mod_datetime
2396
2397    if (
2398        age.days > 1 and
2399        season >= now.year
2400    ):
2401        load_from_cache = False
2402
2403    if load_from_cache is True:
2404        return games_df
2405
2406    response = _get_webpage(url=url)
2407
2408    soup = BeautifulSoup(response.text, features="lxml")
2409
2410    season_name = (
2411        soup.find("select", {"id": "year_list"})
2412        .find("option", {"selected": "selected"})
2413        .text
2414    )
2415
2416    if sport_id == "MVB":
2417        season = f"{season_name[0:2]}{season_name[-2:]}"
2418        season = int(season)
2419    elif sport_id == "WVB":
2420        season = f"{season_name[0:4]}"
2421        season = int(season)
2422
2423    table_data = soup.find(
2424        "table",
2425        {"id": "stat_grid", "class": "small_font dataTable table-bordered"},
2426    )
2427
2428    temp_table_headers = table_data.find("thead").find("tr").find_all("th")
2429    table_headers = [x.text for x in temp_table_headers]
2430
2431    del temp_table_headers
2432
2433    t_rows = table_data.find("tbody").find_all("tr", {"class": "text"})
2434    for t in t_rows:
2435        p_last = ""
2436        p_first = ""
2437        t_cells = t.find_all("td")
2438        if "team" in t_cells[1].text.lower():
2439            continue
2440        p_sortable = t_cells[1].get("data-order")
2441        if len(p_sortable) == 2:
2442            p_last, p_first = p_sortable.split(",")
2443        elif len(p_sortable) == 3:
2444            p_last, temp_name, p_first = p_sortable.split(",")
2445            p_last = f"{p_last} {temp_name}"
2446
2447        t_cells = [x.text.strip() for x in t_cells]
2448        t_cells = [x.replace(",", "") for x in t_cells]
2449
2450        temp_df = pd.DataFrame(
2451            data=[t_cells],
2452            columns=table_headers,
2453            # index=[0]
2454        )
2455
2456        player_id = t.find("a").get("href")
2457
2458        # temp_df["player_url"] = f"https://stats.ncaa.org{player_id}"
2459        player_id = player_id.replace("/players", "").replace("/", "")
2460
2461        player_id = int(player_id)
2462
2463        temp_df["player_id"] = player_id
2464        temp_df["player_last_name"] = p_last.strip()
2465        temp_df["player_first_name"] = p_first.strip()
2466
2467        stats_df_arr.append(temp_df)
2468        del temp_df
2469
2470    stats_df = pd.concat(stats_df_arr, ignore_index=True)
2471    stats_df = stats_df.replace("", None)
2472
2473    # stats_df["stat_id"] = stat_id
2474    stats_df["season"] = season
2475    stats_df["season_name"] = season_name
2476    stats_df["school_id"] = school_id
2477    stats_df["school_name"] = school_name
2478    stats_df["ncaa_division"] = ncaa_division
2479    stats_df["ncaa_division_formatted"] = ncaa_division_formatted
2480    stats_df["team_conference_name"] = team_conference_name
2481    stats_df["sport_id"] = sport_id
2482    stats_df["team_id"] = team_id
2483
2484    stats_df = stats_df.infer_objects()
2485
2486    stats_df.rename(
2487        columns={
2488            "#": "player_jersey_number",
2489            "Player": "player_full_name",
2490            "Yr": "player_class",
2491            "Pos": "player_position",
2492            "Ht": "player_height",
2493            "S": "sets_played",
2494            "Kills": "kills",
2495            "Errors": "errors",
2496            "Total Attacks": "total_attacks",
2497            "Hit Pct": "hit%",
2498            "Assists": "assists",
2499            "Aces": "aces",
2500            "SErr": "serve_errors",
2501            "Digs": "digs",
2502            "RetAtt": "return_attacks",
2503            "RErr": "return_errors",
2504            "Block Solos": "solo_blocks",
2505            "Block Assists": "assisted_blocks",
2506            "BErr": "block_errors",
2507            "PTS": "points",
2508            "Trpl Dbl": "TRP_DBL",
2509            "Dbl Dbl": "DBL_DBL",
2510            "TB": "total_blocks",
2511            "SrvAtt": "serve_attempts",
2512        },
2513        inplace=True,
2514    )
2515
2516    for i in stats_df.columns:
2517        if i in stat_columns:
2518            pass
2519        elif "Attend" in stat_columns:
2520            pass
2521        else:
2522            raise ValueError(
2523                f"Unhandled column name {i}"
2524            )
2525    stats_df = stats_df.reindex(columns=stat_columns)
2526
2527    stats_df = stats_df.infer_objects().fillna(0)
2528    stats_df = stats_df.astype(
2529        {
2530            "GP": "uint16",
2531            "GS": "uint16",
2532            "sets_played": "uint16",
2533            "kills": "uint16",
2534            "errors": "uint16",
2535            "total_attacks": "uint16",
2536            "hit%": "float32",
2537            "assists": "uint16",
2538            "aces": "uint16",
2539            "serve_errors": "uint16",
2540            "digs": "uint16",
2541            "return_attacks": "uint16",
2542            "return_errors": "uint16",
2543            "solo_blocks": "uint16",
2544            "assisted_blocks": "uint16",
2545            "block_errors": "uint16",
2546            "points": "float32",
2547            "BHE": "uint16",
2548            "TRP_DBL": "uint16",
2549            "serve_attempts": "uint16",
2550            "total_blocks": "float32",
2551            "DBL_DBL": "uint16",
2552            "school_id": "uint32",
2553        }
2554    )
2555
2556    stats_df["hit%"] = stats_df["hit%"].round(3)
2557    stats_df["points"] = stats_df["points"].round(1)
2558
2559    stats_df.to_csv(
2560        f"{home_dir}/.ncaa_stats_py/" +
2561        f"volleyball_{sport_id}/player_season_stats/" +
2562        f"{season:00d}_{school_id:00d}_player_season_stats.csv",
2563        index=False,
2564    )
2565
2566    return stats_df

Given a team ID, this function retrieves and parses the season stats for all of the players in a given volleyball team.

Parameters

team_id (int, mandatory): Required argument. Specifies the team you want volleyball stats from. This is separate from a school ID, which identifies the institution. A team ID should be unique to a school, and a season.

Usage

from ncaa_stats_py.volleyball import get_volleyball_player_season_stats


########################################
#          Women's volleyball          #
########################################

# Get the season stats for the
# 2024 Ohio St. team (D1, ID: 585398).
print(
    "Get the season stats for the " +
    "2024 Ohio St. WVB team (D1, ID: 585398)."
)
df = get_volleyball_player_season_stats(585398)
print(df)

# Get the season stats for the
# 2023 Emory & Henry WVB team (D2, ID: 559738).
print(
    "Get the season stats for the " +
    "2023 Emory & Henry WVB team (D2, ID: 559738)."
)
df = get_volleyball_player_season_stats(559738)
print(df)

# Get the season stats for the
# 2022 Fredonia WVB team (D3, ID: 539881).
print(
    "Get the season stats for the " +
    "2022 Fredonia WVB team (D3, ID: 539881)."
)
df = get_volleyball_player_season_stats(539881)
print(df)

# Get the season stats for the
# 2021 Oklahoma WVB team (D1, ID: 523163).
print(
    "Get the season stats for the " +
    "2021 Oklahoma WVB team (D1, ID: 523163)."
)
df = get_volleyball_player_season_stats(523163)
print(df)

# Get the season stats for the
# 2020 North Greenville WVB team (D2, ID: 504820).
print(
    "Get the season stats for the " +
    "2020 North Greenville WVB team (D2, ID: 504820)."
)
df = get_volleyball_player_season_stats(504820)
print(df)

# Get the season stats for the
# 2019 SUNY Potsdam team (D3, ID: 482714).
print(
    "Get the season stats for the " +
    "2019 SUNY Potsdam team (D3, ID: 482714)."
)
df = get_volleyball_player_season_stats(482714)
print(df)

########################################
#          Men's volleyball            #
########################################

# Get the season stats for the
# 2024 Lees-McRae MVB team (D1, ID: 573699).
print(
    "Get the season stats for the " +
    "2024 Lees-McRae MVB team (D1, ID: 573699)."
)
df = get_volleyball_player_season_stats(573699)
print(df)

# Get the season stats for the
# 2023 Elizabethtown MVB team (D3, ID: 550871).
print(
    "Get the season stats for the " +
    "2023 Elizabethtown MVB team (D3, ID: 550871)."
)
df = get_volleyball_player_season_stats(550871)
print(df)

# Get the season stats for the
# 2022 Limestone MVB team (D1, ID: 529884).
print(
    "Get the season stats for the " +
    "2022 Limestone MVB team (D1, ID: 529884)."
)
df = get_volleyball_player_season_stats(529884)
print(df)

# Get the season stats for the
# 2021 Maranatha Baptist MVB team (D3, ID: 508471).
print(
    "Get the season stats for the " +
    "2021 Maranatha Baptist MVB team (D3, ID: 508471)."
)
df = get_volleyball_player_season_stats(508471)
print(df)

# Get the season stats for the
# 2020 CUI MVB team (D1, ID: 484972).
print(
    "Get the season stats for the " +
    "2020 CUI MVB team (D1, ID: 484972)."
)
df = get_volleyball_player_season_stats(484972)
print(df)

# Get the season stats for the
# 2019 SUNY New Paltz MVB team (D3, ID: 453851).
print(
    "Get the season stats for the " +
    "2019 SUNY New Paltz MVB team (D3, ID: 453851)."
)
df = get_volleyball_player_season_stats(453851)
print(df)

Returns

A pandas DataFrame object with the season batting stats for all players with a given NCAA volleyball team.

def get_volleyball_player_game_stats(player_id: int) -> pandas.core.frame.DataFrame:
2569def get_volleyball_player_game_stats(
2570    player_id: int
2571) -> pd.DataFrame:
2572    """
2573    Given a valid player ID and season,
2574    this function retrieves the game stats for this player at a game level.
2575
2576    Parameters
2577    ----------
2578    `player_id` (int, mandatory):
2579        Required argument.
2580        Specifies the player you want game stats from.
2581
2582    `season` (int, mandatory):
2583        Required argument.
2584        Specifies the season you want game stats from.
2585
2586    Usage
2587    ----------
2588    ```python
2589
2590    from ncaa_stats_py.volleyball import (
2591        get_volleyball_player_game_stats
2592    )
2593
2594    ########################################
2595    #          Women's volleyball          #
2596    ########################################
2597
2598    # Get the game stats of Zuzanna Wieczorek in 2024 (Idaho).
2599    print(
2600        "Get the game stats of Zuzanna Wieczorek in 2024 (Idaho)."
2601    )
2602    df = get_volleyball_player_game_stats(player_id=8432514)
2603    print(df)
2604
2605    # Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2).
2606    print(
2607        "Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2)."
2608    )
2609    df = get_volleyball_player_game_stats(player_id=8145555)
2610    print(df)
2611
2612    # Get the game stats of Lauren Gips in 2022 (Babson, D3).
2613    print(
2614        "Get the game stats of Lauren Gips in 2022 (Babson, D3)."
2615    )
2616    df = get_volleyball_player_game_stats(player_id=7876821)
2617    print(df)
2618
2619    # Get the game stats of Rhett Robinson in 2021 (North Texas).
2620    print(
2621        "Get the game stats of Rhett Robinson in 2021 (North Texas)."
2622    )
2623    df = get_volleyball_player_game_stats(player_id=7234089)
2624    print(df)
2625
2626    # Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2).
2627    print(
2628        "Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2)."
2629    )
2630    df = get_volleyball_player_game_stats(player_id=6822147)
2631    print(df)
2632
2633    # Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3).
2634    print(
2635        "Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3)."
2636    )
2637    df = get_volleyball_player_game_stats(player_id=6449807)
2638    print(df)
2639
2640    ########################################
2641    #          Men's volleyball            #
2642    ########################################
2643
2644    # Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial).
2645    print(
2646        "Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial)."
2647    )
2648    df = get_volleyball_player_game_stats(player_id=8253076)
2649    print(df)
2650
2651    # Get the game stats of Ray Rodriguez in 2023 (Lehman, D3).
2652    print(
2653        "Get the game stats of Ray Rodriguez in 2023 (Lehman, D3)."
2654    )
2655    df = get_volleyball_player_game_stats(player_id=7883459)
2656    print(df)
2657
2658    # Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus).
2659    print(
2660        "Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus)."
2661    )
2662    df = get_volleyball_player_game_stats(player_id=7413984)
2663    print(df)
2664
2665    # Get the game stats of Tyler Anderson in 2021 (Alvernia, D3).
2666    print(
2667        "Get the game stats of Tyler Anderson in 2021 (Alvernia, D3)."
2668    )
2669    df = get_volleyball_player_game_stats(player_id=7118023)
2670    print(df)
2671
2672    # Get the game stats of Jaylen Jasper in 2020 (Stanford).
2673    print(
2674        "Get the game stats of Jaylen Jasper in 2020 (Stanford)."
2675    )
2676    df = get_volleyball_player_game_stats(player_id=6357146)
2677    print(df)
2678
2679    # Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3).
2680    print(
2681        "Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3)."
2682    )
2683    df = get_volleyball_player_game_stats(player_id=5816111)
2684    print(df)
2685
2686    ```
2687
2688    Returns
2689    ----------
2690    A pandas `DataFrame` object with a player's batting game logs
2691    in a given season.
2692    """
2693    sport_id = ""
2694
2695    stat_columns = [
2696        "season",
2697        "sport_id",
2698        "game_id",
2699        "game_num",
2700        "player_id",
2701        "date",
2702        "opponent",
2703        "Result",
2704        "team_sets_won",
2705        "opponent_sets_won",
2706        "GP",
2707        # "GS",
2708        "sets_played",
2709        "MS",
2710        "kills",
2711        "errors",
2712        "total_attacks",
2713        "hit%",
2714        "assists",
2715        "aces",
2716        "serve_errors",
2717        "digs",
2718        "return_attacks",
2719        "return_errors",
2720        "solo_blocks",
2721        "assisted_blocks",
2722        "block_errors",
2723        "total_blocks",
2724        "points",
2725        "BHE",
2726        "serve_attempts",
2727        "DBL_DBL",
2728        "TRP_DBL",
2729    ]
2730
2731    load_from_cache = True
2732    stats_df = pd.DataFrame()
2733    stats_df_arr = []
2734    temp_df = pd.DataFrame()
2735    sport_id = ""
2736    home_dir = expanduser("~")
2737    home_dir = _format_folder_str(home_dir)
2738
2739    # stat_id = _get_stat_id(
2740    #     sport="volleyball",
2741    #     season=season,
2742    #     stat_type="batting"
2743    # )
2744    url = f"https://stats.ncaa.org/players/{player_id}"
2745
2746    if exists(f"{home_dir}/.ncaa_stats_py/"):
2747        pass
2748    else:
2749        mkdir(f"{home_dir}/.ncaa_stats_py/")
2750
2751    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"):
2752        pass
2753    else:
2754        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/")
2755
2756    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/"):
2757        pass
2758    else:
2759        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/")
2760
2761    if exists(
2762        f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/"
2763        + f"{player_id}_player_game_stats.csv"
2764    ):
2765        games_df = pd.read_csv(
2766            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/"
2767            + f"{player_id}_player_game_stats.csv"
2768        )
2769        file_mod_datetime = datetime.fromtimestamp(
2770            getmtime(
2771                f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"
2772                + "player_game_stats/"
2773                + f"{player_id}_player_game_stats.csv"
2774            )
2775        )
2776        games_df = games_df.infer_objects()
2777        load_from_cache = True
2778    else:
2779        file_mod_datetime = datetime.today()
2780        load_from_cache = False
2781
2782    if exists(f"{home_dir}/.ncaa_stats_py/"):
2783        pass
2784    else:
2785        mkdir(f"{home_dir}/.ncaa_stats_py/")
2786
2787    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"):
2788        pass
2789    else:
2790        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/")
2791
2792    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/"):
2793        pass
2794    else:
2795        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/")
2796
2797    if exists(
2798        f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/"
2799        + f"{player_id}_player_game_stats.csv"
2800    ):
2801        games_df = pd.read_csv(
2802            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/"
2803            + f"{player_id}_player_game_stats.csv"
2804        )
2805        file_mod_datetime = datetime.fromtimestamp(
2806            getmtime(
2807                f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"
2808                + "player_game_stats/"
2809                + f"{player_id}_player_game_stats.csv"
2810            )
2811        )
2812        games_df = games_df.infer_objects()
2813        load_from_cache = True
2814    else:
2815        logging.info("Could not find a WVB player game stats file")
2816
2817    now = datetime.today()
2818
2819    age = now - file_mod_datetime
2820
2821    if (
2822        age.days >= 1
2823    ):
2824        load_from_cache = False
2825
2826    if load_from_cache is True:
2827        return games_df
2828
2829    # team_df = load_volleyball_teams()
2830
2831    # team_df = team_df[team_df["team_id"] == team_id]
2832
2833    # season = team_df["season"].iloc[0]
2834    # ncaa_division = team_df["ncaa_division"].iloc[0]
2835    # ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0]
2836    # team_conference_name = team_df["team_conference_name"].iloc[0]
2837    # school_name = team_df["school_name"].iloc[0]
2838    # school_id = int(team_df["school_id"].iloc[0])
2839
2840    # del team_df
2841    response = _get_webpage(url=url)
2842    soup = BeautifulSoup(response.text, features="lxml")
2843
2844    table_navigation = soup.find("ul", {"class": "nav nav-tabs padding-nav"})
2845    table_nav_card = table_navigation.find_all("a")
2846
2847    for u in table_nav_card:
2848        url_str = u.get("href")
2849        if "MVB" in url_str.upper():
2850            sport_id = "MVB"
2851        elif "WVB" in url_str.upper():
2852            sport_id = "WVB"
2853
2854    if sport_id is None or len(sport_id) == 0:
2855        # This should **never** be the case IRL,
2856        # but in case something weird happened and
2857        # we can't make a determination of if this is a
2858        # MVB player or a WVB player, and we somehow haven't
2859        # crashed by this point, set the sport ID to
2860        # "MVB" by default so we don't have other weirdness.
2861        logging.error(
2862            f"Could not determine if player ID {player_id} " +
2863            "is a MVB or a WVB player. " +
2864            "Because this cannot be determined, " +
2865            "we will make the automatic assumption that this is a MVB player."
2866        )
2867        sport_id = "MVB"
2868
2869    table_data = soup.find_all(
2870        "table", {"class": "small_font dataTable table-bordered"}
2871    )[1]
2872
2873    temp_table_headers = table_data.find("thead").find("tr").find_all("th")
2874    table_headers = [x.text for x in temp_table_headers]
2875
2876    del temp_table_headers
2877
2878    temp_t_rows = table_data.find("tbody")
2879    temp_t_rows = temp_t_rows.find_all("tr")
2880    season_name = (
2881        soup.find("select", {"id": "year_list"})
2882        .find("option", {"selected": "selected"})
2883        .text
2884    )
2885
2886    if sport_id == "MVB":
2887        season = f"{season_name[0:2]}{season_name[-2:]}"
2888        season = int(season)
2889    elif sport_id == "WVB":
2890        season = f"{season_name[0:4]}"
2891        season = int(season)
2892
2893    for t in temp_t_rows:
2894        game_num = 1
2895        ot_periods = 0
2896        # innings = 9
2897        row_id = t.get("id")
2898        opp_team_name = ""
2899
2900        if "contest" not in row_id:
2901            continue
2902        del row_id
2903
2904        t_cells = t.find_all("td")
2905        t_cells = [x.text.strip() for x in t_cells]
2906
2907        g_date = t_cells[0]
2908
2909        if "(" in g_date:
2910            g_date, game_num = g_date.split("(")
2911            g_date = g_date.strip()
2912
2913            game_num = game_num.replace(")", "")
2914            game_num = int(game_num)
2915
2916        try:
2917            opp_team_id = t.find_all("td")[1].find("a").get("href")
2918        except AttributeError as e:
2919            logging.info(
2920                "Could not extract a team ID for this game. " +
2921                f"Full exception {e}"
2922            )
2923        except Exception as e:
2924            logging.warning(
2925                "An unhandled exception has occurred when "
2926                + "trying to get the opposition team ID for this game. "
2927                f"Full exception `{e}`."
2928            )
2929            raise e
2930
2931        try:
2932            opp_team_id = opp_team_id.replace("/teams/", "")
2933            opp_team_id = opp_team_id.replace(
2934                "javascript:toggleDefensiveStats(", ""
2935            )
2936            opp_team_id = opp_team_id.replace(");", "")
2937            opp_team_id = int(opp_team_id)
2938
2939            temp_df["opponent_team_id"] = opp_team_id
2940        except Exception:
2941            logging.info(
2942                "Couldn't find the opposition team naIDme "
2943                + "for this row. "
2944            )
2945            opp_team_id = None
2946        # print(i.find("td").text)
2947        try:
2948            opp_team_name = t.find_all("td")[1].find_all("img")[1].get("alt")
2949        except AttributeError:
2950            logging.info(
2951                "Couldn't find the opposition team name "
2952                + "for this row from an image element. "
2953                + "Attempting a backup method"
2954            )
2955            opp_team_name = t_cells[1]
2956        except IndexError:
2957            logging.info(
2958                "Couldn't find the opposition team name "
2959                + "for this row from an image element. "
2960                + "Attempting a backup method"
2961            )
2962            opp_team_name = t_cells[1]
2963        except Exception as e:
2964            logging.warning(
2965                "Unhandled exception when trying to get the "
2966                + "opposition team name from this game. "
2967                + f"Full exception `{e}`"
2968            )
2969            raise e
2970
2971        if opp_team_name == "Defensive Stats":
2972            opp_team_name = t_cells[1]
2973
2974        if "@" in opp_team_name:
2975            opp_team_name = opp_team_name.split("@")[0]
2976
2977        result_str = t_cells[2]
2978
2979        result_str = (
2980            result_str.lower().replace("w", "").replace("l", "").replace(
2981                "t", ""
2982            )
2983        )
2984
2985        if (
2986            result_str.lower() == "ppd" or
2987            result_str.lower() == "" or
2988            result_str.lower() == "canceed"
2989        ):
2990            continue
2991
2992        result_str = result_str.replace("\n", "")
2993        result_str = result_str.replace("*", "")
2994
2995        tm_score, opp_score = result_str.split("-")
2996        t_cells = [x.replace("*", "") for x in t_cells]
2997        t_cells = [x.replace("/", "") for x in t_cells]
2998        t_cells = [x.replace("\\", "") for x in t_cells]
2999
3000        temp_df = pd.DataFrame(
3001            data=[t_cells],
3002            columns=table_headers,
3003            # index=[0]
3004        )
3005
3006        tm_score = int(tm_score)
3007        if "(" in opp_score:
3008            opp_score = opp_score.replace(")", "")
3009            opp_score, ot_periods = opp_score.split("(")
3010            temp_df["ot_periods"] = ot_periods
3011
3012        if "\n" in opp_score:
3013            opp_score = opp_score.strip()
3014            # opp_score = opp_score
3015        opp_score = int(opp_score)
3016
3017        temp_df["team_sets_won"] = tm_score
3018        temp_df["opponent_sets_won"] = opp_score
3019
3020        del tm_score
3021        del opp_score
3022
3023        try:
3024            g_id = t.find_all("td")[2].find("a").get("href")
3025
3026            g_id = g_id.replace("/contests", "")
3027            g_id = g_id.replace("/box_score", "")
3028            g_id = g_id.replace("/", "")
3029
3030            g_id = int(g_id)
3031            temp_df["game_id"] = g_id
3032            del g_id
3033        except AttributeError:
3034            logging.warning(
3035                f"Could not find a game ID for a {g_date} game " +
3036                f"against {opp_team_name}."
3037            )
3038            temp_df["game_id"] = None
3039        except Exception as e:
3040            raise e
3041
3042        temp_df.rename(
3043            columns={"Opponent": "opponent", "Date": "date"},
3044            inplace=True,
3045        )
3046        game_date = datetime.strptime(g_date, "%m/%d/%Y").date()
3047
3048        temp_df["date"] = game_date
3049        temp_df["game_num"] = game_num
3050        # temp_df["game_innings"] = innings
3051
3052        if len(opp_team_name) > 0:
3053            temp_df["opponent"] = opp_team_name
3054        del opp_team_name
3055
3056        duplicate_cols = temp_df.columns[temp_df.columns.duplicated()]
3057        temp_df.drop(columns=duplicate_cols, inplace=True)
3058
3059        stats_df_arr.append(temp_df)
3060        del temp_df
3061
3062    stats_df = pd.concat(stats_df_arr, ignore_index=True)
3063    stats_df = stats_df.replace("/", "", regex=True)
3064    stats_df = stats_df.replace("", np.nan)
3065    stats_df = stats_df.infer_objects()
3066
3067    stats_df["player_id"] = player_id
3068    stats_df["sport_id"] = sport_id
3069    stats_df["season"] = season
3070
3071    stats_df.rename(
3072        columns={
3073            "#": "player_jersey_number",
3074            "Player": "player_full_name",
3075            "Yr": "player_class",
3076            "Pos": "player_position",
3077            "Ht": "player_height",
3078            "S": "sets_played",
3079            "Kills": "kills",
3080            "Errors": "errors",
3081            "Total Attacks": "total_attacks",
3082            "TotalAttacks": "total_attacks",
3083            "Hit Pct": "hit%",
3084            "HitPct": "hit%",
3085            "Assists": "assists",
3086            "Aces": "aces",
3087            "SErr": "serve_errors",
3088            "Digs": "digs",
3089            "RetAtt": "return_attacks",
3090            "RErr": "return_errors",
3091            "Block Solos": "solo_blocks",
3092            "BlockSolos": "solo_blocks",
3093            "Block Assists": "assisted_blocks",
3094            "BlockAssists": "assisted_blocks",
3095            "BErr": "block_errors",
3096            "PTS": "points",
3097            "Trpl Dbl": "TRP_DBL",
3098            "Dbl Dbl": "DBL_DBL",
3099            "TB": "total_blocks",
3100            "SrvAtt": "serve_attempts",
3101        },
3102        inplace=True,
3103    )
3104    # This is a separate function call because these stats
3105    # *don't* exist in every season.
3106
3107    if "serve_attempts" not in stats_df.columns:
3108        stats_df["serve_attempts"] = None
3109
3110    if "return_attacks" not in stats_df.columns:
3111        stats_df["return_attacks"] = None
3112
3113    stats_df = stats_df.infer_objects().fillna(0)
3114    stats_df = stats_df.astype(
3115        {
3116            "GP": "uint16",
3117            "sets_played": "uint16",
3118            # "MS": "uint16",
3119            "kills": "uint16",
3120            "errors": "uint16",
3121            "total_attacks": "uint16",
3122            "hit%": "float32",
3123            "assists": "uint16",
3124            "aces": "uint16",
3125            "serve_errors": "uint16",
3126            "digs": "uint16",
3127            "return_attacks": "uint16",
3128            "return_errors": "uint16",
3129            "solo_blocks": "uint16",
3130            "assisted_blocks": "uint16",
3131            "block_errors": "uint16",
3132            # "total_blocks": "uint16",
3133            "points": "float32",
3134            "BHE": "uint16",
3135            "serve_attempts": "uint16",
3136            # "DBL_DBL": "uint8",
3137            # "TRP_DBL": "uint8",
3138        }
3139    )
3140
3141    stats_df.loc[
3142        (stats_df["solo_blocks"] > 0) | (stats_df["assisted_blocks"] > 0),
3143        "total_blocks"
3144    ] = (
3145        stats_df["solo_blocks"] +
3146        (stats_df["assisted_blocks"] / 2)
3147    )
3148    stats_df["total_blocks"] = stats_df["total_blocks"].astype("float32")
3149
3150    # Columns used to calculate double doubles and triple doubles.
3151    # Credits:
3152    # https://en.wikipedia.org/wiki/Double_(volleyball)
3153    # https://stackoverflow.com/a/54381918
3154    double_stats_arr = [
3155        "aces",
3156        "kills",
3157        "total_blocks",
3158        "digs",
3159        "assists",
3160    ]
3161    stats_df["DBL_DBL"] = (
3162        (
3163            (stats_df[double_stats_arr] >= 10).sum(1)
3164        ) >= 2
3165    )
3166    stats_df["DBL_DBL"] = stats_df["DBL_DBL"].astype(int)
3167
3168    stats_df["TRP_DBL"] = (
3169        (
3170            (stats_df[double_stats_arr] >= 10).sum(1)
3171        ) >= 3
3172    )
3173    stats_df["TRP_DBL"] = stats_df["TRP_DBL"].astype(int)
3174
3175    for i in stats_df.columns:
3176        if i in stat_columns:
3177            pass
3178        elif "Attend" in stat_columns:
3179            pass
3180        else:
3181            raise ValueError(
3182                f"Unhandled column name {i}"
3183            )
3184    stats_df = stats_df.reindex(columns=stat_columns)
3185
3186    stats_df.to_csv(
3187        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"
3188        + "player_game_stats/"
3189        + f"{player_id}_player_game_stats.csv",
3190        index=False,
3191    )
3192    return stats_df

Given a valid player ID and season, this function retrieves the game stats for this player at a game level.

Parameters

player_id (int, mandatory): Required argument. Specifies the player you want game stats from.

season (int, mandatory): Required argument. Specifies the season you want game stats from.

Usage

from ncaa_stats_py.volleyball import (
    get_volleyball_player_game_stats
)

########################################
#          Women's volleyball          #
########################################

# Get the game stats of Zuzanna Wieczorek in 2024 (Idaho).
print(
    "Get the game stats of Zuzanna Wieczorek in 2024 (Idaho)."
)
df = get_volleyball_player_game_stats(player_id=8432514)
print(df)

# Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2).
print(
    "Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2)."
)
df = get_volleyball_player_game_stats(player_id=8145555)
print(df)

# Get the game stats of Lauren Gips in 2022 (Babson, D3).
print(
    "Get the game stats of Lauren Gips in 2022 (Babson, D3)."
)
df = get_volleyball_player_game_stats(player_id=7876821)
print(df)

# Get the game stats of Rhett Robinson in 2021 (North Texas).
print(
    "Get the game stats of Rhett Robinson in 2021 (North Texas)."
)
df = get_volleyball_player_game_stats(player_id=7234089)
print(df)

# Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2).
print(
    "Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2)."
)
df = get_volleyball_player_game_stats(player_id=6822147)
print(df)

# Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3).
print(
    "Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3)."
)
df = get_volleyball_player_game_stats(player_id=6449807)
print(df)

########################################
#          Men's volleyball            #
########################################

# Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial).
print(
    "Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial)."
)
df = get_volleyball_player_game_stats(player_id=8253076)
print(df)

# Get the game stats of Ray Rodriguez in 2023 (Lehman, D3).
print(
    "Get the game stats of Ray Rodriguez in 2023 (Lehman, D3)."
)
df = get_volleyball_player_game_stats(player_id=7883459)
print(df)

# Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus).
print(
    "Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus)."
)
df = get_volleyball_player_game_stats(player_id=7413984)
print(df)

# Get the game stats of Tyler Anderson in 2021 (Alvernia, D3).
print(
    "Get the game stats of Tyler Anderson in 2021 (Alvernia, D3)."
)
df = get_volleyball_player_game_stats(player_id=7118023)
print(df)

# Get the game stats of Jaylen Jasper in 2020 (Stanford).
print(
    "Get the game stats of Jaylen Jasper in 2020 (Stanford)."
)
df = get_volleyball_player_game_stats(player_id=6357146)
print(df)

# Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3).
print(
    "Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3)."
)
df = get_volleyball_player_game_stats(player_id=5816111)
print(df)

Returns

A pandas DataFrame object with a player's batting game logs in a given season.

def get_volleyball_game_player_stats(game_id: int) -> pandas.core.frame.DataFrame:
3195def get_volleyball_game_player_stats(game_id: int) -> pd.DataFrame:
3196    """
3197    Given a valid game ID,
3198    this function will attempt to get all player game stats, if possible.
3199
3200    Parameters
3201    ----------
3202    `game_id` (int, mandatory):
3203        Required argument.
3204        Specifies the game you want player game stats from.
3205
3206    Usage
3207    ----------
3208    ```python
3209
3210    from ncaa_stats_py.volleyball import get_volleyball_game_player_stats
3211
3212    ########################################
3213    #          Women's volleyball          #
3214    ########################################
3215
3216    # Get the game stats of the
3217    # 2024 NCAA D1 Women's Volleyball National Championship game.
3218    print(
3219        "Get the game stats of the "
3220        + "2024 NCAA D1 Women's volleyball National Championship game"
3221    )
3222    df = get_volleyball_game_player_stats(6080706)
3223    print(df)
3224
3225    # Get the game stats of a September 14th, 2024
3226    # game between the UNC Asheville Bulldogs and the Iona Gaels.
3227    print(
3228        "Get the game stats of a September 14th, 2024 "
3229        + "game between the UNC Asheville Bulldogs "
3230        + "and the Iona Gaels"
3231    )
3232    df = get_volleyball_game_player_stats(5670752)
3233    print(df)
3234
3235    # Get the game stats of a September 16th, 2023
3236    # game between the Saginaw Valley Cardinals
3237    # and the Lake Superior St. Lakes.
3238    print(
3239        "Get the game stats of a September 16th, 2023 "
3240        + "game between the Saginaw Valley Cardinals "
3241        + "and the Lake Superior St. Lakes."
3242    )
3243    df = get_volleyball_game_player_stats(3243563)
3244    print(df)
3245
3246    # Get the game stats of a October 15th, 2022
3247    # game between the Macalester Scots
3248    # and the St. Scholastica Saints (D3).
3249    print(
3250        "Get the game stats of a October 15th, 2022 "
3251        + "game between the Macalester Scots and "
3252        + "the St. Scholastica Saints (D3)."
3253    )
3254    df = get_volleyball_game_player_stats(2307684)
3255    print(df)
3256
3257    # Get the game stats of a October 24th, 2021
3258    # game between the Howard Bison and the UMES Hawks.
3259    print(
3260        "Get the game stats of a October 24th, 2021 "
3261        + "game between the Howard Bison and the UMES Hawks."
3262    )
3263    df = get_volleyball_game_player_stats(2113627)
3264    print(df)
3265
3266    # Get the game stats of a March 5th, 2021
3267    # game between the Notre Dame (OH) Falcons
3268    # and the Alderson Broaddus Battlers.
3269    print(
3270        "Get the game stats of a March 5th, 2021 "
3271        + "game between the Notre Dame (OH) Falcons "
3272        + "and the Alderson Broaddus Battlers."
3273    )
3274    df = get_volleyball_game_player_stats(2005442)
3275    print(df)
3276
3277    # Get the game stats of a November 14th, 2019
3278    # game between the Wittenberg Tigers
3279    # and the Muskingum Fighting Muskies (D3).
3280    print(
3281        "Get the game stats of a November 14th, 2019 "
3282        + "game between the Wittenberg Tigers and "
3283        + "the Muskingum Fighting Muskies (D3)."
3284    )
3285    df = get_volleyball_game_player_stats(1815514)
3286    print(df)
3287
3288    ########################################
3289    #          Men's volleyball            #
3290    ########################################
3291
3292    # Get the game stats of the
3293    # 2024 NCAA D1 Men's Volleyball National Championship game.
3294    print(
3295        "Get the game stats of the "
3296        + "2024 NCAA D1 Men's volleyball National Championship game"
3297    )
3298    df = get_volleyball_game_player_stats(5282845)
3299    print(df)
3300
3301    # Get the game stats of a January 14th, 2025
3302    # game between the Kean Cougars and the Arcadia Knights.
3303    print(
3304        "Get the game stats of a January 14th, 2025 "
3305        + "game between the UNC Asheville Bulldogs "
3306        + "and the Iona Gaels"
3307    )
3308    df = get_volleyball_game_player_stats(6081598)
3309    print(df)
3310
3311    # Get the game stats of a January 13th, 2024
3312    # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
3313    print(
3314        "Get the game stats of a September 14th, 2024 "
3315        + "game between the Purdue Fort Wayne Mastodons "
3316        + "and the NJIT Highlanders."
3317    )
3318    df = get_volleyball_game_player_stats(4473231)
3319    print(df)
3320
3321    # Get the game stats of a January 21st, 2023
3322    # game between the Baruch Bearcats and the Widener Pride.
3323    print(
3324        "Get the game stats of a January 21st, 2023 "
3325        + "game between the Baruch Bearcats and the Widener Pride."
3326    )
3327    df = get_volleyball_game_player_stats(2355323)
3328    print(df)
3329
3330    # Get the game stats of a February 24th, 2022
3331    # game between the Ball St. Cardinals and the Lindenwood Lions.
3332    print(
3333        "Get the game stats of a February 24th, 2022 "
3334        + "game between the Ball St. Cardinals and the Lindenwood Lions."
3335    )
3336    df = get_volleyball_game_player_stats(2162239)
3337    print(df)
3338
3339    # Get the game stats of a March 20th, 2021
3340    # game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals.
3341    print(
3342        "Get the game stats of a March 20th, 2021 "
3343        + "game between the SUNY New Paltz Hawks "
3344        + "and the St. John Fisher Cardinals."
3345    )
3346    df = get_volleyball_game_player_stats(2059180)
3347    print(df)
3348
3349    # Get the game stats of a March 1th, 2020
3350    # game between the USC Trojans and the CUI Golden Eagles.
3351    print(
3352        "Get the game stats of a March 1th, 2020 "
3353        + "game between the USC Trojans and the CUI Golden Eagles."
3354    )
3355    df = get_volleyball_game_player_stats(1820058)
3356    print(df)
3357
3358    # Get the game stats of an April 4th, 2019
3359    # game between the Lesly Lynx and the Pine Manor Gators (D3).
3360    print(
3361        "Get the game stats of an April 4th, 2019 "
3362        + "game between the Lesly Lynx and the Pine Manor Gators (D3)."
3363    )
3364    df = get_volleyball_game_player_stats(1723131)
3365    print(df)
3366
3367
3368    ```
3369
3370    Returns
3371    ----------
3372    A pandas `DataFrame` object with player game stats in a given game.
3373
3374    """
3375    load_from_cache = True
3376
3377    sport_id = ""
3378    season = 0
3379
3380    MVB_teams_df = load_volleyball_teams(get_mens_data=True)
3381    MVB_team_ids_arr = MVB_teams_df["team_id"].to_list()
3382
3383    WVB_teams_df = load_volleyball_teams(get_mens_data=False)
3384    WVB_team_ids_arr = WVB_teams_df["team_id"].to_list()
3385
3386    stats_df = pd.DataFrame()
3387    stats_df_arr = []
3388
3389    temp_df = pd.DataFrame()
3390    home_dir = expanduser("~")
3391    home_dir = _format_folder_str(home_dir)
3392
3393    stat_columns = [
3394        "season",
3395        "sport_id",
3396        "game_datetime",
3397        "game_id",
3398        "team_id",
3399        "team_name",
3400        "player_id",
3401        "player_num",
3402        "player_full_name",
3403        "player_position",
3404        "GP",
3405        "sets_played",
3406        "kills",
3407        "errors",
3408        "total_attacks",
3409        "hit%",
3410        "assists",
3411        "aces",
3412        "serve_errors",
3413        "digs",
3414        "return_attacks",
3415        "return_errors",
3416        "solo_blocks",
3417        "assisted_blocks",
3418        "block_errors",
3419        "total_blocks",
3420        "points",
3421        "BHE",
3422        "DBL_DBL",
3423        "TRP_DBL",
3424    ]
3425
3426    url = f"https://stats.ncaa.org/contests/{game_id}/individual_stats"
3427
3428    if exists(f"{home_dir}/.ncaa_stats_py/"):
3429        pass
3430    else:
3431        mkdir(f"{home_dir}/.ncaa_stats_py/")
3432
3433    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"):
3434        pass
3435    else:
3436        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/")
3437
3438    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/"):
3439        pass
3440    else:
3441        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/")
3442
3443    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"):
3444        pass
3445    else:
3446        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/")
3447
3448    if exists(
3449        f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"
3450        + f"{game_id}_player_game_stats.csv"
3451    ):
3452        games_df = pd.read_csv(
3453            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"
3454            + f"{game_id}_player_game_stats.csv"
3455        )
3456        games_df = games_df.infer_objects()
3457        file_mod_datetime = datetime.fromtimestamp(
3458            getmtime(
3459                f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"
3460                + f"{game_id}_player_game_stats.csv"
3461            )
3462        )
3463        load_from_cache = True
3464    else:
3465        file_mod_datetime = datetime.today()
3466        load_from_cache = False
3467
3468    if exists(f"{home_dir}/.ncaa_stats_py/"):
3469        pass
3470    else:
3471        mkdir(f"{home_dir}/.ncaa_stats_py/")
3472
3473    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"):
3474        pass
3475    else:
3476        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/")
3477
3478    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/"):
3479        pass
3480    else:
3481        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/")
3482
3483    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"):
3484        pass
3485    else:
3486        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/")
3487
3488    if exists(
3489        f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"
3490        + f"{game_id}_player_game_stats.csv"
3491    ):
3492        games_df = pd.read_csv(
3493            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"
3494            + f"{game_id}_player_game_stats.csv"
3495        )
3496        games_df = games_df.infer_objects()
3497        file_mod_datetime = datetime.fromtimestamp(
3498            getmtime(
3499                f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"
3500                + f"{game_id}_player_game_stats.csv"
3501            )
3502        )
3503        load_from_cache = True
3504    else:
3505        logging.info("Could not find a WVB player game stats file")
3506
3507    now = datetime.today()
3508
3509    age = now - file_mod_datetime
3510
3511    if age.days >= 35:
3512        load_from_cache = False
3513
3514    if load_from_cache is True:
3515        return games_df
3516
3517    response = _get_webpage(url=url)
3518    soup = BeautifulSoup(response.text, features="lxml")
3519
3520    info_table = soup.find(
3521        "td",
3522        {
3523            "style": "padding: 0px 30px 0px 30px",
3524            "class": "d-none d-md-table-cell"
3525        }
3526    ).find(
3527        "table",
3528        {"style": "border-collapse: collapse"}
3529    )
3530
3531    info_table_rows = info_table.find_all("tr")
3532
3533    game_date_str = info_table_rows[3].find("td").text
3534    if "TBA" in game_date_str:
3535        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
3536    elif "tba" in game_date_str:
3537        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
3538    elif "TBD" in game_date_str:
3539        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
3540    elif "tbd" in game_date_str:
3541        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
3542    elif (
3543        "tbd" not in game_date_str.lower() and
3544        ":" not in game_date_str.lower()
3545    ):
3546        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
3547    else:
3548        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y %I:%M %p')
3549    game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
3550    game_date_str = game_datetime.isoformat()
3551    del game_datetime
3552
3553    table_boxes = soup.find_all("div", {"class": "card p-0 table-responsive"})
3554
3555    for box in table_boxes:
3556        t_header = box.find(
3557            "div", {"class": "card-header"}
3558        ).find(
3559            "div", {"class": "row"}
3560        )
3561
3562        t_header_str = t_header.text
3563        t_header_str = t_header_str.replace("Period Stats", "")
3564        t_header_str = t_header_str.replace("\n", "")
3565        t_header_str = t_header_str.strip()
3566
3567        team_id = t_header.find("a").get("href")
3568        team_id = team_id.replace("/teams", "")
3569        team_id = team_id.replace("/", "")
3570        team_id = int(team_id)
3571
3572        table_data = box.find(
3573            "table",
3574            {"class": "display dataTable small_font"}
3575        )
3576        table_headers = box.find("thead").find_all("th")
3577        table_headers = [x.text for x in table_headers]
3578
3579        temp_t_rows = table_data.find("tbody")
3580        temp_t_rows = temp_t_rows.find_all("tr")
3581
3582        spec_stats_df = pd.DataFrame()
3583        spec_stats_df_arr = []
3584        for t in temp_t_rows:
3585            # row_id = t.get("id")
3586            game_played = 1
3587            # game_started = 0
3588
3589            try:
3590                player_id = t.find("a").get("href")
3591                player_id = player_id.replace("/players", "")
3592                player_id = player_id.replace("/player", "")
3593                player_id = player_id.replace("/", "")
3594            except Exception as e:
3595                logging.debug(
3596                    "Could not replace player IDs. " +
3597                    f"Full exception: `{e}`"
3598                )
3599
3600            t_cells = t.find_all("td")
3601            p_name = t_cells[1].text.replace("\n", "")
3602            p_name = p_name.strip()
3603
3604            if t_header_str in p_name:
3605                continue
3606            elif p_name.lower() == "team":
3607                continue
3608            # if "\xa0" in p_name:
3609            #     game_started = 0
3610
3611            t_cells = [x.text.strip() for x in t_cells]
3612            player_id = int(player_id)
3613
3614            temp_df = pd.DataFrame(
3615                data=[t_cells],
3616                columns=table_headers
3617            )
3618
3619            duplicate_cols = temp_df.columns[temp_df.columns.duplicated()]
3620            temp_df.drop(columns=duplicate_cols, inplace=True)
3621
3622            temp_df["player_id"] = player_id
3623            temp_df["GP"] = game_played
3624            # temp_df["GS"] = game_started
3625
3626            spec_stats_df_arr.append(temp_df)
3627            del temp_df
3628
3629        spec_stats_df = pd.concat(
3630            spec_stats_df_arr,
3631            ignore_index=True
3632        )
3633
3634        if team_id in MVB_team_ids_arr:
3635            sport_id = "MVB"
3636            df = MVB_teams_df[MVB_teams_df["team_id"] == team_id]
3637            season = df["season"].iloc[0]
3638        elif team_id in WVB_team_ids_arr:
3639            sport_id = "WVB"
3640            df = WVB_teams_df[WVB_teams_df["team_id"] == team_id]
3641            season = df["season"].iloc[0]
3642        else:
3643            raise ValueError(
3644                f"Unhandled team ID {team_id}"
3645            )
3646
3647        spec_stats_df["team_id"] = team_id
3648        spec_stats_df["team_name"] = t_header_str
3649        stats_df_arr.append(spec_stats_df)
3650        del spec_stats_df
3651
3652    stats_df = pd.concat(stats_df_arr)
3653    stats_df["season"] = season
3654    stats_df.rename(
3655        columns={
3656            "#": "player_num",
3657            "Name": "player_full_name",
3658            "P": "player_position",
3659            "Ht": "player_height",
3660            "S": "sets_played",
3661            "Kills": "kills",
3662            "Errors": "errors",
3663            "Total Attacks": "total_attacks",
3664            "TotalAttacks": "total_attacks",
3665            "Hit Pct": "hit%",
3666            "HitPct": "hit%",
3667            "Assists": "assists",
3668            "Aces": "aces",
3669            "SErr": "serve_errors",
3670            "Digs": "digs",
3671            "RetAtt": "return_attacks",
3672            "RErr": "return_errors",
3673            "Block Solos": "solo_blocks",
3674            "BlockSolos": "solo_blocks",
3675            "Block Assists": "assisted_blocks",
3676            "BlockAssists": "assisted_blocks",
3677            "BErr": "block_errors",
3678            "PTS": "points",
3679            "Trpl Dbl": "TRP_DBL",
3680            "Dbl Dbl": "DBL_DBL",
3681            "TB": "total_blocks",
3682            "SrvAtt": "serve_attempts",
3683        },
3684        inplace=True,
3685    )
3686
3687    if "return_attacks" not in stats_df.columns:
3688        stats_df["return_attacks"] = None
3689
3690    if "serve_attempts" not in stats_df.columns:
3691        stats_df["serve_attempts"] = None
3692
3693    stats_df = stats_df.infer_objects().fillna(0)
3694    stats_df = stats_df.astype(
3695        {
3696            "GP": "uint16",
3697            "sets_played": "uint16",
3698            # "MS": "uint16",
3699            "kills": "uint16",
3700            "errors": "uint16",
3701            "total_attacks": "uint16",
3702            "hit%": "float32",
3703            "assists": "uint16",
3704            "aces": "uint16",
3705            "serve_errors": "uint16",
3706            "digs": "uint16",
3707            "return_attacks": "uint16",
3708            "return_errors": "uint16",
3709            "solo_blocks": "uint16",
3710            "assisted_blocks": "uint16",
3711            "block_errors": "uint16",
3712            # "total_blocks": "uint16",
3713            "points": "float32",
3714            "BHE": "uint16",
3715            "serve_attempts": "uint16",
3716            # "DBL_DBL": "uint8",
3717            # "TRP_DBL": "uint8",
3718        }
3719    )
3720    # print(stats_df.columns)
3721    stats_df["game_datetime"] = game_date_str
3722    stats_df["sport_id"] = sport_id
3723
3724    stats_df["game_id"] = game_id
3725
3726    stats_df["total_blocks"] = (
3727        stats_df["solo_blocks"] +
3728        (stats_df["assisted_blocks"] / 2)
3729    )
3730    stats_df["total_blocks"] = stats_df["total_blocks"].astype("float32")
3731
3732    # Columns used to calculate double doubles and triple doubles.
3733    # Credits:
3734    # https://en.wikipedia.org/wiki/Double_(volleyball)
3735    # https://stackoverflow.com/a/54381918
3736    double_stats_arr = [
3737        "aces",
3738        "kills",
3739        "total_blocks",
3740        "digs",
3741        "assists",
3742    ]
3743    stats_df["DBL_DBL"] = ((stats_df[double_stats_arr] >= 10).sum(1)) >= 2
3744    stats_df["DBL_DBL"] = stats_df["DBL_DBL"].astype(int)
3745
3746    stats_df["TRP_DBL"] = ((stats_df[double_stats_arr] >= 10).sum(1)) >= 3
3747    stats_df["TRP_DBL"] = stats_df["TRP_DBL"].astype(int)
3748
3749    for i in stats_df.columns:
3750        if i in stat_columns:
3751            pass
3752        elif "Attend" in stat_columns:
3753            pass
3754        else:
3755            raise ValueError(
3756                f"Unhandled column name {i}"
3757            )
3758
3759    stats_df = stats_df.reindex(
3760        columns=stat_columns
3761    )
3762
3763    # print(stats_df.columns)
3764    stats_df.to_csv(
3765        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/game_stats/player/"
3766        + f"{game_id}_player_game_stats.csv",
3767        index=False
3768    )
3769    return stats_df

Given a valid game ID, this function will attempt to get all player game stats, if possible.

Parameters

game_id (int, mandatory): Required argument. Specifies the game you want player game stats from.

Usage

from ncaa_stats_py.volleyball import get_volleyball_game_player_stats

########################################
#          Women's volleyball          #
########################################

# Get the game stats of the
# 2024 NCAA D1 Women's Volleyball National Championship game.
print(
    "Get the game stats of the "
    + "2024 NCAA D1 Women's volleyball National Championship game"
)
df = get_volleyball_game_player_stats(6080706)
print(df)

# Get the game stats of a September 14th, 2024
# game between the UNC Asheville Bulldogs and the Iona Gaels.
print(
    "Get the game stats of a September 14th, 2024 "
    + "game between the UNC Asheville Bulldogs "
    + "and the Iona Gaels"
)
df = get_volleyball_game_player_stats(5670752)
print(df)

# Get the game stats of a September 16th, 2023
# game between the Saginaw Valley Cardinals
# and the Lake Superior St. Lakes.
print(
    "Get the game stats of a September 16th, 2023 "
    + "game between the Saginaw Valley Cardinals "
    + "and the Lake Superior St. Lakes."
)
df = get_volleyball_game_player_stats(3243563)
print(df)

# Get the game stats of a October 15th, 2022
# game between the Macalester Scots
# and the St. Scholastica Saints (D3).
print(
    "Get the game stats of a October 15th, 2022 "
    + "game between the Macalester Scots and "
    + "the St. Scholastica Saints (D3)."
)
df = get_volleyball_game_player_stats(2307684)
print(df)

# Get the game stats of a October 24th, 2021
# game between the Howard Bison and the UMES Hawks.
print(
    "Get the game stats of a October 24th, 2021 "
    + "game between the Howard Bison and the UMES Hawks."
)
df = get_volleyball_game_player_stats(2113627)
print(df)

# Get the game stats of a March 5th, 2021
# game between the Notre Dame (OH) Falcons
# and the Alderson Broaddus Battlers.
print(
    "Get the game stats of a March 5th, 2021 "
    + "game between the Notre Dame (OH) Falcons "
    + "and the Alderson Broaddus Battlers."
)
df = get_volleyball_game_player_stats(2005442)
print(df)

# Get the game stats of a November 14th, 2019
# game between the Wittenberg Tigers
# and the Muskingum Fighting Muskies (D3).
print(
    "Get the game stats of a November 14th, 2019 "
    + "game between the Wittenberg Tigers and "
    + "the Muskingum Fighting Muskies (D3)."
)
df = get_volleyball_game_player_stats(1815514)
print(df)

########################################
#          Men's volleyball            #
########################################

# Get the game stats of the
# 2024 NCAA D1 Men's Volleyball National Championship game.
print(
    "Get the game stats of the "
    + "2024 NCAA D1 Men's volleyball National Championship game"
)
df = get_volleyball_game_player_stats(5282845)
print(df)

# Get the game stats of a January 14th, 2025
# game between the Kean Cougars and the Arcadia Knights.
print(
    "Get the game stats of a January 14th, 2025 "
    + "game between the UNC Asheville Bulldogs "
    + "and the Iona Gaels"
)
df = get_volleyball_game_player_stats(6081598)
print(df)

# Get the game stats of a January 13th, 2024
# game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
print(
    "Get the game stats of a September 14th, 2024 "
    + "game between the Purdue Fort Wayne Mastodons "
    + "and the NJIT Highlanders."
)
df = get_volleyball_game_player_stats(4473231)
print(df)

# Get the game stats of a January 21st, 2023
# game between the Baruch Bearcats and the Widener Pride.
print(
    "Get the game stats of a January 21st, 2023 "
    + "game between the Baruch Bearcats and the Widener Pride."
)
df = get_volleyball_game_player_stats(2355323)
print(df)

# Get the game stats of a February 24th, 2022
# game between the Ball St. Cardinals and the Lindenwood Lions.
print(
    "Get the game stats of a February 24th, 2022 "
    + "game between the Ball St. Cardinals and the Lindenwood Lions."
)
df = get_volleyball_game_player_stats(2162239)
print(df)

# Get the game stats of a March 20th, 2021
# game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals.
print(
    "Get the game stats of a March 20th, 2021 "
    + "game between the SUNY New Paltz Hawks "
    + "and the St. John Fisher Cardinals."
)
df = get_volleyball_game_player_stats(2059180)
print(df)

# Get the game stats of a March 1th, 2020
# game between the USC Trojans and the CUI Golden Eagles.
print(
    "Get the game stats of a March 1th, 2020 "
    + "game between the USC Trojans and the CUI Golden Eagles."
)
df = get_volleyball_game_player_stats(1820058)
print(df)

# Get the game stats of an April 4th, 2019
# game between the Lesly Lynx and the Pine Manor Gators (D3).
print(
    "Get the game stats of an April 4th, 2019 "
    + "game between the Lesly Lynx and the Pine Manor Gators (D3)."
)
df = get_volleyball_game_player_stats(1723131)
print(df)

Returns

A pandas DataFrame object with player game stats in a given game.

def get_volleyball_game_team_stats(game_id: int) -> pandas.core.frame.DataFrame:
3772def get_volleyball_game_team_stats(game_id: int) -> pd.DataFrame:
3773    """
3774    Given a valid game ID,
3775    this function will attempt to get all team game stats, if possible.
3776
3777    Parameters
3778    ----------
3779    `game_id` (int, mandatory):
3780        Required argument.
3781        Specifies the game you want team game stats from.
3782
3783    Usage
3784    ----------
3785    ```python
3786
3787    from ncaa_stats_py.volleyball import get_volleyball_game_team_stats
3788
3789    ########################################
3790    #          Women's volleyball          #
3791    ########################################
3792
3793    # Get the game stats of the
3794    # 2024 NCAA D1 Women's Volleyball National Championship game.
3795    print(
3796        "Get the game stats of the "
3797        + "2024 NCAA D1 Women's volleyball National Championship game"
3798    )
3799    df = get_volleyball_game_team_stats(6080706)
3800    print(df)
3801
3802    # Get the game stats of a September 14th, 2024
3803    # game between the UNC Asheville Bulldogs and the Iona Gaels.
3804    print(
3805        "Get the game stats of a September 14th, 2024 "
3806        + "game between the UNC Asheville Bulldogs "
3807        + "and the Iona Gaels"
3808    )
3809    df = get_volleyball_game_team_stats(5670752)
3810    print(df)
3811
3812    # Get the game stats of a September 16th, 2023
3813    # game between the Saginaw Valley Cardinals
3814    # and the Lake Superior St. Lakes.
3815    print(
3816        "Get the game stats of a September 16th, 2023 "
3817        + "game between the Saginaw Valley Cardinals "
3818        + "and the Lake Superior St. Lakes."
3819    )
3820    df = get_volleyball_game_team_stats(3243563)
3821    print(df)
3822
3823    # Get the game stats of a October 15th, 2022
3824    # game between the Macalester Scots
3825    # and the St. Scholastica Saints (D3).
3826    print(
3827        "Get the game stats of a October 15th, 2022 "
3828        + "game between the Macalester Scots and "
3829        + "the St. Scholastica Saints (D3)."
3830    )
3831    df = get_volleyball_game_team_stats(2307684)
3832    print(df)
3833
3834    # Get the game stats of a October 24th, 2021
3835    # game between the Howard Bison and the UMES Hawks.
3836    print(
3837        "Get the game stats of a October 24th, 2021 "
3838        + "game between the Howard Bison and the UMES Hawks."
3839    )
3840    df = get_volleyball_game_team_stats(2113627)
3841    print(df)
3842
3843    # Get the game stats of a March 5th, 2021
3844    # game between the Notre Dame (OH) Falcons
3845    # and the Alderson Broaddus Battlers.
3846    print(
3847        "Get the game stats of a March 5th, 2021 "
3848        + "game between the Notre Dame (OH) Falcons "
3849        + "and the Alderson Broaddus Battlers."
3850    )
3851    df = get_volleyball_game_team_stats(2005442)
3852    print(df)
3853
3854    # Get the game stats of a November 14th, 2019
3855    # game between the Wittenberg Tigers
3856    # and the Muskingum Fighting Muskies (D3).
3857    print(
3858        "Get the game stats of a November 14th, 2019 "
3859        + "game between the Wittenberg Tigers and "
3860        + "the Muskingum Fighting Muskies (D3)."
3861    )
3862    df = get_volleyball_game_team_stats(1815514)
3863    print(df)
3864
3865    ########################################
3866    #          Men's volleyball            #
3867    ########################################
3868
3869    # Get the game stats of the
3870    # 2024 NCAA D1 Men's Volleyball National Championship game.
3871    print(
3872        "Get the game stats of the "
3873        + "2024 NCAA D1 Men's volleyball National Championship game"
3874    )
3875    df = get_volleyball_game_team_stats(5282845)
3876    print(df)
3877
3878    # Get the game stats of a January 14th, 2025
3879    # game between the Kean Cougars and the Arcadia Knights.
3880    print(
3881        "Get the game stats of a January 14th, 2025 "
3882        + "game between the UNC Asheville Bulldogs "
3883        + "and the Iona Gaels"
3884    )
3885    df = get_volleyball_game_team_stats(6081598)
3886    print(df)
3887
3888    # Get the game stats of a January 13th, 2024
3889    # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
3890    print(
3891        "Get the game stats of a September 14th, 2024 "
3892        + "game between the Purdue Fort Wayne Mastodons "
3893        + "and the NJIT Highlanders."
3894    )
3895    df = get_volleyball_game_team_stats(4473231)
3896    print(df)
3897
3898    # Get the game stats of a January 21st, 2023
3899    # game between the Baruch Bearcats and the Widener Pride.
3900    print(
3901        "Get the game stats of a January 21st, 2023 "
3902        + "game between the Baruch Bearcats and the Widener Pride."
3903    )
3904    df = get_volleyball_game_team_stats(2355323)
3905    print(df)
3906
3907    # Get the game stats of a February 24th, 2022
3908    # game between the Ball St. Cardinals and the Lindenwood Lions.
3909    print(
3910        "Get the game stats of a February 24th, 2022 "
3911        + "game between the Ball St. Cardinals and the Lindenwood Lions."
3912    )
3913    df = get_volleyball_game_team_stats(2162239)
3914    print(df)
3915
3916    # Get the game stats of a March 20th, 2021
3917    # game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals.
3918    print(
3919        "Get the game stats of a March 20th, 2021 "
3920        + "game between the SUNY New Paltz Hawks "
3921        + "and the St. John Fisher Cardinals."
3922    )
3923    df = get_volleyball_game_team_stats(2059180)
3924    print(df)
3925
3926    # Get the game stats of a March 1th, 2020
3927    # game between the USC Trojans and the CUI Golden Eagles.
3928    print(
3929        "Get the game stats of a March 1th, 2020 "
3930        + "game between the USC Trojans and the CUI Golden Eagles."
3931    )
3932    df = get_volleyball_game_team_stats(1820058)
3933    print(df)
3934
3935    # Get the game stats of an April 4th, 2019
3936    # game between the Lesly Lynx and the Pine Manor Gators (D3).
3937    print(
3938        "Get the game stats of an April 4th, 2019 "
3939        + "game between the Lesly Lynx and the Pine Manor Gators (D3)."
3940    )
3941    df = get_volleyball_game_team_stats(1723131)
3942    print(df)
3943
3944    ```
3945
3946    Returns
3947    ----------
3948    A pandas `DataFrame` object with team game stats in a given game.
3949
3950    """
3951    df = get_volleyball_game_player_stats(game_id=game_id)
3952    # print(df.columns)
3953    df = df.infer_objects()
3954    stats_df = df.groupby(
3955        [
3956            "season",
3957            "sport_id",
3958            "game_datetime",
3959            "game_id",
3960            "team_id",
3961            "team_name"
3962        ],
3963        as_index=False,
3964    ).agg(
3965        {
3966            "sets_played": "sum",
3967            "kills": "sum",
3968            "errors": "sum",
3969            "total_attacks": "sum",
3970            # "hit%": "sum",
3971            "assists": "sum",
3972            "aces": "sum",
3973            "serve_errors": "sum",
3974            "digs": "sum",
3975            "return_attacks": "sum",
3976            "return_errors": "sum",
3977            "solo_blocks": "sum",
3978            "assisted_blocks": "sum",
3979            "block_errors": "sum",
3980            "total_blocks": "sum",
3981            "points": "sum",
3982            "BHE": "sum",
3983            "DBL_DBL": "sum",
3984            "TRP_DBL": "sum",
3985        }
3986    )
3987    stats_df["hit%"] = (
3988        (stats_df["kills"] - stats_df["errors"]) / stats_df["total_attacks"]
3989    )
3990    return stats_df

Given a valid game ID, this function will attempt to get all team game stats, if possible.

Parameters

game_id (int, mandatory): Required argument. Specifies the game you want team game stats from.

Usage

from ncaa_stats_py.volleyball import get_volleyball_game_team_stats

########################################
#          Women's volleyball          #
########################################

# Get the game stats of the
# 2024 NCAA D1 Women's Volleyball National Championship game.
print(
    "Get the game stats of the "
    + "2024 NCAA D1 Women's volleyball National Championship game"
)
df = get_volleyball_game_team_stats(6080706)
print(df)

# Get the game stats of a September 14th, 2024
# game between the UNC Asheville Bulldogs and the Iona Gaels.
print(
    "Get the game stats of a September 14th, 2024 "
    + "game between the UNC Asheville Bulldogs "
    + "and the Iona Gaels"
)
df = get_volleyball_game_team_stats(5670752)
print(df)

# Get the game stats of a September 16th, 2023
# game between the Saginaw Valley Cardinals
# and the Lake Superior St. Lakes.
print(
    "Get the game stats of a September 16th, 2023 "
    + "game between the Saginaw Valley Cardinals "
    + "and the Lake Superior St. Lakes."
)
df = get_volleyball_game_team_stats(3243563)
print(df)

# Get the game stats of a October 15th, 2022
# game between the Macalester Scots
# and the St. Scholastica Saints (D3).
print(
    "Get the game stats of a October 15th, 2022 "
    + "game between the Macalester Scots and "
    + "the St. Scholastica Saints (D3)."
)
df = get_volleyball_game_team_stats(2307684)
print(df)

# Get the game stats of a October 24th, 2021
# game between the Howard Bison and the UMES Hawks.
print(
    "Get the game stats of a October 24th, 2021 "
    + "game between the Howard Bison and the UMES Hawks."
)
df = get_volleyball_game_team_stats(2113627)
print(df)

# Get the game stats of a March 5th, 2021
# game between the Notre Dame (OH) Falcons
# and the Alderson Broaddus Battlers.
print(
    "Get the game stats of a March 5th, 2021 "
    + "game between the Notre Dame (OH) Falcons "
    + "and the Alderson Broaddus Battlers."
)
df = get_volleyball_game_team_stats(2005442)
print(df)

# Get the game stats of a November 14th, 2019
# game between the Wittenberg Tigers
# and the Muskingum Fighting Muskies (D3).
print(
    "Get the game stats of a November 14th, 2019 "
    + "game between the Wittenberg Tigers and "
    + "the Muskingum Fighting Muskies (D3)."
)
df = get_volleyball_game_team_stats(1815514)
print(df)

########################################
#          Men's volleyball            #
########################################

# Get the game stats of the
# 2024 NCAA D1 Men's Volleyball National Championship game.
print(
    "Get the game stats of the "
    + "2024 NCAA D1 Men's volleyball National Championship game"
)
df = get_volleyball_game_team_stats(5282845)
print(df)

# Get the game stats of a January 14th, 2025
# game between the Kean Cougars and the Arcadia Knights.
print(
    "Get the game stats of a January 14th, 2025 "
    + "game between the UNC Asheville Bulldogs "
    + "and the Iona Gaels"
)
df = get_volleyball_game_team_stats(6081598)
print(df)

# Get the game stats of a January 13th, 2024
# game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
print(
    "Get the game stats of a September 14th, 2024 "
    + "game between the Purdue Fort Wayne Mastodons "
    + "and the NJIT Highlanders."
)
df = get_volleyball_game_team_stats(4473231)
print(df)

# Get the game stats of a January 21st, 2023
# game between the Baruch Bearcats and the Widener Pride.
print(
    "Get the game stats of a January 21st, 2023 "
    + "game between the Baruch Bearcats and the Widener Pride."
)
df = get_volleyball_game_team_stats(2355323)
print(df)

# Get the game stats of a February 24th, 2022
# game between the Ball St. Cardinals and the Lindenwood Lions.
print(
    "Get the game stats of a February 24th, 2022 "
    + "game between the Ball St. Cardinals and the Lindenwood Lions."
)
df = get_volleyball_game_team_stats(2162239)
print(df)

# Get the game stats of a March 20th, 2021
# game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals.
print(
    "Get the game stats of a March 20th, 2021 "
    + "game between the SUNY New Paltz Hawks "
    + "and the St. John Fisher Cardinals."
)
df = get_volleyball_game_team_stats(2059180)
print(df)

# Get the game stats of a March 1th, 2020
# game between the USC Trojans and the CUI Golden Eagles.
print(
    "Get the game stats of a March 1th, 2020 "
    + "game between the USC Trojans and the CUI Golden Eagles."
)
df = get_volleyball_game_team_stats(1820058)
print(df)

# Get the game stats of an April 4th, 2019
# game between the Lesly Lynx and the Pine Manor Gators (D3).
print(
    "Get the game stats of an April 4th, 2019 "
    + "game between the Lesly Lynx and the Pine Manor Gators (D3)."
)
df = get_volleyball_game_team_stats(1723131)
print(df)

Returns

A pandas DataFrame object with team game stats in a given game.

def get_volleyball_raw_pbp(game_id: int) -> pandas.core.frame.DataFrame:
3993def get_volleyball_raw_pbp(game_id: int) -> pd.DataFrame:
3994    """
3995    Given a valid game ID,
3996    this function will attempt to get the raw play-by-play (PBP)
3997    data for that game.
3998
3999    Parameters
4000    ----------
4001    `game_id` (int, mandatory):
4002        Required argument.
4003        Specifies the game you want play-by-play data (PBP) from.
4004
4005    Usage
4006    ----------
4007    ```python
4008
4009    from ncaa_stats_py.volleyball import get_volleyball_raw_pbp
4010
4011    ########################################
4012    #          Women's volleyball          #
4013    ########################################
4014
4015    # Get the play-by-play data of the
4016    # 2024 NCAA D1 Women's Volleyball National Championship game.
4017    print(
4018        "Get the play-by-play data of the "
4019        + "2024 NCAA D1 Women's volleyball National Championship game"
4020    )
4021    df = get_volleyball_raw_pbp(6080706)
4022    print(df)
4023
4024    # Get the play-by-play data of a September 14th, 2024
4025    # game between the UNC Asheville Bulldogs and the Iona Gaels.
4026    print(
4027        "Get the play-by-play data of a September 14th, 2024 "
4028        + "game between the UNC Asheville Bulldogs "
4029        + "and the Iona Gaels"
4030    )
4031    df = get_volleyball_raw_pbp(5670752)
4032    print(df)
4033
4034    # Get the play-by-play data of a September 16th, 2023
4035    # game between the Saginaw Valley Cardinals
4036    # and the Lake Superior St. Lakes.
4037    print(
4038        "Get the play-by-play data of a September 16th, 2023 "
4039        + "game between the Saginaw Valley Cardinals "
4040        + "and the Lake Superior St. Lakes."
4041    )
4042    df = get_volleyball_raw_pbp(3243563)
4043    print(df)
4044
4045    # Get the play-by-play data of a October 15th, 2022
4046    # game between the Macalester Scots
4047    # and the St. Scholastica Saints (D3).
4048    print(
4049        "Get the play-by-play data of a October 15th, 2022 "
4050        + "game between the Macalester Scots and "
4051        + "the St. Scholastica Saints (D3)."
4052    )
4053    df = get_volleyball_raw_pbp(2307684)
4054    print(df)
4055
4056    # Get the play-by-play data of a October 24th, 2021
4057    # game between the Howard Bison and the UMES Hawks.
4058    print(
4059        "Get the play-by-play data of a October 24th, 2021 "
4060        + "game between the Howard Bison and the UMES Hawks."
4061    )
4062    df = get_volleyball_raw_pbp(2113627)
4063    print(df)
4064
4065    # Get the play-by-play data of a March 5th, 2021
4066    # game between the Notre Dame (OH) Falcons
4067    # and the Alderson Broaddus Battlers.
4068    print(
4069        "Get the play-by-play data of a March 5th, 2021 "
4070        + "game between the Notre Dame (OH) Falcons "
4071        + "and the Alderson Broaddus Battlers."
4072    )
4073    df = get_volleyball_raw_pbp(2005442)
4074    print(df)
4075
4076    # Get the play-by-play data of a November 14th, 2019
4077    # game between the Wittenberg Tigers
4078    # and the Muskingum Fighting Muskies (D3).
4079    print(
4080        "Get the play-by-play data of a November 14th, 2019 "
4081        + "game between the Wittenberg Tigers and "
4082        + "the Muskingum Fighting Muskies (D3)."
4083    )
4084    df = get_volleyball_raw_pbp(1815514)
4085    print(df)
4086
4087    ########################################
4088    #          Men's volleyball            #
4089    ########################################
4090
4091    # Get the play-by-play data of the
4092    # 2024 NCAA D1 Men's Volleyball National Championship game.
4093    print(
4094        "Get the play-by-play data of the "
4095        + "2024 NCAA D1 Men's volleyball National Championship game"
4096    )
4097    df = get_volleyball_raw_pbp(5282845)
4098    print(df)
4099
4100    # Get the play-by-play data of a January 14th, 2025
4101    # game between the Kean Cougars and the Arcadia Knights.
4102    print(
4103        "Get the play-by-play data of a January 14th, 2025 "
4104        + "game between the UNC Asheville Bulldogs "
4105        + "and the Iona Gaels"
4106    )
4107    df = get_volleyball_raw_pbp(6081598)
4108    print(df)
4109
4110    # Get the play-by-play data of a January 13th, 2024
4111    # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
4112    print(
4113        "Get the play-by-play data of a September 14th, 2024 "
4114        + "game between the Purdue Fort Wayne Mastodons "
4115        + "and the NJIT Highlanders."
4116    )
4117    df = get_volleyball_raw_pbp(4473231)
4118    print(df)
4119
4120    # Get the play-by-play data of a January 21st, 2023
4121    # game between the Baruch Bearcats and the Widener Pride.
4122    print(
4123        "Get the play-by-play data of a January 21st, 2023 "
4124        + "game between the Baruch Bearcats and the Widener Pride."
4125    )
4126    df = get_volleyball_raw_pbp(2355323)
4127    print(df)
4128
4129    # Get the play-by-play data of a February 24th, 2022
4130    # game between the Ball St. Cardinals and the Lindenwood Lions.
4131    print(
4132        "Get the play-by-play data of a February 24th, 2022 "
4133        + "game between the Ball St. Cardinals and the Lindenwood Lions."
4134    )
4135    df = get_volleyball_raw_pbp(2162239)
4136    print(df)
4137
4138    # Get the play-by-play data of a March 7th, 2021
4139    # game between the Adrian Bulldogs and the Baldwin Wallace Yellow Jackets.
4140    print(
4141        "Get the play-by-play data of a March 7th, 2021 "
4142        + "game between the Adrian Bulldogs "
4143        + "and the Baldwin Wallace Yellow Jackets."
4144    )
4145    df = get_volleyball_raw_pbp(1998844)
4146    print(df)
4147
4148    # Get the play-by-play data of a March 1th, 2020
4149    # game between the USC Trojans and the CUI Golden Eagles.
4150    print(
4151        "Get the play-by-play data of a March 1th, 2020 "
4152        + "game between the USC Trojans and the CUI Golden Eagles."
4153    )
4154    df = get_volleyball_raw_pbp(1820058)
4155    print(df)
4156
4157    # Get the play-by-play data of an April 4th, 2019
4158    # game between the Lesly Lynx and the Pine Manor Gators (D3).
4159    print(
4160        "Get the play-by-play data of an April 4th, 2019 "
4161        + "game between the Lesly Lynx and the Pine Manor Gators (D3)."
4162    )
4163    df = get_volleyball_raw_pbp(1723131)
4164    print(df)
4165
4166    ```
4167
4168    Returns
4169    ----------
4170    A pandas `DataFrame` object with a play-by-play (PBP) data in a given game.
4171
4172    """
4173    load_from_cache = True
4174    # is_overtime = False
4175
4176    sport_id = ""
4177    season = 0
4178    away_score = 0
4179    home_score = 0
4180
4181    home_sets_won = 0
4182    away_sets_won = 0
4183
4184    home_set_1_score = 0
4185    away_set_1_score = 0
4186
4187    home_set_2_score = 0
4188    away_set_2_score = 0
4189
4190    home_set_3_score = 0
4191    away_set_3_score = 0
4192
4193    home_set_4_score = 0
4194    away_set_4_score = 0
4195
4196    home_set_5_score = 0
4197    away_set_5_score = 0
4198
4199    home_cumulative_score = 0
4200    away_cumulative_score = 0
4201
4202    MVB_teams_df = load_volleyball_teams(get_mens_data=True)
4203    MVB_team_ids_arr = MVB_teams_df["team_id"].to_list()
4204
4205    WVB_teams_df = load_volleyball_teams(get_mens_data=False)
4206    WVB_team_ids_arr = WVB_teams_df["team_id"].to_list()
4207
4208    pbp_df = pd.DataFrame()
4209    pbp_df_arr = []
4210    temp_df = pd.DataFrame()
4211
4212    temp_df = pd.DataFrame()
4213    home_dir = expanduser("~")
4214    home_dir = _format_folder_str(home_dir)
4215
4216    stat_columns = [
4217        "season",
4218        "game_id",
4219        "sport_id",
4220        "game_datetime",
4221        "set_num",
4222        "event_num",
4223        "event_team",
4224        "event_text",
4225        "is_scoring_play",
4226        "home_set_score",
4227        "away_set_score",
4228        "is_extra_points",
4229        "home_cumulative_score",
4230        "away_cumulative_score",
4231        "home_sets_won",
4232        "away_sets_won",
4233        "stadium_name",
4234        "attendance",
4235        "away_team_id",
4236        "away_team_name",
4237        "home_team_id",
4238        "home_team_name",
4239        "home_set_1_score",
4240        "away_set_1_score",
4241        "home_set_2_score",
4242        "away_set_2_score",
4243        "home_set_3_score",
4244        "away_set_3_score",
4245        "home_set_4_score",
4246        "away_set_4_score",
4247        "home_set_5_score",
4248        "away_set_5_score",
4249    ]
4250
4251    url = f"https://stats.ncaa.org/contests/{game_id}/play_by_play"
4252
4253    if exists(f"{home_dir}/.ncaa_stats_py/"):
4254        pass
4255    else:
4256        mkdir(f"{home_dir}/.ncaa_stats_py/")
4257
4258    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"):
4259        pass
4260    else:
4261        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/")
4262
4263    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"):
4264        pass
4265    else:
4266        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/")
4267
4268    if exists(
4269        f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"
4270        + f"{game_id}_raw_pbp.csv"
4271    ):
4272        games_df = pd.read_csv(
4273            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"
4274            + f"{game_id}_raw_pbp.csv"
4275        )
4276        games_df = games_df.infer_objects()
4277        file_mod_datetime = datetime.fromtimestamp(
4278            getmtime(
4279                f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"
4280                + f"{game_id}_raw_pbp.csv"
4281            )
4282        )
4283        load_from_cache = True
4284    else:
4285        file_mod_datetime = datetime.today()
4286        load_from_cache = False
4287
4288    if exists(f"{home_dir}/.ncaa_stats_py/"):
4289        pass
4290    else:
4291        mkdir(f"{home_dir}/.ncaa_stats_py/")
4292
4293    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"):
4294        pass
4295    else:
4296        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/")
4297
4298    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"):
4299        pass
4300    else:
4301        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/")
4302
4303    if exists(
4304        f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"
4305        + f"{game_id}_raw_pbp.csv"
4306    ):
4307        games_df = pd.read_csv(
4308            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"
4309            + f"{game_id}_raw_pbp.csv"
4310        )
4311        games_df = games_df.infer_objects()
4312        file_mod_datetime = datetime.fromtimestamp(
4313            getmtime(
4314                f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"
4315                + f"{game_id}_raw_pbp.csv"
4316            )
4317        )
4318        load_from_cache = True
4319    else:
4320        logging.info("Could not find a WVB player game stats file")
4321
4322    now = datetime.today()
4323
4324    age = now - file_mod_datetime
4325
4326    if age.days >= 35:
4327        load_from_cache = False
4328
4329    if load_from_cache is True:
4330        return games_df
4331
4332    response = _get_webpage(url=url)
4333    soup = BeautifulSoup(response.text, features="lxml")
4334
4335    info_table = soup.find(
4336        "td",
4337        {
4338            "style": "padding: 0px 30px 0px 30px",
4339            "class": "d-none d-md-table-cell"
4340        }
4341    ).find(
4342        "table",
4343        {"style": "border-collapse: collapse"}
4344    )
4345
4346    info_table_rows = info_table.find_all("tr")
4347
4348    game_date_str = info_table_rows[3].find("td").text
4349    if "TBA" in game_date_str:
4350        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA')
4351    elif "tba" in game_date_str:
4352        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba')
4353    elif "TBD" in game_date_str:
4354        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD')
4355    elif "tbd" in game_date_str:
4356        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd')
4357    elif (
4358        "tbd" not in game_date_str.lower() and
4359        ":" not in game_date_str.lower()
4360    ):
4361        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y')
4362    else:
4363        game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y %I:%M %p')
4364    game_datetime = game_datetime.astimezone(timezone("US/Eastern"))
4365    game_date_str = game_datetime.isoformat()
4366    # del game_datetime
4367
4368    stadium_str = info_table_rows[4].find("td").text
4369
4370    attendance_str = info_table_rows[5].find("td").text
4371    attendance_int = re.findall(
4372        r"([0-9\,]+)",
4373        attendance_str
4374    )[0]
4375    attendance_int = attendance_int.replace(",", "")
4376    attendance_int = int(attendance_int)
4377
4378    del attendance_str
4379    team_cards = soup.find_all(
4380        "td",
4381        {
4382            "valign": "center",
4383            "class": "grey_text d-none d-sm-table-cell"
4384        }
4385    )
4386
4387    away_url = team_cards[0].find_all("a")
4388    away_url = away_url[0]
4389    home_url = team_cards[1].find_all("a")
4390    home_url = home_url[0]
4391
4392    away_team_name = away_url.text
4393    home_team_name = home_url.text
4394
4395    away_team_id = away_url.get("href")
4396    home_team_id = home_url.get("href")
4397
4398    away_team_id = away_team_id.replace("/teams", "")
4399    away_team_id = away_team_id.replace("/team", "")
4400    away_team_id = away_team_id.replace("/", "")
4401    away_team_id = int(away_team_id)
4402
4403    home_team_id = home_team_id.replace("/teams", "")
4404    home_team_id = home_team_id.replace("/team", "")
4405    home_team_id = home_team_id.replace("/", "")
4406    home_team_id = int(home_team_id)
4407
4408    if home_team_id in MVB_team_ids_arr:
4409        sport_id = "MVB"
4410        temp_df = MVB_teams_df[MVB_teams_df["team_id"] == home_team_id]
4411        season = temp_df["season"].iloc[0]
4412        del temp_df
4413    elif home_team_id in WVB_team_ids_arr:
4414        sport_id = "WVB"
4415        temp_df = WVB_teams_df[WVB_teams_df["team_id"] == home_team_id]
4416        season = temp_df["season"].iloc[0]
4417        del temp_df
4418    # This should never be the case,
4419    # but if something goes very horribly wrong,
4420    # double check the away team ID to
4421    # the MVB and WVB team ID list.
4422    elif away_team_id in MVB_team_ids_arr:
4423        sport_id = "MVB"
4424        temp_df = MVB_teams_df[MVB_teams_df["team_id"] == away_team_id]
4425        season = temp_df["season"].iloc[0]
4426        del temp_df
4427    elif away_team_id in WVB_team_ids_arr:
4428        sport_id = "WVB"
4429        temp_df = WVB_teams_df[WVB_teams_df["team_id"] == home_team_id]
4430        season = temp_df["season"].iloc[0]
4431        del temp_df
4432    # If we get to this, we are in a code red situation.
4433    # "SHUT IT DOWN" - Gordon Ramsay
4434    else:
4435        raise ValueError(
4436            "Could not identify if this is a " +
4437            "MVB or WVB game based on team IDs. "
4438        )
4439
4440    section_cards = soup.find_all(
4441        "div",
4442        {"class": "row justify-content-md-center w-100"}
4443    )
4444
4445    if len(section_cards) == 0:
4446        logging.warning(
4447            f"Could not find any plays for game ID `{game_id}`. " +
4448            "Returning empty DataFrame."
4449        )
4450        df = pd.DataFrame(columns=stat_columns)
4451        return df
4452
4453    # play_id = 0
4454    for card in section_cards:
4455        is_extra_points = False
4456        event_text = ""
4457
4458        set_num_str = card.find(
4459            "div",
4460            {"class": "card-header"}
4461        ).text
4462        set_num = re.findall(
4463            r"([0-9]+)",
4464            set_num_str
4465        )
4466
4467        set_num = int(set_num[0])
4468
4469        table_body = card.find("table").find("tbody").find_all("tr")
4470
4471        # pbp rows
4472        for row in table_body:
4473            is_scoring_play = True
4474            t_cells = row.find_all("td")
4475            t_cells = [x.text.strip() for x in t_cells]
4476            game_time_str = t_cells[0]
4477
4478            if len(t_cells[0]) > 0:
4479                event_team = away_team_id
4480                event_text = t_cells[0]
4481            elif len(t_cells[2]) > 0:
4482                event_team = home_team_id
4483                event_text = t_cells[2]
4484
4485            if "+" in event_text:
4486                temp = event_text.split("\n")
4487                if len(temp) >= 2:
4488                    event_text = temp[1]
4489                else:
4490                    raise Exception(
4491                        "Unhandled situation " +
4492                        f"when parsing a scoring play: `{temp}`"
4493                    )
4494                # print()
4495            else:
4496                event_text = event_text.replace("\n", "")
4497
4498            event_text = event_text.replace("  ", " ")
4499            event_text = event_text.strip()
4500
4501            if len(t_cells) == 3:
4502                try:
4503                    away_score, home_score = t_cells[1].split("-")
4504
4505                    away_score = int(away_score)
4506                    home_score = int(home_score)
4507                    is_scoring_play = True
4508                except ValueError:
4509                    logging.info(
4510                        "Could not extract a score " +
4511                        f"from the following play `{event_text}`"
4512                    )
4513                    is_scoring_play = False
4514                except Exception as e:
4515                    logging.warning(
4516                        f"An unhandled exception has occurred: `{e}`"
4517                    )
4518                    raise e
4519                    # scoring_play = False
4520            elif len(t_cells) > 3:
4521                raise SyntaxError(
4522                    f"Unhandled PBP row format in game ID `{game_id}`"
4523                )
4524
4525            if set_num <= 4 and home_score == 24 and away_score == 24:
4526                is_extra_points = True
4527            elif set_num == 5 and home_score == 14 and away_score == 14:
4528                is_extra_points = True
4529
4530            temp_home_cumulative_score = home_cumulative_score + home_score
4531            temp_away_cumulative_score = away_cumulative_score + away_score
4532
4533            temp_df = pd.DataFrame(
4534                {
4535                    # "season": season,
4536                    # "game_id": game_id,
4537                    # "sport_id": sport_id,
4538                    # "away_team_id": away_team_id,
4539                    # "away_team_name": away_team_name,
4540                    # "home_team_id": home_team_id,
4541                    # "home_team_name": home_team_name,
4542                    "game_time_str": game_time_str,
4543                    "set_num": set_num,
4544                    "away_set_score": away_score,
4545                    "home_set_score": home_score,
4546                    "event_team": event_team,
4547                    "event_text": event_text,
4548                    "is_scoring_play": is_scoring_play,
4549                    "is_extra_points": is_extra_points,
4550                    "home_cumulative_score": temp_home_cumulative_score,
4551                    "away_cumulative_score": temp_away_cumulative_score,
4552                    "home_sets_won": home_sets_won,
4553                    "away_sets_won": away_sets_won,
4554                },
4555                index=[0],
4556            )
4557            pbp_df_arr.append(temp_df)
4558
4559        if set_num == 1:
4560            home_set_1_score = home_score
4561            away_set_1_score = away_score
4562            home_cumulative_score = home_set_1_score
4563            away_cumulative_score = away_set_1_score
4564        elif set_num == 2:
4565            home_set_2_score = home_score
4566            away_set_2_score = away_score
4567            home_cumulative_score += home_set_2_score
4568            away_cumulative_score += away_set_2_score
4569        elif set_num == 3:
4570            home_set_3_score = home_score
4571            away_set_3_score = away_score
4572            home_cumulative_score += home_set_3_score
4573            away_cumulative_score += away_set_3_score
4574        elif set_num == 4:
4575            home_set_4_score = home_score
4576            away_set_4_score = away_score
4577            home_cumulative_score += home_set_4_score
4578            away_cumulative_score += away_set_4_score
4579        elif set_num == 5:
4580            home_set_5_score = home_score
4581            away_set_5_score = away_score
4582            home_cumulative_score += home_set_4_score
4583            away_cumulative_score += away_set_4_score
4584
4585        if temp_away_cumulative_score > home_cumulative_score:
4586            away_sets_won += 1
4587        elif temp_away_cumulative_score < home_cumulative_score:
4588            home_sets_won += 1
4589
4590        # End of set play
4591        temp_df = pd.DataFrame(
4592            {
4593                # "season": season,
4594                # "game_id": game_id,
4595                # "sport_id": sport_id,
4596                # "away_team_id": away_team_id,
4597                # "away_team_name": away_team_name,
4598                # "home_team_id": home_team_id,
4599                # "home_team_name": home_team_name,
4600                "game_time_str": game_time_str,
4601                "set_num": set_num,
4602                "away_set_score": away_score,
4603                "home_set_score": home_score,
4604                "event_team": event_team,
4605                "event_text": f"END SET {set_num}",
4606                "is_scoring_play": is_scoring_play,
4607                "is_extra_points": is_extra_points,
4608                "home_cumulative_score": temp_home_cumulative_score,
4609                "away_cumulative_score": temp_away_cumulative_score,
4610                "home_sets_won": home_sets_won,
4611                "away_sets_won": away_sets_won,
4612            },
4613            index=[0],
4614        )
4615        pbp_df_arr.append(temp_df)
4616
4617    # End of game play
4618    temp_df = pd.DataFrame(
4619        {
4620            # "season": season,
4621            # "game_id": game_id,
4622            # "sport_id": sport_id,
4623            # "away_team_id": away_team_id,
4624            # "away_team_name": away_team_name,
4625            # "home_team_id": home_team_id,
4626            # "home_team_name": home_team_name,
4627            "game_time_str": game_time_str,
4628            "set_num": set_num,
4629            "away_set_score": away_score,
4630            "home_set_score": home_score,
4631            "event_team": event_team,
4632            "event_text": "END MATCH",
4633            "is_scoring_play": is_scoring_play,
4634            "is_extra_points": is_extra_points,
4635            "home_cumulative_score": temp_home_cumulative_score,
4636            "away_cumulative_score": temp_away_cumulative_score,
4637            "home_sets_won": home_sets_won,
4638            "away_sets_won": away_sets_won,
4639        },
4640        index=[0],
4641    )
4642    pbp_df_arr.append(temp_df)
4643    pbp_df = pd.concat(pbp_df_arr, ignore_index=True)
4644    pbp_df["event_num"] = pbp_df.index + 1
4645    pbp_df["game_datetime"] = game_date_str
4646    pbp_df["season"] = season
4647    pbp_df["game_id"] = game_id
4648    pbp_df["sport_id"] = sport_id
4649    pbp_df["stadium_name"] = stadium_str
4650    pbp_df["attendance"] = attendance_int
4651    pbp_df["away_team_id"] = away_team_id
4652    pbp_df["away_team_name"] = away_team_name
4653    pbp_df["home_team_id"] = home_team_id
4654    pbp_df["home_team_name"] = home_team_name
4655
4656    pbp_df["home_set_1_score"] = home_set_1_score
4657    pbp_df["away_set_1_score"] = away_set_1_score
4658
4659    pbp_df["home_set_2_score"] = home_set_2_score
4660    pbp_df["away_set_2_score"] = away_set_2_score
4661
4662    pbp_df["home_set_3_score"] = home_set_3_score
4663    pbp_df["away_set_3_score"] = away_set_3_score
4664
4665    pbp_df["home_set_4_score"] = home_set_4_score
4666    pbp_df["away_set_4_score"] = away_set_4_score
4667
4668    pbp_df["home_set_5_score"] = home_set_5_score
4669    pbp_df["away_set_5_score"] = away_set_5_score
4670
4671    # print(pbp_df.columns)
4672    pbp_df = pbp_df.reindex(columns=stat_columns)
4673    pbp_df = pbp_df.infer_objects()
4674
4675    if sport_id == "MVB":
4676        pbp_df.to_csv(
4677            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"
4678            + f"{game_id}_raw_pbp.csv",
4679            index=False
4680        )
4681    elif sport_id == "WVB":
4682        pbp_df.to_csv(
4683            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"
4684            + f"{game_id}_raw_pbp.csv",
4685            index=False
4686        )
4687    else:
4688        raise ValueError(
4689            f"Improper Sport ID: `{sport_id}`"
4690        )
4691
4692    return pbp_df

Given a valid game ID, this function will attempt to get the raw play-by-play (PBP) data for that game.

Parameters

game_id (int, mandatory): Required argument. Specifies the game you want play-by-play data (PBP) from.

Usage

from ncaa_stats_py.volleyball import get_volleyball_raw_pbp

########################################
#          Women's volleyball          #
########################################

# Get the play-by-play data of the
# 2024 NCAA D1 Women's Volleyball National Championship game.
print(
    "Get the play-by-play data of the "
    + "2024 NCAA D1 Women's volleyball National Championship game"
)
df = get_volleyball_raw_pbp(6080706)
print(df)

# Get the play-by-play data of a September 14th, 2024
# game between the UNC Asheville Bulldogs and the Iona Gaels.
print(
    "Get the play-by-play data of a September 14th, 2024 "
    + "game between the UNC Asheville Bulldogs "
    + "and the Iona Gaels"
)
df = get_volleyball_raw_pbp(5670752)
print(df)

# Get the play-by-play data of a September 16th, 2023
# game between the Saginaw Valley Cardinals
# and the Lake Superior St. Lakes.
print(
    "Get the play-by-play data of a September 16th, 2023 "
    + "game between the Saginaw Valley Cardinals "
    + "and the Lake Superior St. Lakes."
)
df = get_volleyball_raw_pbp(3243563)
print(df)

# Get the play-by-play data of a October 15th, 2022
# game between the Macalester Scots
# and the St. Scholastica Saints (D3).
print(
    "Get the play-by-play data of a October 15th, 2022 "
    + "game between the Macalester Scots and "
    + "the St. Scholastica Saints (D3)."
)
df = get_volleyball_raw_pbp(2307684)
print(df)

# Get the play-by-play data of a October 24th, 2021
# game between the Howard Bison and the UMES Hawks.
print(
    "Get the play-by-play data of a October 24th, 2021 "
    + "game between the Howard Bison and the UMES Hawks."
)
df = get_volleyball_raw_pbp(2113627)
print(df)

# Get the play-by-play data of a March 5th, 2021
# game between the Notre Dame (OH) Falcons
# and the Alderson Broaddus Battlers.
print(
    "Get the play-by-play data of a March 5th, 2021 "
    + "game between the Notre Dame (OH) Falcons "
    + "and the Alderson Broaddus Battlers."
)
df = get_volleyball_raw_pbp(2005442)
print(df)

# Get the play-by-play data of a November 14th, 2019
# game between the Wittenberg Tigers
# and the Muskingum Fighting Muskies (D3).
print(
    "Get the play-by-play data of a November 14th, 2019 "
    + "game between the Wittenberg Tigers and "
    + "the Muskingum Fighting Muskies (D3)."
)
df = get_volleyball_raw_pbp(1815514)
print(df)

########################################
#          Men's volleyball            #
########################################

# Get the play-by-play data of the
# 2024 NCAA D1 Men's Volleyball National Championship game.
print(
    "Get the play-by-play data of the "
    + "2024 NCAA D1 Men's volleyball National Championship game"
)
df = get_volleyball_raw_pbp(5282845)
print(df)

# Get the play-by-play data of a January 14th, 2025
# game between the Kean Cougars and the Arcadia Knights.
print(
    "Get the play-by-play data of a January 14th, 2025 "
    + "game between the UNC Asheville Bulldogs "
    + "and the Iona Gaels"
)
df = get_volleyball_raw_pbp(6081598)
print(df)

# Get the play-by-play data of a January 13th, 2024
# game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
print(
    "Get the play-by-play data of a September 14th, 2024 "
    + "game between the Purdue Fort Wayne Mastodons "
    + "and the NJIT Highlanders."
)
df = get_volleyball_raw_pbp(4473231)
print(df)

# Get the play-by-play data of a January 21st, 2023
# game between the Baruch Bearcats and the Widener Pride.
print(
    "Get the play-by-play data of a January 21st, 2023 "
    + "game between the Baruch Bearcats and the Widener Pride."
)
df = get_volleyball_raw_pbp(2355323)
print(df)

# Get the play-by-play data of a February 24th, 2022
# game between the Ball St. Cardinals and the Lindenwood Lions.
print(
    "Get the play-by-play data of a February 24th, 2022 "
    + "game between the Ball St. Cardinals and the Lindenwood Lions."
)
df = get_volleyball_raw_pbp(2162239)
print(df)

# Get the play-by-play data of a March 7th, 2021
# game between the Adrian Bulldogs and the Baldwin Wallace Yellow Jackets.
print(
    "Get the play-by-play data of a March 7th, 2021 "
    + "game between the Adrian Bulldogs "
    + "and the Baldwin Wallace Yellow Jackets."
)
df = get_volleyball_raw_pbp(1998844)
print(df)

# Get the play-by-play data of a March 1th, 2020
# game between the USC Trojans and the CUI Golden Eagles.
print(
    "Get the play-by-play data of a March 1th, 2020 "
    + "game between the USC Trojans and the CUI Golden Eagles."
)
df = get_volleyball_raw_pbp(1820058)
print(df)

# Get the play-by-play data of an April 4th, 2019
# game between the Lesly Lynx and the Pine Manor Gators (D3).
print(
    "Get the play-by-play data of an April 4th, 2019 "
    + "game between the Lesly Lynx and the Pine Manor Gators (D3)."
)
df = get_volleyball_raw_pbp(1723131)
print(df)

Returns

A pandas DataFrame object with a play-by-play (PBP) data in a given game.

def get_parsed_volleyball_pbp(game_id: int) -> pandas.core.frame.DataFrame:
4695def get_parsed_volleyball_pbp(game_id: int) -> pd.DataFrame:
4696    """
4697    Given a valid game ID,
4698    this function will attempt to parse play-by-play (PBP)
4699    data for that game.
4700
4701    Parameters
4702    ----------
4703    `game_id` (int, mandatory):
4704        Required argument.
4705        Specifies the game you want play-by-play data (PBP) from.
4706
4707    Usage
4708    ----------
4709    ```python
4710    ```
4711
4712    Returns
4713    ----------
4714    A pandas `DataFrame` object with a play-by-play (PBP) data in a given game.
4715
4716    """
4717    home_team_id = 0
4718    away_team_id = 0
4719    sport_id = ""
4720
4721    home_roster_df = pd.DataFrame()
4722    away_roster_df = pd.DataFrame()
4723
4724    home_dir = expanduser("~")
4725    home_dir = _format_folder_str(home_dir)
4726
4727    if exists(f"{home_dir}/.ncaa_stats_py/"):
4728        pass
4729    else:
4730        mkdir(f"{home_dir}/.ncaa_stats_py/")
4731
4732    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"):
4733        pass
4734    else:
4735        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/")
4736
4737    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"):
4738        pass
4739    else:
4740        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/")
4741
4742    if exists(
4743        f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"
4744        + f"{game_id}_parsed_pbp.csv"
4745    ):
4746        games_df = pd.read_csv(
4747            f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"
4748            + f"{game_id}_parsed_pbp.csv"
4749        )
4750        games_df = games_df.infer_objects()
4751        file_mod_datetime = datetime.fromtimestamp(
4752            getmtime(
4753                f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"
4754                + f"{game_id}_parsed_pbp.csv"
4755            )
4756        )
4757        load_from_cache = True
4758    else:
4759        file_mod_datetime = datetime.today()
4760        load_from_cache = False
4761
4762    if exists(f"{home_dir}/.ncaa_stats_py/"):
4763        pass
4764    else:
4765        mkdir(f"{home_dir}/.ncaa_stats_py/")
4766
4767    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"):
4768        pass
4769    else:
4770        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/")
4771
4772    if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"):
4773        pass
4774    else:
4775        mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/")
4776
4777    if exists(
4778        f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"
4779        + f"{game_id}_parsed_pbp.csv"
4780    ):
4781        games_df = pd.read_csv(
4782            f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"
4783            + f"{game_id}_parsed_pbp.csv"
4784        )
4785        games_df = games_df.infer_objects()
4786        file_mod_datetime = datetime.fromtimestamp(
4787            getmtime(
4788                f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"
4789                + f"{game_id}_parsed_pbp.csv"
4790            )
4791        )
4792        load_from_cache = True
4793    else:
4794        logging.info("Could not find a WVB player game stats file")
4795
4796    now = datetime.today()
4797
4798    age = now - file_mod_datetime
4799
4800    if age.days > 1:
4801        load_from_cache = False
4802
4803    if load_from_cache is True:
4804        return games_df
4805
4806    raw_df = get_volleyball_raw_pbp(game_id=game_id)
4807
4808    sport_id = raw_df["sport_id"].iloc[0]
4809    home_team_id = raw_df["home_team_id"].iloc[0]
4810    away_team_id = raw_df["away_team_id"].iloc[0]
4811
4812    pbp_df = _volleyball_pbp_helper(raw_df=raw_df)
4813
4814    home_roster_df = get_volleyball_team_roster(team_id=home_team_id)
4815    home_roster_df["Name"] = home_roster_df["Name"].str.lower()
4816
4817    away_roster_df = get_volleyball_team_roster(team_id=away_team_id)
4818    away_roster_df["Name"] = away_roster_df["Name"].str.lower()
4819
4820    home_players_arr = dict(
4821        zip(
4822            home_roster_df["Name"], home_roster_df["player_id"]
4823        )
4824    )
4825    away_players_arr = dict(
4826        zip(
4827            away_roster_df["Name"], away_roster_df["player_id"]
4828        )
4829    )
4830    players_arr = home_players_arr | away_players_arr
4831    name_cols = [
4832        "substitution_player_1_name",
4833        "substitution_player_2_name",
4834        "substitution_player_3_name",
4835        "substitution_player_4_name",
4836        "serve_player_name",
4837        "reception_player_name",
4838        "set_player_name",
4839        "set_error_player_name",
4840        "attack_player_name",
4841        "dig_player_name",
4842        "kill_player_name",
4843        "block_player_1_name",
4844        "block_player_2_name",
4845        "ball_handling_error_player_name",
4846        "dig_error_player_name",
4847    ]
4848    id_cols = [
4849        "substitution_player_1_id",
4850        "substitution_player_2_id",
4851        "substitution_player_3_id",
4852        "substitution_player_4_id",
4853        "serve_player_id",
4854        "reception_player_id",
4855        "set_player_id",
4856        "set_error_player_id",
4857        "attack_player_id",
4858        "dig_player_id",
4859        "kill_player_id",
4860        "block_player_1_id",
4861        "block_player_2_id",
4862        "ball_handling_error_player_id",
4863        "dig_error_player_id",
4864    ]
4865
4866    for i in range(0, len(id_cols)):
4867        name_column = name_cols[i]
4868        id_column = id_cols[i]
4869        pbp_df[name_column] = pbp_df[name_column].str.replace("3a", "")
4870        pbp_df[name_column] = pbp_df[name_column].str.replace(".", "")
4871        pbp_df[id_column] = pbp_df[name_column].str.lower()
4872        pbp_df.loc[pbp_df[id_column].notnull(), id_column] = pbp_df[
4873            id_column
4874        ].map(_name_smother)
4875        pbp_df[id_column] = pbp_df[id_column].map(players_arr)
4876
4877    pbp_df.to_csv(
4878        f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/parsed_pbp/"
4879        + f"{game_id}_parsed_pbp.csv",
4880        index=False
4881    )
4882    return pbp_df

Given a valid game ID, this function will attempt to parse play-by-play (PBP) data for that game.

Parameters

game_id (int, mandatory): Required argument. Specifies the game you want play-by-play data (PBP) from.

Usage


Returns

A pandas DataFrame object with a play-by-play (PBP) data in a given game.