ncaa_stats_py.volleyball
1# Author: Joseph Armstrong (armstrongjoseph08@gmail.com) 2# File Name: `volleyball.py` 3# Purpose: Houses functions that allows one to access NCAA volleyball data 4# Creation Date: 2024-09-20 08:15 PM EDT 5# Update History: 6# - 2024-09-20 08:15 PM EDT 7# - 2025-01-04 03:00 PM EDT 8# - 2025-01-18 02:44 PM EDT 9# - 2025-02-01 02:40 PM EDT 10# - 2025-02-05 08:50 PM EDT 11 12 13import logging 14import re 15from datetime import date, datetime 16from os import mkdir 17from os.path import exists, expanduser, getmtime 18 19import numpy as np 20import pandas as pd 21from bs4 import BeautifulSoup 22from dateutil import parser 23from pytz import timezone 24from tqdm import tqdm 25 26from ncaa_stats_py.helpers.volleyball import _volleyball_pbp_helper 27from ncaa_stats_py.utls import ( 28 _format_folder_str, 29 _get_schools, 30 _get_webpage, 31 _name_smother, 32) 33 34 35def get_volleyball_teams( 36 season: int, 37 level: str | int, 38 get_mens_data: bool = False 39) -> pd.DataFrame: 40 """ 41 Retrieves a list of volleyball teams from the NCAA. 42 43 Parameters 44 ---------- 45 `season` (int, mandatory): 46 Required argument. 47 Specifies the season you want NCAA volleyball team information from. 48 49 `level` (int, mandatory): 50 Required argument. 51 Specifies the level/division you want 52 NCAA volleyball team information from. 53 This can either be an integer (1-3) or a string ("I"-"III"). 54 55 `get_mens_data` (bool, optional): 56 Optional argument. 57 If you want men's volleyball data instead of women's volleyball data, 58 set this to `True`. 59 60 Usage 61 ---------- 62 ```python 63 64 from ncaa_stats_py.volleyball import get_volleyball_teams 65 66 ######################################## 67 # Men's volleyball # 68 ######################################## 69 70 # Get all D1 men's volleyball teams for the 2024 season. 71 print("Get all D1 men's volleyball teams for the 2024 season.") 72 df = get_volleyball_teams(2024, 1) 73 print(df) 74 75 # Get all D2 men's volleyball teams for the 2023 season. 76 print("Get all D2 men's volleyball teams for the 2023 season.") 77 df = get_volleyball_teams(2023, 2) 78 print(df) 79 80 # Get all D3 men's volleyball teams for the 2022 season. 81 print("Get all D3 men's volleyball teams for the 2022 season.") 82 df = get_volleyball_teams(2022, 3) 83 print(df) 84 85 # Get all D1 men's volleyball teams for the 2021 season. 86 print("Get all D1 men's volleyball teams for the 2021 season.") 87 df = get_volleyball_teams(2021, "I") 88 print(df) 89 90 # Get all D2 men's volleyball teams for the 2020 season. 91 print("Get all D2 men's volleyball teams for the 2020 season.") 92 df = get_volleyball_teams(2020, "II") 93 print(df) 94 95 # Get all D3 men's volleyball teams for the 2019 season. 96 print("Get all D3 men's volleyball teams for the 2019 season.") 97 df = get_volleyball_teams(2019, "III") 98 print(df) 99 100 ######################################## 101 # Women's volleyball # 102 ######################################## 103 104 # Get all D1 women's volleyball teams for the 2024 season. 105 print( 106 "Get all D1 women's volleyball teams for the 2024 season." 107 ) 108 df = get_volleyball_teams(2024, 1) 109 print(df) 110 111 # Get all D2 women's volleyball teams for the 2023 season. 112 print( 113 "Get all D2 women's volleyball teams for the 2023 season." 114 ) 115 df = get_volleyball_teams(2023, 2) 116 print(df) 117 118 # Get all D3 women's volleyball teams for the 2022 season. 119 print( 120 "Get all D3 women's volleyball teams for the 2022 season." 121 ) 122 df = get_volleyball_teams(2022, 3) 123 print(df) 124 125 # Get all D1 women's volleyball teams for the 2021 season. 126 print( 127 "Get all D1 women's volleyball teams for the 2021 season." 128 ) 129 df = get_volleyball_teams(2021, "I") 130 print(df) 131 132 # Get all D2 women's volleyball teams for the 2020 season. 133 print( 134 "Get all D2 women's volleyball teams for the 2020 season." 135 ) 136 df = get_volleyball_teams(2020, "II") 137 print(df) 138 139 # Get all D3 women's volleyball teams for the 2019 season. 140 print( 141 "Get all D3 women's volleyball teams for the 2019 season." 142 ) 143 df = get_volleyball_teams(2019, "III") 144 print(df) 145 146 ``` 147 148 Returns 149 ---------- 150 A pandas `DataFrame` object with a list of college volleyball teams 151 in that season and NCAA level. 152 """ 153 # def is_comment(elem): 154 # return isinstance(elem, Comment) 155 sport_id = "" 156 # stat_sequence = 0 157 load_from_cache = True 158 home_dir = expanduser("~") 159 home_dir = _format_folder_str(home_dir) 160 teams_df = pd.DataFrame() 161 teams_df_arr = [] 162 temp_df = pd.DataFrame() 163 formatted_level = "" 164 ncaa_level = 0 165 166 if get_mens_data is True: 167 sport_id = "MVB" 168 stat_sequence = 528 169 elif get_mens_data is False: 170 sport_id = "WVB" 171 stat_sequence = 48 172 173 if isinstance(level, int) and level == 1: 174 formatted_level = "I" 175 ncaa_level = 1 176 elif isinstance(level, int) and level == 2: 177 formatted_level = "II" 178 ncaa_level = 2 179 elif isinstance(level, int) and level == 3: 180 formatted_level = "III" 181 ncaa_level = 3 182 elif isinstance(level, str) and ( 183 level.lower() == "i" or level.lower() == "d1" or level.lower() == "1" 184 ): 185 ncaa_level = 1 186 formatted_level = level.upper() 187 elif isinstance(level, str) and ( 188 level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2" 189 ): 190 ncaa_level = 2 191 formatted_level = level.upper() 192 elif isinstance(level, str) and ( 193 level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3" 194 ): 195 ncaa_level = 3 196 formatted_level = level.upper() 197 198 if exists(f"{home_dir}/.ncaa_stats_py/"): 199 pass 200 else: 201 mkdir(f"{home_dir}/.ncaa_stats_py/") 202 203 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 204 pass 205 else: 206 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 207 208 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"): 209 pass 210 else: 211 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/") 212 213 if exists( 214 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/" 215 + f"{season}_{formatted_level}_teams.csv" 216 ): 217 teams_df = pd.read_csv( 218 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/" 219 + f"{season}_{formatted_level}_teams.csv" 220 ) 221 file_mod_datetime = datetime.fromtimestamp( 222 getmtime( 223 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/" 224 + f"{season}_{formatted_level}_teams.csv" 225 ) 226 ) 227 else: 228 file_mod_datetime = datetime.today() 229 load_from_cache = False 230 231 now = datetime.today() 232 233 age = now - file_mod_datetime 234 235 if ( 236 age.days > 1 and 237 season >= (now.year - 1) and 238 now.month <= 7 239 ): 240 load_from_cache = False 241 elif age.days >= 35: 242 load_from_cache = False 243 244 if load_from_cache is True: 245 return teams_df 246 247 logging.warning( 248 f"Either we could not load {season} D{level} schools from cache, " 249 + "or it's time to refresh the cached data." 250 ) 251 schools_df = _get_schools() 252 253 # Volleyball 254 if sport_id == "MVB": 255 url = ( 256 "https://stats.ncaa.org/rankings/change_sport_year_div?" 257 + f"academic_year={season}.0&division={ncaa_level}.0" + 258 f"&sport_code={sport_id}" 259 ) 260 elif sport_id == "WVB": 261 url = ( 262 "https://stats.ncaa.org/rankings/change_sport_year_div?" 263 + f"academic_year={season+1}.0&division={ncaa_level}.0" + 264 f"&sport_code={sport_id}" 265 ) 266 267 response = _get_webpage(url=url) 268 269 soup = BeautifulSoup(response.text, features="lxml") 270 ranking_periods = soup.find("select", {"name": "rp", "id": "rp"}) 271 ranking_periods = ranking_periods.find_all("option") 272 273 rp_value = 0 274 found_value = False 275 276 while found_value is False: 277 # print("check") 278 for rp in ranking_periods: 279 if "final" in rp.text.lower(): 280 rp_value = rp.get("value") 281 found_value = True 282 break 283 # pass 284 elif "-" in rp.text.lower(): 285 pass 286 else: 287 rp_value = rp.get("value") 288 found_value = True 289 break 290 291 if sport_id == "MVB": 292 url = ( 293 "https://stats.ncaa.org/rankings/institution_trends?" 294 + f"academic_year={season}.0&division={ncaa_level}.0&" 295 + f"ranking_period={rp_value}&sport_code={sport_id}" 296 ) 297 elif sport_id == "WVB": 298 url = ( 299 "https://stats.ncaa.org/rankings/institution_trends?" 300 + f"academic_year={season+1}.0&division={ncaa_level}.0&" 301 + f"ranking_period={rp_value}&sport_code={sport_id}" 302 ) 303 304 best_method = True 305 if ( 306 (season < 2017 and sport_id == "MVB") 307 ): 308 url = ( 309 "https://stats.ncaa.org/rankings/national_ranking?" 310 + f"academic_year={season}.0&division={ncaa_level}.0&" 311 + f"ranking_period={rp_value}&sport_code={sport_id}" 312 + f"&stat_seq={stat_sequence}.0" 313 ) 314 response = _get_webpage(url=url) 315 best_method = False 316 elif ( 317 (season < 2017 and sport_id == "WVB") 318 ): 319 url = ( 320 "https://stats.ncaa.org/rankings/national_ranking?" 321 + f"academic_year={season+1}.0&division={ncaa_level}.0&" 322 + f"ranking_period={rp_value}&sport_code={sport_id}" 323 + f"&stat_seq={stat_sequence}.0" 324 ) 325 response = _get_webpage(url=url) 326 best_method = False 327 elif sport_id == "MVB": 328 try: 329 response = _get_webpage(url=url) 330 except Exception as e: 331 logging.info(f"Found exception when loading teams `{e}`") 332 logging.info("Attempting backup method.") 333 url = ( 334 "https://stats.ncaa.org/rankings/national_ranking?" 335 + f"academic_year={season}.0&division={ncaa_level}.0&" 336 + f"ranking_period={rp_value}&sport_code={sport_id}" 337 + f"&stat_seq={stat_sequence}.0" 338 ) 339 response = _get_webpage(url=url) 340 best_method = False 341 else: 342 try: 343 response = _get_webpage(url=url) 344 except Exception as e: 345 logging.info(f"Found exception when loading teams `{e}`") 346 logging.info("Attempting backup method.") 347 url = ( 348 "https://stats.ncaa.org/rankings/national_ranking?" 349 + f"academic_year={season+1}.0&division={ncaa_level}.0&" 350 + f"ranking_period={rp_value}&sport_code={sport_id}" 351 + f"&stat_seq={stat_sequence}.0" 352 ) 353 response = _get_webpage(url=url) 354 best_method = False 355 356 soup = BeautifulSoup(response.text, features="lxml") 357 358 if best_method is True: 359 soup = soup.find( 360 "table", 361 {"id": "stat_grid"}, 362 ) 363 soup = soup.find("tbody") 364 t_rows = soup.find_all("tr") 365 366 for t in t_rows: 367 team_id = t.find("a") 368 team_id = team_id.get("href") 369 team_id = team_id.replace("/teams/", "") 370 team_id = int(team_id) 371 team_name = t.find_all("td")[0].text 372 team_conference_name = t.find_all("td")[1].text 373 # del team 374 temp_df = pd.DataFrame( 375 { 376 "season": season, 377 "ncaa_division": ncaa_level, 378 "ncaa_division_formatted": formatted_level, 379 "team_conference_name": team_conference_name, 380 "team_id": team_id, 381 "school_name": team_name, 382 "sport_id": sport_id, 383 }, 384 index=[0], 385 ) 386 teams_df_arr.append(temp_df) 387 del temp_df 388 else: 389 soup = soup.find( 390 "table", 391 {"id": "rankings_table"}, 392 ) 393 soup = soup.find("tbody") 394 t_rows = soup.find_all("tr") 395 396 for t in t_rows: 397 team_id = t.find("a") 398 team_id = team_id.get("href") 399 team_id = team_id.replace("/teams/", "") 400 team_id = int(team_id) 401 team = t.find_all("td")[1].get("data-order") 402 team_name, team_conference_name = team.split(",") 403 del team 404 temp_df = pd.DataFrame( 405 { 406 "season": season, 407 "ncaa_division": ncaa_level, 408 "ncaa_division_formatted": formatted_level, 409 "team_conference_name": team_conference_name, 410 "team_id": team_id, 411 "school_name": team_name, 412 "sport_id": sport_id, 413 }, 414 index=[0], 415 ) 416 teams_df_arr.append(temp_df) 417 del temp_df 418 419 teams_df = pd.concat(teams_df_arr, ignore_index=True) 420 teams_df = pd.merge( 421 left=teams_df, 422 right=schools_df, 423 on=["school_name"], 424 how="left" 425 ) 426 teams_df.sort_values(by=["team_id"], inplace=True) 427 428 teams_df.to_csv( 429 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/" 430 + f"{season}_{formatted_level}_teams.csv", 431 index=False, 432 ) 433 434 return teams_df 435 436 437def load_volleyball_teams( 438 start_year: int = 2011, 439 get_mens_data: bool = False 440) -> pd.DataFrame: 441 """ 442 Compiles a list of known NCAA volleyball teams in NCAA volleyball history. 443 444 Parameters 445 ---------- 446 `start_year` (int, optional): 447 Optional argument. 448 Specifies the first season you want 449 NCAA volleyball team information from. 450 451 `get_mens_data` (bool, optional): 452 Optional argument. 453 If you want men's volleyball data instead of women's volleyball data, 454 set this to `True`. 455 456 Usage 457 ---------- 458 ```python 459 460 from ncaa_stats_py.volleyball import load_volleyball_teams 461 462 # WARNING: Running this script "as-is" for the first time may 463 # take some time. 464 # The *N*th time you run this script will be faster. 465 466 # Load in every women's volleyball team 467 # from 2011 to present day. 468 print( 469 "Load in every women's volleyball team " + 470 "from 2011 to present day." 471 ) 472 df = load_volleyball_teams(get_mens_data=True) 473 print(df) 474 475 # Load in every men's volleyball team 476 # from 2011 to present day. 477 print( 478 "Load in every men's volleyball team " + 479 "from 2011 to present day." 480 ) 481 df = load_volleyball_teams() 482 print(df) 483 484 # Load in every men's volleyball team 485 # from 2020 to present day. 486 print( 487 "Load in every men's volleyball team " + 488 "from 2020 to present day." 489 ) 490 df = load_volleyball_teams(start_year=2020) 491 print(df) 492 493 ``` 494 495 Returns 496 ---------- 497 A pandas `DataFrame` object with a list of 498 all known college volleyball teams. 499 500 """ 501 # start_year = 2008 502 503 # if get_mens_data is True: 504 # sport_id = "WVB" 505 # else: 506 # sport_id = "MVB" 507 508 teams_df = pd.DataFrame() 509 teams_df_arr = [] 510 temp_df = pd.DataFrame() 511 512 now = datetime.now() 513 mens_ncaa_divisions = ["I", "III"] 514 womens_ncaa_divisions = ["I", "II", "III"] 515 if now.month > 5 and get_mens_data is False: 516 ncaa_seasons = [x for x in range(start_year, (now.year + 2))] 517 elif now.month < 5 and get_mens_data is True: 518 ncaa_seasons = [x for x in range(start_year, (now.year + 1))] 519 else: 520 ncaa_seasons = [x for x in range(start_year, (now.year + 1))] 521 522 logging.info( 523 "Loading in all NCAA volleyball teams. " 524 + "If this is the first time you're seeing this message, " 525 + "it may take some time (3-10 minutes) for this to load." 526 ) 527 528 if get_mens_data is True: 529 for s in ncaa_seasons: 530 logging.info( 531 f"Loading in men's volleyball teams for the {s} season." 532 ) 533 for d in mens_ncaa_divisions: 534 temp_df = get_volleyball_teams( 535 season=s, 536 level=d, 537 get_mens_data=True 538 ) 539 teams_df_arr.append(temp_df) 540 del temp_df 541 else: 542 for s in ncaa_seasons: 543 logging.info( 544 f"Loading in women's volleyball teams for the {s} season." 545 ) 546 for d in womens_ncaa_divisions: 547 temp_df = get_volleyball_teams( 548 season=s, 549 level=d 550 ) 551 teams_df_arr.append(temp_df) 552 del temp_df 553 554 teams_df = pd.concat(teams_df_arr, ignore_index=True) 555 teams_df = teams_df.infer_objects() 556 return teams_df 557 558 559def get_volleyball_team_schedule(team_id: int) -> pd.DataFrame: 560 """ 561 Retrieves a team schedule, from a valid NCAA volleyball team ID. 562 563 Parameters 564 ---------- 565 `team_id` (int, mandatory): 566 Required argument. 567 Specifies the team you want a schedule from. 568 This is separate from a school ID, which identifies the institution. 569 A team ID should be unique to a school, and a season. 570 571 Usage 572 ---------- 573 ```python 574 575 from ncaa_stats_py.volleyball import get_volleyball_team_schedule 576 577 ######################################## 578 # Women's volleyball # 579 ######################################## 580 581 # Get the team schedule for the 582 # 2024 Toledo WVB team (D1, ID: 585329). 583 print( 584 "Get the team schedule for the " + 585 "2024 Toledo WVB team (D1, ID: 585329)." 586 ) 587 df = get_volleyball_team_schedule(585329) 588 print(df) 589 590 # Get the team schedule for the 591 # 2023 Black Hills St. WVB team (D2, ID: 559709). 592 print( 593 "Get the team schedule for the " + 594 "2023 Black Hills St. WVB team (D2, ID: 559709)." 595 ) 596 df = get_volleyball_team_schedule(559709) 597 print(df) 598 599 # Get the team schedule for the 600 # 2022 Mount Mary WVB team (D3, ID: 539750). 601 print( 602 "Get the team schedule for the " + 603 "2022 Mount Mary WVB team (D3, ID: 539750)." 604 ) 605 df = get_volleyball_team_schedule(539750) 606 print(df) 607 608 # Get the team schedule for the 609 # 2021 TCU WVB team (D1, ID: 522750). 610 print( 611 "Get the team schedule for the " + 612 "2024 TCU WVB team (D1, ID: 522750)." 613 ) 614 df = get_volleyball_team_schedule(522750) 615 print(df) 616 617 # Get the team schedule for the 618 # 2020 Purdue Northwest WVB team (D2, ID: 504832). 619 print( 620 "Get the team schedule for the " + 621 "2020 Purdue Northwest WVB team (D2, ID: 504832)." 622 ) 623 df = get_volleyball_team_schedule(504832) 624 print(df) 625 626 # Get the team schedule for the 627 # 2019 Juniata WVB team (D3, ID: 482642). 628 print( 629 "Get the team schedule for the " + 630 "2019 Juniata WVB team (D3, ID: 482642)." 631 ) 632 df = get_volleyball_team_schedule(482642) 633 print(df) 634 635 ######################################## 636 # Men's volleyball # 637 ######################################## 638 639 # Get the team schedule for the 640 # 2024 Missouri S&T MVB team (D1, ID: 573720). 641 print( 642 "Get the team schedule for the " + 643 "2024 Missouri S&T MVB team (D1, ID: 573720)." 644 ) 645 df = get_volleyball_team_schedule(573720) 646 print(df) 647 648 # Get the team schedule for the 649 # 2023 Rockford MVB team (D3, ID: 550890). 650 print( 651 "Get the team schedule for the " + 652 "2023 Rockford MVB team (D3, ID: 550890)." 653 ) 654 df = get_volleyball_team_schedule(550890) 655 print(df) 656 657 # Get the team schedule for the 658 # 2022 McKendree MVB team (D1, ID: 529896). 659 print( 660 "Get the team schedule for the " + 661 "2022 McKendreeMaritime MVB team (D1, ID: 529896)." 662 ) 663 df = get_volleyball_team_schedule(529896) 664 print(df) 665 666 # Get the team schedule for the 667 # 2021 Concordia Chicago MVB team (D3, ID: 508505). 668 print( 669 "Get the team schedule for the " + 670 "2021 Concordia Chicago MVB team (D3, ID: 508505)." 671 ) 672 df = get_volleyball_team_schedule(508505) 673 print(df) 674 675 # Get the team schedule for the 676 # 2020 St. Francis Brooklyn MVB team (D1, ID: 487992). 677 print( 678 "Get the team schedule for the " + 679 "2020 St. Francis Brooklyn MVB team (D1, ID: 487992)." 680 ) 681 df = get_volleyball_team_schedule(487992) 682 print(df) 683 684 # Get the team schedule for the 685 # 2019 Loras MVB team (D3, ID: 453845). 686 print( 687 "Get the team schedule for the " + 688 "2019 Loras MVB team (D3, ID: 453845)." 689 ) 690 df = get_volleyball_team_schedule(453845) 691 print(df) 692 693 ``` 694 695 Returns 696 ---------- 697 A pandas `DataFrame` object with an NCAA volleyball team's schedule. 698 699 """ 700 701 sport_id = "" 702 schools_df = _get_schools() 703 games_df = pd.DataFrame() 704 games_df_arr = [] 705 season = 0 706 temp_df = pd.DataFrame() 707 load_from_cache = True 708 709 home_dir = expanduser("~") 710 home_dir = _format_folder_str(home_dir) 711 712 url = f"https://stats.ncaa.org/teams/{team_id}" 713 714 try: 715 team_df = load_volleyball_teams() 716 team_df = team_df[team_df["team_id"] == team_id] 717 season = team_df["season"].iloc[0] 718 ncaa_division = team_df["ncaa_division"].iloc[0] 719 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 720 sport_id = "WVB" 721 except Exception: 722 team_df = load_volleyball_teams(get_mens_data=True) 723 team_df = team_df[team_df["team_id"] == team_id] 724 season = team_df["season"].iloc[0] 725 ncaa_division = team_df["ncaa_division"].iloc[0] 726 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 727 sport_id = "MVB" 728 # team_conference_name = team_df["team_conference_name"].iloc[0] 729 # school_name = team_df["school_name"].iloc[0] 730 # school_id = int(team_df["school_id"].iloc[0]) 731 732 del team_df 733 734 if exists(f"{home_dir}/.ncaa_stats_py/"): 735 pass 736 else: 737 mkdir(f"{home_dir}/.ncaa_stats_py/") 738 739 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 740 pass 741 else: 742 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 743 744 if exists( 745 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/" 746 ): 747 pass 748 else: 749 mkdir( 750 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/" 751 ) 752 753 if exists( 754 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/" 755 + f"{team_id}_team_schedule.csv" 756 ): 757 games_df = pd.read_csv( 758 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/" 759 + f"{team_id}_team_schedule.csv" 760 ) 761 file_mod_datetime = datetime.fromtimestamp( 762 getmtime( 763 f"{home_dir}/.ncaa_stats_py/" 764 + f"volleyball_{sport_id}/team_schedule/" 765 + f"{team_id}_team_schedule.csv" 766 ) 767 ) 768 else: 769 file_mod_datetime = datetime.today() 770 load_from_cache = False 771 772 now = datetime.today() 773 774 age = now - file_mod_datetime 775 if ( 776 age.days > 1 and 777 season >= now.year 778 ): 779 load_from_cache = False 780 781 if load_from_cache is True: 782 return games_df 783 784 response = _get_webpage(url=url) 785 soup = BeautifulSoup(response.text, features="lxml") 786 787 school_name = soup.find("div", {"class": "card"}).find("img").get("alt") 788 season_name = ( 789 soup.find("select", {"id": "year_list"}) 790 .find("option", {"selected": "selected"}) 791 .text 792 ) 793 794 soup = soup.find_all( 795 "div", 796 {"class": "col p-0"}, 797 ) 798 799 # declaring it here to prevent potential problems down the road. 800 table_data = "" 801 for s in soup: 802 try: 803 temp_name = s.find("div", {"class": "card-header"}) 804 temp_name = temp_name.text 805 except Exception as e: 806 logging.warning( 807 f"Could not parse card header. Full exception `{e}`. " 808 + "Attempting alternate method." 809 ) 810 temp_name = s.find("tr", {"class": "heading"}).find("td").text 811 812 if "schedule" in temp_name.lower(): 813 table_data = s.find("table") 814 815 t_rows = table_data.find_all("tr", {"class": "underline_rows"}) 816 817 if len(t_rows) == 0: 818 t_rows = table_data.find_all("tr") 819 820 for g in t_rows: 821 is_valid_row = True 822 game_num = 1 823 ot_periods = 0 824 is_home_game = True 825 is_neutral_game = False 826 827 cells = g.find_all("td") 828 if len(cells) <= 1: 829 # Because of how *well* designed 830 # stats.ncaa.org is, if we have to use execute 831 # the `if len(t_rows) == 0:` code, 832 # we need to catch any cases where every element in a 833 # table row (`<tr>`) is a table header (`<th>`), 834 # instead of a table data cell (`<td>`) 835 continue 836 837 game_date = cells[0].text 838 839 # If "(" is in the same cell as the date, 840 # this means that this game is an extra innings game. 841 # The number encased in `()` is the actual number of innings. 842 # We need to remove that from the date, 843 # and move it into a separate variable. 844 if "(" in game_date: 845 game_date = game_date.replace(")", "") 846 game_date, game_num = game_date.split("(") 847 game_date = game_date.strip() 848 game_num = int(game_num.strip()) 849 850 if ":" in game_date and ("PM" in game_date or "AM" in game_date): 851 game_date = datetime.strptime( 852 game_date, 853 "%m/%d/%Y %I:%M %p" 854 ).date() 855 else: 856 game_date = datetime.strptime( 857 game_date, 858 "%m/%d/%Y" 859 ).date() 860 861 try: 862 opp_team_id = cells[1].find("a").get("href") 863 except IndexError: 864 logging.info( 865 "Skipping row because it is clearly " 866 + "not a row that has schedule data." 867 ) 868 is_valid_row = False 869 except AttributeError as e: 870 logging.info( 871 "Could not extract a team ID for this game. " + 872 f"Full exception {e}" 873 ) 874 opp_team_id = "-1" 875 except Exception as e: 876 logging.warning( 877 "An unhandled exception has occurred when " 878 + "trying to get the opposition team ID for this game. " 879 f"Full exception `{e}`." 880 ) 881 raise e 882 if is_valid_row is True: 883 if opp_team_id is not None: 884 opp_team_id = opp_team_id.replace("/teams/", "") 885 opp_team_id = int(opp_team_id) 886 887 try: 888 opp_team_name = cells[1].find("img").get("alt") 889 except AttributeError: 890 logging.info( 891 "Couldn't find the opposition team name " 892 + "for this row from an image element. " 893 + "Attempting a backup method" 894 ) 895 opp_team_name = cells[1].text 896 except Exception as e: 897 logging.info( 898 "Unhandled exception when trying to get the " 899 + "opposition team name from this game. " 900 + f"Full exception `{e}`" 901 ) 902 raise e 903 else: 904 opp_team_name = cells[1].text 905 906 if opp_team_name[0] == "@": 907 # The logic for determining if this game was a 908 # neutral site game doesn't care if that info is in 909 # `opp_team_name`. 910 opp_team_name = opp_team_name.strip().replace("@", "") 911 elif "@" in opp_team_name: 912 opp_team_name = opp_team_name.strip().split("@")[0] 913 # opp_team_show_name = cells[1].text.strip() 914 915 opp_text = cells[1].text 916 opp_text = opp_text.strip() 917 if "@" in opp_text and opp_text[0] == "@": 918 is_home_game = False 919 elif "@" in opp_text and opp_text[0] != "@": 920 is_neutral_game = True 921 is_home_game = False 922 # This is just to cover conference and NCAA championship 923 # tournaments. 924 elif "championship" in opp_text.lower(): 925 is_neutral_game = True 926 is_home_game = False 927 elif "ncaa" in opp_text.lower(): 928 is_neutral_game = True 929 is_home_game = False 930 931 del opp_text 932 933 score = cells[2].text.strip() 934 if len(score) == 0: 935 score_1 = 0 936 score_2 = 0 937 elif ( 938 "canceled" not in score.lower() and 939 "ppd" not in score.lower() 940 ): 941 score_1, score_2 = score.split("-") 942 943 # `score_1` should be "W `n`", "L `n`", or "T `n`", 944 # with `n` representing the number of runs this team 945 # scored in this game. 946 # Let's remove the "W", "L", or "T" from `score_1`, 947 # and determine which team won later on in this code. 948 if any(x in score_1 for x in ["W", "L", "T"]): 949 score_1 = score_1.split(" ")[1] 950 951 if "(" in score_2: 952 score_2 = score_2.replace(")", "") 953 score_2, ot_periods = score_2.split("(") 954 ot_periods = ot_periods.replace("OT", "") 955 ot_periods = ot_periods.replace(" ", "") 956 ot_periods = int(ot_periods) 957 958 if ot_periods is None: 959 ot_periods = 0 960 score_1 = int(score_1) 961 score_2 = int(score_2) 962 else: 963 score_1 = None 964 score_2 = None 965 966 try: 967 game_id = cells[2].find("a").get("href") 968 game_id = game_id.replace("/contests", "") 969 game_id = game_id.replace("/box_score", "") 970 game_id = game_id.replace("/", "") 971 game_id = int(game_id) 972 game_url = ( 973 f"https://stats.ncaa.org/contests/{game_id}/box_score" 974 ) 975 except AttributeError as e: 976 logging.info( 977 "Could not parse a game ID for this game. " 978 + f"Full exception `{e}`." 979 ) 980 game_id = None 981 game_url = None 982 except Exception as e: 983 logging.info( 984 "An unhandled exception occurred when trying " 985 + "to find a game ID for this game. " 986 + f"Full exception `{e}`." 987 ) 988 raise e 989 990 try: 991 attendance = cells[3].text 992 attendance = attendance.replace(",", "") 993 attendance = attendance.replace("\n", "") 994 attendance = int(attendance) 995 except IndexError as e: 996 logging.info( 997 "It doesn't appear as if there is an attendance column " 998 + "for this team's schedule table." 999 f"Full exception `{e}`." 1000 ) 1001 attendance = None 1002 except ValueError as e: 1003 logging.info( 1004 "There doesn't appear as if " 1005 + "there is a recorded attendance. " 1006 + "for this game/row. " 1007 f"Full exception `{e}`." 1008 ) 1009 attendance = None 1010 except Exception as e: 1011 logging.info( 1012 "An unhandled exception occurred when trying " 1013 + "to find this game's attendance. " 1014 + f"Full exception `{e}`." 1015 ) 1016 raise e 1017 1018 if is_home_game is True: 1019 temp_df = pd.DataFrame( 1020 { 1021 "season": season, 1022 "season_name": season_name, 1023 "game_id": game_id, 1024 "game_date": game_date, 1025 "game_num": game_num, 1026 "ot_periods": ot_periods, 1027 "home_team_id": team_id, 1028 "home_team_name": school_name, 1029 "away_team_id": opp_team_id, 1030 "away_team_name": opp_team_name, 1031 "home_team_sets_won": score_1, 1032 "away_team_sets_won": score_2, 1033 "is_neutral_game": is_neutral_game, 1034 "game_url": game_url, 1035 }, 1036 index=[0], 1037 ) 1038 games_df_arr.append(temp_df) 1039 del temp_df 1040 elif is_neutral_game is True: 1041 # For the sake of simplicity, 1042 # order both team ID's, 1043 # and set the lower number of the two as 1044 # the "away" team in this neutral site game, 1045 # just so there's no confusion if someone 1046 # combines a ton of these team schedule `DataFrame`s, 1047 # and wants to remove duplicates afterwards. 1048 t_ids = [opp_team_id, team_id] 1049 t_ids.sort() 1050 1051 if t_ids[0] == team_id: 1052 # home 1053 temp_df = pd.DataFrame( 1054 { 1055 "season": season, 1056 "season_name": season_name, 1057 "game_id": game_id, 1058 "game_date": game_date, 1059 "game_num": game_num, 1060 "ot_periods": ot_periods, 1061 "home_team_id": team_id, 1062 "home_team_name": school_name, 1063 "away_team_id": opp_team_id, 1064 "away_team_name": opp_team_name, 1065 "home_team_sets_won": score_1, 1066 "away_team_sets_won": score_2, 1067 "is_neutral_game": is_neutral_game, 1068 "game_url": game_url, 1069 }, 1070 index=[0], 1071 ) 1072 1073 else: 1074 # away 1075 temp_df = pd.DataFrame( 1076 { 1077 "season": season, 1078 "season_name": season_name, 1079 "game_id": game_id, 1080 "game_date": game_date, 1081 "game_num": game_num, 1082 "ot_periods": ot_periods, 1083 "home_team_id": opp_team_id, 1084 "home_team_name": opp_team_name, 1085 "away_team_id": team_id, 1086 "away_team_name": school_name, 1087 "home_team_sets_won": score_2, 1088 "away_team_sets_won": score_1, 1089 "is_neutral_game": is_neutral_game, 1090 "game_url": game_url, 1091 }, 1092 index=[0], 1093 ) 1094 1095 games_df_arr.append(temp_df) 1096 del temp_df 1097 else: 1098 temp_df = pd.DataFrame( 1099 { 1100 "season": season, 1101 "season_name": season_name, 1102 "game_id": game_id, 1103 "game_date": game_date, 1104 "game_num": game_num, 1105 "ot_periods": ot_periods, 1106 "home_team_id": opp_team_id, 1107 "home_team_name": opp_team_name, 1108 "away_team_id": team_id, 1109 "away_team_name": school_name, 1110 "home_team_sets_won": score_2, 1111 "away_team_sets_won": score_1, 1112 "is_neutral_game": is_neutral_game, 1113 "game_url": game_url, 1114 }, 1115 index=[0], 1116 ) 1117 1118 games_df_arr.append(temp_df) 1119 del temp_df 1120 1121 # team_photo = team_id.find("img").get("src") 1122 1123 games_df = pd.concat(games_df_arr, ignore_index=True) 1124 1125 temp_df = schools_df.rename( 1126 columns={ 1127 "school_name": "home_team_name", 1128 "school_id": "home_school_id" 1129 } 1130 ) 1131 games_df = games_df.merge(right=temp_df, on="home_team_name", how="left") 1132 1133 temp_df = schools_df.rename( 1134 columns={ 1135 "school_name": "away_team_name", 1136 "school_id": "away_school_id" 1137 } 1138 ) 1139 games_df = games_df.merge(right=temp_df, on="away_team_name", how="left") 1140 games_df["ncaa_division"] = ncaa_division 1141 games_df["ncaa_division_formatted"] = ncaa_division_formatted 1142 1143 # games_df["game_url"] = games_df["game_url"].str.replace("/box_score", "") 1144 games_df.to_csv( 1145 f"{home_dir}/.ncaa_stats_py/" 1146 + f"volleyball_{sport_id}/team_schedule/" 1147 + f"{team_id}_team_schedule.csv", 1148 index=False, 1149 ) 1150 1151 return games_df 1152 1153 1154def get_volleyball_day_schedule( 1155 game_date: str | date | datetime, 1156 level: str | int = "I", 1157 get_mens_data: bool = False 1158): 1159 """ 1160 Given a date and NCAA level, this function retrieves volleyball every game 1161 for that date. 1162 1163 Parameters 1164 ---------- 1165 `game_date` (int, mandatory): 1166 Required argument. 1167 Specifies the date you want a volleyball schedule from. 1168 For best results, pass a string formatted as "YYYY-MM-DD". 1169 1170 `level` (int, mandatory): 1171 Required argument. 1172 Specifies the level/division you want a 1173 NCAA volleyball schedule from. 1174 This can either be an integer (1-3) or a string ("I"-"III"). 1175 1176 `get_mens_data` (bool, optional): 1177 Optional argument. 1178 If you want men's volleyball data instead of women's volleyball data, 1179 set this to `True`. 1180 1181 Usage 1182 ---------- 1183 ```python 1184 1185 from ncaa_stats_py.volleyball import get_volleyball_day_schedule 1186 1187 ######################################## 1188 # Women's Volleyball # 1189 ######################################## 1190 1191 # Get all DI games (if any) that were played on December 22th, 2024. 1192 print("Get all games (if any) that were played on December 22th, 2024.") 1193 df = get_volleyball_day_schedule("2024-12-22", level=1) 1194 print(df) 1195 1196 # Get all division II games that were played on November 24th, 2024. 1197 print("Get all division II games that were played on November 24th, 2024.") 1198 df = get_volleyball_day_schedule("2024-11-24", level="II") 1199 print(df) 1200 1201 # Get all DIII games that were played on October 27th, 2024. 1202 print("Get all DIII games that were played on October 27th, 2024.") 1203 df = get_volleyball_day_schedule("2024-10-27", level="III") 1204 print(df) 1205 1206 # Get all DI games (if any) that were played on September 29th, 2024. 1207 print( 1208 "Get all DI games (if any) that were played on September 29th, 2024." 1209 ) 1210 df = get_volleyball_day_schedule("2024-09-29") 1211 print(df) 1212 1213 # Get all DII games played on August 30th, 2024. 1214 print("Get all DI games played on August 30th, 2024.") 1215 df = get_volleyball_day_schedule("2024-08-30") 1216 print(df) 1217 1218 # Get all division III games played on September 23rd, 2023. 1219 print("Get all division III games played on September 23rd, 2023.") 1220 df = get_volleyball_day_schedule("2023-09-23", level="III") 1221 print(df) 1222 1223 ######################################## 1224 # Men's Volleyball # 1225 ######################################## 1226 1227 # Get all DI games that will be played on April 12th, 2025. 1228 print("Get all games that will be played on April 12th, 2025.") 1229 df = get_volleyball_day_schedule("2025-04-12", level=1, get_mens_data=True) 1230 print(df) 1231 1232 # Get all DI games that were played on January 30th, 2025. 1233 print("Get all games that were played on January 30th, 2025.") 1234 df = get_volleyball_day_schedule( 1235 "2025-01-30", level="I", get_mens_data=True 1236 ) 1237 print(df) 1238 1239 # Get all division III games that were played on April 6th, 2024. 1240 print("Get all division III games that were played on April 6th, 2024.") 1241 df = get_volleyball_day_schedule( 1242 "2025-04-05", level="III", get_mens_data=True 1243 ) 1244 print(df) 1245 1246 # Get all DI games (if any) that were played on March 30th, 2024. 1247 print("Get all DI games (if any) that were played on March 30th, 2024.") 1248 df = get_volleyball_day_schedule("2024-03-30", get_mens_data=True) 1249 print(df) 1250 1251 # Get all DI games played on February 23rd, 2024. 1252 print("Get all DI games played on February 23rd, 2024.") 1253 df = get_volleyball_day_schedule("2024-02-23", get_mens_data=True) 1254 print(df) 1255 1256 # Get all division III games played on February 11th, 2023. 1257 print("Get all division III games played on February 11th, 2023.") 1258 df = get_volleyball_day_schedule("2024-02-11", level=3, get_mens_data=True) 1259 print(df) 1260 1261 ``` 1262 1263 Returns 1264 ---------- 1265 A pandas `DataFrame` object with all volleyball games played on that day, 1266 for that NCAA division/level. 1267 1268 """ 1269 1270 season = 0 1271 sport_id = "WVB" 1272 1273 schedule_df = pd.DataFrame() 1274 schedule_df_arr = [] 1275 1276 if isinstance(game_date, date): 1277 game_datetime = datetime.combine( 1278 game_date, datetime.min.time() 1279 ) 1280 elif isinstance(game_date, datetime): 1281 game_datetime = game_date 1282 elif isinstance(game_date, str): 1283 game_datetime = parser.parse( 1284 game_date 1285 ) 1286 else: 1287 unhandled_datatype = type(game_date) 1288 raise ValueError( 1289 f"Unhandled datatype for `game_date`: `{unhandled_datatype}`" 1290 ) 1291 1292 if isinstance(level, int) and level == 1: 1293 formatted_level = "I" 1294 ncaa_level = 1 1295 elif isinstance(level, int) and level == 2: 1296 formatted_level = "II" 1297 ncaa_level = 2 1298 elif isinstance(level, int) and level == 3: 1299 formatted_level = "III" 1300 ncaa_level = 3 1301 elif isinstance(level, str) and ( 1302 level.lower() == "i" or level.lower() == "d1" or level.lower() == "1" 1303 ): 1304 ncaa_level = 1 1305 formatted_level = level.upper() 1306 elif isinstance(level, str) and ( 1307 level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2" 1308 ): 1309 ncaa_level = 2 1310 formatted_level = level.upper() 1311 elif isinstance(level, str) and ( 1312 level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3" 1313 ): 1314 ncaa_level = 3 1315 formatted_level = level.upper() 1316 1317 del level 1318 1319 if get_mens_data is True: 1320 sport_id = "MVB" 1321 elif get_mens_data is False: 1322 sport_id = "WVB" 1323 else: 1324 raise ValueError( 1325 f"Unhandled value for `get_wbb_data`: `{get_mens_data}`" 1326 ) 1327 1328 season = game_datetime.year 1329 game_month = game_datetime.month 1330 game_day = game_datetime.day 1331 game_year = game_datetime.year 1332 1333 if game_month > 7: 1334 season += 1 1335 url = ( 1336 "https://stats.ncaa.org/contests/" + 1337 f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" + 1338 f"&academic_year={season}&division={ncaa_level}" + 1339 f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" + 1340 "&commit=Submit" 1341 ) 1342 else: 1343 url = ( 1344 "https://stats.ncaa.org/contests/" + 1345 f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" + 1346 f"&academic_year={season}&division={ncaa_level}" + 1347 f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" + 1348 "&commit=Submit" 1349 ) 1350 1351 response = _get_webpage(url=url) 1352 soup = BeautifulSoup(response.text, features="lxml") 1353 1354 game_boxes = soup.find_all("div", {"class": "table-responsive"}) 1355 1356 for box in game_boxes: 1357 game_id = None 1358 game_alt_text = None 1359 game_num = 1 1360 # t_box = box.find("table") 1361 table_box = box.find("table") 1362 table_rows = table_box.find_all("tr") 1363 1364 # Date/attendance 1365 game_date_str = table_rows[0].find("div", {"class": "col-6 p-0"}).text 1366 game_date_str = game_date_str.replace("\n", "") 1367 game_date_str = game_date_str.strip() 1368 game_date_str = game_date_str.replace("TBA ", "TBA") 1369 game_date_str = game_date_str.replace("TBD ", "TBD") 1370 game_date_str = game_date_str.replace("PM ", "PM") 1371 game_date_str = game_date_str.replace("AM ", "AM") 1372 game_date_str = game_date_str.strip() 1373 attendance_str = table_rows[0].find( 1374 "div", 1375 {"class": "col p-0 text-right"} 1376 ).text 1377 1378 attendance_str = attendance_str.replace("Attend:", "") 1379 attendance_str = attendance_str.replace(",", "") 1380 attendance_str = attendance_str.replace("\n", "") 1381 if ( 1382 "st" in attendance_str.lower() or 1383 "nd" in attendance_str.lower() or 1384 "rd" in attendance_str.lower() or 1385 "th" in attendance_str.lower() 1386 ): 1387 # This is not an attendance, 1388 # this is whatever quarter/half/inning this game is in. 1389 attendance_num = None 1390 elif "final" in attendance_str.lower(): 1391 attendance_num = None 1392 elif len(attendance_str) > 0: 1393 attendance_num = int(attendance_str) 1394 else: 1395 attendance_num = None 1396 1397 if "(" in game_date_str: 1398 game_date_str = game_date_str.replace(")", "") 1399 game_date_str, game_num = game_date_str.split("(") 1400 game_num = int(game_num) 1401 1402 if "TBA" in game_date_str: 1403 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA') 1404 elif "tba" in game_date_str: 1405 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba') 1406 elif "TBD" in game_date_str: 1407 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD') 1408 elif "tbd" in game_date_str: 1409 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd') 1410 elif ( 1411 "tbd" not in game_date_str.lower() and 1412 ":" not in game_date_str.lower() 1413 ): 1414 game_date_str = game_date_str.replace(" ", "") 1415 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y') 1416 else: 1417 game_datetime = datetime.strptime( 1418 game_date_str, 1419 '%m/%d/%Y %I:%M %p' 1420 ) 1421 game_datetime = game_datetime.astimezone(timezone("US/Eastern")) 1422 1423 game_alt_text = table_rows[1].find_all("td")[0].text 1424 if game_alt_text is not None and len(game_alt_text) > 0: 1425 game_alt_text = game_alt_text.replace("\n", "") 1426 game_alt_text = game_alt_text.strip() 1427 1428 if len(game_alt_text) == 0: 1429 game_alt_text = None 1430 1431 urls_arr = box.find_all("a") 1432 1433 for u in urls_arr: 1434 url_temp = u.get("href") 1435 if "contests" in url_temp: 1436 game_id = url_temp 1437 del url_temp 1438 1439 if game_id is None: 1440 for r in range(0, len(table_rows)): 1441 temp = table_rows[r] 1442 temp_id = temp.get("id") 1443 1444 if temp_id is not None and len(temp_id) > 0: 1445 game_id = temp_id 1446 1447 del urls_arr 1448 1449 game_id = game_id.replace("/contests", "") 1450 game_id = game_id.replace("/box_score", "") 1451 game_id = game_id.replace("/livestream_scoreboards", "") 1452 game_id = game_id.replace("/", "") 1453 game_id = game_id.replace("contest_", "") 1454 game_id = int(game_id) 1455 1456 table_rows = table_box.find_all("tr", {"id": f"contest_{game_id}"}) 1457 away_team_row = table_rows[0] 1458 home_team_row = table_rows[1] 1459 1460 # Away team 1461 td_arr = away_team_row.find_all("td") 1462 1463 try: 1464 away_team_name = td_arr[0].find("img").get("alt") 1465 except Exception: 1466 away_team_name = td_arr[1].text 1467 away_team_name = away_team_name.replace("\n", "") 1468 away_team_name = away_team_name.strip() 1469 1470 try: 1471 away_team_id = td_arr[1].find("a").get("href") 1472 away_team_id = away_team_id.replace("/teams/", "") 1473 away_team_id = int(away_team_id) 1474 except AttributeError: 1475 away_team_id = None 1476 logging.info("No team ID found for the away team") 1477 except Exception as e: 1478 raise e 1479 1480 away_sets_scored = td_arr[-1].text 1481 away_sets_scored = away_sets_scored.replace("\n", "") 1482 away_sets_scored = away_sets_scored.replace("\xa0", "") 1483 1484 if "ppd" in away_sets_scored.lower(): 1485 continue 1486 elif "cancel" in away_sets_scored.lower(): 1487 continue 1488 1489 if len(away_sets_scored) > 0: 1490 away_sets_scored = int(away_sets_scored) 1491 else: 1492 away_sets_scored = 0 1493 1494 del td_arr 1495 1496 # Home team 1497 td_arr = home_team_row.find_all("td") 1498 1499 try: 1500 home_team_name = td_arr[0].find("img").get("alt") 1501 except Exception: 1502 home_team_name = td_arr[1].text 1503 home_team_name = home_team_name.replace("\n", "") 1504 home_team_name = home_team_name.strip() 1505 1506 try: 1507 home_team_id = td_arr[1].find("a").get("href") 1508 home_team_id = home_team_id.replace("/teams/", "") 1509 home_team_id = int(home_team_id) 1510 except AttributeError: 1511 home_team_id = None 1512 logging.info("No team ID found for the home team") 1513 except Exception as e: 1514 raise e 1515 1516 home_sets_scored = td_arr[-1].text 1517 home_sets_scored = home_sets_scored.replace("\n", "") 1518 home_sets_scored = home_sets_scored.replace("\xa0", "") 1519 1520 if "ppd" in home_sets_scored.lower(): 1521 continue 1522 elif "cancel" in home_sets_scored.lower(): 1523 continue 1524 1525 if len(home_sets_scored) > 0: 1526 home_sets_scored = int(home_sets_scored) 1527 else: 1528 home_sets_scored = 0 1529 1530 temp_df = pd.DataFrame( 1531 { 1532 "season": season, 1533 "sport_id": sport_id, 1534 "game_date": game_datetime.strftime("%Y-%m-%d"), 1535 "game_datetime": game_datetime.isoformat(), 1536 "game_id": game_id, 1537 "formatted_level": formatted_level, 1538 "ncaa_level": ncaa_level, 1539 "game_alt_text": game_alt_text, 1540 "away_team_id": away_team_id, 1541 "away_team_name": away_team_name, 1542 "home_team_id": home_team_id, 1543 "home_team_name": home_team_name, 1544 "home_sets_scored": home_sets_scored, 1545 "away_sets_scored": away_sets_scored, 1546 "attendance": attendance_num 1547 }, 1548 index=[0] 1549 ) 1550 schedule_df_arr.append(temp_df) 1551 1552 del temp_df 1553 1554 if len(schedule_df_arr) >= 1: 1555 schedule_df = pd.concat(schedule_df_arr, ignore_index=True) 1556 else: 1557 logging.warning( 1558 "Could not find any game(s) for " 1559 + f"{game_datetime.year:00d}-{game_datetime.month:00d}" 1560 + f"-{game_datetime.day:00d}. " 1561 + "If you believe this is an error, " 1562 + "please raise an issue at " 1563 + "\n https://github.com/armstjc/ncaa_stats_py/issues \n" 1564 ) 1565 return schedule_df 1566 1567 1568def get_full_volleyball_schedule( 1569 season: int, 1570 level: str | int = "I", 1571 get_mens_data: bool = True 1572) -> pd.DataFrame: 1573 """ 1574 Retrieves a full volleyball schedule, 1575 from an NCAA level (`"I"`, `"II"`, `"III"`). 1576 The way this is done is by going through every team in a division, 1577 and parsing the schedules of every team in a division. 1578 1579 This function will take time when first run (30-60 minutes)! 1580 You have been warned. 1581 1582 Parameters 1583 ---------- 1584 `season` (int, mandatory): 1585 Specifies the season you want a schedule from. 1586 1587 `level` (int | str, mandatory): 1588 Specifies the team you want a schedule from. 1589 1590 `get_mens_data` (bool, optional): 1591 Optional argument. 1592 If you want men's volleyball data instead of women's volleyball data, 1593 set this to `True`. 1594 1595 Usage 1596 ---------- 1597 ```python 1598 1599 from ncaa_stats_py.volleyball import get_full_volleyball_schedule 1600 1601 ############################################################################## 1602 # NOTE 1603 # This function will easily take an hour or more 1604 # to run for the first time in a given season and NCAA level! 1605 # You have been warned! 1606 ############################################################################## 1607 1608 # Get the entire 2024 schedule for the 2024 women's D1 volleyball season. 1609 print( 1610 "Get the entire 2024 schedule " + 1611 "for the 2024 women's D1 volleyball season." 1612 ) 1613 df = get_full_volleyball_schedule(season=2024, level="I") 1614 print(df) 1615 1616 # Get the entire 2024 schedule for the 2024 men's D1 volleyball season. 1617 # print( 1618 # "Get the entire 2024 schedule for " + 1619 # "the 2024 men's D1 volleyball season." 1620 # ) 1621 # df = get_full_volleyball_schedule( 1622 # season=2024, 1623 # level="I", 1624 # get_mens_data=True 1625 # ) 1626 # print(df) 1627 1628 # You can also input `level` as an integer. 1629 # In addition, this and other functions cache data, 1630 # so this should load very quickly 1631 # compared to the first run of this function. 1632 print("You can also input `level` as an integer.") 1633 print( 1634 "In addition, this and other functions cache data, " 1635 + "so this should load very quickly " 1636 + "compared to the first run of this function." 1637 ) 1638 df = get_full_volleyball_schedule(season=2024, level=1) 1639 print(df) 1640 1641 ``` 1642 1643 Returns 1644 ---------- 1645 A pandas `DataFrame` object with an NCAA volleyball 1646 schedule for a specific season and level. 1647 """ 1648 1649 sport_id = "" 1650 load_from_cache = True 1651 home_dir = expanduser("~") 1652 home_dir = _format_folder_str(home_dir) 1653 schedule_df = pd.DataFrame() 1654 schedule_df_arr = [] 1655 temp_df = pd.DataFrame() 1656 formatted_level = "" 1657 ncaa_level = 0 1658 1659 if get_mens_data is True: 1660 sport_id = "MVB" 1661 else: 1662 sport_id = "WVB" 1663 1664 if isinstance(level, int) and level == 1: 1665 formatted_level = "I" 1666 ncaa_level = 1 1667 elif isinstance(level, int) and level == 2: 1668 formatted_level = "II" 1669 ncaa_level = 2 1670 elif isinstance(level, int) and level == 3: 1671 formatted_level = "III" 1672 ncaa_level = 3 1673 elif isinstance(level, str) and ( 1674 level.lower() == "i" or level.lower() == "d1" or level.lower() == "1" 1675 ): 1676 ncaa_level = 1 1677 formatted_level = level.upper() 1678 elif isinstance(level, str) and ( 1679 level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2" 1680 ): 1681 ncaa_level = 2 1682 formatted_level = level.upper() 1683 elif isinstance(level, str) and ( 1684 level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3" 1685 ): 1686 ncaa_level = 3 1687 formatted_level = level.upper() 1688 1689 del level 1690 1691 if exists(f"{home_dir}/.ncaa_stats_py/"): 1692 pass 1693 else: 1694 mkdir(f"{home_dir}/.ncaa_stats_py/") 1695 1696 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 1697 pass 1698 else: 1699 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 1700 1701 if exists( 1702 f"{home_dir}/.ncaa_stats_py/" + 1703 f"volleyball_{sport_id}/full_schedule/" 1704 ): 1705 pass 1706 else: 1707 mkdir( 1708 f"{home_dir}/.ncaa_stats_py/" + 1709 f"volleyball_{sport_id}/full_schedule/" 1710 ) 1711 1712 if exists( 1713 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/full_schedule/" 1714 + f"{season}_{formatted_level}_full_schedule.csv" 1715 ): 1716 teams_df = pd.read_csv( 1717 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/full_schedule/" 1718 + f"{season}_{formatted_level}_full_schedule.csv" 1719 ) 1720 file_mod_datetime = datetime.fromtimestamp( 1721 getmtime( 1722 f"{home_dir}/.ncaa_stats_py/" + 1723 f"volleyball_{sport_id}/full_schedule/" 1724 + f"{season}_{formatted_level}_full_schedule.csv" 1725 ) 1726 ) 1727 else: 1728 file_mod_datetime = datetime.today() 1729 load_from_cache = False 1730 1731 now = datetime.today() 1732 1733 age = now - file_mod_datetime 1734 1735 if ( 1736 age.days > 1 and 1737 season >= now.year 1738 ): 1739 load_from_cache = False 1740 1741 if load_from_cache is True: 1742 return teams_df 1743 1744 teams_df = load_volleyball_teams() 1745 teams_df = teams_df[ 1746 (teams_df["season"] == season) & 1747 (teams_df["ncaa_division"] == ncaa_level) 1748 ] 1749 team_ids_arr = teams_df["team_id"].to_numpy() 1750 1751 for team_id in tqdm(team_ids_arr): 1752 temp_df = get_volleyball_team_schedule(team_id=team_id) 1753 schedule_df_arr.append(temp_df) 1754 1755 schedule_df = pd.concat(schedule_df_arr, ignore_index=True) 1756 schedule_df = schedule_df.drop_duplicates(subset="game_id", keep="first") 1757 schedule_df.to_csv( 1758 f"{home_dir}/.ncaa_stats_py/" 1759 + f"volleyball_{sport_id}/full_schedule/" 1760 + f"{season}_{formatted_level}_full_schedule.csv", 1761 index=False, 1762 ) 1763 return schedule_df 1764 1765 1766def get_volleyball_team_roster(team_id: int) -> pd.DataFrame: 1767 """ 1768 Retrieves a volleyball team's roster from a given team ID. 1769 1770 Parameters 1771 ---------- 1772 `team_id` (int, mandatory): 1773 Required argument. 1774 Specifies the team you want a roster from. 1775 This is separate from a school ID, which identifies the institution. 1776 A team ID should be unique to a school, and a season. 1777 1778 Usage 1779 ---------- 1780 ```python 1781 1782 from ncaa_stats_py.volleyball import get_volleyball_team_roster 1783 1784 ######################################## 1785 # Women's volleyball # 1786 ######################################## 1787 1788 # Get the volleyball roster for the 1789 # 2024 Weber St. WVB team (D1, ID: 585347). 1790 print( 1791 "Get the volleyball roster for the " + 1792 "2024 Weber St. WVB team (D1, ID: 585347)." 1793 ) 1794 df = get_volleyball_team_roster(585347) 1795 print(df) 1796 1797 # Get the volleyball roster for the 1798 # 2023 Montevallo WVB team (D2, ID: 559599). 1799 print( 1800 "Get the volleyball roster for the " + 1801 "2023 Montevallo WVB team (D2, ID: 559599)." 1802 ) 1803 df = get_volleyball_team_roster(559599) 1804 print(df) 1805 1806 # Get the volleyball roster for the 1807 # 2022 Millsaps team (D3, ID: 539944). 1808 print( 1809 "Get the volleyball roster for the " + 1810 "2022 Millsaps team (D3, ID: 539944)." 1811 ) 1812 df = get_volleyball_team_roster(539944) 1813 print(df) 1814 1815 # Get the volleyball roster for the 1816 # 2021 Binghamton WVB team (D1, ID: 522893). 1817 print( 1818 "Get the volleyball roster for the " + 1819 "2021 Binghamton WVB team (D1, ID: 522893)." 1820 ) 1821 df = get_volleyball_team_roster(522893) 1822 print(df) 1823 1824 # Get the volleyball roster for the 1825 # 2020 Holy Family WVB team (D2, ID: 504760). 1826 print( 1827 "Get the volleyball roster for the " + 1828 "2020 Holy Family WVB team (D2, ID: 504760)." 1829 ) 1830 df = get_volleyball_team_roster(504760) 1831 print(df) 1832 1833 # Get the volleyball roster for the 1834 # 2019 Franciscan team (D3, ID: 482939). 1835 print( 1836 "Get the volleyball roster for the " + 1837 "2019 Franciscan team (D3, ID: 482939)." 1838 ) 1839 df = get_volleyball_team_roster(482939) 1840 print(df) 1841 1842 ######################################## 1843 # Men's volleyball # 1844 ######################################## 1845 1846 # Get the volleyball roster for the 1847 # 2024 Hawaii MVB team (D1, ID: 573674). 1848 print( 1849 "Get the volleyball roster for the " + 1850 "2024 Hawaii MVB team (D1, ID: 573674)." 1851 ) 1852 df = get_volleyball_team_roster(573674) 1853 print(df) 1854 1855 # Get the volleyball roster for the 1856 # 2023 Widener MVB team (D3, ID: 550860). 1857 print( 1858 "Get the volleyball roster for the " + 1859 "2023 Widener MVB team (D3, ID: 550860)." 1860 ) 1861 df = get_volleyball_team_roster(550860) 1862 print(df) 1863 1864 # Get the volleyball roster for the 1865 # 2022 Alderson Broaddus MVB team (D1, ID: 529880). 1866 print( 1867 "Get the volleyball roster for the " + 1868 "2022 Alderson Broaddus MVB team (D1, ID: 529880)." 1869 ) 1870 df = get_volleyball_team_roster(529880) 1871 print(df) 1872 1873 # Get the volleyball roster for the 1874 # 2021 Geneva MVB team (D3, ID: 508506). 1875 print( 1876 "Get the volleyball roster for the " + 1877 "2021 Geneva MVB team (D3, ID: 508506)." 1878 ) 1879 df = get_volleyball_team_roster(508506) 1880 print(df) 1881 1882 # Get the volleyball roster for the 1883 # 2020 Urbana MVB team (D1, ID: 484975). 1884 print( 1885 "Get the volleyball roster for the " + 1886 "2020 Urbana MVB team (D1, ID: 484975)." 1887 ) 1888 df = get_volleyball_team_roster(484975) 1889 print(df) 1890 1891 # Get the volleyball roster for the 1892 # 2019 Eastern Nazarene MVB team (D3, ID: 453876). 1893 print( 1894 "Get the volleyball roster for the " + 1895 "2019 Eastern Nazarene MVB team (D3, ID: 453876)." 1896 ) 1897 df = get_volleyball_team_roster(453876) 1898 print(df) 1899 1900 ``` 1901 1902 Returns 1903 ---------- 1904 A pandas `DataFrame` object with 1905 an NCAA volleyball team's roster for that season. 1906 """ 1907 sport_id = "" 1908 roster_df = pd.DataFrame() 1909 roster_df_arr = [] 1910 temp_df = pd.DataFrame() 1911 url = f"https://stats.ncaa.org/teams/{team_id}/roster" 1912 load_from_cache = True 1913 home_dir = expanduser("~") 1914 home_dir = _format_folder_str(home_dir) 1915 1916 stat_columns = [ 1917 "season", 1918 "season_name", 1919 "sport_id", 1920 "ncaa_division", 1921 "ncaa_division_formatted", 1922 "team_conference_name", 1923 "school_id", 1924 "school_name", 1925 "player_id", 1926 "player_jersey_num", 1927 "player_full_name", 1928 "player_first_name", 1929 "player_last_name", 1930 "player_class", 1931 "player_positions", 1932 "player_height_string", 1933 "player_weight", 1934 "player_hometown", 1935 "player_high_school", 1936 "player_G", 1937 "player_GS", 1938 "player_url", 1939 ] 1940 1941 try: 1942 team_df = load_volleyball_teams() 1943 team_df = team_df[team_df["team_id"] == team_id] 1944 1945 season = team_df["season"].iloc[0] 1946 ncaa_division = team_df["ncaa_division"].iloc[0] 1947 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 1948 team_conference_name = team_df["team_conference_name"].iloc[0] 1949 school_name = team_df["school_name"].iloc[0] 1950 school_id = int(team_df["school_id"].iloc[0]) 1951 sport_id = "WVB" 1952 except Exception: 1953 team_df = load_volleyball_teams(get_mens_data=True) 1954 team_df = team_df[team_df["team_id"] == team_id] 1955 1956 season = team_df["season"].iloc[0] 1957 ncaa_division = team_df["ncaa_division"].iloc[0] 1958 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 1959 team_conference_name = team_df["team_conference_name"].iloc[0] 1960 school_name = team_df["school_name"].iloc[0] 1961 school_id = int(team_df["school_id"].iloc[0]) 1962 school_id = int(team_df["school_id"].iloc[0]) 1963 sport_id = "MVB" 1964 1965 del team_df 1966 1967 if exists(f"{home_dir}/.ncaa_stats_py/"): 1968 pass 1969 else: 1970 mkdir(f"{home_dir}/.ncaa_stats_py/") 1971 1972 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 1973 pass 1974 else: 1975 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 1976 1977 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/"): 1978 pass 1979 else: 1980 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/") 1981 1982 if exists( 1983 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" + 1984 f"{team_id}_roster.csv" 1985 ): 1986 teams_df = pd.read_csv( 1987 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" + 1988 f"{team_id}_roster.csv" 1989 ) 1990 file_mod_datetime = datetime.fromtimestamp( 1991 getmtime( 1992 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" + 1993 f"{team_id}_roster.csv" 1994 ) 1995 ) 1996 else: 1997 file_mod_datetime = datetime.today() 1998 load_from_cache = False 1999 2000 now = datetime.today() 2001 2002 age = now - file_mod_datetime 2003 2004 if ( 2005 age.days >= 14 and 2006 season >= now.year 2007 ): 2008 load_from_cache = False 2009 2010 if load_from_cache is True: 2011 return teams_df 2012 2013 response = _get_webpage(url=url) 2014 soup = BeautifulSoup(response.text, features="lxml") 2015 try: 2016 school_name = soup.find( 2017 "div", 2018 {"class": "card"} 2019 ).find("img").get("alt") 2020 except Exception: 2021 school_name = soup.find("div", {"class": "card"}).find("a").text 2022 school_name = school_name.rsplit(" ", maxsplit=1)[0] 2023 2024 season_name = ( 2025 soup.find("select", {"id": "year_list"}) 2026 .find("option", {"selected": "selected"}) 2027 .text 2028 ) 2029 2030 try: 2031 table = soup.find( 2032 "table", 2033 {"class": "dataTable small_font"}, 2034 ) 2035 2036 table_headers = table.find("thead").find_all("th") 2037 except Exception: 2038 table = soup.find( 2039 "table", 2040 {"class": "dataTable small_font no_padding"}, 2041 ) 2042 2043 table_headers = table.find("thead").find_all("th") 2044 table_headers = [x.text for x in table_headers] 2045 2046 t_rows = table.find("tbody").find_all("tr") 2047 2048 for t in t_rows: 2049 t_cells = t.find_all("td") 2050 t_cells = [x.text for x in t_cells] 2051 2052 temp_df = pd.DataFrame( 2053 data=[t_cells], 2054 columns=table_headers, 2055 # index=[0] 2056 ) 2057 2058 player_id = t.find("a").get("href") 2059 # temp_df["school_name"] = school_name 2060 temp_df["player_url"] = f"https://stats.ncaa.org{player_id}" 2061 2062 player_id = player_id.replace("/players", "").replace("/", "") 2063 player_id = int(player_id) 2064 2065 temp_df["player_id"] = player_id 2066 2067 roster_df_arr.append(temp_df) 2068 del temp_df 2069 2070 roster_df = pd.concat(roster_df_arr, ignore_index=True) 2071 roster_df = roster_df.infer_objects() 2072 roster_df["season"] = season 2073 roster_df["season_name"] = season_name 2074 roster_df["ncaa_division"] = ncaa_division 2075 roster_df["ncaa_division_formatted"] = ncaa_division_formatted 2076 roster_df["team_conference_name"] = team_conference_name 2077 roster_df["school_id"] = school_id 2078 roster_df["school_name"] = school_name 2079 roster_df["sport_id"] = sport_id 2080 2081 roster_df.rename( 2082 columns={ 2083 "GP": "player_G", 2084 "GS": "player_GS", 2085 "#": "player_jersey_num", 2086 "Name": "player_full_name", 2087 "Class": "player_class", 2088 "Position": "player_positions", 2089 "Height": "player_height_string", 2090 "Bats": "player_batting_hand", 2091 "Throws": "player_throwing_hand", 2092 "Hometown": "player_hometown", 2093 "High School": "player_high_school", 2094 }, 2095 inplace=True 2096 ) 2097 2098 # print(roster_df.columns) 2099 2100 roster_df[["player_first_name", "player_last_name"]] = roster_df[ 2101 "player_full_name" 2102 ].str.split(" ", n=1, expand=True) 2103 roster_df = roster_df.infer_objects() 2104 2105 for i in roster_df.columns: 2106 if i in stat_columns: 2107 pass 2108 else: 2109 raise ValueError( 2110 f"Unhandled column name {i}" 2111 ) 2112 2113 roster_df = roster_df.infer_objects().reindex(columns=stat_columns) 2114 2115 roster_df.to_csv( 2116 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" + 2117 f"{team_id}_roster.csv", 2118 index=False, 2119 ) 2120 return roster_df 2121 2122 2123def get_volleyball_player_season_stats( 2124 team_id: int, 2125) -> pd.DataFrame: 2126 """ 2127 Given a team ID, this function retrieves and parses 2128 the season stats for all of the players in a given volleyball team. 2129 2130 Parameters 2131 ---------- 2132 `team_id` (int, mandatory): 2133 Required argument. 2134 Specifies the team you want volleyball stats from. 2135 This is separate from a school ID, which identifies the institution. 2136 A team ID should be unique to a school, and a season. 2137 2138 Usage 2139 ---------- 2140 ```python 2141 2142 from ncaa_stats_py.volleyball import get_volleyball_player_season_stats 2143 2144 2145 ######################################## 2146 # Women's volleyball # 2147 ######################################## 2148 2149 # Get the season stats for the 2150 # 2024 Ohio St. team (D1, ID: 585398). 2151 print( 2152 "Get the season stats for the " + 2153 "2024 Ohio St. WVB team (D1, ID: 585398)." 2154 ) 2155 df = get_volleyball_player_season_stats(585398) 2156 print(df) 2157 2158 # Get the season stats for the 2159 # 2023 Emory & Henry WVB team (D2, ID: 559738). 2160 print( 2161 "Get the season stats for the " + 2162 "2023 Emory & Henry WVB team (D2, ID: 559738)." 2163 ) 2164 df = get_volleyball_player_season_stats(559738) 2165 print(df) 2166 2167 # Get the season stats for the 2168 # 2022 Fredonia WVB team (D3, ID: 539881). 2169 print( 2170 "Get the season stats for the " + 2171 "2022 Fredonia WVB team (D3, ID: 539881)." 2172 ) 2173 df = get_volleyball_player_season_stats(539881) 2174 print(df) 2175 2176 # Get the season stats for the 2177 # 2021 Oklahoma WVB team (D1, ID: 523163). 2178 print( 2179 "Get the season stats for the " + 2180 "2021 Oklahoma WVB team (D1, ID: 523163)." 2181 ) 2182 df = get_volleyball_player_season_stats(523163) 2183 print(df) 2184 2185 # Get the season stats for the 2186 # 2020 North Greenville WVB team (D2, ID: 504820). 2187 print( 2188 "Get the season stats for the " + 2189 "2020 North Greenville WVB team (D2, ID: 504820)." 2190 ) 2191 df = get_volleyball_player_season_stats(504820) 2192 print(df) 2193 2194 # Get the season stats for the 2195 # 2019 SUNY Potsdam team (D3, ID: 482714). 2196 print( 2197 "Get the season stats for the " + 2198 "2019 SUNY Potsdam team (D3, ID: 482714)." 2199 ) 2200 df = get_volleyball_player_season_stats(482714) 2201 print(df) 2202 2203 ######################################## 2204 # Men's volleyball # 2205 ######################################## 2206 2207 # Get the season stats for the 2208 # 2024 Lees-McRae MVB team (D1, ID: 573699). 2209 print( 2210 "Get the season stats for the " + 2211 "2024 Lees-McRae MVB team (D1, ID: 573699)." 2212 ) 2213 df = get_volleyball_player_season_stats(573699) 2214 print(df) 2215 2216 # Get the season stats for the 2217 # 2023 Elizabethtown MVB team (D3, ID: 550871). 2218 print( 2219 "Get the season stats for the " + 2220 "2023 Elizabethtown MVB team (D3, ID: 550871)." 2221 ) 2222 df = get_volleyball_player_season_stats(550871) 2223 print(df) 2224 2225 # Get the season stats for the 2226 # 2022 Limestone MVB team (D1, ID: 529884). 2227 print( 2228 "Get the season stats for the " + 2229 "2022 Limestone MVB team (D1, ID: 529884)." 2230 ) 2231 df = get_volleyball_player_season_stats(529884) 2232 print(df) 2233 2234 # Get the season stats for the 2235 # 2021 Maranatha Baptist MVB team (D3, ID: 508471). 2236 print( 2237 "Get the season stats for the " + 2238 "2021 Maranatha Baptist MVB team (D3, ID: 508471)." 2239 ) 2240 df = get_volleyball_player_season_stats(508471) 2241 print(df) 2242 2243 # Get the season stats for the 2244 # 2020 CUI MVB team (D1, ID: 484972). 2245 print( 2246 "Get the season stats for the " + 2247 "2020 CUI MVB team (D1, ID: 484972)." 2248 ) 2249 df = get_volleyball_player_season_stats(484972) 2250 print(df) 2251 2252 # Get the season stats for the 2253 # 2019 SUNY New Paltz MVB team (D3, ID: 453851). 2254 print( 2255 "Get the season stats for the " + 2256 "2019 SUNY New Paltz MVB team (D3, ID: 453851)." 2257 ) 2258 df = get_volleyball_player_season_stats(453851) 2259 print(df) 2260 2261 ``` 2262 2263 Returns 2264 ---------- 2265 A pandas `DataFrame` object with the season batting stats for 2266 all players with a given NCAA volleyball team. 2267 """ 2268 2269 sport_id = "" 2270 load_from_cache = True 2271 stats_df = pd.DataFrame() 2272 stats_df_arr = [] 2273 temp_df = pd.DataFrame() 2274 2275 stat_columns = [ 2276 "season", 2277 "season_name", 2278 "sport_id", 2279 "team_id", 2280 "team_conference_name", 2281 "school_id", 2282 "school_name", 2283 "ncaa_division", 2284 "ncaa_division_formatted", 2285 "player_id", 2286 "player_jersey_number", 2287 "player_last_name", 2288 "player_first_name", 2289 "player_full_name", 2290 "player_class", 2291 "player_position", 2292 "player_height", 2293 "GP", 2294 "GS", 2295 "sets_played", 2296 "MS", 2297 "kills", 2298 "errors", 2299 "total_attacks", 2300 "hit%", 2301 "assists", 2302 "aces", 2303 "serve_errors", 2304 "digs", 2305 "return_attacks", 2306 "return_errors", 2307 "solo_blocks", 2308 "assisted_blocks", 2309 "block_errors", 2310 "total_blocks", 2311 "points", 2312 "BHE", 2313 "serve_attempts", 2314 "DBL_DBL", 2315 "TRP_DBL", 2316 ] 2317 2318 try: 2319 team_df = load_volleyball_teams() 2320 2321 team_df = team_df[team_df["team_id"] == team_id] 2322 2323 season = team_df["season"].iloc[0] 2324 ncaa_division = int(team_df["ncaa_division"].iloc[0]) 2325 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 2326 team_conference_name = team_df["team_conference_name"].iloc[0] 2327 school_name = team_df["school_name"].iloc[0] 2328 school_id = int(team_df["school_id"].iloc[0]) 2329 sport_id = "WVB" 2330 except Exception: 2331 team_df = load_volleyball_teams(get_mens_data=True) 2332 2333 team_df = team_df[team_df["team_id"] == team_id] 2334 2335 season = team_df["season"].iloc[0] 2336 ncaa_division = int(team_df["ncaa_division"].iloc[0]) 2337 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 2338 team_conference_name = team_df["team_conference_name"].iloc[0] 2339 school_name = team_df["school_name"].iloc[0] 2340 school_id = int(team_df["school_id"].iloc[0]) 2341 sport_id = "MVB" 2342 2343 del team_df 2344 2345 home_dir = expanduser("~") 2346 home_dir = _format_folder_str(home_dir) 2347 2348 url = f"https://stats.ncaa.org/teams/{team_id}/season_to_date_stats" 2349 2350 if exists(f"{home_dir}/.ncaa_stats_py/"): 2351 pass 2352 else: 2353 mkdir(f"{home_dir}/.ncaa_stats_py/") 2354 2355 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 2356 pass 2357 else: 2358 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 2359 2360 if exists( 2361 f"{home_dir}/.ncaa_stats_py/" + 2362 f"volleyball_{sport_id}/player_season_stats/" 2363 ): 2364 pass 2365 else: 2366 mkdir( 2367 f"{home_dir}/.ncaa_stats_py/" + 2368 f"volleyball_{sport_id}/player_season_stats/" 2369 ) 2370 2371 if exists( 2372 f"{home_dir}/.ncaa_stats_py/" + 2373 f"volleyball_{sport_id}/player_season_stats/" 2374 + f"{season:00d}_{school_id:00d}_player_season_stats.csv" 2375 ): 2376 games_df = pd.read_csv( 2377 f"{home_dir}/.ncaa_stats_py/" + 2378 f"volleyball_{sport_id}/player_season_stats/" 2379 + f"{season:00d}_{school_id:00d}_player_season_stats.csv" 2380 ) 2381 file_mod_datetime = datetime.fromtimestamp( 2382 getmtime( 2383 f"{home_dir}/.ncaa_stats_py/" + 2384 f"volleyball_{sport_id}/player_season_stats/" 2385 + f"{season:00d}_{school_id:00d}_player_season_stats.csv" 2386 ) 2387 ) 2388 else: 2389 file_mod_datetime = datetime.today() 2390 load_from_cache = False 2391 2392 now = datetime.today() 2393 2394 age = now - file_mod_datetime 2395 2396 if ( 2397 age.days > 1 and 2398 season >= now.year 2399 ): 2400 load_from_cache = False 2401 2402 if load_from_cache is True: 2403 return games_df 2404 2405 response = _get_webpage(url=url) 2406 2407 soup = BeautifulSoup(response.text, features="lxml") 2408 2409 season_name = ( 2410 soup.find("select", {"id": "year_list"}) 2411 .find("option", {"selected": "selected"}) 2412 .text 2413 ) 2414 2415 if sport_id == "MVB": 2416 season = f"{season_name[0:2]}{season_name[-2:]}" 2417 season = int(season) 2418 elif sport_id == "WVB": 2419 season = f"{season_name[0:4]}" 2420 season = int(season) 2421 2422 table_data = soup.find( 2423 "table", 2424 {"id": "stat_grid", "class": "small_font dataTable table-bordered"}, 2425 ) 2426 2427 temp_table_headers = table_data.find("thead").find("tr").find_all("th") 2428 table_headers = [x.text for x in temp_table_headers] 2429 2430 del temp_table_headers 2431 2432 t_rows = table_data.find("tbody").find_all("tr", {"class": "text"}) 2433 for t in t_rows: 2434 p_last = "" 2435 p_first = "" 2436 t_cells = t.find_all("td") 2437 if "team" in t_cells[1].text.lower(): 2438 continue 2439 p_sortable = t_cells[1].get("data-order") 2440 if len(p_sortable) == 2: 2441 p_last, p_first = p_sortable.split(",") 2442 elif len(p_sortable) == 3: 2443 p_last, temp_name, p_first = p_sortable.split(",") 2444 p_last = f"{p_last} {temp_name}" 2445 2446 t_cells = [x.text.strip() for x in t_cells] 2447 t_cells = [x.replace(",", "") for x in t_cells] 2448 2449 temp_df = pd.DataFrame( 2450 data=[t_cells], 2451 columns=table_headers, 2452 # index=[0] 2453 ) 2454 2455 player_id = t.find("a").get("href") 2456 2457 # temp_df["player_url"] = f"https://stats.ncaa.org{player_id}" 2458 player_id = player_id.replace("/players", "").replace("/", "") 2459 2460 player_id = int(player_id) 2461 2462 temp_df["player_id"] = player_id 2463 temp_df["player_last_name"] = p_last.strip() 2464 temp_df["player_first_name"] = p_first.strip() 2465 2466 stats_df_arr.append(temp_df) 2467 del temp_df 2468 2469 stats_df = pd.concat(stats_df_arr, ignore_index=True) 2470 stats_df = stats_df.replace("", None) 2471 2472 # stats_df["stat_id"] = stat_id 2473 stats_df["season"] = season 2474 stats_df["season_name"] = season_name 2475 stats_df["school_id"] = school_id 2476 stats_df["school_name"] = school_name 2477 stats_df["ncaa_division"] = ncaa_division 2478 stats_df["ncaa_division_formatted"] = ncaa_division_formatted 2479 stats_df["team_conference_name"] = team_conference_name 2480 stats_df["sport_id"] = sport_id 2481 stats_df["team_id"] = team_id 2482 2483 stats_df = stats_df.infer_objects() 2484 2485 stats_df.rename( 2486 columns={ 2487 "#": "player_jersey_number", 2488 "Player": "player_full_name", 2489 "Yr": "player_class", 2490 "Pos": "player_position", 2491 "Ht": "player_height", 2492 "S": "sets_played", 2493 "Kills": "kills", 2494 "Errors": "errors", 2495 "Total Attacks": "total_attacks", 2496 "Hit Pct": "hit%", 2497 "Assists": "assists", 2498 "Aces": "aces", 2499 "SErr": "serve_errors", 2500 "Digs": "digs", 2501 "RetAtt": "return_attacks", 2502 "RErr": "return_errors", 2503 "Block Solos": "solo_blocks", 2504 "Block Assists": "assisted_blocks", 2505 "BErr": "block_errors", 2506 "PTS": "points", 2507 "Trpl Dbl": "TRP_DBL", 2508 "Dbl Dbl": "DBL_DBL", 2509 "TB": "total_blocks", 2510 "SrvAtt": "serve_attempts", 2511 }, 2512 inplace=True, 2513 ) 2514 2515 for i in stats_df.columns: 2516 if i in stat_columns: 2517 pass 2518 elif "Attend" in stat_columns: 2519 pass 2520 else: 2521 raise ValueError( 2522 f"Unhandled column name {i}" 2523 ) 2524 stats_df = stats_df.reindex(columns=stat_columns) 2525 2526 stats_df = stats_df.infer_objects().fillna(0) 2527 stats_df = stats_df.astype( 2528 { 2529 "GP": "uint16", 2530 "GS": "uint16", 2531 "sets_played": "uint16", 2532 "kills": "uint16", 2533 "errors": "uint16", 2534 "total_attacks": "uint16", 2535 "hit%": "float32", 2536 "assists": "uint16", 2537 "aces": "uint16", 2538 "serve_errors": "uint16", 2539 "digs": "uint16", 2540 "return_attacks": "uint16", 2541 "return_errors": "uint16", 2542 "solo_blocks": "uint16", 2543 "assisted_blocks": "uint16", 2544 "block_errors": "uint16", 2545 "points": "float32", 2546 "BHE": "uint16", 2547 "TRP_DBL": "uint16", 2548 "serve_attempts": "uint16", 2549 "total_blocks": "float32", 2550 "DBL_DBL": "uint16", 2551 "school_id": "uint32", 2552 } 2553 ) 2554 2555 stats_df["hit%"] = stats_df["hit%"].round(3) 2556 stats_df["points"] = stats_df["points"].round(1) 2557 2558 stats_df.to_csv( 2559 f"{home_dir}/.ncaa_stats_py/" + 2560 f"volleyball_{sport_id}/player_season_stats/" + 2561 f"{season:00d}_{school_id:00d}_player_season_stats.csv", 2562 index=False, 2563 ) 2564 2565 return stats_df 2566 2567 2568def get_volleyball_player_game_stats( 2569 player_id: int 2570) -> pd.DataFrame: 2571 """ 2572 Given a valid player ID and season, 2573 this function retrieves the game stats for this player at a game level. 2574 2575 Parameters 2576 ---------- 2577 `player_id` (int, mandatory): 2578 Required argument. 2579 Specifies the player you want game stats from. 2580 2581 `season` (int, mandatory): 2582 Required argument. 2583 Specifies the season you want game stats from. 2584 2585 Usage 2586 ---------- 2587 ```python 2588 2589 from ncaa_stats_py.volleyball import ( 2590 get_volleyball_player_game_stats 2591 ) 2592 2593 ######################################## 2594 # Women's volleyball # 2595 ######################################## 2596 2597 # Get the game stats of Zuzanna Wieczorek in 2024 (Idaho). 2598 print( 2599 "Get the game stats of Zuzanna Wieczorek in 2024 (Idaho)." 2600 ) 2601 df = get_volleyball_player_game_stats(player_id=8432514) 2602 print(df) 2603 2604 # Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2). 2605 print( 2606 "Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2)." 2607 ) 2608 df = get_volleyball_player_game_stats(player_id=8145555) 2609 print(df) 2610 2611 # Get the game stats of Lauren Gips in 2022 (Babson, D3). 2612 print( 2613 "Get the game stats of Lauren Gips in 2022 (Babson, D3)." 2614 ) 2615 df = get_volleyball_player_game_stats(player_id=7876821) 2616 print(df) 2617 2618 # Get the game stats of Rhett Robinson in 2021 (North Texas). 2619 print( 2620 "Get the game stats of Rhett Robinson in 2021 (North Texas)." 2621 ) 2622 df = get_volleyball_player_game_stats(player_id=7234089) 2623 print(df) 2624 2625 # Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2). 2626 print( 2627 "Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2)." 2628 ) 2629 df = get_volleyball_player_game_stats(player_id=6822147) 2630 print(df) 2631 2632 # Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3). 2633 print( 2634 "Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3)." 2635 ) 2636 df = get_volleyball_player_game_stats(player_id=6449807) 2637 print(df) 2638 2639 ######################################## 2640 # Men's volleyball # 2641 ######################################## 2642 2643 # Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial). 2644 print( 2645 "Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial)." 2646 ) 2647 df = get_volleyball_player_game_stats(player_id=8253076) 2648 print(df) 2649 2650 # Get the game stats of Ray Rodriguez in 2023 (Lehman, D3). 2651 print( 2652 "Get the game stats of Ray Rodriguez in 2023 (Lehman, D3)." 2653 ) 2654 df = get_volleyball_player_game_stats(player_id=7883459) 2655 print(df) 2656 2657 # Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus). 2658 print( 2659 "Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus)." 2660 ) 2661 df = get_volleyball_player_game_stats(player_id=7413984) 2662 print(df) 2663 2664 # Get the game stats of Tyler Anderson in 2021 (Alvernia, D3). 2665 print( 2666 "Get the game stats of Tyler Anderson in 2021 (Alvernia, D3)." 2667 ) 2668 df = get_volleyball_player_game_stats(player_id=7118023) 2669 print(df) 2670 2671 # Get the game stats of Jaylen Jasper in 2020 (Stanford). 2672 print( 2673 "Get the game stats of Jaylen Jasper in 2020 (Stanford)." 2674 ) 2675 df = get_volleyball_player_game_stats(player_id=6357146) 2676 print(df) 2677 2678 # Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3). 2679 print( 2680 "Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3)." 2681 ) 2682 df = get_volleyball_player_game_stats(player_id=5816111) 2683 print(df) 2684 2685 ``` 2686 2687 Returns 2688 ---------- 2689 A pandas `DataFrame` object with a player's batting game logs 2690 in a given season. 2691 """ 2692 sport_id = "" 2693 2694 stat_columns = [ 2695 "season", 2696 "sport_id", 2697 "game_id", 2698 "game_num", 2699 "player_id", 2700 "date", 2701 "opponent", 2702 "Result", 2703 "team_sets_won", 2704 "opponent_sets_won", 2705 "GP", 2706 # "GS", 2707 "sets_played", 2708 "MS", 2709 "kills", 2710 "errors", 2711 "total_attacks", 2712 "hit%", 2713 "assists", 2714 "aces", 2715 "serve_errors", 2716 "digs", 2717 "return_attacks", 2718 "return_errors", 2719 "solo_blocks", 2720 "assisted_blocks", 2721 "block_errors", 2722 "total_blocks", 2723 "points", 2724 "BHE", 2725 "serve_attempts", 2726 "DBL_DBL", 2727 "TRP_DBL", 2728 ] 2729 2730 load_from_cache = True 2731 stats_df = pd.DataFrame() 2732 stats_df_arr = [] 2733 temp_df = pd.DataFrame() 2734 sport_id = "" 2735 home_dir = expanduser("~") 2736 home_dir = _format_folder_str(home_dir) 2737 2738 # stat_id = _get_stat_id( 2739 # sport="volleyball", 2740 # season=season, 2741 # stat_type="batting" 2742 # ) 2743 url = f"https://stats.ncaa.org/players/{player_id}" 2744 2745 if exists(f"{home_dir}/.ncaa_stats_py/"): 2746 pass 2747 else: 2748 mkdir(f"{home_dir}/.ncaa_stats_py/") 2749 2750 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"): 2751 pass 2752 else: 2753 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/") 2754 2755 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/"): 2756 pass 2757 else: 2758 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/") 2759 2760 if exists( 2761 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/" 2762 + f"{player_id}_player_game_stats.csv" 2763 ): 2764 games_df = pd.read_csv( 2765 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/" 2766 + f"{player_id}_player_game_stats.csv" 2767 ) 2768 file_mod_datetime = datetime.fromtimestamp( 2769 getmtime( 2770 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/" 2771 + "player_game_stats/" 2772 + f"{player_id}_player_game_stats.csv" 2773 ) 2774 ) 2775 games_df = games_df.infer_objects() 2776 load_from_cache = True 2777 else: 2778 file_mod_datetime = datetime.today() 2779 load_from_cache = False 2780 2781 if exists(f"{home_dir}/.ncaa_stats_py/"): 2782 pass 2783 else: 2784 mkdir(f"{home_dir}/.ncaa_stats_py/") 2785 2786 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"): 2787 pass 2788 else: 2789 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/") 2790 2791 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/"): 2792 pass 2793 else: 2794 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/") 2795 2796 if exists( 2797 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/" 2798 + f"{player_id}_player_game_stats.csv" 2799 ): 2800 games_df = pd.read_csv( 2801 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/" 2802 + f"{player_id}_player_game_stats.csv" 2803 ) 2804 file_mod_datetime = datetime.fromtimestamp( 2805 getmtime( 2806 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/" 2807 + "player_game_stats/" 2808 + f"{player_id}_player_game_stats.csv" 2809 ) 2810 ) 2811 games_df = games_df.infer_objects() 2812 load_from_cache = True 2813 else: 2814 logging.info("Could not find a WVB player game stats file") 2815 2816 now = datetime.today() 2817 2818 age = now - file_mod_datetime 2819 2820 if ( 2821 age.days >= 1 2822 ): 2823 load_from_cache = False 2824 2825 if load_from_cache is True: 2826 return games_df 2827 2828 # team_df = load_volleyball_teams() 2829 2830 # team_df = team_df[team_df["team_id"] == team_id] 2831 2832 # season = team_df["season"].iloc[0] 2833 # ncaa_division = team_df["ncaa_division"].iloc[0] 2834 # ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 2835 # team_conference_name = team_df["team_conference_name"].iloc[0] 2836 # school_name = team_df["school_name"].iloc[0] 2837 # school_id = int(team_df["school_id"].iloc[0]) 2838 2839 # del team_df 2840 response = _get_webpage(url=url) 2841 soup = BeautifulSoup(response.text, features="lxml") 2842 2843 table_navigation = soup.find("ul", {"class": "nav nav-tabs padding-nav"}) 2844 table_nav_card = table_navigation.find_all("a") 2845 2846 for u in table_nav_card: 2847 url_str = u.get("href") 2848 if "MVB" in url_str.upper(): 2849 sport_id = "MVB" 2850 elif "WVB" in url_str.upper(): 2851 sport_id = "WVB" 2852 2853 if sport_id is None or len(sport_id) == 0: 2854 # This should **never** be the case IRL, 2855 # but in case something weird happened and 2856 # we can't make a determination of if this is a 2857 # MVB player or a WVB player, and we somehow haven't 2858 # crashed by this point, set the sport ID to 2859 # "MVB" by default so we don't have other weirdness. 2860 logging.error( 2861 f"Could not determine if player ID {player_id} " + 2862 "is a MVB or a WVB player. " + 2863 "Because this cannot be determined, " + 2864 "we will make the automatic assumption that this is a MVB player." 2865 ) 2866 sport_id = "MVB" 2867 2868 table_data = soup.find_all( 2869 "table", {"class": "small_font dataTable table-bordered"} 2870 )[1] 2871 2872 temp_table_headers = table_data.find("thead").find("tr").find_all("th") 2873 table_headers = [x.text for x in temp_table_headers] 2874 2875 del temp_table_headers 2876 2877 temp_t_rows = table_data.find("tbody") 2878 temp_t_rows = temp_t_rows.find_all("tr") 2879 season_name = ( 2880 soup.find("select", {"id": "year_list"}) 2881 .find("option", {"selected": "selected"}) 2882 .text 2883 ) 2884 2885 if sport_id == "MVB": 2886 season = f"{season_name[0:2]}{season_name[-2:]}" 2887 season = int(season) 2888 elif sport_id == "WVB": 2889 season = f"{season_name[0:4]}" 2890 season = int(season) 2891 2892 for t in temp_t_rows: 2893 game_num = 1 2894 ot_periods = 0 2895 # innings = 9 2896 row_id = t.get("id") 2897 opp_team_name = "" 2898 2899 if "contest" not in row_id: 2900 continue 2901 del row_id 2902 2903 t_cells = t.find_all("td") 2904 t_cells = [x.text.strip() for x in t_cells] 2905 2906 g_date = t_cells[0] 2907 2908 if "(" in g_date: 2909 g_date, game_num = g_date.split("(") 2910 g_date = g_date.strip() 2911 2912 game_num = game_num.replace(")", "") 2913 game_num = int(game_num) 2914 2915 try: 2916 opp_team_id = t.find_all("td")[1].find("a").get("href") 2917 except AttributeError as e: 2918 logging.info( 2919 "Could not extract a team ID for this game. " + 2920 f"Full exception {e}" 2921 ) 2922 except Exception as e: 2923 logging.warning( 2924 "An unhandled exception has occurred when " 2925 + "trying to get the opposition team ID for this game. " 2926 f"Full exception `{e}`." 2927 ) 2928 raise e 2929 2930 try: 2931 opp_team_id = opp_team_id.replace("/teams/", "") 2932 opp_team_id = opp_team_id.replace( 2933 "javascript:toggleDefensiveStats(", "" 2934 ) 2935 opp_team_id = opp_team_id.replace(");", "") 2936 opp_team_id = int(opp_team_id) 2937 2938 temp_df["opponent_team_id"] = opp_team_id 2939 except Exception: 2940 logging.info( 2941 "Couldn't find the opposition team naIDme " 2942 + "for this row. " 2943 ) 2944 opp_team_id = None 2945 # print(i.find("td").text) 2946 try: 2947 opp_team_name = t.find_all("td")[1].find_all("img")[1].get("alt") 2948 except AttributeError: 2949 logging.info( 2950 "Couldn't find the opposition team name " 2951 + "for this row from an image element. " 2952 + "Attempting a backup method" 2953 ) 2954 opp_team_name = t_cells[1] 2955 except IndexError: 2956 logging.info( 2957 "Couldn't find the opposition team name " 2958 + "for this row from an image element. " 2959 + "Attempting a backup method" 2960 ) 2961 opp_team_name = t_cells[1] 2962 except Exception as e: 2963 logging.warning( 2964 "Unhandled exception when trying to get the " 2965 + "opposition team name from this game. " 2966 + f"Full exception `{e}`" 2967 ) 2968 raise e 2969 2970 if opp_team_name == "Defensive Stats": 2971 opp_team_name = t_cells[1] 2972 2973 if "@" in opp_team_name: 2974 opp_team_name = opp_team_name.split("@")[0] 2975 2976 result_str = t_cells[2] 2977 2978 result_str = ( 2979 result_str.lower().replace("w", "").replace("l", "").replace( 2980 "t", "" 2981 ) 2982 ) 2983 2984 if ( 2985 result_str.lower() == "ppd" or 2986 result_str.lower() == "" or 2987 result_str.lower() == "canceed" 2988 ): 2989 continue 2990 2991 result_str = result_str.replace("\n", "") 2992 result_str = result_str.replace("*", "") 2993 2994 tm_score, opp_score = result_str.split("-") 2995 t_cells = [x.replace("*", "") for x in t_cells] 2996 t_cells = [x.replace("/", "") for x in t_cells] 2997 t_cells = [x.replace("\\", "") for x in t_cells] 2998 2999 temp_df = pd.DataFrame( 3000 data=[t_cells], 3001 columns=table_headers, 3002 # index=[0] 3003 ) 3004 3005 tm_score = int(tm_score) 3006 if "(" in opp_score: 3007 opp_score = opp_score.replace(")", "") 3008 opp_score, ot_periods = opp_score.split("(") 3009 temp_df["ot_periods"] = ot_periods 3010 3011 if "\n" in opp_score: 3012 opp_score = opp_score.strip() 3013 # opp_score = opp_score 3014 opp_score = int(opp_score) 3015 3016 temp_df["team_sets_won"] = tm_score 3017 temp_df["opponent_sets_won"] = opp_score 3018 3019 del tm_score 3020 del opp_score 3021 3022 try: 3023 g_id = t.find_all("td")[2].find("a").get("href") 3024 3025 g_id = g_id.replace("/contests", "") 3026 g_id = g_id.replace("/box_score", "") 3027 g_id = g_id.replace("/", "") 3028 3029 g_id = int(g_id) 3030 temp_df["game_id"] = g_id 3031 del g_id 3032 except AttributeError: 3033 logging.warning( 3034 f"Could not find a game ID for a {g_date} game " + 3035 f"against {opp_team_name}." 3036 ) 3037 temp_df["game_id"] = None 3038 except Exception as e: 3039 raise e 3040 3041 temp_df.rename( 3042 columns={"Opponent": "opponent", "Date": "date"}, 3043 inplace=True, 3044 ) 3045 game_date = datetime.strptime(g_date, "%m/%d/%Y").date() 3046 3047 temp_df["date"] = game_date 3048 temp_df["game_num"] = game_num 3049 # temp_df["game_innings"] = innings 3050 3051 if len(opp_team_name) > 0: 3052 temp_df["opponent"] = opp_team_name 3053 del opp_team_name 3054 3055 duplicate_cols = temp_df.columns[temp_df.columns.duplicated()] 3056 temp_df.drop(columns=duplicate_cols, inplace=True) 3057 3058 stats_df_arr.append(temp_df) 3059 del temp_df 3060 3061 stats_df = pd.concat(stats_df_arr, ignore_index=True) 3062 stats_df = stats_df.replace("/", "", regex=True) 3063 stats_df = stats_df.replace("", np.nan) 3064 stats_df = stats_df.infer_objects() 3065 3066 stats_df["player_id"] = player_id 3067 stats_df["sport_id"] = sport_id 3068 stats_df["season"] = season 3069 3070 stats_df.rename( 3071 columns={ 3072 "#": "player_jersey_number", 3073 "Player": "player_full_name", 3074 "Yr": "player_class", 3075 "Pos": "player_position", 3076 "Ht": "player_height", 3077 "S": "sets_played", 3078 "Kills": "kills", 3079 "Errors": "errors", 3080 "Total Attacks": "total_attacks", 3081 "TotalAttacks": "total_attacks", 3082 "Hit Pct": "hit%", 3083 "HitPct": "hit%", 3084 "Assists": "assists", 3085 "Aces": "aces", 3086 "SErr": "serve_errors", 3087 "Digs": "digs", 3088 "RetAtt": "return_attacks", 3089 "RErr": "return_errors", 3090 "Block Solos": "solo_blocks", 3091 "BlockSolos": "solo_blocks", 3092 "Block Assists": "assisted_blocks", 3093 "BlockAssists": "assisted_blocks", 3094 "BErr": "block_errors", 3095 "PTS": "points", 3096 "Trpl Dbl": "TRP_DBL", 3097 "Dbl Dbl": "DBL_DBL", 3098 "TB": "total_blocks", 3099 "SrvAtt": "serve_attempts", 3100 }, 3101 inplace=True, 3102 ) 3103 # This is a separate function call because these stats 3104 # *don't* exist in every season. 3105 3106 if "serve_attempts" not in stats_df.columns: 3107 stats_df["serve_attempts"] = None 3108 3109 if "return_attacks" not in stats_df.columns: 3110 stats_df["return_attacks"] = None 3111 3112 stats_df = stats_df.infer_objects().fillna(0) 3113 stats_df = stats_df.astype( 3114 { 3115 "GP": "uint16", 3116 "sets_played": "uint16", 3117 # "MS": "uint16", 3118 "kills": "uint16", 3119 "errors": "uint16", 3120 "total_attacks": "uint16", 3121 "hit%": "float32", 3122 "assists": "uint16", 3123 "aces": "uint16", 3124 "serve_errors": "uint16", 3125 "digs": "uint16", 3126 "return_attacks": "uint16", 3127 "return_errors": "uint16", 3128 "solo_blocks": "uint16", 3129 "assisted_blocks": "uint16", 3130 "block_errors": "uint16", 3131 # "total_blocks": "uint16", 3132 "points": "float32", 3133 "BHE": "uint16", 3134 "serve_attempts": "uint16", 3135 # "DBL_DBL": "uint8", 3136 # "TRP_DBL": "uint8", 3137 } 3138 ) 3139 3140 stats_df.loc[ 3141 (stats_df["solo_blocks"] > 0) | (stats_df["assisted_blocks"] > 0), 3142 "total_blocks" 3143 ] = ( 3144 stats_df["solo_blocks"] + 3145 (stats_df["assisted_blocks"] / 2) 3146 ) 3147 stats_df["total_blocks"] = stats_df["total_blocks"].astype("float32") 3148 3149 # Columns used to calculate double doubles and triple doubles. 3150 # Credits: 3151 # https://en.wikipedia.org/wiki/Double_(volleyball) 3152 # https://stackoverflow.com/a/54381918 3153 double_stats_arr = [ 3154 "aces", 3155 "kills", 3156 "total_blocks", 3157 "digs", 3158 "assists", 3159 ] 3160 stats_df["DBL_DBL"] = ( 3161 ( 3162 (stats_df[double_stats_arr] >= 10).sum(1) 3163 ) >= 2 3164 ) 3165 stats_df["DBL_DBL"] = stats_df["DBL_DBL"].astype(int) 3166 3167 stats_df["TRP_DBL"] = ( 3168 ( 3169 (stats_df[double_stats_arr] >= 10).sum(1) 3170 ) >= 3 3171 ) 3172 stats_df["TRP_DBL"] = stats_df["TRP_DBL"].astype(int) 3173 3174 for i in stats_df.columns: 3175 if i in stat_columns: 3176 pass 3177 elif "Attend" in stat_columns: 3178 pass 3179 else: 3180 raise ValueError( 3181 f"Unhandled column name {i}" 3182 ) 3183 stats_df = stats_df.reindex(columns=stat_columns) 3184 3185 stats_df.to_csv( 3186 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/" 3187 + "player_game_stats/" 3188 + f"{player_id}_player_game_stats.csv", 3189 index=False, 3190 ) 3191 return stats_df 3192 3193 3194def get_volleyball_game_player_stats(game_id: int) -> pd.DataFrame: 3195 """ 3196 Given a valid game ID, 3197 this function will attempt to get all player game stats, if possible. 3198 3199 Parameters 3200 ---------- 3201 `game_id` (int, mandatory): 3202 Required argument. 3203 Specifies the game you want player game stats from. 3204 3205 Usage 3206 ---------- 3207 ```python 3208 3209 from ncaa_stats_py.volleyball import get_volleyball_game_player_stats 3210 3211 ######################################## 3212 # Women's volleyball # 3213 ######################################## 3214 3215 # Get the game stats of the 3216 # 2024 NCAA D1 Women's Volleyball National Championship game. 3217 print( 3218 "Get the game stats of the " 3219 + "2024 NCAA D1 Women's volleyball National Championship game" 3220 ) 3221 df = get_volleyball_game_player_stats(6080706) 3222 print(df) 3223 3224 # Get the game stats of a September 14th, 2024 3225 # game between the UNC Asheville Bulldogs and the Iona Gaels. 3226 print( 3227 "Get the game stats of a September 14th, 2024 " 3228 + "game between the UNC Asheville Bulldogs " 3229 + "and the Iona Gaels" 3230 ) 3231 df = get_volleyball_game_player_stats(5670752) 3232 print(df) 3233 3234 # Get the game stats of a September 16th, 2023 3235 # game between the Saginaw Valley Cardinals 3236 # and the Lake Superior St. Lakes. 3237 print( 3238 "Get the game stats of a September 16th, 2023 " 3239 + "game between the Saginaw Valley Cardinals " 3240 + "and the Lake Superior St. Lakes." 3241 ) 3242 df = get_volleyball_game_player_stats(3243563) 3243 print(df) 3244 3245 # Get the game stats of a October 15th, 2022 3246 # game between the Macalester Scots 3247 # and the St. Scholastica Saints (D3). 3248 print( 3249 "Get the game stats of a October 15th, 2022 " 3250 + "game between the Macalester Scots and " 3251 + "the St. Scholastica Saints (D3)." 3252 ) 3253 df = get_volleyball_game_player_stats(2307684) 3254 print(df) 3255 3256 # Get the game stats of a October 24th, 2021 3257 # game between the Howard Bison and the UMES Hawks. 3258 print( 3259 "Get the game stats of a October 24th, 2021 " 3260 + "game between the Howard Bison and the UMES Hawks." 3261 ) 3262 df = get_volleyball_game_player_stats(2113627) 3263 print(df) 3264 3265 # Get the game stats of a March 5th, 2021 3266 # game between the Notre Dame (OH) Falcons 3267 # and the Alderson Broaddus Battlers. 3268 print( 3269 "Get the game stats of a March 5th, 2021 " 3270 + "game between the Notre Dame (OH) Falcons " 3271 + "and the Alderson Broaddus Battlers." 3272 ) 3273 df = get_volleyball_game_player_stats(2005442) 3274 print(df) 3275 3276 # Get the game stats of a November 14th, 2019 3277 # game between the Wittenberg Tigers 3278 # and the Muskingum Fighting Muskies (D3). 3279 print( 3280 "Get the game stats of a November 14th, 2019 " 3281 + "game between the Wittenberg Tigers and " 3282 + "the Muskingum Fighting Muskies (D3)." 3283 ) 3284 df = get_volleyball_game_player_stats(1815514) 3285 print(df) 3286 3287 ######################################## 3288 # Men's volleyball # 3289 ######################################## 3290 3291 # Get the game stats of the 3292 # 2024 NCAA D1 Men's Volleyball National Championship game. 3293 print( 3294 "Get the game stats of the " 3295 + "2024 NCAA D1 Men's volleyball National Championship game" 3296 ) 3297 df = get_volleyball_game_player_stats(5282845) 3298 print(df) 3299 3300 # Get the game stats of a January 14th, 2025 3301 # game between the Kean Cougars and the Arcadia Knights. 3302 print( 3303 "Get the game stats of a January 14th, 2025 " 3304 + "game between the UNC Asheville Bulldogs " 3305 + "and the Iona Gaels" 3306 ) 3307 df = get_volleyball_game_player_stats(6081598) 3308 print(df) 3309 3310 # Get the game stats of a January 13th, 2024 3311 # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders. 3312 print( 3313 "Get the game stats of a September 14th, 2024 " 3314 + "game between the Purdue Fort Wayne Mastodons " 3315 + "and the NJIT Highlanders." 3316 ) 3317 df = get_volleyball_game_player_stats(4473231) 3318 print(df) 3319 3320 # Get the game stats of a January 21st, 2023 3321 # game between the Baruch Bearcats and the Widener Pride. 3322 print( 3323 "Get the game stats of a January 21st, 2023 " 3324 + "game between the Baruch Bearcats and the Widener Pride." 3325 ) 3326 df = get_volleyball_game_player_stats(2355323) 3327 print(df) 3328 3329 # Get the game stats of a February 24th, 2022 3330 # game between the Ball St. Cardinals and the Lindenwood Lions. 3331 print( 3332 "Get the game stats of a February 24th, 2022 " 3333 + "game between the Ball St. Cardinals and the Lindenwood Lions." 3334 ) 3335 df = get_volleyball_game_player_stats(2162239) 3336 print(df) 3337 3338 # Get the game stats of a March 20th, 2021 3339 # game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals. 3340 print( 3341 "Get the game stats of a March 20th, 2021 " 3342 + "game between the SUNY New Paltz Hawks " 3343 + "and the St. John Fisher Cardinals." 3344 ) 3345 df = get_volleyball_game_player_stats(2059180) 3346 print(df) 3347 3348 # Get the game stats of a March 1th, 2020 3349 # game between the USC Trojans and the CUI Golden Eagles. 3350 print( 3351 "Get the game stats of a March 1th, 2020 " 3352 + "game between the USC Trojans and the CUI Golden Eagles." 3353 ) 3354 df = get_volleyball_game_player_stats(1820058) 3355 print(df) 3356 3357 # Get the game stats of an April 4th, 2019 3358 # game between the Lesly Lynx and the Pine Manor Gators (D3). 3359 print( 3360 "Get the game stats of an April 4th, 2019 " 3361 + "game between the Lesly Lynx and the Pine Manor Gators (D3)." 3362 ) 3363 df = get_volleyball_game_player_stats(1723131) 3364 print(df) 3365 3366 3367 ``` 3368 3369 Returns 3370 ---------- 3371 A pandas `DataFrame` object with player game stats in a given game. 3372 3373 """ 3374 load_from_cache = True 3375 3376 sport_id = "" 3377 season = 0 3378 3379 MVB_teams_df = load_volleyball_teams(get_mens_data=True) 3380 MVB_team_ids_arr = MVB_teams_df["team_id"].to_list() 3381 3382 WVB_teams_df = load_volleyball_teams(get_mens_data=False) 3383 WVB_team_ids_arr = WVB_teams_df["team_id"].to_list() 3384 3385 stats_df = pd.DataFrame() 3386 stats_df_arr = [] 3387 3388 temp_df = pd.DataFrame() 3389 home_dir = expanduser("~") 3390 home_dir = _format_folder_str(home_dir) 3391 3392 stat_columns = [ 3393 "season", 3394 "sport_id", 3395 "game_datetime", 3396 "game_id", 3397 "team_id", 3398 "team_name", 3399 "player_id", 3400 "player_num", 3401 "player_full_name", 3402 "player_position", 3403 "GP", 3404 "sets_played", 3405 "kills", 3406 "errors", 3407 "total_attacks", 3408 "hit%", 3409 "assists", 3410 "aces", 3411 "serve_errors", 3412 "digs", 3413 "return_attacks", 3414 "return_errors", 3415 "solo_blocks", 3416 "assisted_blocks", 3417 "block_errors", 3418 "total_blocks", 3419 "points", 3420 "BHE", 3421 "DBL_DBL", 3422 "TRP_DBL", 3423 ] 3424 3425 url = f"https://stats.ncaa.org/contests/{game_id}/individual_stats" 3426 3427 if exists(f"{home_dir}/.ncaa_stats_py/"): 3428 pass 3429 else: 3430 mkdir(f"{home_dir}/.ncaa_stats_py/") 3431 3432 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"): 3433 pass 3434 else: 3435 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/") 3436 3437 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/"): 3438 pass 3439 else: 3440 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/") 3441 3442 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"): 3443 pass 3444 else: 3445 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/") 3446 3447 if exists( 3448 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/" 3449 + f"{game_id}_player_game_stats.csv" 3450 ): 3451 games_df = pd.read_csv( 3452 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/" 3453 + f"{game_id}_player_game_stats.csv" 3454 ) 3455 games_df = games_df.infer_objects() 3456 file_mod_datetime = datetime.fromtimestamp( 3457 getmtime( 3458 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/" 3459 + f"{game_id}_player_game_stats.csv" 3460 ) 3461 ) 3462 load_from_cache = True 3463 else: 3464 file_mod_datetime = datetime.today() 3465 load_from_cache = False 3466 3467 if exists(f"{home_dir}/.ncaa_stats_py/"): 3468 pass 3469 else: 3470 mkdir(f"{home_dir}/.ncaa_stats_py/") 3471 3472 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"): 3473 pass 3474 else: 3475 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/") 3476 3477 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/"): 3478 pass 3479 else: 3480 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/") 3481 3482 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"): 3483 pass 3484 else: 3485 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/") 3486 3487 if exists( 3488 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/" 3489 + f"{game_id}_player_game_stats.csv" 3490 ): 3491 games_df = pd.read_csv( 3492 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/" 3493 + f"{game_id}_player_game_stats.csv" 3494 ) 3495 games_df = games_df.infer_objects() 3496 file_mod_datetime = datetime.fromtimestamp( 3497 getmtime( 3498 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/" 3499 + f"{game_id}_player_game_stats.csv" 3500 ) 3501 ) 3502 load_from_cache = True 3503 else: 3504 logging.info("Could not find a WVB player game stats file") 3505 3506 now = datetime.today() 3507 3508 age = now - file_mod_datetime 3509 3510 if age.days >= 35: 3511 load_from_cache = False 3512 3513 if load_from_cache is True: 3514 return games_df 3515 3516 response = _get_webpage(url=url) 3517 soup = BeautifulSoup(response.text, features="lxml") 3518 3519 info_table = soup.find( 3520 "td", 3521 { 3522 "style": "padding: 0px 30px 0px 30px", 3523 "class": "d-none d-md-table-cell" 3524 } 3525 ).find( 3526 "table", 3527 {"style": "border-collapse: collapse"} 3528 ) 3529 3530 info_table_rows = info_table.find_all("tr") 3531 3532 game_date_str = info_table_rows[3].find("td").text 3533 if "TBA" in game_date_str: 3534 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA') 3535 elif "tba" in game_date_str: 3536 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba') 3537 elif "TBD" in game_date_str: 3538 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD') 3539 elif "tbd" in game_date_str: 3540 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd') 3541 elif ( 3542 "tbd" not in game_date_str.lower() and 3543 ":" not in game_date_str.lower() 3544 ): 3545 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y') 3546 else: 3547 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y %I:%M %p') 3548 game_datetime = game_datetime.astimezone(timezone("US/Eastern")) 3549 game_date_str = game_datetime.isoformat() 3550 del game_datetime 3551 3552 table_boxes = soup.find_all("div", {"class": "card p-0 table-responsive"}) 3553 3554 for box in table_boxes: 3555 t_header = box.find( 3556 "div", {"class": "card-header"} 3557 ).find( 3558 "div", {"class": "row"} 3559 ) 3560 3561 t_header_str = t_header.text 3562 t_header_str = t_header_str.replace("Period Stats", "") 3563 t_header_str = t_header_str.replace("\n", "") 3564 t_header_str = t_header_str.strip() 3565 3566 team_id = t_header.find("a").get("href") 3567 team_id = team_id.replace("/teams", "") 3568 team_id = team_id.replace("/", "") 3569 team_id = int(team_id) 3570 3571 table_data = box.find( 3572 "table", 3573 {"class": "display dataTable small_font"} 3574 ) 3575 table_headers = box.find("thead").find_all("th") 3576 table_headers = [x.text for x in table_headers] 3577 3578 temp_t_rows = table_data.find("tbody") 3579 temp_t_rows = temp_t_rows.find_all("tr") 3580 3581 spec_stats_df = pd.DataFrame() 3582 spec_stats_df_arr = [] 3583 for t in temp_t_rows: 3584 # row_id = t.get("id") 3585 game_played = 1 3586 # game_started = 0 3587 3588 try: 3589 player_id = t.find("a").get("href") 3590 player_id = player_id.replace("/players", "") 3591 player_id = player_id.replace("/player", "") 3592 player_id = player_id.replace("/", "") 3593 except Exception as e: 3594 logging.debug( 3595 "Could not replace player IDs. " + 3596 f"Full exception: `{e}`" 3597 ) 3598 3599 t_cells = t.find_all("td") 3600 p_name = t_cells[1].text.replace("\n", "") 3601 p_name = p_name.strip() 3602 3603 if t_header_str in p_name: 3604 continue 3605 elif p_name.lower() == "team": 3606 continue 3607 # if "\xa0" in p_name: 3608 # game_started = 0 3609 3610 t_cells = [x.text.strip() for x in t_cells] 3611 player_id = int(player_id) 3612 3613 temp_df = pd.DataFrame( 3614 data=[t_cells], 3615 columns=table_headers 3616 ) 3617 3618 duplicate_cols = temp_df.columns[temp_df.columns.duplicated()] 3619 temp_df.drop(columns=duplicate_cols, inplace=True) 3620 3621 temp_df["player_id"] = player_id 3622 temp_df["GP"] = game_played 3623 # temp_df["GS"] = game_started 3624 3625 spec_stats_df_arr.append(temp_df) 3626 del temp_df 3627 3628 spec_stats_df = pd.concat( 3629 spec_stats_df_arr, 3630 ignore_index=True 3631 ) 3632 3633 if team_id in MVB_team_ids_arr: 3634 sport_id = "MVB" 3635 df = MVB_teams_df[MVB_teams_df["team_id"] == team_id] 3636 season = df["season"].iloc[0] 3637 elif team_id in WVB_team_ids_arr: 3638 sport_id = "WVB" 3639 df = WVB_teams_df[WVB_teams_df["team_id"] == team_id] 3640 season = df["season"].iloc[0] 3641 else: 3642 raise ValueError( 3643 f"Unhandled team ID {team_id}" 3644 ) 3645 3646 spec_stats_df["team_id"] = team_id 3647 spec_stats_df["team_name"] = t_header_str 3648 stats_df_arr.append(spec_stats_df) 3649 del spec_stats_df 3650 3651 stats_df = pd.concat(stats_df_arr) 3652 stats_df["season"] = season 3653 stats_df.rename( 3654 columns={ 3655 "#": "player_num", 3656 "Name": "player_full_name", 3657 "P": "player_position", 3658 "Ht": "player_height", 3659 "S": "sets_played", 3660 "Kills": "kills", 3661 "Errors": "errors", 3662 "Total Attacks": "total_attacks", 3663 "TotalAttacks": "total_attacks", 3664 "Hit Pct": "hit%", 3665 "HitPct": "hit%", 3666 "Assists": "assists", 3667 "Aces": "aces", 3668 "SErr": "serve_errors", 3669 "Digs": "digs", 3670 "RetAtt": "return_attacks", 3671 "RErr": "return_errors", 3672 "Block Solos": "solo_blocks", 3673 "BlockSolos": "solo_blocks", 3674 "Block Assists": "assisted_blocks", 3675 "BlockAssists": "assisted_blocks", 3676 "BErr": "block_errors", 3677 "PTS": "points", 3678 "Trpl Dbl": "TRP_DBL", 3679 "Dbl Dbl": "DBL_DBL", 3680 "TB": "total_blocks", 3681 "SrvAtt": "serve_attempts", 3682 }, 3683 inplace=True, 3684 ) 3685 3686 if "return_attacks" not in stats_df.columns: 3687 stats_df["return_attacks"] = None 3688 3689 if "serve_attempts" not in stats_df.columns: 3690 stats_df["serve_attempts"] = None 3691 3692 stats_df = stats_df.infer_objects().fillna(0) 3693 stats_df = stats_df.astype( 3694 { 3695 "GP": "uint16", 3696 "sets_played": "uint16", 3697 # "MS": "uint16", 3698 "kills": "uint16", 3699 "errors": "uint16", 3700 "total_attacks": "uint16", 3701 "hit%": "float32", 3702 "assists": "uint16", 3703 "aces": "uint16", 3704 "serve_errors": "uint16", 3705 "digs": "uint16", 3706 "return_attacks": "uint16", 3707 "return_errors": "uint16", 3708 "solo_blocks": "uint16", 3709 "assisted_blocks": "uint16", 3710 "block_errors": "uint16", 3711 # "total_blocks": "uint16", 3712 "points": "float32", 3713 "BHE": "uint16", 3714 "serve_attempts": "uint16", 3715 # "DBL_DBL": "uint8", 3716 # "TRP_DBL": "uint8", 3717 } 3718 ) 3719 # print(stats_df.columns) 3720 stats_df["game_datetime"] = game_date_str 3721 stats_df["sport_id"] = sport_id 3722 3723 stats_df["game_id"] = game_id 3724 3725 stats_df["total_blocks"] = ( 3726 stats_df["solo_blocks"] + 3727 (stats_df["assisted_blocks"] / 2) 3728 ) 3729 stats_df["total_blocks"] = stats_df["total_blocks"].astype("float32") 3730 3731 # Columns used to calculate double doubles and triple doubles. 3732 # Credits: 3733 # https://en.wikipedia.org/wiki/Double_(volleyball) 3734 # https://stackoverflow.com/a/54381918 3735 double_stats_arr = [ 3736 "aces", 3737 "kills", 3738 "total_blocks", 3739 "digs", 3740 "assists", 3741 ] 3742 stats_df["DBL_DBL"] = ((stats_df[double_stats_arr] >= 10).sum(1)) >= 2 3743 stats_df["DBL_DBL"] = stats_df["DBL_DBL"].astype(int) 3744 3745 stats_df["TRP_DBL"] = ((stats_df[double_stats_arr] >= 10).sum(1)) >= 3 3746 stats_df["TRP_DBL"] = stats_df["TRP_DBL"].astype(int) 3747 3748 for i in stats_df.columns: 3749 if i in stat_columns: 3750 pass 3751 elif "Attend" in stat_columns: 3752 pass 3753 else: 3754 raise ValueError( 3755 f"Unhandled column name {i}" 3756 ) 3757 3758 stats_df = stats_df.reindex( 3759 columns=stat_columns 3760 ) 3761 3762 # print(stats_df.columns) 3763 stats_df.to_csv( 3764 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/game_stats/player/" 3765 + f"{game_id}_player_game_stats.csv", 3766 index=False 3767 ) 3768 return stats_df 3769 3770 3771def get_volleyball_game_team_stats(game_id: int) -> pd.DataFrame: 3772 """ 3773 Given a valid game ID, 3774 this function will attempt to get all team game stats, if possible. 3775 3776 Parameters 3777 ---------- 3778 `game_id` (int, mandatory): 3779 Required argument. 3780 Specifies the game you want team game stats from. 3781 3782 Usage 3783 ---------- 3784 ```python 3785 3786 from ncaa_stats_py.volleyball import get_volleyball_game_team_stats 3787 3788 ######################################## 3789 # Women's volleyball # 3790 ######################################## 3791 3792 # Get the game stats of the 3793 # 2024 NCAA D1 Women's Volleyball National Championship game. 3794 print( 3795 "Get the game stats of the " 3796 + "2024 NCAA D1 Women's volleyball National Championship game" 3797 ) 3798 df = get_volleyball_game_team_stats(6080706) 3799 print(df) 3800 3801 # Get the game stats of a September 14th, 2024 3802 # game between the UNC Asheville Bulldogs and the Iona Gaels. 3803 print( 3804 "Get the game stats of a September 14th, 2024 " 3805 + "game between the UNC Asheville Bulldogs " 3806 + "and the Iona Gaels" 3807 ) 3808 df = get_volleyball_game_team_stats(5670752) 3809 print(df) 3810 3811 # Get the game stats of a September 16th, 2023 3812 # game between the Saginaw Valley Cardinals 3813 # and the Lake Superior St. Lakes. 3814 print( 3815 "Get the game stats of a September 16th, 2023 " 3816 + "game between the Saginaw Valley Cardinals " 3817 + "and the Lake Superior St. Lakes." 3818 ) 3819 df = get_volleyball_game_team_stats(3243563) 3820 print(df) 3821 3822 # Get the game stats of a October 15th, 2022 3823 # game between the Macalester Scots 3824 # and the St. Scholastica Saints (D3). 3825 print( 3826 "Get the game stats of a October 15th, 2022 " 3827 + "game between the Macalester Scots and " 3828 + "the St. Scholastica Saints (D3)." 3829 ) 3830 df = get_volleyball_game_team_stats(2307684) 3831 print(df) 3832 3833 # Get the game stats of a October 24th, 2021 3834 # game between the Howard Bison and the UMES Hawks. 3835 print( 3836 "Get the game stats of a October 24th, 2021 " 3837 + "game between the Howard Bison and the UMES Hawks." 3838 ) 3839 df = get_volleyball_game_team_stats(2113627) 3840 print(df) 3841 3842 # Get the game stats of a March 5th, 2021 3843 # game between the Notre Dame (OH) Falcons 3844 # and the Alderson Broaddus Battlers. 3845 print( 3846 "Get the game stats of a March 5th, 2021 " 3847 + "game between the Notre Dame (OH) Falcons " 3848 + "and the Alderson Broaddus Battlers." 3849 ) 3850 df = get_volleyball_game_team_stats(2005442) 3851 print(df) 3852 3853 # Get the game stats of a November 14th, 2019 3854 # game between the Wittenberg Tigers 3855 # and the Muskingum Fighting Muskies (D3). 3856 print( 3857 "Get the game stats of a November 14th, 2019 " 3858 + "game between the Wittenberg Tigers and " 3859 + "the Muskingum Fighting Muskies (D3)." 3860 ) 3861 df = get_volleyball_game_team_stats(1815514) 3862 print(df) 3863 3864 ######################################## 3865 # Men's volleyball # 3866 ######################################## 3867 3868 # Get the game stats of the 3869 # 2024 NCAA D1 Men's Volleyball National Championship game. 3870 print( 3871 "Get the game stats of the " 3872 + "2024 NCAA D1 Men's volleyball National Championship game" 3873 ) 3874 df = get_volleyball_game_team_stats(5282845) 3875 print(df) 3876 3877 # Get the game stats of a January 14th, 2025 3878 # game between the Kean Cougars and the Arcadia Knights. 3879 print( 3880 "Get the game stats of a January 14th, 2025 " 3881 + "game between the UNC Asheville Bulldogs " 3882 + "and the Iona Gaels" 3883 ) 3884 df = get_volleyball_game_team_stats(6081598) 3885 print(df) 3886 3887 # Get the game stats of a January 13th, 2024 3888 # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders. 3889 print( 3890 "Get the game stats of a September 14th, 2024 " 3891 + "game between the Purdue Fort Wayne Mastodons " 3892 + "and the NJIT Highlanders." 3893 ) 3894 df = get_volleyball_game_team_stats(4473231) 3895 print(df) 3896 3897 # Get the game stats of a January 21st, 2023 3898 # game between the Baruch Bearcats and the Widener Pride. 3899 print( 3900 "Get the game stats of a January 21st, 2023 " 3901 + "game between the Baruch Bearcats and the Widener Pride." 3902 ) 3903 df = get_volleyball_game_team_stats(2355323) 3904 print(df) 3905 3906 # Get the game stats of a February 24th, 2022 3907 # game between the Ball St. Cardinals and the Lindenwood Lions. 3908 print( 3909 "Get the game stats of a February 24th, 2022 " 3910 + "game between the Ball St. Cardinals and the Lindenwood Lions." 3911 ) 3912 df = get_volleyball_game_team_stats(2162239) 3913 print(df) 3914 3915 # Get the game stats of a March 20th, 2021 3916 # game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals. 3917 print( 3918 "Get the game stats of a March 20th, 2021 " 3919 + "game between the SUNY New Paltz Hawks " 3920 + "and the St. John Fisher Cardinals." 3921 ) 3922 df = get_volleyball_game_team_stats(2059180) 3923 print(df) 3924 3925 # Get the game stats of a March 1th, 2020 3926 # game between the USC Trojans and the CUI Golden Eagles. 3927 print( 3928 "Get the game stats of a March 1th, 2020 " 3929 + "game between the USC Trojans and the CUI Golden Eagles." 3930 ) 3931 df = get_volleyball_game_team_stats(1820058) 3932 print(df) 3933 3934 # Get the game stats of an April 4th, 2019 3935 # game between the Lesly Lynx and the Pine Manor Gators (D3). 3936 print( 3937 "Get the game stats of an April 4th, 2019 " 3938 + "game between the Lesly Lynx and the Pine Manor Gators (D3)." 3939 ) 3940 df = get_volleyball_game_team_stats(1723131) 3941 print(df) 3942 3943 ``` 3944 3945 Returns 3946 ---------- 3947 A pandas `DataFrame` object with team game stats in a given game. 3948 3949 """ 3950 df = get_volleyball_game_player_stats(game_id=game_id) 3951 # print(df.columns) 3952 df = df.infer_objects() 3953 stats_df = df.groupby( 3954 [ 3955 "season", 3956 "sport_id", 3957 "game_datetime", 3958 "game_id", 3959 "team_id", 3960 "team_name" 3961 ], 3962 as_index=False, 3963 ).agg( 3964 { 3965 "sets_played": "sum", 3966 "kills": "sum", 3967 "errors": "sum", 3968 "total_attacks": "sum", 3969 # "hit%": "sum", 3970 "assists": "sum", 3971 "aces": "sum", 3972 "serve_errors": "sum", 3973 "digs": "sum", 3974 "return_attacks": "sum", 3975 "return_errors": "sum", 3976 "solo_blocks": "sum", 3977 "assisted_blocks": "sum", 3978 "block_errors": "sum", 3979 "total_blocks": "sum", 3980 "points": "sum", 3981 "BHE": "sum", 3982 "DBL_DBL": "sum", 3983 "TRP_DBL": "sum", 3984 } 3985 ) 3986 stats_df["hit%"] = ( 3987 (stats_df["kills"] - stats_df["errors"]) / stats_df["total_attacks"] 3988 ) 3989 return stats_df 3990 3991 3992def get_volleyball_raw_pbp(game_id: int) -> pd.DataFrame: 3993 """ 3994 Given a valid game ID, 3995 this function will attempt to get the raw play-by-play (PBP) 3996 data for that game. 3997 3998 Parameters 3999 ---------- 4000 `game_id` (int, mandatory): 4001 Required argument. 4002 Specifies the game you want play-by-play data (PBP) from. 4003 4004 Usage 4005 ---------- 4006 ```python 4007 4008 from ncaa_stats_py.volleyball import get_volleyball_raw_pbp 4009 4010 ######################################## 4011 # Women's volleyball # 4012 ######################################## 4013 4014 # Get the play-by-play data of the 4015 # 2024 NCAA D1 Women's Volleyball National Championship game. 4016 print( 4017 "Get the play-by-play data of the " 4018 + "2024 NCAA D1 Women's volleyball National Championship game" 4019 ) 4020 df = get_volleyball_raw_pbp(6080706) 4021 print(df) 4022 4023 # Get the play-by-play data of a September 14th, 2024 4024 # game between the UNC Asheville Bulldogs and the Iona Gaels. 4025 print( 4026 "Get the play-by-play data of a September 14th, 2024 " 4027 + "game between the UNC Asheville Bulldogs " 4028 + "and the Iona Gaels" 4029 ) 4030 df = get_volleyball_raw_pbp(5670752) 4031 print(df) 4032 4033 # Get the play-by-play data of a September 16th, 2023 4034 # game between the Saginaw Valley Cardinals 4035 # and the Lake Superior St. Lakes. 4036 print( 4037 "Get the play-by-play data of a September 16th, 2023 " 4038 + "game between the Saginaw Valley Cardinals " 4039 + "and the Lake Superior St. Lakes." 4040 ) 4041 df = get_volleyball_raw_pbp(3243563) 4042 print(df) 4043 4044 # Get the play-by-play data of a October 15th, 2022 4045 # game between the Macalester Scots 4046 # and the St. Scholastica Saints (D3). 4047 print( 4048 "Get the play-by-play data of a October 15th, 2022 " 4049 + "game between the Macalester Scots and " 4050 + "the St. Scholastica Saints (D3)." 4051 ) 4052 df = get_volleyball_raw_pbp(2307684) 4053 print(df) 4054 4055 # Get the play-by-play data of a October 24th, 2021 4056 # game between the Howard Bison and the UMES Hawks. 4057 print( 4058 "Get the play-by-play data of a October 24th, 2021 " 4059 + "game between the Howard Bison and the UMES Hawks." 4060 ) 4061 df = get_volleyball_raw_pbp(2113627) 4062 print(df) 4063 4064 # Get the play-by-play data of a March 5th, 2021 4065 # game between the Notre Dame (OH) Falcons 4066 # and the Alderson Broaddus Battlers. 4067 print( 4068 "Get the play-by-play data of a March 5th, 2021 " 4069 + "game between the Notre Dame (OH) Falcons " 4070 + "and the Alderson Broaddus Battlers." 4071 ) 4072 df = get_volleyball_raw_pbp(2005442) 4073 print(df) 4074 4075 # Get the play-by-play data of a November 14th, 2019 4076 # game between the Wittenberg Tigers 4077 # and the Muskingum Fighting Muskies (D3). 4078 print( 4079 "Get the play-by-play data of a November 14th, 2019 " 4080 + "game between the Wittenberg Tigers and " 4081 + "the Muskingum Fighting Muskies (D3)." 4082 ) 4083 df = get_volleyball_raw_pbp(1815514) 4084 print(df) 4085 4086 ######################################## 4087 # Men's volleyball # 4088 ######################################## 4089 4090 # Get the play-by-play data of the 4091 # 2024 NCAA D1 Men's Volleyball National Championship game. 4092 print( 4093 "Get the play-by-play data of the " 4094 + "2024 NCAA D1 Men's volleyball National Championship game" 4095 ) 4096 df = get_volleyball_raw_pbp(5282845) 4097 print(df) 4098 4099 # Get the play-by-play data of a January 14th, 2025 4100 # game between the Kean Cougars and the Arcadia Knights. 4101 print( 4102 "Get the play-by-play data of a January 14th, 2025 " 4103 + "game between the UNC Asheville Bulldogs " 4104 + "and the Iona Gaels" 4105 ) 4106 df = get_volleyball_raw_pbp(6081598) 4107 print(df) 4108 4109 # Get the play-by-play data of a January 13th, 2024 4110 # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders. 4111 print( 4112 "Get the play-by-play data of a September 14th, 2024 " 4113 + "game between the Purdue Fort Wayne Mastodons " 4114 + "and the NJIT Highlanders." 4115 ) 4116 df = get_volleyball_raw_pbp(4473231) 4117 print(df) 4118 4119 # Get the play-by-play data of a January 21st, 2023 4120 # game between the Baruch Bearcats and the Widener Pride. 4121 print( 4122 "Get the play-by-play data of a January 21st, 2023 " 4123 + "game between the Baruch Bearcats and the Widener Pride." 4124 ) 4125 df = get_volleyball_raw_pbp(2355323) 4126 print(df) 4127 4128 # Get the play-by-play data of a February 24th, 2022 4129 # game between the Ball St. Cardinals and the Lindenwood Lions. 4130 print( 4131 "Get the play-by-play data of a February 24th, 2022 " 4132 + "game between the Ball St. Cardinals and the Lindenwood Lions." 4133 ) 4134 df = get_volleyball_raw_pbp(2162239) 4135 print(df) 4136 4137 # Get the play-by-play data of a March 7th, 2021 4138 # game between the Adrian Bulldogs and the Baldwin Wallace Yellow Jackets. 4139 print( 4140 "Get the play-by-play data of a March 7th, 2021 " 4141 + "game between the Adrian Bulldogs " 4142 + "and the Baldwin Wallace Yellow Jackets." 4143 ) 4144 df = get_volleyball_raw_pbp(1998844) 4145 print(df) 4146 4147 # Get the play-by-play data of a March 1th, 2020 4148 # game between the USC Trojans and the CUI Golden Eagles. 4149 print( 4150 "Get the play-by-play data of a March 1th, 2020 " 4151 + "game between the USC Trojans and the CUI Golden Eagles." 4152 ) 4153 df = get_volleyball_raw_pbp(1820058) 4154 print(df) 4155 4156 # Get the play-by-play data of an April 4th, 2019 4157 # game between the Lesly Lynx and the Pine Manor Gators (D3). 4158 print( 4159 "Get the play-by-play data of an April 4th, 2019 " 4160 + "game between the Lesly Lynx and the Pine Manor Gators (D3)." 4161 ) 4162 df = get_volleyball_raw_pbp(1723131) 4163 print(df) 4164 4165 ``` 4166 4167 Returns 4168 ---------- 4169 A pandas `DataFrame` object with a play-by-play (PBP) data in a given game. 4170 4171 """ 4172 load_from_cache = True 4173 # is_overtime = False 4174 4175 sport_id = "" 4176 season = 0 4177 away_score = 0 4178 home_score = 0 4179 4180 home_sets_won = 0 4181 away_sets_won = 0 4182 4183 home_set_1_score = 0 4184 away_set_1_score = 0 4185 4186 home_set_2_score = 0 4187 away_set_2_score = 0 4188 4189 home_set_3_score = 0 4190 away_set_3_score = 0 4191 4192 home_set_4_score = 0 4193 away_set_4_score = 0 4194 4195 home_set_5_score = 0 4196 away_set_5_score = 0 4197 4198 home_cumulative_score = 0 4199 away_cumulative_score = 0 4200 4201 MVB_teams_df = load_volleyball_teams(get_mens_data=True) 4202 MVB_team_ids_arr = MVB_teams_df["team_id"].to_list() 4203 4204 WVB_teams_df = load_volleyball_teams(get_mens_data=False) 4205 WVB_team_ids_arr = WVB_teams_df["team_id"].to_list() 4206 4207 pbp_df = pd.DataFrame() 4208 pbp_df_arr = [] 4209 temp_df = pd.DataFrame() 4210 4211 temp_df = pd.DataFrame() 4212 home_dir = expanduser("~") 4213 home_dir = _format_folder_str(home_dir) 4214 4215 stat_columns = [ 4216 "season", 4217 "game_id", 4218 "sport_id", 4219 "game_datetime", 4220 "set_num", 4221 "event_num", 4222 "event_team", 4223 "event_text", 4224 "is_scoring_play", 4225 "home_set_score", 4226 "away_set_score", 4227 "is_extra_points", 4228 "home_cumulative_score", 4229 "away_cumulative_score", 4230 "home_sets_won", 4231 "away_sets_won", 4232 "stadium_name", 4233 "attendance", 4234 "away_team_id", 4235 "away_team_name", 4236 "home_team_id", 4237 "home_team_name", 4238 "home_set_1_score", 4239 "away_set_1_score", 4240 "home_set_2_score", 4241 "away_set_2_score", 4242 "home_set_3_score", 4243 "away_set_3_score", 4244 "home_set_4_score", 4245 "away_set_4_score", 4246 "home_set_5_score", 4247 "away_set_5_score", 4248 ] 4249 4250 url = f"https://stats.ncaa.org/contests/{game_id}/play_by_play" 4251 4252 if exists(f"{home_dir}/.ncaa_stats_py/"): 4253 pass 4254 else: 4255 mkdir(f"{home_dir}/.ncaa_stats_py/") 4256 4257 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"): 4258 pass 4259 else: 4260 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/") 4261 4262 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"): 4263 pass 4264 else: 4265 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/") 4266 4267 if exists( 4268 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/" 4269 + f"{game_id}_raw_pbp.csv" 4270 ): 4271 games_df = pd.read_csv( 4272 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/" 4273 + f"{game_id}_raw_pbp.csv" 4274 ) 4275 games_df = games_df.infer_objects() 4276 file_mod_datetime = datetime.fromtimestamp( 4277 getmtime( 4278 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/" 4279 + f"{game_id}_raw_pbp.csv" 4280 ) 4281 ) 4282 load_from_cache = True 4283 else: 4284 file_mod_datetime = datetime.today() 4285 load_from_cache = False 4286 4287 if exists(f"{home_dir}/.ncaa_stats_py/"): 4288 pass 4289 else: 4290 mkdir(f"{home_dir}/.ncaa_stats_py/") 4291 4292 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"): 4293 pass 4294 else: 4295 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/") 4296 4297 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"): 4298 pass 4299 else: 4300 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/") 4301 4302 if exists( 4303 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/" 4304 + f"{game_id}_raw_pbp.csv" 4305 ): 4306 games_df = pd.read_csv( 4307 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/" 4308 + f"{game_id}_raw_pbp.csv" 4309 ) 4310 games_df = games_df.infer_objects() 4311 file_mod_datetime = datetime.fromtimestamp( 4312 getmtime( 4313 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/" 4314 + f"{game_id}_raw_pbp.csv" 4315 ) 4316 ) 4317 load_from_cache = True 4318 else: 4319 logging.info("Could not find a WVB player game stats file") 4320 4321 now = datetime.today() 4322 4323 age = now - file_mod_datetime 4324 4325 if age.days >= 35: 4326 load_from_cache = False 4327 4328 if load_from_cache is True: 4329 return games_df 4330 4331 response = _get_webpage(url=url) 4332 soup = BeautifulSoup(response.text, features="lxml") 4333 4334 info_table = soup.find( 4335 "td", 4336 { 4337 "style": "padding: 0px 30px 0px 30px", 4338 "class": "d-none d-md-table-cell" 4339 } 4340 ).find( 4341 "table", 4342 {"style": "border-collapse: collapse"} 4343 ) 4344 4345 info_table_rows = info_table.find_all("tr") 4346 4347 game_date_str = info_table_rows[3].find("td").text 4348 if "TBA" in game_date_str: 4349 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA') 4350 elif "tba" in game_date_str: 4351 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba') 4352 elif "TBD" in game_date_str: 4353 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD') 4354 elif "tbd" in game_date_str: 4355 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd') 4356 elif ( 4357 "tbd" not in game_date_str.lower() and 4358 ":" not in game_date_str.lower() 4359 ): 4360 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y') 4361 else: 4362 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y %I:%M %p') 4363 game_datetime = game_datetime.astimezone(timezone("US/Eastern")) 4364 game_date_str = game_datetime.isoformat() 4365 # del game_datetime 4366 4367 stadium_str = info_table_rows[4].find("td").text 4368 4369 attendance_str = info_table_rows[5].find("td").text 4370 attendance_int = re.findall( 4371 r"([0-9\,]+)", 4372 attendance_str 4373 )[0] 4374 attendance_int = attendance_int.replace(",", "") 4375 attendance_int = int(attendance_int) 4376 4377 del attendance_str 4378 team_cards = soup.find_all( 4379 "td", 4380 { 4381 "valign": "center", 4382 "class": "grey_text d-none d-sm-table-cell" 4383 } 4384 ) 4385 4386 away_url = team_cards[0].find_all("a") 4387 away_url = away_url[0] 4388 home_url = team_cards[1].find_all("a") 4389 home_url = home_url[0] 4390 4391 away_team_name = away_url.text 4392 home_team_name = home_url.text 4393 4394 away_team_id = away_url.get("href") 4395 home_team_id = home_url.get("href") 4396 4397 away_team_id = away_team_id.replace("/teams", "") 4398 away_team_id = away_team_id.replace("/team", "") 4399 away_team_id = away_team_id.replace("/", "") 4400 away_team_id = int(away_team_id) 4401 4402 home_team_id = home_team_id.replace("/teams", "") 4403 home_team_id = home_team_id.replace("/team", "") 4404 home_team_id = home_team_id.replace("/", "") 4405 home_team_id = int(home_team_id) 4406 4407 if home_team_id in MVB_team_ids_arr: 4408 sport_id = "MVB" 4409 temp_df = MVB_teams_df[MVB_teams_df["team_id"] == home_team_id] 4410 season = temp_df["season"].iloc[0] 4411 del temp_df 4412 elif home_team_id in WVB_team_ids_arr: 4413 sport_id = "WVB" 4414 temp_df = WVB_teams_df[WVB_teams_df["team_id"] == home_team_id] 4415 season = temp_df["season"].iloc[0] 4416 del temp_df 4417 # This should never be the case, 4418 # but if something goes very horribly wrong, 4419 # double check the away team ID to 4420 # the MVB and WVB team ID list. 4421 elif away_team_id in MVB_team_ids_arr: 4422 sport_id = "MVB" 4423 temp_df = MVB_teams_df[MVB_teams_df["team_id"] == away_team_id] 4424 season = temp_df["season"].iloc[0] 4425 del temp_df 4426 elif away_team_id in WVB_team_ids_arr: 4427 sport_id = "WVB" 4428 temp_df = WVB_teams_df[WVB_teams_df["team_id"] == home_team_id] 4429 season = temp_df["season"].iloc[0] 4430 del temp_df 4431 # If we get to this, we are in a code red situation. 4432 # "SHUT IT DOWN" - Gordon Ramsay 4433 else: 4434 raise ValueError( 4435 "Could not identify if this is a " + 4436 "MVB or WVB game based on team IDs. " 4437 ) 4438 4439 section_cards = soup.find_all( 4440 "div", 4441 {"class": "row justify-content-md-center w-100"} 4442 ) 4443 4444 if len(section_cards) == 0: 4445 logging.warning( 4446 f"Could not find any plays for game ID `{game_id}`. " + 4447 "Returning empty DataFrame." 4448 ) 4449 df = pd.DataFrame(columns=stat_columns) 4450 return df 4451 4452 # play_id = 0 4453 for card in section_cards: 4454 is_extra_points = False 4455 event_text = "" 4456 4457 set_num_str = card.find( 4458 "div", 4459 {"class": "card-header"} 4460 ).text 4461 set_num = re.findall( 4462 r"([0-9]+)", 4463 set_num_str 4464 ) 4465 4466 set_num = int(set_num[0]) 4467 4468 table_body = card.find("table").find("tbody").find_all("tr") 4469 4470 # pbp rows 4471 for row in table_body: 4472 is_scoring_play = True 4473 t_cells = row.find_all("td") 4474 t_cells = [x.text.strip() for x in t_cells] 4475 game_time_str = t_cells[0] 4476 4477 if len(t_cells[0]) > 0: 4478 event_team = away_team_id 4479 event_text = t_cells[0] 4480 elif len(t_cells[2]) > 0: 4481 event_team = home_team_id 4482 event_text = t_cells[2] 4483 4484 if "+" in event_text: 4485 temp = event_text.split("\n") 4486 if len(temp) >= 2: 4487 event_text = temp[1] 4488 else: 4489 raise Exception( 4490 "Unhandled situation " + 4491 f"when parsing a scoring play: `{temp}`" 4492 ) 4493 # print() 4494 else: 4495 event_text = event_text.replace("\n", "") 4496 4497 event_text = event_text.replace(" ", " ") 4498 event_text = event_text.strip() 4499 4500 if len(t_cells) == 3: 4501 try: 4502 away_score, home_score = t_cells[1].split("-") 4503 4504 away_score = int(away_score) 4505 home_score = int(home_score) 4506 is_scoring_play = True 4507 except ValueError: 4508 logging.info( 4509 "Could not extract a score " + 4510 f"from the following play `{event_text}`" 4511 ) 4512 is_scoring_play = False 4513 except Exception as e: 4514 logging.warning( 4515 f"An unhandled exception has occurred: `{e}`" 4516 ) 4517 raise e 4518 # scoring_play = False 4519 elif len(t_cells) > 3: 4520 raise SyntaxError( 4521 f"Unhandled PBP row format in game ID `{game_id}`" 4522 ) 4523 4524 if set_num <= 4 and home_score == 24 and away_score == 24: 4525 is_extra_points = True 4526 elif set_num == 5 and home_score == 14 and away_score == 14: 4527 is_extra_points = True 4528 4529 temp_home_cumulative_score = home_cumulative_score + home_score 4530 temp_away_cumulative_score = away_cumulative_score + away_score 4531 4532 temp_df = pd.DataFrame( 4533 { 4534 # "season": season, 4535 # "game_id": game_id, 4536 # "sport_id": sport_id, 4537 # "away_team_id": away_team_id, 4538 # "away_team_name": away_team_name, 4539 # "home_team_id": home_team_id, 4540 # "home_team_name": home_team_name, 4541 "game_time_str": game_time_str, 4542 "set_num": set_num, 4543 "away_set_score": away_score, 4544 "home_set_score": home_score, 4545 "event_team": event_team, 4546 "event_text": event_text, 4547 "is_scoring_play": is_scoring_play, 4548 "is_extra_points": is_extra_points, 4549 "home_cumulative_score": temp_home_cumulative_score, 4550 "away_cumulative_score": temp_away_cumulative_score, 4551 "home_sets_won": home_sets_won, 4552 "away_sets_won": away_sets_won, 4553 }, 4554 index=[0], 4555 ) 4556 pbp_df_arr.append(temp_df) 4557 4558 if set_num == 1: 4559 home_set_1_score = home_score 4560 away_set_1_score = away_score 4561 home_cumulative_score = home_set_1_score 4562 away_cumulative_score = away_set_1_score 4563 elif set_num == 2: 4564 home_set_2_score = home_score 4565 away_set_2_score = away_score 4566 home_cumulative_score += home_set_2_score 4567 away_cumulative_score += away_set_2_score 4568 elif set_num == 3: 4569 home_set_3_score = home_score 4570 away_set_3_score = away_score 4571 home_cumulative_score += home_set_3_score 4572 away_cumulative_score += away_set_3_score 4573 elif set_num == 4: 4574 home_set_4_score = home_score 4575 away_set_4_score = away_score 4576 home_cumulative_score += home_set_4_score 4577 away_cumulative_score += away_set_4_score 4578 elif set_num == 5: 4579 home_set_5_score = home_score 4580 away_set_5_score = away_score 4581 home_cumulative_score += home_set_4_score 4582 away_cumulative_score += away_set_4_score 4583 4584 if temp_away_cumulative_score > home_cumulative_score: 4585 away_sets_won += 1 4586 elif temp_away_cumulative_score < home_cumulative_score: 4587 home_sets_won += 1 4588 4589 # End of set play 4590 temp_df = pd.DataFrame( 4591 { 4592 # "season": season, 4593 # "game_id": game_id, 4594 # "sport_id": sport_id, 4595 # "away_team_id": away_team_id, 4596 # "away_team_name": away_team_name, 4597 # "home_team_id": home_team_id, 4598 # "home_team_name": home_team_name, 4599 "game_time_str": game_time_str, 4600 "set_num": set_num, 4601 "away_set_score": away_score, 4602 "home_set_score": home_score, 4603 "event_team": event_team, 4604 "event_text": f"END SET {set_num}", 4605 "is_scoring_play": is_scoring_play, 4606 "is_extra_points": is_extra_points, 4607 "home_cumulative_score": temp_home_cumulative_score, 4608 "away_cumulative_score": temp_away_cumulative_score, 4609 "home_sets_won": home_sets_won, 4610 "away_sets_won": away_sets_won, 4611 }, 4612 index=[0], 4613 ) 4614 pbp_df_arr.append(temp_df) 4615 4616 # End of game play 4617 temp_df = pd.DataFrame( 4618 { 4619 # "season": season, 4620 # "game_id": game_id, 4621 # "sport_id": sport_id, 4622 # "away_team_id": away_team_id, 4623 # "away_team_name": away_team_name, 4624 # "home_team_id": home_team_id, 4625 # "home_team_name": home_team_name, 4626 "game_time_str": game_time_str, 4627 "set_num": set_num, 4628 "away_set_score": away_score, 4629 "home_set_score": home_score, 4630 "event_team": event_team, 4631 "event_text": "END MATCH", 4632 "is_scoring_play": is_scoring_play, 4633 "is_extra_points": is_extra_points, 4634 "home_cumulative_score": temp_home_cumulative_score, 4635 "away_cumulative_score": temp_away_cumulative_score, 4636 "home_sets_won": home_sets_won, 4637 "away_sets_won": away_sets_won, 4638 }, 4639 index=[0], 4640 ) 4641 pbp_df_arr.append(temp_df) 4642 pbp_df = pd.concat(pbp_df_arr, ignore_index=True) 4643 pbp_df["event_num"] = pbp_df.index + 1 4644 pbp_df["game_datetime"] = game_date_str 4645 pbp_df["season"] = season 4646 pbp_df["game_id"] = game_id 4647 pbp_df["sport_id"] = sport_id 4648 pbp_df["stadium_name"] = stadium_str 4649 pbp_df["attendance"] = attendance_int 4650 pbp_df["away_team_id"] = away_team_id 4651 pbp_df["away_team_name"] = away_team_name 4652 pbp_df["home_team_id"] = home_team_id 4653 pbp_df["home_team_name"] = home_team_name 4654 4655 pbp_df["home_set_1_score"] = home_set_1_score 4656 pbp_df["away_set_1_score"] = away_set_1_score 4657 4658 pbp_df["home_set_2_score"] = home_set_2_score 4659 pbp_df["away_set_2_score"] = away_set_2_score 4660 4661 pbp_df["home_set_3_score"] = home_set_3_score 4662 pbp_df["away_set_3_score"] = away_set_3_score 4663 4664 pbp_df["home_set_4_score"] = home_set_4_score 4665 pbp_df["away_set_4_score"] = away_set_4_score 4666 4667 pbp_df["home_set_5_score"] = home_set_5_score 4668 pbp_df["away_set_5_score"] = away_set_5_score 4669 4670 # print(pbp_df.columns) 4671 pbp_df = pbp_df.reindex(columns=stat_columns) 4672 pbp_df = pbp_df.infer_objects() 4673 4674 if sport_id == "MVB": 4675 pbp_df.to_csv( 4676 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/" 4677 + f"{game_id}_raw_pbp.csv", 4678 index=False 4679 ) 4680 elif sport_id == "WVB": 4681 pbp_df.to_csv( 4682 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/" 4683 + f"{game_id}_raw_pbp.csv", 4684 index=False 4685 ) 4686 else: 4687 raise ValueError( 4688 f"Improper Sport ID: `{sport_id}`" 4689 ) 4690 4691 return pbp_df 4692 4693 4694def get_parsed_volleyball_pbp(game_id: int) -> pd.DataFrame: 4695 """ 4696 Given a valid game ID, 4697 this function will attempt to parse play-by-play (PBP) 4698 data for that game. 4699 4700 Parameters 4701 ---------- 4702 `game_id` (int, mandatory): 4703 Required argument. 4704 Specifies the game you want play-by-play data (PBP) from. 4705 4706 Usage 4707 ---------- 4708 ```python 4709 ``` 4710 4711 Returns 4712 ---------- 4713 A pandas `DataFrame` object with a play-by-play (PBP) data in a given game. 4714 4715 """ 4716 home_team_id = 0 4717 away_team_id = 0 4718 sport_id = "" 4719 4720 home_roster_df = pd.DataFrame() 4721 away_roster_df = pd.DataFrame() 4722 4723 home_dir = expanduser("~") 4724 home_dir = _format_folder_str(home_dir) 4725 4726 if exists(f"{home_dir}/.ncaa_stats_py/"): 4727 pass 4728 else: 4729 mkdir(f"{home_dir}/.ncaa_stats_py/") 4730 4731 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"): 4732 pass 4733 else: 4734 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/") 4735 4736 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"): 4737 pass 4738 else: 4739 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/") 4740 4741 if exists( 4742 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/" 4743 + f"{game_id}_parsed_pbp.csv" 4744 ): 4745 games_df = pd.read_csv( 4746 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/" 4747 + f"{game_id}_parsed_pbp.csv" 4748 ) 4749 games_df = games_df.infer_objects() 4750 file_mod_datetime = datetime.fromtimestamp( 4751 getmtime( 4752 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/" 4753 + f"{game_id}_parsed_pbp.csv" 4754 ) 4755 ) 4756 load_from_cache = True 4757 else: 4758 file_mod_datetime = datetime.today() 4759 load_from_cache = False 4760 4761 if exists(f"{home_dir}/.ncaa_stats_py/"): 4762 pass 4763 else: 4764 mkdir(f"{home_dir}/.ncaa_stats_py/") 4765 4766 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"): 4767 pass 4768 else: 4769 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/") 4770 4771 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"): 4772 pass 4773 else: 4774 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/") 4775 4776 if exists( 4777 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/" 4778 + f"{game_id}_parsed_pbp.csv" 4779 ): 4780 games_df = pd.read_csv( 4781 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/" 4782 + f"{game_id}_parsed_pbp.csv" 4783 ) 4784 games_df = games_df.infer_objects() 4785 file_mod_datetime = datetime.fromtimestamp( 4786 getmtime( 4787 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/" 4788 + f"{game_id}_parsed_pbp.csv" 4789 ) 4790 ) 4791 load_from_cache = True 4792 else: 4793 logging.info("Could not find a WVB player game stats file") 4794 4795 now = datetime.today() 4796 4797 age = now - file_mod_datetime 4798 4799 if age.days > 1: 4800 load_from_cache = False 4801 4802 if load_from_cache is True: 4803 return games_df 4804 4805 raw_df = get_volleyball_raw_pbp(game_id=game_id) 4806 4807 sport_id = raw_df["sport_id"].iloc[0] 4808 home_team_id = raw_df["home_team_id"].iloc[0] 4809 away_team_id = raw_df["away_team_id"].iloc[0] 4810 4811 pbp_df = _volleyball_pbp_helper(raw_df=raw_df) 4812 4813 home_roster_df = get_volleyball_team_roster(team_id=home_team_id) 4814 home_roster_df["Name"] = home_roster_df["Name"].str.lower() 4815 4816 away_roster_df = get_volleyball_team_roster(team_id=away_team_id) 4817 away_roster_df["Name"] = away_roster_df["Name"].str.lower() 4818 4819 home_players_arr = dict( 4820 zip( 4821 home_roster_df["Name"], home_roster_df["player_id"] 4822 ) 4823 ) 4824 away_players_arr = dict( 4825 zip( 4826 away_roster_df["Name"], away_roster_df["player_id"] 4827 ) 4828 ) 4829 players_arr = home_players_arr | away_players_arr 4830 name_cols = [ 4831 "substitution_player_1_name", 4832 "substitution_player_2_name", 4833 "substitution_player_3_name", 4834 "substitution_player_4_name", 4835 "serve_player_name", 4836 "reception_player_name", 4837 "set_player_name", 4838 "set_error_player_name", 4839 "attack_player_name", 4840 "dig_player_name", 4841 "kill_player_name", 4842 "block_player_1_name", 4843 "block_player_2_name", 4844 "ball_handling_error_player_name", 4845 "dig_error_player_name", 4846 ] 4847 id_cols = [ 4848 "substitution_player_1_id", 4849 "substitution_player_2_id", 4850 "substitution_player_3_id", 4851 "substitution_player_4_id", 4852 "serve_player_id", 4853 "reception_player_id", 4854 "set_player_id", 4855 "set_error_player_id", 4856 "attack_player_id", 4857 "dig_player_id", 4858 "kill_player_id", 4859 "block_player_1_id", 4860 "block_player_2_id", 4861 "ball_handling_error_player_id", 4862 "dig_error_player_id", 4863 ] 4864 4865 for i in range(0, len(id_cols)): 4866 name_column = name_cols[i] 4867 id_column = id_cols[i] 4868 pbp_df[name_column] = pbp_df[name_column].str.replace("3a", "") 4869 pbp_df[name_column] = pbp_df[name_column].str.replace(".", "") 4870 pbp_df[id_column] = pbp_df[name_column].str.lower() 4871 pbp_df.loc[pbp_df[id_column].notnull(), id_column] = pbp_df[ 4872 id_column 4873 ].map(_name_smother) 4874 pbp_df[id_column] = pbp_df[id_column].map(players_arr) 4875 4876 pbp_df.to_csv( 4877 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/parsed_pbp/" 4878 + f"{game_id}_parsed_pbp.csv", 4879 index=False 4880 ) 4881 return pbp_df
36def get_volleyball_teams( 37 season: int, 38 level: str | int, 39 get_mens_data: bool = False 40) -> pd.DataFrame: 41 """ 42 Retrieves a list of volleyball teams from the NCAA. 43 44 Parameters 45 ---------- 46 `season` (int, mandatory): 47 Required argument. 48 Specifies the season you want NCAA volleyball team information from. 49 50 `level` (int, mandatory): 51 Required argument. 52 Specifies the level/division you want 53 NCAA volleyball team information from. 54 This can either be an integer (1-3) or a string ("I"-"III"). 55 56 `get_mens_data` (bool, optional): 57 Optional argument. 58 If you want men's volleyball data instead of women's volleyball data, 59 set this to `True`. 60 61 Usage 62 ---------- 63 ```python 64 65 from ncaa_stats_py.volleyball import get_volleyball_teams 66 67 ######################################## 68 # Men's volleyball # 69 ######################################## 70 71 # Get all D1 men's volleyball teams for the 2024 season. 72 print("Get all D1 men's volleyball teams for the 2024 season.") 73 df = get_volleyball_teams(2024, 1) 74 print(df) 75 76 # Get all D2 men's volleyball teams for the 2023 season. 77 print("Get all D2 men's volleyball teams for the 2023 season.") 78 df = get_volleyball_teams(2023, 2) 79 print(df) 80 81 # Get all D3 men's volleyball teams for the 2022 season. 82 print("Get all D3 men's volleyball teams for the 2022 season.") 83 df = get_volleyball_teams(2022, 3) 84 print(df) 85 86 # Get all D1 men's volleyball teams for the 2021 season. 87 print("Get all D1 men's volleyball teams for the 2021 season.") 88 df = get_volleyball_teams(2021, "I") 89 print(df) 90 91 # Get all D2 men's volleyball teams for the 2020 season. 92 print("Get all D2 men's volleyball teams for the 2020 season.") 93 df = get_volleyball_teams(2020, "II") 94 print(df) 95 96 # Get all D3 men's volleyball teams for the 2019 season. 97 print("Get all D3 men's volleyball teams for the 2019 season.") 98 df = get_volleyball_teams(2019, "III") 99 print(df) 100 101 ######################################## 102 # Women's volleyball # 103 ######################################## 104 105 # Get all D1 women's volleyball teams for the 2024 season. 106 print( 107 "Get all D1 women's volleyball teams for the 2024 season." 108 ) 109 df = get_volleyball_teams(2024, 1) 110 print(df) 111 112 # Get all D2 women's volleyball teams for the 2023 season. 113 print( 114 "Get all D2 women's volleyball teams for the 2023 season." 115 ) 116 df = get_volleyball_teams(2023, 2) 117 print(df) 118 119 # Get all D3 women's volleyball teams for the 2022 season. 120 print( 121 "Get all D3 women's volleyball teams for the 2022 season." 122 ) 123 df = get_volleyball_teams(2022, 3) 124 print(df) 125 126 # Get all D1 women's volleyball teams for the 2021 season. 127 print( 128 "Get all D1 women's volleyball teams for the 2021 season." 129 ) 130 df = get_volleyball_teams(2021, "I") 131 print(df) 132 133 # Get all D2 women's volleyball teams for the 2020 season. 134 print( 135 "Get all D2 women's volleyball teams for the 2020 season." 136 ) 137 df = get_volleyball_teams(2020, "II") 138 print(df) 139 140 # Get all D3 women's volleyball teams for the 2019 season. 141 print( 142 "Get all D3 women's volleyball teams for the 2019 season." 143 ) 144 df = get_volleyball_teams(2019, "III") 145 print(df) 146 147 ``` 148 149 Returns 150 ---------- 151 A pandas `DataFrame` object with a list of college volleyball teams 152 in that season and NCAA level. 153 """ 154 # def is_comment(elem): 155 # return isinstance(elem, Comment) 156 sport_id = "" 157 # stat_sequence = 0 158 load_from_cache = True 159 home_dir = expanduser("~") 160 home_dir = _format_folder_str(home_dir) 161 teams_df = pd.DataFrame() 162 teams_df_arr = [] 163 temp_df = pd.DataFrame() 164 formatted_level = "" 165 ncaa_level = 0 166 167 if get_mens_data is True: 168 sport_id = "MVB" 169 stat_sequence = 528 170 elif get_mens_data is False: 171 sport_id = "WVB" 172 stat_sequence = 48 173 174 if isinstance(level, int) and level == 1: 175 formatted_level = "I" 176 ncaa_level = 1 177 elif isinstance(level, int) and level == 2: 178 formatted_level = "II" 179 ncaa_level = 2 180 elif isinstance(level, int) and level == 3: 181 formatted_level = "III" 182 ncaa_level = 3 183 elif isinstance(level, str) and ( 184 level.lower() == "i" or level.lower() == "d1" or level.lower() == "1" 185 ): 186 ncaa_level = 1 187 formatted_level = level.upper() 188 elif isinstance(level, str) and ( 189 level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2" 190 ): 191 ncaa_level = 2 192 formatted_level = level.upper() 193 elif isinstance(level, str) and ( 194 level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3" 195 ): 196 ncaa_level = 3 197 formatted_level = level.upper() 198 199 if exists(f"{home_dir}/.ncaa_stats_py/"): 200 pass 201 else: 202 mkdir(f"{home_dir}/.ncaa_stats_py/") 203 204 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 205 pass 206 else: 207 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 208 209 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/"): 210 pass 211 else: 212 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/") 213 214 if exists( 215 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/" 216 + f"{season}_{formatted_level}_teams.csv" 217 ): 218 teams_df = pd.read_csv( 219 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/" 220 + f"{season}_{formatted_level}_teams.csv" 221 ) 222 file_mod_datetime = datetime.fromtimestamp( 223 getmtime( 224 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/" 225 + f"{season}_{formatted_level}_teams.csv" 226 ) 227 ) 228 else: 229 file_mod_datetime = datetime.today() 230 load_from_cache = False 231 232 now = datetime.today() 233 234 age = now - file_mod_datetime 235 236 if ( 237 age.days > 1 and 238 season >= (now.year - 1) and 239 now.month <= 7 240 ): 241 load_from_cache = False 242 elif age.days >= 35: 243 load_from_cache = False 244 245 if load_from_cache is True: 246 return teams_df 247 248 logging.warning( 249 f"Either we could not load {season} D{level} schools from cache, " 250 + "or it's time to refresh the cached data." 251 ) 252 schools_df = _get_schools() 253 254 # Volleyball 255 if sport_id == "MVB": 256 url = ( 257 "https://stats.ncaa.org/rankings/change_sport_year_div?" 258 + f"academic_year={season}.0&division={ncaa_level}.0" + 259 f"&sport_code={sport_id}" 260 ) 261 elif sport_id == "WVB": 262 url = ( 263 "https://stats.ncaa.org/rankings/change_sport_year_div?" 264 + f"academic_year={season+1}.0&division={ncaa_level}.0" + 265 f"&sport_code={sport_id}" 266 ) 267 268 response = _get_webpage(url=url) 269 270 soup = BeautifulSoup(response.text, features="lxml") 271 ranking_periods = soup.find("select", {"name": "rp", "id": "rp"}) 272 ranking_periods = ranking_periods.find_all("option") 273 274 rp_value = 0 275 found_value = False 276 277 while found_value is False: 278 # print("check") 279 for rp in ranking_periods: 280 if "final" in rp.text.lower(): 281 rp_value = rp.get("value") 282 found_value = True 283 break 284 # pass 285 elif "-" in rp.text.lower(): 286 pass 287 else: 288 rp_value = rp.get("value") 289 found_value = True 290 break 291 292 if sport_id == "MVB": 293 url = ( 294 "https://stats.ncaa.org/rankings/institution_trends?" 295 + f"academic_year={season}.0&division={ncaa_level}.0&" 296 + f"ranking_period={rp_value}&sport_code={sport_id}" 297 ) 298 elif sport_id == "WVB": 299 url = ( 300 "https://stats.ncaa.org/rankings/institution_trends?" 301 + f"academic_year={season+1}.0&division={ncaa_level}.0&" 302 + f"ranking_period={rp_value}&sport_code={sport_id}" 303 ) 304 305 best_method = True 306 if ( 307 (season < 2017 and sport_id == "MVB") 308 ): 309 url = ( 310 "https://stats.ncaa.org/rankings/national_ranking?" 311 + f"academic_year={season}.0&division={ncaa_level}.0&" 312 + f"ranking_period={rp_value}&sport_code={sport_id}" 313 + f"&stat_seq={stat_sequence}.0" 314 ) 315 response = _get_webpage(url=url) 316 best_method = False 317 elif ( 318 (season < 2017 and sport_id == "WVB") 319 ): 320 url = ( 321 "https://stats.ncaa.org/rankings/national_ranking?" 322 + f"academic_year={season+1}.0&division={ncaa_level}.0&" 323 + f"ranking_period={rp_value}&sport_code={sport_id}" 324 + f"&stat_seq={stat_sequence}.0" 325 ) 326 response = _get_webpage(url=url) 327 best_method = False 328 elif sport_id == "MVB": 329 try: 330 response = _get_webpage(url=url) 331 except Exception as e: 332 logging.info(f"Found exception when loading teams `{e}`") 333 logging.info("Attempting backup method.") 334 url = ( 335 "https://stats.ncaa.org/rankings/national_ranking?" 336 + f"academic_year={season}.0&division={ncaa_level}.0&" 337 + f"ranking_period={rp_value}&sport_code={sport_id}" 338 + f"&stat_seq={stat_sequence}.0" 339 ) 340 response = _get_webpage(url=url) 341 best_method = False 342 else: 343 try: 344 response = _get_webpage(url=url) 345 except Exception as e: 346 logging.info(f"Found exception when loading teams `{e}`") 347 logging.info("Attempting backup method.") 348 url = ( 349 "https://stats.ncaa.org/rankings/national_ranking?" 350 + f"academic_year={season+1}.0&division={ncaa_level}.0&" 351 + f"ranking_period={rp_value}&sport_code={sport_id}" 352 + f"&stat_seq={stat_sequence}.0" 353 ) 354 response = _get_webpage(url=url) 355 best_method = False 356 357 soup = BeautifulSoup(response.text, features="lxml") 358 359 if best_method is True: 360 soup = soup.find( 361 "table", 362 {"id": "stat_grid"}, 363 ) 364 soup = soup.find("tbody") 365 t_rows = soup.find_all("tr") 366 367 for t in t_rows: 368 team_id = t.find("a") 369 team_id = team_id.get("href") 370 team_id = team_id.replace("/teams/", "") 371 team_id = int(team_id) 372 team_name = t.find_all("td")[0].text 373 team_conference_name = t.find_all("td")[1].text 374 # del team 375 temp_df = pd.DataFrame( 376 { 377 "season": season, 378 "ncaa_division": ncaa_level, 379 "ncaa_division_formatted": formatted_level, 380 "team_conference_name": team_conference_name, 381 "team_id": team_id, 382 "school_name": team_name, 383 "sport_id": sport_id, 384 }, 385 index=[0], 386 ) 387 teams_df_arr.append(temp_df) 388 del temp_df 389 else: 390 soup = soup.find( 391 "table", 392 {"id": "rankings_table"}, 393 ) 394 soup = soup.find("tbody") 395 t_rows = soup.find_all("tr") 396 397 for t in t_rows: 398 team_id = t.find("a") 399 team_id = team_id.get("href") 400 team_id = team_id.replace("/teams/", "") 401 team_id = int(team_id) 402 team = t.find_all("td")[1].get("data-order") 403 team_name, team_conference_name = team.split(",") 404 del team 405 temp_df = pd.DataFrame( 406 { 407 "season": season, 408 "ncaa_division": ncaa_level, 409 "ncaa_division_formatted": formatted_level, 410 "team_conference_name": team_conference_name, 411 "team_id": team_id, 412 "school_name": team_name, 413 "sport_id": sport_id, 414 }, 415 index=[0], 416 ) 417 teams_df_arr.append(temp_df) 418 del temp_df 419 420 teams_df = pd.concat(teams_df_arr, ignore_index=True) 421 teams_df = pd.merge( 422 left=teams_df, 423 right=schools_df, 424 on=["school_name"], 425 how="left" 426 ) 427 teams_df.sort_values(by=["team_id"], inplace=True) 428 429 teams_df.to_csv( 430 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/teams/" 431 + f"{season}_{formatted_level}_teams.csv", 432 index=False, 433 ) 434 435 return teams_df
Retrieves a list of volleyball teams from the NCAA.
Parameters
season
(int, mandatory):
Required argument.
Specifies the season you want NCAA volleyball team information from.
level
(int, mandatory):
Required argument.
Specifies the level/division you want
NCAA volleyball team information from.
This can either be an integer (1-3) or a string ("I"-"III").
get_mens_data
(bool, optional):
Optional argument.
If you want men's volleyball data instead of women's volleyball data,
set this to True
.
Usage
from ncaa_stats_py.volleyball import get_volleyball_teams
########################################
# Men's volleyball #
########################################
# Get all D1 men's volleyball teams for the 2024 season.
print("Get all D1 men's volleyball teams for the 2024 season.")
df = get_volleyball_teams(2024, 1)
print(df)
# Get all D2 men's volleyball teams for the 2023 season.
print("Get all D2 men's volleyball teams for the 2023 season.")
df = get_volleyball_teams(2023, 2)
print(df)
# Get all D3 men's volleyball teams for the 2022 season.
print("Get all D3 men's volleyball teams for the 2022 season.")
df = get_volleyball_teams(2022, 3)
print(df)
# Get all D1 men's volleyball teams for the 2021 season.
print("Get all D1 men's volleyball teams for the 2021 season.")
df = get_volleyball_teams(2021, "I")
print(df)
# Get all D2 men's volleyball teams for the 2020 season.
print("Get all D2 men's volleyball teams for the 2020 season.")
df = get_volleyball_teams(2020, "II")
print(df)
# Get all D3 men's volleyball teams for the 2019 season.
print("Get all D3 men's volleyball teams for the 2019 season.")
df = get_volleyball_teams(2019, "III")
print(df)
########################################
# Women's volleyball #
########################################
# Get all D1 women's volleyball teams for the 2024 season.
print(
"Get all D1 women's volleyball teams for the 2024 season."
)
df = get_volleyball_teams(2024, 1)
print(df)
# Get all D2 women's volleyball teams for the 2023 season.
print(
"Get all D2 women's volleyball teams for the 2023 season."
)
df = get_volleyball_teams(2023, 2)
print(df)
# Get all D3 women's volleyball teams for the 2022 season.
print(
"Get all D3 women's volleyball teams for the 2022 season."
)
df = get_volleyball_teams(2022, 3)
print(df)
# Get all D1 women's volleyball teams for the 2021 season.
print(
"Get all D1 women's volleyball teams for the 2021 season."
)
df = get_volleyball_teams(2021, "I")
print(df)
# Get all D2 women's volleyball teams for the 2020 season.
print(
"Get all D2 women's volleyball teams for the 2020 season."
)
df = get_volleyball_teams(2020, "II")
print(df)
# Get all D3 women's volleyball teams for the 2019 season.
print(
"Get all D3 women's volleyball teams for the 2019 season."
)
df = get_volleyball_teams(2019, "III")
print(df)
Returns
A pandas DataFrame
object with a list of college volleyball teams
in that season and NCAA level.
438def load_volleyball_teams( 439 start_year: int = 2011, 440 get_mens_data: bool = False 441) -> pd.DataFrame: 442 """ 443 Compiles a list of known NCAA volleyball teams in NCAA volleyball history. 444 445 Parameters 446 ---------- 447 `start_year` (int, optional): 448 Optional argument. 449 Specifies the first season you want 450 NCAA volleyball team information from. 451 452 `get_mens_data` (bool, optional): 453 Optional argument. 454 If you want men's volleyball data instead of women's volleyball data, 455 set this to `True`. 456 457 Usage 458 ---------- 459 ```python 460 461 from ncaa_stats_py.volleyball import load_volleyball_teams 462 463 # WARNING: Running this script "as-is" for the first time may 464 # take some time. 465 # The *N*th time you run this script will be faster. 466 467 # Load in every women's volleyball team 468 # from 2011 to present day. 469 print( 470 "Load in every women's volleyball team " + 471 "from 2011 to present day." 472 ) 473 df = load_volleyball_teams(get_mens_data=True) 474 print(df) 475 476 # Load in every men's volleyball team 477 # from 2011 to present day. 478 print( 479 "Load in every men's volleyball team " + 480 "from 2011 to present day." 481 ) 482 df = load_volleyball_teams() 483 print(df) 484 485 # Load in every men's volleyball team 486 # from 2020 to present day. 487 print( 488 "Load in every men's volleyball team " + 489 "from 2020 to present day." 490 ) 491 df = load_volleyball_teams(start_year=2020) 492 print(df) 493 494 ``` 495 496 Returns 497 ---------- 498 A pandas `DataFrame` object with a list of 499 all known college volleyball teams. 500 501 """ 502 # start_year = 2008 503 504 # if get_mens_data is True: 505 # sport_id = "WVB" 506 # else: 507 # sport_id = "MVB" 508 509 teams_df = pd.DataFrame() 510 teams_df_arr = [] 511 temp_df = pd.DataFrame() 512 513 now = datetime.now() 514 mens_ncaa_divisions = ["I", "III"] 515 womens_ncaa_divisions = ["I", "II", "III"] 516 if now.month > 5 and get_mens_data is False: 517 ncaa_seasons = [x for x in range(start_year, (now.year + 2))] 518 elif now.month < 5 and get_mens_data is True: 519 ncaa_seasons = [x for x in range(start_year, (now.year + 1))] 520 else: 521 ncaa_seasons = [x for x in range(start_year, (now.year + 1))] 522 523 logging.info( 524 "Loading in all NCAA volleyball teams. " 525 + "If this is the first time you're seeing this message, " 526 + "it may take some time (3-10 minutes) for this to load." 527 ) 528 529 if get_mens_data is True: 530 for s in ncaa_seasons: 531 logging.info( 532 f"Loading in men's volleyball teams for the {s} season." 533 ) 534 for d in mens_ncaa_divisions: 535 temp_df = get_volleyball_teams( 536 season=s, 537 level=d, 538 get_mens_data=True 539 ) 540 teams_df_arr.append(temp_df) 541 del temp_df 542 else: 543 for s in ncaa_seasons: 544 logging.info( 545 f"Loading in women's volleyball teams for the {s} season." 546 ) 547 for d in womens_ncaa_divisions: 548 temp_df = get_volleyball_teams( 549 season=s, 550 level=d 551 ) 552 teams_df_arr.append(temp_df) 553 del temp_df 554 555 teams_df = pd.concat(teams_df_arr, ignore_index=True) 556 teams_df = teams_df.infer_objects() 557 return teams_df
Compiles a list of known NCAA volleyball teams in NCAA volleyball history.
Parameters
start_year
(int, optional):
Optional argument.
Specifies the first season you want
NCAA volleyball team information from.
get_mens_data
(bool, optional):
Optional argument.
If you want men's volleyball data instead of women's volleyball data,
set this to True
.
Usage
from ncaa_stats_py.volleyball import load_volleyball_teams
# WARNING: Running this script "as-is" for the first time may
# take some time.
# The *N*th time you run this script will be faster.
# Load in every women's volleyball team
# from 2011 to present day.
print(
"Load in every women's volleyball team " +
"from 2011 to present day."
)
df = load_volleyball_teams(get_mens_data=True)
print(df)
# Load in every men's volleyball team
# from 2011 to present day.
print(
"Load in every men's volleyball team " +
"from 2011 to present day."
)
df = load_volleyball_teams()
print(df)
# Load in every men's volleyball team
# from 2020 to present day.
print(
"Load in every men's volleyball team " +
"from 2020 to present day."
)
df = load_volleyball_teams(start_year=2020)
print(df)
Returns
A pandas DataFrame
object with a list of
all known college volleyball teams.
560def get_volleyball_team_schedule(team_id: int) -> pd.DataFrame: 561 """ 562 Retrieves a team schedule, from a valid NCAA volleyball team ID. 563 564 Parameters 565 ---------- 566 `team_id` (int, mandatory): 567 Required argument. 568 Specifies the team you want a schedule from. 569 This is separate from a school ID, which identifies the institution. 570 A team ID should be unique to a school, and a season. 571 572 Usage 573 ---------- 574 ```python 575 576 from ncaa_stats_py.volleyball import get_volleyball_team_schedule 577 578 ######################################## 579 # Women's volleyball # 580 ######################################## 581 582 # Get the team schedule for the 583 # 2024 Toledo WVB team (D1, ID: 585329). 584 print( 585 "Get the team schedule for the " + 586 "2024 Toledo WVB team (D1, ID: 585329)." 587 ) 588 df = get_volleyball_team_schedule(585329) 589 print(df) 590 591 # Get the team schedule for the 592 # 2023 Black Hills St. WVB team (D2, ID: 559709). 593 print( 594 "Get the team schedule for the " + 595 "2023 Black Hills St. WVB team (D2, ID: 559709)." 596 ) 597 df = get_volleyball_team_schedule(559709) 598 print(df) 599 600 # Get the team schedule for the 601 # 2022 Mount Mary WVB team (D3, ID: 539750). 602 print( 603 "Get the team schedule for the " + 604 "2022 Mount Mary WVB team (D3, ID: 539750)." 605 ) 606 df = get_volleyball_team_schedule(539750) 607 print(df) 608 609 # Get the team schedule for the 610 # 2021 TCU WVB team (D1, ID: 522750). 611 print( 612 "Get the team schedule for the " + 613 "2024 TCU WVB team (D1, ID: 522750)." 614 ) 615 df = get_volleyball_team_schedule(522750) 616 print(df) 617 618 # Get the team schedule for the 619 # 2020 Purdue Northwest WVB team (D2, ID: 504832). 620 print( 621 "Get the team schedule for the " + 622 "2020 Purdue Northwest WVB team (D2, ID: 504832)." 623 ) 624 df = get_volleyball_team_schedule(504832) 625 print(df) 626 627 # Get the team schedule for the 628 # 2019 Juniata WVB team (D3, ID: 482642). 629 print( 630 "Get the team schedule for the " + 631 "2019 Juniata WVB team (D3, ID: 482642)." 632 ) 633 df = get_volleyball_team_schedule(482642) 634 print(df) 635 636 ######################################## 637 # Men's volleyball # 638 ######################################## 639 640 # Get the team schedule for the 641 # 2024 Missouri S&T MVB team (D1, ID: 573720). 642 print( 643 "Get the team schedule for the " + 644 "2024 Missouri S&T MVB team (D1, ID: 573720)." 645 ) 646 df = get_volleyball_team_schedule(573720) 647 print(df) 648 649 # Get the team schedule for the 650 # 2023 Rockford MVB team (D3, ID: 550890). 651 print( 652 "Get the team schedule for the " + 653 "2023 Rockford MVB team (D3, ID: 550890)." 654 ) 655 df = get_volleyball_team_schedule(550890) 656 print(df) 657 658 # Get the team schedule for the 659 # 2022 McKendree MVB team (D1, ID: 529896). 660 print( 661 "Get the team schedule for the " + 662 "2022 McKendreeMaritime MVB team (D1, ID: 529896)." 663 ) 664 df = get_volleyball_team_schedule(529896) 665 print(df) 666 667 # Get the team schedule for the 668 # 2021 Concordia Chicago MVB team (D3, ID: 508505). 669 print( 670 "Get the team schedule for the " + 671 "2021 Concordia Chicago MVB team (D3, ID: 508505)." 672 ) 673 df = get_volleyball_team_schedule(508505) 674 print(df) 675 676 # Get the team schedule for the 677 # 2020 St. Francis Brooklyn MVB team (D1, ID: 487992). 678 print( 679 "Get the team schedule for the " + 680 "2020 St. Francis Brooklyn MVB team (D1, ID: 487992)." 681 ) 682 df = get_volleyball_team_schedule(487992) 683 print(df) 684 685 # Get the team schedule for the 686 # 2019 Loras MVB team (D3, ID: 453845). 687 print( 688 "Get the team schedule for the " + 689 "2019 Loras MVB team (D3, ID: 453845)." 690 ) 691 df = get_volleyball_team_schedule(453845) 692 print(df) 693 694 ``` 695 696 Returns 697 ---------- 698 A pandas `DataFrame` object with an NCAA volleyball team's schedule. 699 700 """ 701 702 sport_id = "" 703 schools_df = _get_schools() 704 games_df = pd.DataFrame() 705 games_df_arr = [] 706 season = 0 707 temp_df = pd.DataFrame() 708 load_from_cache = True 709 710 home_dir = expanduser("~") 711 home_dir = _format_folder_str(home_dir) 712 713 url = f"https://stats.ncaa.org/teams/{team_id}" 714 715 try: 716 team_df = load_volleyball_teams() 717 team_df = team_df[team_df["team_id"] == team_id] 718 season = team_df["season"].iloc[0] 719 ncaa_division = team_df["ncaa_division"].iloc[0] 720 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 721 sport_id = "WVB" 722 except Exception: 723 team_df = load_volleyball_teams(get_mens_data=True) 724 team_df = team_df[team_df["team_id"] == team_id] 725 season = team_df["season"].iloc[0] 726 ncaa_division = team_df["ncaa_division"].iloc[0] 727 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 728 sport_id = "MVB" 729 # team_conference_name = team_df["team_conference_name"].iloc[0] 730 # school_name = team_df["school_name"].iloc[0] 731 # school_id = int(team_df["school_id"].iloc[0]) 732 733 del team_df 734 735 if exists(f"{home_dir}/.ncaa_stats_py/"): 736 pass 737 else: 738 mkdir(f"{home_dir}/.ncaa_stats_py/") 739 740 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 741 pass 742 else: 743 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 744 745 if exists( 746 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/" 747 ): 748 pass 749 else: 750 mkdir( 751 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/" 752 ) 753 754 if exists( 755 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/" 756 + f"{team_id}_team_schedule.csv" 757 ): 758 games_df = pd.read_csv( 759 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/team_schedule/" 760 + f"{team_id}_team_schedule.csv" 761 ) 762 file_mod_datetime = datetime.fromtimestamp( 763 getmtime( 764 f"{home_dir}/.ncaa_stats_py/" 765 + f"volleyball_{sport_id}/team_schedule/" 766 + f"{team_id}_team_schedule.csv" 767 ) 768 ) 769 else: 770 file_mod_datetime = datetime.today() 771 load_from_cache = False 772 773 now = datetime.today() 774 775 age = now - file_mod_datetime 776 if ( 777 age.days > 1 and 778 season >= now.year 779 ): 780 load_from_cache = False 781 782 if load_from_cache is True: 783 return games_df 784 785 response = _get_webpage(url=url) 786 soup = BeautifulSoup(response.text, features="lxml") 787 788 school_name = soup.find("div", {"class": "card"}).find("img").get("alt") 789 season_name = ( 790 soup.find("select", {"id": "year_list"}) 791 .find("option", {"selected": "selected"}) 792 .text 793 ) 794 795 soup = soup.find_all( 796 "div", 797 {"class": "col p-0"}, 798 ) 799 800 # declaring it here to prevent potential problems down the road. 801 table_data = "" 802 for s in soup: 803 try: 804 temp_name = s.find("div", {"class": "card-header"}) 805 temp_name = temp_name.text 806 except Exception as e: 807 logging.warning( 808 f"Could not parse card header. Full exception `{e}`. " 809 + "Attempting alternate method." 810 ) 811 temp_name = s.find("tr", {"class": "heading"}).find("td").text 812 813 if "schedule" in temp_name.lower(): 814 table_data = s.find("table") 815 816 t_rows = table_data.find_all("tr", {"class": "underline_rows"}) 817 818 if len(t_rows) == 0: 819 t_rows = table_data.find_all("tr") 820 821 for g in t_rows: 822 is_valid_row = True 823 game_num = 1 824 ot_periods = 0 825 is_home_game = True 826 is_neutral_game = False 827 828 cells = g.find_all("td") 829 if len(cells) <= 1: 830 # Because of how *well* designed 831 # stats.ncaa.org is, if we have to use execute 832 # the `if len(t_rows) == 0:` code, 833 # we need to catch any cases where every element in a 834 # table row (`<tr>`) is a table header (`<th>`), 835 # instead of a table data cell (`<td>`) 836 continue 837 838 game_date = cells[0].text 839 840 # If "(" is in the same cell as the date, 841 # this means that this game is an extra innings game. 842 # The number encased in `()` is the actual number of innings. 843 # We need to remove that from the date, 844 # and move it into a separate variable. 845 if "(" in game_date: 846 game_date = game_date.replace(")", "") 847 game_date, game_num = game_date.split("(") 848 game_date = game_date.strip() 849 game_num = int(game_num.strip()) 850 851 if ":" in game_date and ("PM" in game_date or "AM" in game_date): 852 game_date = datetime.strptime( 853 game_date, 854 "%m/%d/%Y %I:%M %p" 855 ).date() 856 else: 857 game_date = datetime.strptime( 858 game_date, 859 "%m/%d/%Y" 860 ).date() 861 862 try: 863 opp_team_id = cells[1].find("a").get("href") 864 except IndexError: 865 logging.info( 866 "Skipping row because it is clearly " 867 + "not a row that has schedule data." 868 ) 869 is_valid_row = False 870 except AttributeError as e: 871 logging.info( 872 "Could not extract a team ID for this game. " + 873 f"Full exception {e}" 874 ) 875 opp_team_id = "-1" 876 except Exception as e: 877 logging.warning( 878 "An unhandled exception has occurred when " 879 + "trying to get the opposition team ID for this game. " 880 f"Full exception `{e}`." 881 ) 882 raise e 883 if is_valid_row is True: 884 if opp_team_id is not None: 885 opp_team_id = opp_team_id.replace("/teams/", "") 886 opp_team_id = int(opp_team_id) 887 888 try: 889 opp_team_name = cells[1].find("img").get("alt") 890 except AttributeError: 891 logging.info( 892 "Couldn't find the opposition team name " 893 + "for this row from an image element. " 894 + "Attempting a backup method" 895 ) 896 opp_team_name = cells[1].text 897 except Exception as e: 898 logging.info( 899 "Unhandled exception when trying to get the " 900 + "opposition team name from this game. " 901 + f"Full exception `{e}`" 902 ) 903 raise e 904 else: 905 opp_team_name = cells[1].text 906 907 if opp_team_name[0] == "@": 908 # The logic for determining if this game was a 909 # neutral site game doesn't care if that info is in 910 # `opp_team_name`. 911 opp_team_name = opp_team_name.strip().replace("@", "") 912 elif "@" in opp_team_name: 913 opp_team_name = opp_team_name.strip().split("@")[0] 914 # opp_team_show_name = cells[1].text.strip() 915 916 opp_text = cells[1].text 917 opp_text = opp_text.strip() 918 if "@" in opp_text and opp_text[0] == "@": 919 is_home_game = False 920 elif "@" in opp_text and opp_text[0] != "@": 921 is_neutral_game = True 922 is_home_game = False 923 # This is just to cover conference and NCAA championship 924 # tournaments. 925 elif "championship" in opp_text.lower(): 926 is_neutral_game = True 927 is_home_game = False 928 elif "ncaa" in opp_text.lower(): 929 is_neutral_game = True 930 is_home_game = False 931 932 del opp_text 933 934 score = cells[2].text.strip() 935 if len(score) == 0: 936 score_1 = 0 937 score_2 = 0 938 elif ( 939 "canceled" not in score.lower() and 940 "ppd" not in score.lower() 941 ): 942 score_1, score_2 = score.split("-") 943 944 # `score_1` should be "W `n`", "L `n`", or "T `n`", 945 # with `n` representing the number of runs this team 946 # scored in this game. 947 # Let's remove the "W", "L", or "T" from `score_1`, 948 # and determine which team won later on in this code. 949 if any(x in score_1 for x in ["W", "L", "T"]): 950 score_1 = score_1.split(" ")[1] 951 952 if "(" in score_2: 953 score_2 = score_2.replace(")", "") 954 score_2, ot_periods = score_2.split("(") 955 ot_periods = ot_periods.replace("OT", "") 956 ot_periods = ot_periods.replace(" ", "") 957 ot_periods = int(ot_periods) 958 959 if ot_periods is None: 960 ot_periods = 0 961 score_1 = int(score_1) 962 score_2 = int(score_2) 963 else: 964 score_1 = None 965 score_2 = None 966 967 try: 968 game_id = cells[2].find("a").get("href") 969 game_id = game_id.replace("/contests", "") 970 game_id = game_id.replace("/box_score", "") 971 game_id = game_id.replace("/", "") 972 game_id = int(game_id) 973 game_url = ( 974 f"https://stats.ncaa.org/contests/{game_id}/box_score" 975 ) 976 except AttributeError as e: 977 logging.info( 978 "Could not parse a game ID for this game. " 979 + f"Full exception `{e}`." 980 ) 981 game_id = None 982 game_url = None 983 except Exception as e: 984 logging.info( 985 "An unhandled exception occurred when trying " 986 + "to find a game ID for this game. " 987 + f"Full exception `{e}`." 988 ) 989 raise e 990 991 try: 992 attendance = cells[3].text 993 attendance = attendance.replace(",", "") 994 attendance = attendance.replace("\n", "") 995 attendance = int(attendance) 996 except IndexError as e: 997 logging.info( 998 "It doesn't appear as if there is an attendance column " 999 + "for this team's schedule table." 1000 f"Full exception `{e}`." 1001 ) 1002 attendance = None 1003 except ValueError as e: 1004 logging.info( 1005 "There doesn't appear as if " 1006 + "there is a recorded attendance. " 1007 + "for this game/row. " 1008 f"Full exception `{e}`." 1009 ) 1010 attendance = None 1011 except Exception as e: 1012 logging.info( 1013 "An unhandled exception occurred when trying " 1014 + "to find this game's attendance. " 1015 + f"Full exception `{e}`." 1016 ) 1017 raise e 1018 1019 if is_home_game is True: 1020 temp_df = pd.DataFrame( 1021 { 1022 "season": season, 1023 "season_name": season_name, 1024 "game_id": game_id, 1025 "game_date": game_date, 1026 "game_num": game_num, 1027 "ot_periods": ot_periods, 1028 "home_team_id": team_id, 1029 "home_team_name": school_name, 1030 "away_team_id": opp_team_id, 1031 "away_team_name": opp_team_name, 1032 "home_team_sets_won": score_1, 1033 "away_team_sets_won": score_2, 1034 "is_neutral_game": is_neutral_game, 1035 "game_url": game_url, 1036 }, 1037 index=[0], 1038 ) 1039 games_df_arr.append(temp_df) 1040 del temp_df 1041 elif is_neutral_game is True: 1042 # For the sake of simplicity, 1043 # order both team ID's, 1044 # and set the lower number of the two as 1045 # the "away" team in this neutral site game, 1046 # just so there's no confusion if someone 1047 # combines a ton of these team schedule `DataFrame`s, 1048 # and wants to remove duplicates afterwards. 1049 t_ids = [opp_team_id, team_id] 1050 t_ids.sort() 1051 1052 if t_ids[0] == team_id: 1053 # home 1054 temp_df = pd.DataFrame( 1055 { 1056 "season": season, 1057 "season_name": season_name, 1058 "game_id": game_id, 1059 "game_date": game_date, 1060 "game_num": game_num, 1061 "ot_periods": ot_periods, 1062 "home_team_id": team_id, 1063 "home_team_name": school_name, 1064 "away_team_id": opp_team_id, 1065 "away_team_name": opp_team_name, 1066 "home_team_sets_won": score_1, 1067 "away_team_sets_won": score_2, 1068 "is_neutral_game": is_neutral_game, 1069 "game_url": game_url, 1070 }, 1071 index=[0], 1072 ) 1073 1074 else: 1075 # away 1076 temp_df = pd.DataFrame( 1077 { 1078 "season": season, 1079 "season_name": season_name, 1080 "game_id": game_id, 1081 "game_date": game_date, 1082 "game_num": game_num, 1083 "ot_periods": ot_periods, 1084 "home_team_id": opp_team_id, 1085 "home_team_name": opp_team_name, 1086 "away_team_id": team_id, 1087 "away_team_name": school_name, 1088 "home_team_sets_won": score_2, 1089 "away_team_sets_won": score_1, 1090 "is_neutral_game": is_neutral_game, 1091 "game_url": game_url, 1092 }, 1093 index=[0], 1094 ) 1095 1096 games_df_arr.append(temp_df) 1097 del temp_df 1098 else: 1099 temp_df = pd.DataFrame( 1100 { 1101 "season": season, 1102 "season_name": season_name, 1103 "game_id": game_id, 1104 "game_date": game_date, 1105 "game_num": game_num, 1106 "ot_periods": ot_periods, 1107 "home_team_id": opp_team_id, 1108 "home_team_name": opp_team_name, 1109 "away_team_id": team_id, 1110 "away_team_name": school_name, 1111 "home_team_sets_won": score_2, 1112 "away_team_sets_won": score_1, 1113 "is_neutral_game": is_neutral_game, 1114 "game_url": game_url, 1115 }, 1116 index=[0], 1117 ) 1118 1119 games_df_arr.append(temp_df) 1120 del temp_df 1121 1122 # team_photo = team_id.find("img").get("src") 1123 1124 games_df = pd.concat(games_df_arr, ignore_index=True) 1125 1126 temp_df = schools_df.rename( 1127 columns={ 1128 "school_name": "home_team_name", 1129 "school_id": "home_school_id" 1130 } 1131 ) 1132 games_df = games_df.merge(right=temp_df, on="home_team_name", how="left") 1133 1134 temp_df = schools_df.rename( 1135 columns={ 1136 "school_name": "away_team_name", 1137 "school_id": "away_school_id" 1138 } 1139 ) 1140 games_df = games_df.merge(right=temp_df, on="away_team_name", how="left") 1141 games_df["ncaa_division"] = ncaa_division 1142 games_df["ncaa_division_formatted"] = ncaa_division_formatted 1143 1144 # games_df["game_url"] = games_df["game_url"].str.replace("/box_score", "") 1145 games_df.to_csv( 1146 f"{home_dir}/.ncaa_stats_py/" 1147 + f"volleyball_{sport_id}/team_schedule/" 1148 + f"{team_id}_team_schedule.csv", 1149 index=False, 1150 ) 1151 1152 return games_df
Retrieves a team schedule, from a valid NCAA volleyball team ID.
Parameters
team_id
(int, mandatory):
Required argument.
Specifies the team you want a schedule from.
This is separate from a school ID, which identifies the institution.
A team ID should be unique to a school, and a season.
Usage
from ncaa_stats_py.volleyball import get_volleyball_team_schedule
########################################
# Women's volleyball #
########################################
# Get the team schedule for the
# 2024 Toledo WVB team (D1, ID: 585329).
print(
"Get the team schedule for the " +
"2024 Toledo WVB team (D1, ID: 585329)."
)
df = get_volleyball_team_schedule(585329)
print(df)
# Get the team schedule for the
# 2023 Black Hills St. WVB team (D2, ID: 559709).
print(
"Get the team schedule for the " +
"2023 Black Hills St. WVB team (D2, ID: 559709)."
)
df = get_volleyball_team_schedule(559709)
print(df)
# Get the team schedule for the
# 2022 Mount Mary WVB team (D3, ID: 539750).
print(
"Get the team schedule for the " +
"2022 Mount Mary WVB team (D3, ID: 539750)."
)
df = get_volleyball_team_schedule(539750)
print(df)
# Get the team schedule for the
# 2021 TCU WVB team (D1, ID: 522750).
print(
"Get the team schedule for the " +
"2024 TCU WVB team (D1, ID: 522750)."
)
df = get_volleyball_team_schedule(522750)
print(df)
# Get the team schedule for the
# 2020 Purdue Northwest WVB team (D2, ID: 504832).
print(
"Get the team schedule for the " +
"2020 Purdue Northwest WVB team (D2, ID: 504832)."
)
df = get_volleyball_team_schedule(504832)
print(df)
# Get the team schedule for the
# 2019 Juniata WVB team (D3, ID: 482642).
print(
"Get the team schedule for the " +
"2019 Juniata WVB team (D3, ID: 482642)."
)
df = get_volleyball_team_schedule(482642)
print(df)
########################################
# Men's volleyball #
########################################
# Get the team schedule for the
# 2024 Missouri S&T MVB team (D1, ID: 573720).
print(
"Get the team schedule for the " +
"2024 Missouri S&T MVB team (D1, ID: 573720)."
)
df = get_volleyball_team_schedule(573720)
print(df)
# Get the team schedule for the
# 2023 Rockford MVB team (D3, ID: 550890).
print(
"Get the team schedule for the " +
"2023 Rockford MVB team (D3, ID: 550890)."
)
df = get_volleyball_team_schedule(550890)
print(df)
# Get the team schedule for the
# 2022 McKendree MVB team (D1, ID: 529896).
print(
"Get the team schedule for the " +
"2022 McKendreeMaritime MVB team (D1, ID: 529896)."
)
df = get_volleyball_team_schedule(529896)
print(df)
# Get the team schedule for the
# 2021 Concordia Chicago MVB team (D3, ID: 508505).
print(
"Get the team schedule for the " +
"2021 Concordia Chicago MVB team (D3, ID: 508505)."
)
df = get_volleyball_team_schedule(508505)
print(df)
# Get the team schedule for the
# 2020 St. Francis Brooklyn MVB team (D1, ID: 487992).
print(
"Get the team schedule for the " +
"2020 St. Francis Brooklyn MVB team (D1, ID: 487992)."
)
df = get_volleyball_team_schedule(487992)
print(df)
# Get the team schedule for the
# 2019 Loras MVB team (D3, ID: 453845).
print(
"Get the team schedule for the " +
"2019 Loras MVB team (D3, ID: 453845)."
)
df = get_volleyball_team_schedule(453845)
print(df)
Returns
A pandas DataFrame
object with an NCAA volleyball team's schedule.
1155def get_volleyball_day_schedule( 1156 game_date: str | date | datetime, 1157 level: str | int = "I", 1158 get_mens_data: bool = False 1159): 1160 """ 1161 Given a date and NCAA level, this function retrieves volleyball every game 1162 for that date. 1163 1164 Parameters 1165 ---------- 1166 `game_date` (int, mandatory): 1167 Required argument. 1168 Specifies the date you want a volleyball schedule from. 1169 For best results, pass a string formatted as "YYYY-MM-DD". 1170 1171 `level` (int, mandatory): 1172 Required argument. 1173 Specifies the level/division you want a 1174 NCAA volleyball schedule from. 1175 This can either be an integer (1-3) or a string ("I"-"III"). 1176 1177 `get_mens_data` (bool, optional): 1178 Optional argument. 1179 If you want men's volleyball data instead of women's volleyball data, 1180 set this to `True`. 1181 1182 Usage 1183 ---------- 1184 ```python 1185 1186 from ncaa_stats_py.volleyball import get_volleyball_day_schedule 1187 1188 ######################################## 1189 # Women's Volleyball # 1190 ######################################## 1191 1192 # Get all DI games (if any) that were played on December 22th, 2024. 1193 print("Get all games (if any) that were played on December 22th, 2024.") 1194 df = get_volleyball_day_schedule("2024-12-22", level=1) 1195 print(df) 1196 1197 # Get all division II games that were played on November 24th, 2024. 1198 print("Get all division II games that were played on November 24th, 2024.") 1199 df = get_volleyball_day_schedule("2024-11-24", level="II") 1200 print(df) 1201 1202 # Get all DIII games that were played on October 27th, 2024. 1203 print("Get all DIII games that were played on October 27th, 2024.") 1204 df = get_volleyball_day_schedule("2024-10-27", level="III") 1205 print(df) 1206 1207 # Get all DI games (if any) that were played on September 29th, 2024. 1208 print( 1209 "Get all DI games (if any) that were played on September 29th, 2024." 1210 ) 1211 df = get_volleyball_day_schedule("2024-09-29") 1212 print(df) 1213 1214 # Get all DII games played on August 30th, 2024. 1215 print("Get all DI games played on August 30th, 2024.") 1216 df = get_volleyball_day_schedule("2024-08-30") 1217 print(df) 1218 1219 # Get all division III games played on September 23rd, 2023. 1220 print("Get all division III games played on September 23rd, 2023.") 1221 df = get_volleyball_day_schedule("2023-09-23", level="III") 1222 print(df) 1223 1224 ######################################## 1225 # Men's Volleyball # 1226 ######################################## 1227 1228 # Get all DI games that will be played on April 12th, 2025. 1229 print("Get all games that will be played on April 12th, 2025.") 1230 df = get_volleyball_day_schedule("2025-04-12", level=1, get_mens_data=True) 1231 print(df) 1232 1233 # Get all DI games that were played on January 30th, 2025. 1234 print("Get all games that were played on January 30th, 2025.") 1235 df = get_volleyball_day_schedule( 1236 "2025-01-30", level="I", get_mens_data=True 1237 ) 1238 print(df) 1239 1240 # Get all division III games that were played on April 6th, 2024. 1241 print("Get all division III games that were played on April 6th, 2024.") 1242 df = get_volleyball_day_schedule( 1243 "2025-04-05", level="III", get_mens_data=True 1244 ) 1245 print(df) 1246 1247 # Get all DI games (if any) that were played on March 30th, 2024. 1248 print("Get all DI games (if any) that were played on March 30th, 2024.") 1249 df = get_volleyball_day_schedule("2024-03-30", get_mens_data=True) 1250 print(df) 1251 1252 # Get all DI games played on February 23rd, 2024. 1253 print("Get all DI games played on February 23rd, 2024.") 1254 df = get_volleyball_day_schedule("2024-02-23", get_mens_data=True) 1255 print(df) 1256 1257 # Get all division III games played on February 11th, 2023. 1258 print("Get all division III games played on February 11th, 2023.") 1259 df = get_volleyball_day_schedule("2024-02-11", level=3, get_mens_data=True) 1260 print(df) 1261 1262 ``` 1263 1264 Returns 1265 ---------- 1266 A pandas `DataFrame` object with all volleyball games played on that day, 1267 for that NCAA division/level. 1268 1269 """ 1270 1271 season = 0 1272 sport_id = "WVB" 1273 1274 schedule_df = pd.DataFrame() 1275 schedule_df_arr = [] 1276 1277 if isinstance(game_date, date): 1278 game_datetime = datetime.combine( 1279 game_date, datetime.min.time() 1280 ) 1281 elif isinstance(game_date, datetime): 1282 game_datetime = game_date 1283 elif isinstance(game_date, str): 1284 game_datetime = parser.parse( 1285 game_date 1286 ) 1287 else: 1288 unhandled_datatype = type(game_date) 1289 raise ValueError( 1290 f"Unhandled datatype for `game_date`: `{unhandled_datatype}`" 1291 ) 1292 1293 if isinstance(level, int) and level == 1: 1294 formatted_level = "I" 1295 ncaa_level = 1 1296 elif isinstance(level, int) and level == 2: 1297 formatted_level = "II" 1298 ncaa_level = 2 1299 elif isinstance(level, int) and level == 3: 1300 formatted_level = "III" 1301 ncaa_level = 3 1302 elif isinstance(level, str) and ( 1303 level.lower() == "i" or level.lower() == "d1" or level.lower() == "1" 1304 ): 1305 ncaa_level = 1 1306 formatted_level = level.upper() 1307 elif isinstance(level, str) and ( 1308 level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2" 1309 ): 1310 ncaa_level = 2 1311 formatted_level = level.upper() 1312 elif isinstance(level, str) and ( 1313 level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3" 1314 ): 1315 ncaa_level = 3 1316 formatted_level = level.upper() 1317 1318 del level 1319 1320 if get_mens_data is True: 1321 sport_id = "MVB" 1322 elif get_mens_data is False: 1323 sport_id = "WVB" 1324 else: 1325 raise ValueError( 1326 f"Unhandled value for `get_wbb_data`: `{get_mens_data}`" 1327 ) 1328 1329 season = game_datetime.year 1330 game_month = game_datetime.month 1331 game_day = game_datetime.day 1332 game_year = game_datetime.year 1333 1334 if game_month > 7: 1335 season += 1 1336 url = ( 1337 "https://stats.ncaa.org/contests/" + 1338 f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" + 1339 f"&academic_year={season}&division={ncaa_level}" + 1340 f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" + 1341 "&commit=Submit" 1342 ) 1343 else: 1344 url = ( 1345 "https://stats.ncaa.org/contests/" + 1346 f"livestream_scoreboards?utf8=%E2%9C%93&sport_code={sport_id}" + 1347 f"&academic_year={season}&division={ncaa_level}" + 1348 f"&game_date={game_month:00d}%2F{game_day:00d}%2F{game_year}" + 1349 "&commit=Submit" 1350 ) 1351 1352 response = _get_webpage(url=url) 1353 soup = BeautifulSoup(response.text, features="lxml") 1354 1355 game_boxes = soup.find_all("div", {"class": "table-responsive"}) 1356 1357 for box in game_boxes: 1358 game_id = None 1359 game_alt_text = None 1360 game_num = 1 1361 # t_box = box.find("table") 1362 table_box = box.find("table") 1363 table_rows = table_box.find_all("tr") 1364 1365 # Date/attendance 1366 game_date_str = table_rows[0].find("div", {"class": "col-6 p-0"}).text 1367 game_date_str = game_date_str.replace("\n", "") 1368 game_date_str = game_date_str.strip() 1369 game_date_str = game_date_str.replace("TBA ", "TBA") 1370 game_date_str = game_date_str.replace("TBD ", "TBD") 1371 game_date_str = game_date_str.replace("PM ", "PM") 1372 game_date_str = game_date_str.replace("AM ", "AM") 1373 game_date_str = game_date_str.strip() 1374 attendance_str = table_rows[0].find( 1375 "div", 1376 {"class": "col p-0 text-right"} 1377 ).text 1378 1379 attendance_str = attendance_str.replace("Attend:", "") 1380 attendance_str = attendance_str.replace(",", "") 1381 attendance_str = attendance_str.replace("\n", "") 1382 if ( 1383 "st" in attendance_str.lower() or 1384 "nd" in attendance_str.lower() or 1385 "rd" in attendance_str.lower() or 1386 "th" in attendance_str.lower() 1387 ): 1388 # This is not an attendance, 1389 # this is whatever quarter/half/inning this game is in. 1390 attendance_num = None 1391 elif "final" in attendance_str.lower(): 1392 attendance_num = None 1393 elif len(attendance_str) > 0: 1394 attendance_num = int(attendance_str) 1395 else: 1396 attendance_num = None 1397 1398 if "(" in game_date_str: 1399 game_date_str = game_date_str.replace(")", "") 1400 game_date_str, game_num = game_date_str.split("(") 1401 game_num = int(game_num) 1402 1403 if "TBA" in game_date_str: 1404 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA') 1405 elif "tba" in game_date_str: 1406 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba') 1407 elif "TBD" in game_date_str: 1408 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD') 1409 elif "tbd" in game_date_str: 1410 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd') 1411 elif ( 1412 "tbd" not in game_date_str.lower() and 1413 ":" not in game_date_str.lower() 1414 ): 1415 game_date_str = game_date_str.replace(" ", "") 1416 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y') 1417 else: 1418 game_datetime = datetime.strptime( 1419 game_date_str, 1420 '%m/%d/%Y %I:%M %p' 1421 ) 1422 game_datetime = game_datetime.astimezone(timezone("US/Eastern")) 1423 1424 game_alt_text = table_rows[1].find_all("td")[0].text 1425 if game_alt_text is not None and len(game_alt_text) > 0: 1426 game_alt_text = game_alt_text.replace("\n", "") 1427 game_alt_text = game_alt_text.strip() 1428 1429 if len(game_alt_text) == 0: 1430 game_alt_text = None 1431 1432 urls_arr = box.find_all("a") 1433 1434 for u in urls_arr: 1435 url_temp = u.get("href") 1436 if "contests" in url_temp: 1437 game_id = url_temp 1438 del url_temp 1439 1440 if game_id is None: 1441 for r in range(0, len(table_rows)): 1442 temp = table_rows[r] 1443 temp_id = temp.get("id") 1444 1445 if temp_id is not None and len(temp_id) > 0: 1446 game_id = temp_id 1447 1448 del urls_arr 1449 1450 game_id = game_id.replace("/contests", "") 1451 game_id = game_id.replace("/box_score", "") 1452 game_id = game_id.replace("/livestream_scoreboards", "") 1453 game_id = game_id.replace("/", "") 1454 game_id = game_id.replace("contest_", "") 1455 game_id = int(game_id) 1456 1457 table_rows = table_box.find_all("tr", {"id": f"contest_{game_id}"}) 1458 away_team_row = table_rows[0] 1459 home_team_row = table_rows[1] 1460 1461 # Away team 1462 td_arr = away_team_row.find_all("td") 1463 1464 try: 1465 away_team_name = td_arr[0].find("img").get("alt") 1466 except Exception: 1467 away_team_name = td_arr[1].text 1468 away_team_name = away_team_name.replace("\n", "") 1469 away_team_name = away_team_name.strip() 1470 1471 try: 1472 away_team_id = td_arr[1].find("a").get("href") 1473 away_team_id = away_team_id.replace("/teams/", "") 1474 away_team_id = int(away_team_id) 1475 except AttributeError: 1476 away_team_id = None 1477 logging.info("No team ID found for the away team") 1478 except Exception as e: 1479 raise e 1480 1481 away_sets_scored = td_arr[-1].text 1482 away_sets_scored = away_sets_scored.replace("\n", "") 1483 away_sets_scored = away_sets_scored.replace("\xa0", "") 1484 1485 if "ppd" in away_sets_scored.lower(): 1486 continue 1487 elif "cancel" in away_sets_scored.lower(): 1488 continue 1489 1490 if len(away_sets_scored) > 0: 1491 away_sets_scored = int(away_sets_scored) 1492 else: 1493 away_sets_scored = 0 1494 1495 del td_arr 1496 1497 # Home team 1498 td_arr = home_team_row.find_all("td") 1499 1500 try: 1501 home_team_name = td_arr[0].find("img").get("alt") 1502 except Exception: 1503 home_team_name = td_arr[1].text 1504 home_team_name = home_team_name.replace("\n", "") 1505 home_team_name = home_team_name.strip() 1506 1507 try: 1508 home_team_id = td_arr[1].find("a").get("href") 1509 home_team_id = home_team_id.replace("/teams/", "") 1510 home_team_id = int(home_team_id) 1511 except AttributeError: 1512 home_team_id = None 1513 logging.info("No team ID found for the home team") 1514 except Exception as e: 1515 raise e 1516 1517 home_sets_scored = td_arr[-1].text 1518 home_sets_scored = home_sets_scored.replace("\n", "") 1519 home_sets_scored = home_sets_scored.replace("\xa0", "") 1520 1521 if "ppd" in home_sets_scored.lower(): 1522 continue 1523 elif "cancel" in home_sets_scored.lower(): 1524 continue 1525 1526 if len(home_sets_scored) > 0: 1527 home_sets_scored = int(home_sets_scored) 1528 else: 1529 home_sets_scored = 0 1530 1531 temp_df = pd.DataFrame( 1532 { 1533 "season": season, 1534 "sport_id": sport_id, 1535 "game_date": game_datetime.strftime("%Y-%m-%d"), 1536 "game_datetime": game_datetime.isoformat(), 1537 "game_id": game_id, 1538 "formatted_level": formatted_level, 1539 "ncaa_level": ncaa_level, 1540 "game_alt_text": game_alt_text, 1541 "away_team_id": away_team_id, 1542 "away_team_name": away_team_name, 1543 "home_team_id": home_team_id, 1544 "home_team_name": home_team_name, 1545 "home_sets_scored": home_sets_scored, 1546 "away_sets_scored": away_sets_scored, 1547 "attendance": attendance_num 1548 }, 1549 index=[0] 1550 ) 1551 schedule_df_arr.append(temp_df) 1552 1553 del temp_df 1554 1555 if len(schedule_df_arr) >= 1: 1556 schedule_df = pd.concat(schedule_df_arr, ignore_index=True) 1557 else: 1558 logging.warning( 1559 "Could not find any game(s) for " 1560 + f"{game_datetime.year:00d}-{game_datetime.month:00d}" 1561 + f"-{game_datetime.day:00d}. " 1562 + "If you believe this is an error, " 1563 + "please raise an issue at " 1564 + "\n https://github.com/armstjc/ncaa_stats_py/issues \n" 1565 ) 1566 return schedule_df
Given a date and NCAA level, this function retrieves volleyball every game for that date.
Parameters
game_date
(int, mandatory):
Required argument.
Specifies the date you want a volleyball schedule from.
For best results, pass a string formatted as "YYYY-MM-DD".
level
(int, mandatory):
Required argument.
Specifies the level/division you want a
NCAA volleyball schedule from.
This can either be an integer (1-3) or a string ("I"-"III").
get_mens_data
(bool, optional):
Optional argument.
If you want men's volleyball data instead of women's volleyball data,
set this to True
.
Usage
from ncaa_stats_py.volleyball import get_volleyball_day_schedule
########################################
# Women's Volleyball #
########################################
# Get all DI games (if any) that were played on December 22th, 2024.
print("Get all games (if any) that were played on December 22th, 2024.")
df = get_volleyball_day_schedule("2024-12-22", level=1)
print(df)
# Get all division II games that were played on November 24th, 2024.
print("Get all division II games that were played on November 24th, 2024.")
df = get_volleyball_day_schedule("2024-11-24", level="II")
print(df)
# Get all DIII games that were played on October 27th, 2024.
print("Get all DIII games that were played on October 27th, 2024.")
df = get_volleyball_day_schedule("2024-10-27", level="III")
print(df)
# Get all DI games (if any) that were played on September 29th, 2024.
print(
"Get all DI games (if any) that were played on September 29th, 2024."
)
df = get_volleyball_day_schedule("2024-09-29")
print(df)
# Get all DII games played on August 30th, 2024.
print("Get all DI games played on August 30th, 2024.")
df = get_volleyball_day_schedule("2024-08-30")
print(df)
# Get all division III games played on September 23rd, 2023.
print("Get all division III games played on September 23rd, 2023.")
df = get_volleyball_day_schedule("2023-09-23", level="III")
print(df)
########################################
# Men's Volleyball #
########################################
# Get all DI games that will be played on April 12th, 2025.
print("Get all games that will be played on April 12th, 2025.")
df = get_volleyball_day_schedule("2025-04-12", level=1, get_mens_data=True)
print(df)
# Get all DI games that were played on January 30th, 2025.
print("Get all games that were played on January 30th, 2025.")
df = get_volleyball_day_schedule(
"2025-01-30", level="I", get_mens_data=True
)
print(df)
# Get all division III games that were played on April 6th, 2024.
print("Get all division III games that were played on April 6th, 2024.")
df = get_volleyball_day_schedule(
"2025-04-05", level="III", get_mens_data=True
)
print(df)
# Get all DI games (if any) that were played on March 30th, 2024.
print("Get all DI games (if any) that were played on March 30th, 2024.")
df = get_volleyball_day_schedule("2024-03-30", get_mens_data=True)
print(df)
# Get all DI games played on February 23rd, 2024.
print("Get all DI games played on February 23rd, 2024.")
df = get_volleyball_day_schedule("2024-02-23", get_mens_data=True)
print(df)
# Get all division III games played on February 11th, 2023.
print("Get all division III games played on February 11th, 2023.")
df = get_volleyball_day_schedule("2024-02-11", level=3, get_mens_data=True)
print(df)
Returns
A pandas DataFrame
object with all volleyball games played on that day,
for that NCAA division/level.
1569def get_full_volleyball_schedule( 1570 season: int, 1571 level: str | int = "I", 1572 get_mens_data: bool = True 1573) -> pd.DataFrame: 1574 """ 1575 Retrieves a full volleyball schedule, 1576 from an NCAA level (`"I"`, `"II"`, `"III"`). 1577 The way this is done is by going through every team in a division, 1578 and parsing the schedules of every team in a division. 1579 1580 This function will take time when first run (30-60 minutes)! 1581 You have been warned. 1582 1583 Parameters 1584 ---------- 1585 `season` (int, mandatory): 1586 Specifies the season you want a schedule from. 1587 1588 `level` (int | str, mandatory): 1589 Specifies the team you want a schedule from. 1590 1591 `get_mens_data` (bool, optional): 1592 Optional argument. 1593 If you want men's volleyball data instead of women's volleyball data, 1594 set this to `True`. 1595 1596 Usage 1597 ---------- 1598 ```python 1599 1600 from ncaa_stats_py.volleyball import get_full_volleyball_schedule 1601 1602 ############################################################################## 1603 # NOTE 1604 # This function will easily take an hour or more 1605 # to run for the first time in a given season and NCAA level! 1606 # You have been warned! 1607 ############################################################################## 1608 1609 # Get the entire 2024 schedule for the 2024 women's D1 volleyball season. 1610 print( 1611 "Get the entire 2024 schedule " + 1612 "for the 2024 women's D1 volleyball season." 1613 ) 1614 df = get_full_volleyball_schedule(season=2024, level="I") 1615 print(df) 1616 1617 # Get the entire 2024 schedule for the 2024 men's D1 volleyball season. 1618 # print( 1619 # "Get the entire 2024 schedule for " + 1620 # "the 2024 men's D1 volleyball season." 1621 # ) 1622 # df = get_full_volleyball_schedule( 1623 # season=2024, 1624 # level="I", 1625 # get_mens_data=True 1626 # ) 1627 # print(df) 1628 1629 # You can also input `level` as an integer. 1630 # In addition, this and other functions cache data, 1631 # so this should load very quickly 1632 # compared to the first run of this function. 1633 print("You can also input `level` as an integer.") 1634 print( 1635 "In addition, this and other functions cache data, " 1636 + "so this should load very quickly " 1637 + "compared to the first run of this function." 1638 ) 1639 df = get_full_volleyball_schedule(season=2024, level=1) 1640 print(df) 1641 1642 ``` 1643 1644 Returns 1645 ---------- 1646 A pandas `DataFrame` object with an NCAA volleyball 1647 schedule for a specific season and level. 1648 """ 1649 1650 sport_id = "" 1651 load_from_cache = True 1652 home_dir = expanduser("~") 1653 home_dir = _format_folder_str(home_dir) 1654 schedule_df = pd.DataFrame() 1655 schedule_df_arr = [] 1656 temp_df = pd.DataFrame() 1657 formatted_level = "" 1658 ncaa_level = 0 1659 1660 if get_mens_data is True: 1661 sport_id = "MVB" 1662 else: 1663 sport_id = "WVB" 1664 1665 if isinstance(level, int) and level == 1: 1666 formatted_level = "I" 1667 ncaa_level = 1 1668 elif isinstance(level, int) and level == 2: 1669 formatted_level = "II" 1670 ncaa_level = 2 1671 elif isinstance(level, int) and level == 3: 1672 formatted_level = "III" 1673 ncaa_level = 3 1674 elif isinstance(level, str) and ( 1675 level.lower() == "i" or level.lower() == "d1" or level.lower() == "1" 1676 ): 1677 ncaa_level = 1 1678 formatted_level = level.upper() 1679 elif isinstance(level, str) and ( 1680 level.lower() == "ii" or level.lower() == "d2" or level.lower() == "2" 1681 ): 1682 ncaa_level = 2 1683 formatted_level = level.upper() 1684 elif isinstance(level, str) and ( 1685 level.lower() == "iii" or level.lower() == "d3" or level.lower() == "3" 1686 ): 1687 ncaa_level = 3 1688 formatted_level = level.upper() 1689 1690 del level 1691 1692 if exists(f"{home_dir}/.ncaa_stats_py/"): 1693 pass 1694 else: 1695 mkdir(f"{home_dir}/.ncaa_stats_py/") 1696 1697 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 1698 pass 1699 else: 1700 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 1701 1702 if exists( 1703 f"{home_dir}/.ncaa_stats_py/" + 1704 f"volleyball_{sport_id}/full_schedule/" 1705 ): 1706 pass 1707 else: 1708 mkdir( 1709 f"{home_dir}/.ncaa_stats_py/" + 1710 f"volleyball_{sport_id}/full_schedule/" 1711 ) 1712 1713 if exists( 1714 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/full_schedule/" 1715 + f"{season}_{formatted_level}_full_schedule.csv" 1716 ): 1717 teams_df = pd.read_csv( 1718 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/full_schedule/" 1719 + f"{season}_{formatted_level}_full_schedule.csv" 1720 ) 1721 file_mod_datetime = datetime.fromtimestamp( 1722 getmtime( 1723 f"{home_dir}/.ncaa_stats_py/" + 1724 f"volleyball_{sport_id}/full_schedule/" 1725 + f"{season}_{formatted_level}_full_schedule.csv" 1726 ) 1727 ) 1728 else: 1729 file_mod_datetime = datetime.today() 1730 load_from_cache = False 1731 1732 now = datetime.today() 1733 1734 age = now - file_mod_datetime 1735 1736 if ( 1737 age.days > 1 and 1738 season >= now.year 1739 ): 1740 load_from_cache = False 1741 1742 if load_from_cache is True: 1743 return teams_df 1744 1745 teams_df = load_volleyball_teams() 1746 teams_df = teams_df[ 1747 (teams_df["season"] == season) & 1748 (teams_df["ncaa_division"] == ncaa_level) 1749 ] 1750 team_ids_arr = teams_df["team_id"].to_numpy() 1751 1752 for team_id in tqdm(team_ids_arr): 1753 temp_df = get_volleyball_team_schedule(team_id=team_id) 1754 schedule_df_arr.append(temp_df) 1755 1756 schedule_df = pd.concat(schedule_df_arr, ignore_index=True) 1757 schedule_df = schedule_df.drop_duplicates(subset="game_id", keep="first") 1758 schedule_df.to_csv( 1759 f"{home_dir}/.ncaa_stats_py/" 1760 + f"volleyball_{sport_id}/full_schedule/" 1761 + f"{season}_{formatted_level}_full_schedule.csv", 1762 index=False, 1763 ) 1764 return schedule_df
Retrieves a full volleyball schedule,
from an NCAA level ("I"
, "II"
, "III"
).
The way this is done is by going through every team in a division,
and parsing the schedules of every team in a division.
This function will take time when first run (30-60 minutes)! You have been warned.
Parameters
season
(int, mandatory):
Specifies the season you want a schedule from.
level
(int | str, mandatory):
Specifies the team you want a schedule from.
get_mens_data
(bool, optional):
Optional argument.
If you want men's volleyball data instead of women's volleyball data,
set this to True
.
Usage
from ncaa_stats_py.volleyball import get_full_volleyball_schedule
##############################################################################
# NOTE
# This function will easily take an hour or more
# to run for the first time in a given season and NCAA level!
# You have been warned!
##############################################################################
# Get the entire 2024 schedule for the 2024 women's D1 volleyball season.
print(
"Get the entire 2024 schedule " +
"for the 2024 women's D1 volleyball season."
)
df = get_full_volleyball_schedule(season=2024, level="I")
print(df)
# Get the entire 2024 schedule for the 2024 men's D1 volleyball season.
# print(
# "Get the entire 2024 schedule for " +
# "the 2024 men's D1 volleyball season."
# )
# df = get_full_volleyball_schedule(
# season=2024,
# level="I",
# get_mens_data=True
# )
# print(df)
# You can also input `level` as an integer.
# In addition, this and other functions cache data,
# so this should load very quickly
# compared to the first run of this function.
print("You can also input `level` as an integer.")
print(
"In addition, this and other functions cache data, "
+ "so this should load very quickly "
+ "compared to the first run of this function."
)
df = get_full_volleyball_schedule(season=2024, level=1)
print(df)
Returns
A pandas DataFrame
object with an NCAA volleyball
schedule for a specific season and level.
1767def get_volleyball_team_roster(team_id: int) -> pd.DataFrame: 1768 """ 1769 Retrieves a volleyball team's roster from a given team ID. 1770 1771 Parameters 1772 ---------- 1773 `team_id` (int, mandatory): 1774 Required argument. 1775 Specifies the team you want a roster from. 1776 This is separate from a school ID, which identifies the institution. 1777 A team ID should be unique to a school, and a season. 1778 1779 Usage 1780 ---------- 1781 ```python 1782 1783 from ncaa_stats_py.volleyball import get_volleyball_team_roster 1784 1785 ######################################## 1786 # Women's volleyball # 1787 ######################################## 1788 1789 # Get the volleyball roster for the 1790 # 2024 Weber St. WVB team (D1, ID: 585347). 1791 print( 1792 "Get the volleyball roster for the " + 1793 "2024 Weber St. WVB team (D1, ID: 585347)." 1794 ) 1795 df = get_volleyball_team_roster(585347) 1796 print(df) 1797 1798 # Get the volleyball roster for the 1799 # 2023 Montevallo WVB team (D2, ID: 559599). 1800 print( 1801 "Get the volleyball roster for the " + 1802 "2023 Montevallo WVB team (D2, ID: 559599)." 1803 ) 1804 df = get_volleyball_team_roster(559599) 1805 print(df) 1806 1807 # Get the volleyball roster for the 1808 # 2022 Millsaps team (D3, ID: 539944). 1809 print( 1810 "Get the volleyball roster for the " + 1811 "2022 Millsaps team (D3, ID: 539944)." 1812 ) 1813 df = get_volleyball_team_roster(539944) 1814 print(df) 1815 1816 # Get the volleyball roster for the 1817 # 2021 Binghamton WVB team (D1, ID: 522893). 1818 print( 1819 "Get the volleyball roster for the " + 1820 "2021 Binghamton WVB team (D1, ID: 522893)." 1821 ) 1822 df = get_volleyball_team_roster(522893) 1823 print(df) 1824 1825 # Get the volleyball roster for the 1826 # 2020 Holy Family WVB team (D2, ID: 504760). 1827 print( 1828 "Get the volleyball roster for the " + 1829 "2020 Holy Family WVB team (D2, ID: 504760)." 1830 ) 1831 df = get_volleyball_team_roster(504760) 1832 print(df) 1833 1834 # Get the volleyball roster for the 1835 # 2019 Franciscan team (D3, ID: 482939). 1836 print( 1837 "Get the volleyball roster for the " + 1838 "2019 Franciscan team (D3, ID: 482939)." 1839 ) 1840 df = get_volleyball_team_roster(482939) 1841 print(df) 1842 1843 ######################################## 1844 # Men's volleyball # 1845 ######################################## 1846 1847 # Get the volleyball roster for the 1848 # 2024 Hawaii MVB team (D1, ID: 573674). 1849 print( 1850 "Get the volleyball roster for the " + 1851 "2024 Hawaii MVB team (D1, ID: 573674)." 1852 ) 1853 df = get_volleyball_team_roster(573674) 1854 print(df) 1855 1856 # Get the volleyball roster for the 1857 # 2023 Widener MVB team (D3, ID: 550860). 1858 print( 1859 "Get the volleyball roster for the " + 1860 "2023 Widener MVB team (D3, ID: 550860)." 1861 ) 1862 df = get_volleyball_team_roster(550860) 1863 print(df) 1864 1865 # Get the volleyball roster for the 1866 # 2022 Alderson Broaddus MVB team (D1, ID: 529880). 1867 print( 1868 "Get the volleyball roster for the " + 1869 "2022 Alderson Broaddus MVB team (D1, ID: 529880)." 1870 ) 1871 df = get_volleyball_team_roster(529880) 1872 print(df) 1873 1874 # Get the volleyball roster for the 1875 # 2021 Geneva MVB team (D3, ID: 508506). 1876 print( 1877 "Get the volleyball roster for the " + 1878 "2021 Geneva MVB team (D3, ID: 508506)." 1879 ) 1880 df = get_volleyball_team_roster(508506) 1881 print(df) 1882 1883 # Get the volleyball roster for the 1884 # 2020 Urbana MVB team (D1, ID: 484975). 1885 print( 1886 "Get the volleyball roster for the " + 1887 "2020 Urbana MVB team (D1, ID: 484975)." 1888 ) 1889 df = get_volleyball_team_roster(484975) 1890 print(df) 1891 1892 # Get the volleyball roster for the 1893 # 2019 Eastern Nazarene MVB team (D3, ID: 453876). 1894 print( 1895 "Get the volleyball roster for the " + 1896 "2019 Eastern Nazarene MVB team (D3, ID: 453876)." 1897 ) 1898 df = get_volleyball_team_roster(453876) 1899 print(df) 1900 1901 ``` 1902 1903 Returns 1904 ---------- 1905 A pandas `DataFrame` object with 1906 an NCAA volleyball team's roster for that season. 1907 """ 1908 sport_id = "" 1909 roster_df = pd.DataFrame() 1910 roster_df_arr = [] 1911 temp_df = pd.DataFrame() 1912 url = f"https://stats.ncaa.org/teams/{team_id}/roster" 1913 load_from_cache = True 1914 home_dir = expanduser("~") 1915 home_dir = _format_folder_str(home_dir) 1916 1917 stat_columns = [ 1918 "season", 1919 "season_name", 1920 "sport_id", 1921 "ncaa_division", 1922 "ncaa_division_formatted", 1923 "team_conference_name", 1924 "school_id", 1925 "school_name", 1926 "player_id", 1927 "player_jersey_num", 1928 "player_full_name", 1929 "player_first_name", 1930 "player_last_name", 1931 "player_class", 1932 "player_positions", 1933 "player_height_string", 1934 "player_weight", 1935 "player_hometown", 1936 "player_high_school", 1937 "player_G", 1938 "player_GS", 1939 "player_url", 1940 ] 1941 1942 try: 1943 team_df = load_volleyball_teams() 1944 team_df = team_df[team_df["team_id"] == team_id] 1945 1946 season = team_df["season"].iloc[0] 1947 ncaa_division = team_df["ncaa_division"].iloc[0] 1948 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 1949 team_conference_name = team_df["team_conference_name"].iloc[0] 1950 school_name = team_df["school_name"].iloc[0] 1951 school_id = int(team_df["school_id"].iloc[0]) 1952 sport_id = "WVB" 1953 except Exception: 1954 team_df = load_volleyball_teams(get_mens_data=True) 1955 team_df = team_df[team_df["team_id"] == team_id] 1956 1957 season = team_df["season"].iloc[0] 1958 ncaa_division = team_df["ncaa_division"].iloc[0] 1959 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 1960 team_conference_name = team_df["team_conference_name"].iloc[0] 1961 school_name = team_df["school_name"].iloc[0] 1962 school_id = int(team_df["school_id"].iloc[0]) 1963 school_id = int(team_df["school_id"].iloc[0]) 1964 sport_id = "MVB" 1965 1966 del team_df 1967 1968 if exists(f"{home_dir}/.ncaa_stats_py/"): 1969 pass 1970 else: 1971 mkdir(f"{home_dir}/.ncaa_stats_py/") 1972 1973 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 1974 pass 1975 else: 1976 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 1977 1978 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/"): 1979 pass 1980 else: 1981 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/") 1982 1983 if exists( 1984 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" + 1985 f"{team_id}_roster.csv" 1986 ): 1987 teams_df = pd.read_csv( 1988 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" + 1989 f"{team_id}_roster.csv" 1990 ) 1991 file_mod_datetime = datetime.fromtimestamp( 1992 getmtime( 1993 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" + 1994 f"{team_id}_roster.csv" 1995 ) 1996 ) 1997 else: 1998 file_mod_datetime = datetime.today() 1999 load_from_cache = False 2000 2001 now = datetime.today() 2002 2003 age = now - file_mod_datetime 2004 2005 if ( 2006 age.days >= 14 and 2007 season >= now.year 2008 ): 2009 load_from_cache = False 2010 2011 if load_from_cache is True: 2012 return teams_df 2013 2014 response = _get_webpage(url=url) 2015 soup = BeautifulSoup(response.text, features="lxml") 2016 try: 2017 school_name = soup.find( 2018 "div", 2019 {"class": "card"} 2020 ).find("img").get("alt") 2021 except Exception: 2022 school_name = soup.find("div", {"class": "card"}).find("a").text 2023 school_name = school_name.rsplit(" ", maxsplit=1)[0] 2024 2025 season_name = ( 2026 soup.find("select", {"id": "year_list"}) 2027 .find("option", {"selected": "selected"}) 2028 .text 2029 ) 2030 2031 try: 2032 table = soup.find( 2033 "table", 2034 {"class": "dataTable small_font"}, 2035 ) 2036 2037 table_headers = table.find("thead").find_all("th") 2038 except Exception: 2039 table = soup.find( 2040 "table", 2041 {"class": "dataTable small_font no_padding"}, 2042 ) 2043 2044 table_headers = table.find("thead").find_all("th") 2045 table_headers = [x.text for x in table_headers] 2046 2047 t_rows = table.find("tbody").find_all("tr") 2048 2049 for t in t_rows: 2050 t_cells = t.find_all("td") 2051 t_cells = [x.text for x in t_cells] 2052 2053 temp_df = pd.DataFrame( 2054 data=[t_cells], 2055 columns=table_headers, 2056 # index=[0] 2057 ) 2058 2059 player_id = t.find("a").get("href") 2060 # temp_df["school_name"] = school_name 2061 temp_df["player_url"] = f"https://stats.ncaa.org{player_id}" 2062 2063 player_id = player_id.replace("/players", "").replace("/", "") 2064 player_id = int(player_id) 2065 2066 temp_df["player_id"] = player_id 2067 2068 roster_df_arr.append(temp_df) 2069 del temp_df 2070 2071 roster_df = pd.concat(roster_df_arr, ignore_index=True) 2072 roster_df = roster_df.infer_objects() 2073 roster_df["season"] = season 2074 roster_df["season_name"] = season_name 2075 roster_df["ncaa_division"] = ncaa_division 2076 roster_df["ncaa_division_formatted"] = ncaa_division_formatted 2077 roster_df["team_conference_name"] = team_conference_name 2078 roster_df["school_id"] = school_id 2079 roster_df["school_name"] = school_name 2080 roster_df["sport_id"] = sport_id 2081 2082 roster_df.rename( 2083 columns={ 2084 "GP": "player_G", 2085 "GS": "player_GS", 2086 "#": "player_jersey_num", 2087 "Name": "player_full_name", 2088 "Class": "player_class", 2089 "Position": "player_positions", 2090 "Height": "player_height_string", 2091 "Bats": "player_batting_hand", 2092 "Throws": "player_throwing_hand", 2093 "Hometown": "player_hometown", 2094 "High School": "player_high_school", 2095 }, 2096 inplace=True 2097 ) 2098 2099 # print(roster_df.columns) 2100 2101 roster_df[["player_first_name", "player_last_name"]] = roster_df[ 2102 "player_full_name" 2103 ].str.split(" ", n=1, expand=True) 2104 roster_df = roster_df.infer_objects() 2105 2106 for i in roster_df.columns: 2107 if i in stat_columns: 2108 pass 2109 else: 2110 raise ValueError( 2111 f"Unhandled column name {i}" 2112 ) 2113 2114 roster_df = roster_df.infer_objects().reindex(columns=stat_columns) 2115 2116 roster_df.to_csv( 2117 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/rosters/" + 2118 f"{team_id}_roster.csv", 2119 index=False, 2120 ) 2121 return roster_df
Retrieves a volleyball team's roster from a given team ID.
Parameters
team_id
(int, mandatory):
Required argument.
Specifies the team you want a roster from.
This is separate from a school ID, which identifies the institution.
A team ID should be unique to a school, and a season.
Usage
from ncaa_stats_py.volleyball import get_volleyball_team_roster
########################################
# Women's volleyball #
########################################
# Get the volleyball roster for the
# 2024 Weber St. WVB team (D1, ID: 585347).
print(
"Get the volleyball roster for the " +
"2024 Weber St. WVB team (D1, ID: 585347)."
)
df = get_volleyball_team_roster(585347)
print(df)
# Get the volleyball roster for the
# 2023 Montevallo WVB team (D2, ID: 559599).
print(
"Get the volleyball roster for the " +
"2023 Montevallo WVB team (D2, ID: 559599)."
)
df = get_volleyball_team_roster(559599)
print(df)
# Get the volleyball roster for the
# 2022 Millsaps team (D3, ID: 539944).
print(
"Get the volleyball roster for the " +
"2022 Millsaps team (D3, ID: 539944)."
)
df = get_volleyball_team_roster(539944)
print(df)
# Get the volleyball roster for the
# 2021 Binghamton WVB team (D1, ID: 522893).
print(
"Get the volleyball roster for the " +
"2021 Binghamton WVB team (D1, ID: 522893)."
)
df = get_volleyball_team_roster(522893)
print(df)
# Get the volleyball roster for the
# 2020 Holy Family WVB team (D2, ID: 504760).
print(
"Get the volleyball roster for the " +
"2020 Holy Family WVB team (D2, ID: 504760)."
)
df = get_volleyball_team_roster(504760)
print(df)
# Get the volleyball roster for the
# 2019 Franciscan team (D3, ID: 482939).
print(
"Get the volleyball roster for the " +
"2019 Franciscan team (D3, ID: 482939)."
)
df = get_volleyball_team_roster(482939)
print(df)
########################################
# Men's volleyball #
########################################
# Get the volleyball roster for the
# 2024 Hawaii MVB team (D1, ID: 573674).
print(
"Get the volleyball roster for the " +
"2024 Hawaii MVB team (D1, ID: 573674)."
)
df = get_volleyball_team_roster(573674)
print(df)
# Get the volleyball roster for the
# 2023 Widener MVB team (D3, ID: 550860).
print(
"Get the volleyball roster for the " +
"2023 Widener MVB team (D3, ID: 550860)."
)
df = get_volleyball_team_roster(550860)
print(df)
# Get the volleyball roster for the
# 2022 Alderson Broaddus MVB team (D1, ID: 529880).
print(
"Get the volleyball roster for the " +
"2022 Alderson Broaddus MVB team (D1, ID: 529880)."
)
df = get_volleyball_team_roster(529880)
print(df)
# Get the volleyball roster for the
# 2021 Geneva MVB team (D3, ID: 508506).
print(
"Get the volleyball roster for the " +
"2021 Geneva MVB team (D3, ID: 508506)."
)
df = get_volleyball_team_roster(508506)
print(df)
# Get the volleyball roster for the
# 2020 Urbana MVB team (D1, ID: 484975).
print(
"Get the volleyball roster for the " +
"2020 Urbana MVB team (D1, ID: 484975)."
)
df = get_volleyball_team_roster(484975)
print(df)
# Get the volleyball roster for the
# 2019 Eastern Nazarene MVB team (D3, ID: 453876).
print(
"Get the volleyball roster for the " +
"2019 Eastern Nazarene MVB team (D3, ID: 453876)."
)
df = get_volleyball_team_roster(453876)
print(df)
Returns
A pandas DataFrame
object with
an NCAA volleyball team's roster for that season.
2124def get_volleyball_player_season_stats( 2125 team_id: int, 2126) -> pd.DataFrame: 2127 """ 2128 Given a team ID, this function retrieves and parses 2129 the season stats for all of the players in a given volleyball team. 2130 2131 Parameters 2132 ---------- 2133 `team_id` (int, mandatory): 2134 Required argument. 2135 Specifies the team you want volleyball stats from. 2136 This is separate from a school ID, which identifies the institution. 2137 A team ID should be unique to a school, and a season. 2138 2139 Usage 2140 ---------- 2141 ```python 2142 2143 from ncaa_stats_py.volleyball import get_volleyball_player_season_stats 2144 2145 2146 ######################################## 2147 # Women's volleyball # 2148 ######################################## 2149 2150 # Get the season stats for the 2151 # 2024 Ohio St. team (D1, ID: 585398). 2152 print( 2153 "Get the season stats for the " + 2154 "2024 Ohio St. WVB team (D1, ID: 585398)." 2155 ) 2156 df = get_volleyball_player_season_stats(585398) 2157 print(df) 2158 2159 # Get the season stats for the 2160 # 2023 Emory & Henry WVB team (D2, ID: 559738). 2161 print( 2162 "Get the season stats for the " + 2163 "2023 Emory & Henry WVB team (D2, ID: 559738)." 2164 ) 2165 df = get_volleyball_player_season_stats(559738) 2166 print(df) 2167 2168 # Get the season stats for the 2169 # 2022 Fredonia WVB team (D3, ID: 539881). 2170 print( 2171 "Get the season stats for the " + 2172 "2022 Fredonia WVB team (D3, ID: 539881)." 2173 ) 2174 df = get_volleyball_player_season_stats(539881) 2175 print(df) 2176 2177 # Get the season stats for the 2178 # 2021 Oklahoma WVB team (D1, ID: 523163). 2179 print( 2180 "Get the season stats for the " + 2181 "2021 Oklahoma WVB team (D1, ID: 523163)." 2182 ) 2183 df = get_volleyball_player_season_stats(523163) 2184 print(df) 2185 2186 # Get the season stats for the 2187 # 2020 North Greenville WVB team (D2, ID: 504820). 2188 print( 2189 "Get the season stats for the " + 2190 "2020 North Greenville WVB team (D2, ID: 504820)." 2191 ) 2192 df = get_volleyball_player_season_stats(504820) 2193 print(df) 2194 2195 # Get the season stats for the 2196 # 2019 SUNY Potsdam team (D3, ID: 482714). 2197 print( 2198 "Get the season stats for the " + 2199 "2019 SUNY Potsdam team (D3, ID: 482714)." 2200 ) 2201 df = get_volleyball_player_season_stats(482714) 2202 print(df) 2203 2204 ######################################## 2205 # Men's volleyball # 2206 ######################################## 2207 2208 # Get the season stats for the 2209 # 2024 Lees-McRae MVB team (D1, ID: 573699). 2210 print( 2211 "Get the season stats for the " + 2212 "2024 Lees-McRae MVB team (D1, ID: 573699)." 2213 ) 2214 df = get_volleyball_player_season_stats(573699) 2215 print(df) 2216 2217 # Get the season stats for the 2218 # 2023 Elizabethtown MVB team (D3, ID: 550871). 2219 print( 2220 "Get the season stats for the " + 2221 "2023 Elizabethtown MVB team (D3, ID: 550871)." 2222 ) 2223 df = get_volleyball_player_season_stats(550871) 2224 print(df) 2225 2226 # Get the season stats for the 2227 # 2022 Limestone MVB team (D1, ID: 529884). 2228 print( 2229 "Get the season stats for the " + 2230 "2022 Limestone MVB team (D1, ID: 529884)." 2231 ) 2232 df = get_volleyball_player_season_stats(529884) 2233 print(df) 2234 2235 # Get the season stats for the 2236 # 2021 Maranatha Baptist MVB team (D3, ID: 508471). 2237 print( 2238 "Get the season stats for the " + 2239 "2021 Maranatha Baptist MVB team (D3, ID: 508471)." 2240 ) 2241 df = get_volleyball_player_season_stats(508471) 2242 print(df) 2243 2244 # Get the season stats for the 2245 # 2020 CUI MVB team (D1, ID: 484972). 2246 print( 2247 "Get the season stats for the " + 2248 "2020 CUI MVB team (D1, ID: 484972)." 2249 ) 2250 df = get_volleyball_player_season_stats(484972) 2251 print(df) 2252 2253 # Get the season stats for the 2254 # 2019 SUNY New Paltz MVB team (D3, ID: 453851). 2255 print( 2256 "Get the season stats for the " + 2257 "2019 SUNY New Paltz MVB team (D3, ID: 453851)." 2258 ) 2259 df = get_volleyball_player_season_stats(453851) 2260 print(df) 2261 2262 ``` 2263 2264 Returns 2265 ---------- 2266 A pandas `DataFrame` object with the season batting stats for 2267 all players with a given NCAA volleyball team. 2268 """ 2269 2270 sport_id = "" 2271 load_from_cache = True 2272 stats_df = pd.DataFrame() 2273 stats_df_arr = [] 2274 temp_df = pd.DataFrame() 2275 2276 stat_columns = [ 2277 "season", 2278 "season_name", 2279 "sport_id", 2280 "team_id", 2281 "team_conference_name", 2282 "school_id", 2283 "school_name", 2284 "ncaa_division", 2285 "ncaa_division_formatted", 2286 "player_id", 2287 "player_jersey_number", 2288 "player_last_name", 2289 "player_first_name", 2290 "player_full_name", 2291 "player_class", 2292 "player_position", 2293 "player_height", 2294 "GP", 2295 "GS", 2296 "sets_played", 2297 "MS", 2298 "kills", 2299 "errors", 2300 "total_attacks", 2301 "hit%", 2302 "assists", 2303 "aces", 2304 "serve_errors", 2305 "digs", 2306 "return_attacks", 2307 "return_errors", 2308 "solo_blocks", 2309 "assisted_blocks", 2310 "block_errors", 2311 "total_blocks", 2312 "points", 2313 "BHE", 2314 "serve_attempts", 2315 "DBL_DBL", 2316 "TRP_DBL", 2317 ] 2318 2319 try: 2320 team_df = load_volleyball_teams() 2321 2322 team_df = team_df[team_df["team_id"] == team_id] 2323 2324 season = team_df["season"].iloc[0] 2325 ncaa_division = int(team_df["ncaa_division"].iloc[0]) 2326 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 2327 team_conference_name = team_df["team_conference_name"].iloc[0] 2328 school_name = team_df["school_name"].iloc[0] 2329 school_id = int(team_df["school_id"].iloc[0]) 2330 sport_id = "WVB" 2331 except Exception: 2332 team_df = load_volleyball_teams(get_mens_data=True) 2333 2334 team_df = team_df[team_df["team_id"] == team_id] 2335 2336 season = team_df["season"].iloc[0] 2337 ncaa_division = int(team_df["ncaa_division"].iloc[0]) 2338 ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 2339 team_conference_name = team_df["team_conference_name"].iloc[0] 2340 school_name = team_df["school_name"].iloc[0] 2341 school_id = int(team_df["school_id"].iloc[0]) 2342 sport_id = "MVB" 2343 2344 del team_df 2345 2346 home_dir = expanduser("~") 2347 home_dir = _format_folder_str(home_dir) 2348 2349 url = f"https://stats.ncaa.org/teams/{team_id}/season_to_date_stats" 2350 2351 if exists(f"{home_dir}/.ncaa_stats_py/"): 2352 pass 2353 else: 2354 mkdir(f"{home_dir}/.ncaa_stats_py/") 2355 2356 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/"): 2357 pass 2358 else: 2359 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/") 2360 2361 if exists( 2362 f"{home_dir}/.ncaa_stats_py/" + 2363 f"volleyball_{sport_id}/player_season_stats/" 2364 ): 2365 pass 2366 else: 2367 mkdir( 2368 f"{home_dir}/.ncaa_stats_py/" + 2369 f"volleyball_{sport_id}/player_season_stats/" 2370 ) 2371 2372 if exists( 2373 f"{home_dir}/.ncaa_stats_py/" + 2374 f"volleyball_{sport_id}/player_season_stats/" 2375 + f"{season:00d}_{school_id:00d}_player_season_stats.csv" 2376 ): 2377 games_df = pd.read_csv( 2378 f"{home_dir}/.ncaa_stats_py/" + 2379 f"volleyball_{sport_id}/player_season_stats/" 2380 + f"{season:00d}_{school_id:00d}_player_season_stats.csv" 2381 ) 2382 file_mod_datetime = datetime.fromtimestamp( 2383 getmtime( 2384 f"{home_dir}/.ncaa_stats_py/" + 2385 f"volleyball_{sport_id}/player_season_stats/" 2386 + f"{season:00d}_{school_id:00d}_player_season_stats.csv" 2387 ) 2388 ) 2389 else: 2390 file_mod_datetime = datetime.today() 2391 load_from_cache = False 2392 2393 now = datetime.today() 2394 2395 age = now - file_mod_datetime 2396 2397 if ( 2398 age.days > 1 and 2399 season >= now.year 2400 ): 2401 load_from_cache = False 2402 2403 if load_from_cache is True: 2404 return games_df 2405 2406 response = _get_webpage(url=url) 2407 2408 soup = BeautifulSoup(response.text, features="lxml") 2409 2410 season_name = ( 2411 soup.find("select", {"id": "year_list"}) 2412 .find("option", {"selected": "selected"}) 2413 .text 2414 ) 2415 2416 if sport_id == "MVB": 2417 season = f"{season_name[0:2]}{season_name[-2:]}" 2418 season = int(season) 2419 elif sport_id == "WVB": 2420 season = f"{season_name[0:4]}" 2421 season = int(season) 2422 2423 table_data = soup.find( 2424 "table", 2425 {"id": "stat_grid", "class": "small_font dataTable table-bordered"}, 2426 ) 2427 2428 temp_table_headers = table_data.find("thead").find("tr").find_all("th") 2429 table_headers = [x.text for x in temp_table_headers] 2430 2431 del temp_table_headers 2432 2433 t_rows = table_data.find("tbody").find_all("tr", {"class": "text"}) 2434 for t in t_rows: 2435 p_last = "" 2436 p_first = "" 2437 t_cells = t.find_all("td") 2438 if "team" in t_cells[1].text.lower(): 2439 continue 2440 p_sortable = t_cells[1].get("data-order") 2441 if len(p_sortable) == 2: 2442 p_last, p_first = p_sortable.split(",") 2443 elif len(p_sortable) == 3: 2444 p_last, temp_name, p_first = p_sortable.split(",") 2445 p_last = f"{p_last} {temp_name}" 2446 2447 t_cells = [x.text.strip() for x in t_cells] 2448 t_cells = [x.replace(",", "") for x in t_cells] 2449 2450 temp_df = pd.DataFrame( 2451 data=[t_cells], 2452 columns=table_headers, 2453 # index=[0] 2454 ) 2455 2456 player_id = t.find("a").get("href") 2457 2458 # temp_df["player_url"] = f"https://stats.ncaa.org{player_id}" 2459 player_id = player_id.replace("/players", "").replace("/", "") 2460 2461 player_id = int(player_id) 2462 2463 temp_df["player_id"] = player_id 2464 temp_df["player_last_name"] = p_last.strip() 2465 temp_df["player_first_name"] = p_first.strip() 2466 2467 stats_df_arr.append(temp_df) 2468 del temp_df 2469 2470 stats_df = pd.concat(stats_df_arr, ignore_index=True) 2471 stats_df = stats_df.replace("", None) 2472 2473 # stats_df["stat_id"] = stat_id 2474 stats_df["season"] = season 2475 stats_df["season_name"] = season_name 2476 stats_df["school_id"] = school_id 2477 stats_df["school_name"] = school_name 2478 stats_df["ncaa_division"] = ncaa_division 2479 stats_df["ncaa_division_formatted"] = ncaa_division_formatted 2480 stats_df["team_conference_name"] = team_conference_name 2481 stats_df["sport_id"] = sport_id 2482 stats_df["team_id"] = team_id 2483 2484 stats_df = stats_df.infer_objects() 2485 2486 stats_df.rename( 2487 columns={ 2488 "#": "player_jersey_number", 2489 "Player": "player_full_name", 2490 "Yr": "player_class", 2491 "Pos": "player_position", 2492 "Ht": "player_height", 2493 "S": "sets_played", 2494 "Kills": "kills", 2495 "Errors": "errors", 2496 "Total Attacks": "total_attacks", 2497 "Hit Pct": "hit%", 2498 "Assists": "assists", 2499 "Aces": "aces", 2500 "SErr": "serve_errors", 2501 "Digs": "digs", 2502 "RetAtt": "return_attacks", 2503 "RErr": "return_errors", 2504 "Block Solos": "solo_blocks", 2505 "Block Assists": "assisted_blocks", 2506 "BErr": "block_errors", 2507 "PTS": "points", 2508 "Trpl Dbl": "TRP_DBL", 2509 "Dbl Dbl": "DBL_DBL", 2510 "TB": "total_blocks", 2511 "SrvAtt": "serve_attempts", 2512 }, 2513 inplace=True, 2514 ) 2515 2516 for i in stats_df.columns: 2517 if i in stat_columns: 2518 pass 2519 elif "Attend" in stat_columns: 2520 pass 2521 else: 2522 raise ValueError( 2523 f"Unhandled column name {i}" 2524 ) 2525 stats_df = stats_df.reindex(columns=stat_columns) 2526 2527 stats_df = stats_df.infer_objects().fillna(0) 2528 stats_df = stats_df.astype( 2529 { 2530 "GP": "uint16", 2531 "GS": "uint16", 2532 "sets_played": "uint16", 2533 "kills": "uint16", 2534 "errors": "uint16", 2535 "total_attacks": "uint16", 2536 "hit%": "float32", 2537 "assists": "uint16", 2538 "aces": "uint16", 2539 "serve_errors": "uint16", 2540 "digs": "uint16", 2541 "return_attacks": "uint16", 2542 "return_errors": "uint16", 2543 "solo_blocks": "uint16", 2544 "assisted_blocks": "uint16", 2545 "block_errors": "uint16", 2546 "points": "float32", 2547 "BHE": "uint16", 2548 "TRP_DBL": "uint16", 2549 "serve_attempts": "uint16", 2550 "total_blocks": "float32", 2551 "DBL_DBL": "uint16", 2552 "school_id": "uint32", 2553 } 2554 ) 2555 2556 stats_df["hit%"] = stats_df["hit%"].round(3) 2557 stats_df["points"] = stats_df["points"].round(1) 2558 2559 stats_df.to_csv( 2560 f"{home_dir}/.ncaa_stats_py/" + 2561 f"volleyball_{sport_id}/player_season_stats/" + 2562 f"{season:00d}_{school_id:00d}_player_season_stats.csv", 2563 index=False, 2564 ) 2565 2566 return stats_df
Given a team ID, this function retrieves and parses the season stats for all of the players in a given volleyball team.
Parameters
team_id
(int, mandatory):
Required argument.
Specifies the team you want volleyball stats from.
This is separate from a school ID, which identifies the institution.
A team ID should be unique to a school, and a season.
Usage
from ncaa_stats_py.volleyball import get_volleyball_player_season_stats
########################################
# Women's volleyball #
########################################
# Get the season stats for the
# 2024 Ohio St. team (D1, ID: 585398).
print(
"Get the season stats for the " +
"2024 Ohio St. WVB team (D1, ID: 585398)."
)
df = get_volleyball_player_season_stats(585398)
print(df)
# Get the season stats for the
# 2023 Emory & Henry WVB team (D2, ID: 559738).
print(
"Get the season stats for the " +
"2023 Emory & Henry WVB team (D2, ID: 559738)."
)
df = get_volleyball_player_season_stats(559738)
print(df)
# Get the season stats for the
# 2022 Fredonia WVB team (D3, ID: 539881).
print(
"Get the season stats for the " +
"2022 Fredonia WVB team (D3, ID: 539881)."
)
df = get_volleyball_player_season_stats(539881)
print(df)
# Get the season stats for the
# 2021 Oklahoma WVB team (D1, ID: 523163).
print(
"Get the season stats for the " +
"2021 Oklahoma WVB team (D1, ID: 523163)."
)
df = get_volleyball_player_season_stats(523163)
print(df)
# Get the season stats for the
# 2020 North Greenville WVB team (D2, ID: 504820).
print(
"Get the season stats for the " +
"2020 North Greenville WVB team (D2, ID: 504820)."
)
df = get_volleyball_player_season_stats(504820)
print(df)
# Get the season stats for the
# 2019 SUNY Potsdam team (D3, ID: 482714).
print(
"Get the season stats for the " +
"2019 SUNY Potsdam team (D3, ID: 482714)."
)
df = get_volleyball_player_season_stats(482714)
print(df)
########################################
# Men's volleyball #
########################################
# Get the season stats for the
# 2024 Lees-McRae MVB team (D1, ID: 573699).
print(
"Get the season stats for the " +
"2024 Lees-McRae MVB team (D1, ID: 573699)."
)
df = get_volleyball_player_season_stats(573699)
print(df)
# Get the season stats for the
# 2023 Elizabethtown MVB team (D3, ID: 550871).
print(
"Get the season stats for the " +
"2023 Elizabethtown MVB team (D3, ID: 550871)."
)
df = get_volleyball_player_season_stats(550871)
print(df)
# Get the season stats for the
# 2022 Limestone MVB team (D1, ID: 529884).
print(
"Get the season stats for the " +
"2022 Limestone MVB team (D1, ID: 529884)."
)
df = get_volleyball_player_season_stats(529884)
print(df)
# Get the season stats for the
# 2021 Maranatha Baptist MVB team (D3, ID: 508471).
print(
"Get the season stats for the " +
"2021 Maranatha Baptist MVB team (D3, ID: 508471)."
)
df = get_volleyball_player_season_stats(508471)
print(df)
# Get the season stats for the
# 2020 CUI MVB team (D1, ID: 484972).
print(
"Get the season stats for the " +
"2020 CUI MVB team (D1, ID: 484972)."
)
df = get_volleyball_player_season_stats(484972)
print(df)
# Get the season stats for the
# 2019 SUNY New Paltz MVB team (D3, ID: 453851).
print(
"Get the season stats for the " +
"2019 SUNY New Paltz MVB team (D3, ID: 453851)."
)
df = get_volleyball_player_season_stats(453851)
print(df)
Returns
A pandas DataFrame
object with the season batting stats for
all players with a given NCAA volleyball team.
2569def get_volleyball_player_game_stats( 2570 player_id: int 2571) -> pd.DataFrame: 2572 """ 2573 Given a valid player ID and season, 2574 this function retrieves the game stats for this player at a game level. 2575 2576 Parameters 2577 ---------- 2578 `player_id` (int, mandatory): 2579 Required argument. 2580 Specifies the player you want game stats from. 2581 2582 `season` (int, mandatory): 2583 Required argument. 2584 Specifies the season you want game stats from. 2585 2586 Usage 2587 ---------- 2588 ```python 2589 2590 from ncaa_stats_py.volleyball import ( 2591 get_volleyball_player_game_stats 2592 ) 2593 2594 ######################################## 2595 # Women's volleyball # 2596 ######################################## 2597 2598 # Get the game stats of Zuzanna Wieczorek in 2024 (Idaho). 2599 print( 2600 "Get the game stats of Zuzanna Wieczorek in 2024 (Idaho)." 2601 ) 2602 df = get_volleyball_player_game_stats(player_id=8432514) 2603 print(df) 2604 2605 # Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2). 2606 print( 2607 "Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2)." 2608 ) 2609 df = get_volleyball_player_game_stats(player_id=8145555) 2610 print(df) 2611 2612 # Get the game stats of Lauren Gips in 2022 (Babson, D3). 2613 print( 2614 "Get the game stats of Lauren Gips in 2022 (Babson, D3)." 2615 ) 2616 df = get_volleyball_player_game_stats(player_id=7876821) 2617 print(df) 2618 2619 # Get the game stats of Rhett Robinson in 2021 (North Texas). 2620 print( 2621 "Get the game stats of Rhett Robinson in 2021 (North Texas)." 2622 ) 2623 df = get_volleyball_player_game_stats(player_id=7234089) 2624 print(df) 2625 2626 # Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2). 2627 print( 2628 "Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2)." 2629 ) 2630 df = get_volleyball_player_game_stats(player_id=6822147) 2631 print(df) 2632 2633 # Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3). 2634 print( 2635 "Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3)." 2636 ) 2637 df = get_volleyball_player_game_stats(player_id=6449807) 2638 print(df) 2639 2640 ######################################## 2641 # Men's volleyball # 2642 ######################################## 2643 2644 # Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial). 2645 print( 2646 "Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial)." 2647 ) 2648 df = get_volleyball_player_game_stats(player_id=8253076) 2649 print(df) 2650 2651 # Get the game stats of Ray Rodriguez in 2023 (Lehman, D3). 2652 print( 2653 "Get the game stats of Ray Rodriguez in 2023 (Lehman, D3)." 2654 ) 2655 df = get_volleyball_player_game_stats(player_id=7883459) 2656 print(df) 2657 2658 # Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus). 2659 print( 2660 "Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus)." 2661 ) 2662 df = get_volleyball_player_game_stats(player_id=7413984) 2663 print(df) 2664 2665 # Get the game stats of Tyler Anderson in 2021 (Alvernia, D3). 2666 print( 2667 "Get the game stats of Tyler Anderson in 2021 (Alvernia, D3)." 2668 ) 2669 df = get_volleyball_player_game_stats(player_id=7118023) 2670 print(df) 2671 2672 # Get the game stats of Jaylen Jasper in 2020 (Stanford). 2673 print( 2674 "Get the game stats of Jaylen Jasper in 2020 (Stanford)." 2675 ) 2676 df = get_volleyball_player_game_stats(player_id=6357146) 2677 print(df) 2678 2679 # Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3). 2680 print( 2681 "Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3)." 2682 ) 2683 df = get_volleyball_player_game_stats(player_id=5816111) 2684 print(df) 2685 2686 ``` 2687 2688 Returns 2689 ---------- 2690 A pandas `DataFrame` object with a player's batting game logs 2691 in a given season. 2692 """ 2693 sport_id = "" 2694 2695 stat_columns = [ 2696 "season", 2697 "sport_id", 2698 "game_id", 2699 "game_num", 2700 "player_id", 2701 "date", 2702 "opponent", 2703 "Result", 2704 "team_sets_won", 2705 "opponent_sets_won", 2706 "GP", 2707 # "GS", 2708 "sets_played", 2709 "MS", 2710 "kills", 2711 "errors", 2712 "total_attacks", 2713 "hit%", 2714 "assists", 2715 "aces", 2716 "serve_errors", 2717 "digs", 2718 "return_attacks", 2719 "return_errors", 2720 "solo_blocks", 2721 "assisted_blocks", 2722 "block_errors", 2723 "total_blocks", 2724 "points", 2725 "BHE", 2726 "serve_attempts", 2727 "DBL_DBL", 2728 "TRP_DBL", 2729 ] 2730 2731 load_from_cache = True 2732 stats_df = pd.DataFrame() 2733 stats_df_arr = [] 2734 temp_df = pd.DataFrame() 2735 sport_id = "" 2736 home_dir = expanduser("~") 2737 home_dir = _format_folder_str(home_dir) 2738 2739 # stat_id = _get_stat_id( 2740 # sport="volleyball", 2741 # season=season, 2742 # stat_type="batting" 2743 # ) 2744 url = f"https://stats.ncaa.org/players/{player_id}" 2745 2746 if exists(f"{home_dir}/.ncaa_stats_py/"): 2747 pass 2748 else: 2749 mkdir(f"{home_dir}/.ncaa_stats_py/") 2750 2751 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"): 2752 pass 2753 else: 2754 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/") 2755 2756 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/"): 2757 pass 2758 else: 2759 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/") 2760 2761 if exists( 2762 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/" 2763 + f"{player_id}_player_game_stats.csv" 2764 ): 2765 games_df = pd.read_csv( 2766 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/player_game_stats/" 2767 + f"{player_id}_player_game_stats.csv" 2768 ) 2769 file_mod_datetime = datetime.fromtimestamp( 2770 getmtime( 2771 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/" 2772 + "player_game_stats/" 2773 + f"{player_id}_player_game_stats.csv" 2774 ) 2775 ) 2776 games_df = games_df.infer_objects() 2777 load_from_cache = True 2778 else: 2779 file_mod_datetime = datetime.today() 2780 load_from_cache = False 2781 2782 if exists(f"{home_dir}/.ncaa_stats_py/"): 2783 pass 2784 else: 2785 mkdir(f"{home_dir}/.ncaa_stats_py/") 2786 2787 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"): 2788 pass 2789 else: 2790 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/") 2791 2792 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/"): 2793 pass 2794 else: 2795 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/") 2796 2797 if exists( 2798 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/" 2799 + f"{player_id}_player_game_stats.csv" 2800 ): 2801 games_df = pd.read_csv( 2802 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/player_game_stats/" 2803 + f"{player_id}_player_game_stats.csv" 2804 ) 2805 file_mod_datetime = datetime.fromtimestamp( 2806 getmtime( 2807 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/" 2808 + "player_game_stats/" 2809 + f"{player_id}_player_game_stats.csv" 2810 ) 2811 ) 2812 games_df = games_df.infer_objects() 2813 load_from_cache = True 2814 else: 2815 logging.info("Could not find a WVB player game stats file") 2816 2817 now = datetime.today() 2818 2819 age = now - file_mod_datetime 2820 2821 if ( 2822 age.days >= 1 2823 ): 2824 load_from_cache = False 2825 2826 if load_from_cache is True: 2827 return games_df 2828 2829 # team_df = load_volleyball_teams() 2830 2831 # team_df = team_df[team_df["team_id"] == team_id] 2832 2833 # season = team_df["season"].iloc[0] 2834 # ncaa_division = team_df["ncaa_division"].iloc[0] 2835 # ncaa_division_formatted = team_df["ncaa_division_formatted"].iloc[0] 2836 # team_conference_name = team_df["team_conference_name"].iloc[0] 2837 # school_name = team_df["school_name"].iloc[0] 2838 # school_id = int(team_df["school_id"].iloc[0]) 2839 2840 # del team_df 2841 response = _get_webpage(url=url) 2842 soup = BeautifulSoup(response.text, features="lxml") 2843 2844 table_navigation = soup.find("ul", {"class": "nav nav-tabs padding-nav"}) 2845 table_nav_card = table_navigation.find_all("a") 2846 2847 for u in table_nav_card: 2848 url_str = u.get("href") 2849 if "MVB" in url_str.upper(): 2850 sport_id = "MVB" 2851 elif "WVB" in url_str.upper(): 2852 sport_id = "WVB" 2853 2854 if sport_id is None or len(sport_id) == 0: 2855 # This should **never** be the case IRL, 2856 # but in case something weird happened and 2857 # we can't make a determination of if this is a 2858 # MVB player or a WVB player, and we somehow haven't 2859 # crashed by this point, set the sport ID to 2860 # "MVB" by default so we don't have other weirdness. 2861 logging.error( 2862 f"Could not determine if player ID {player_id} " + 2863 "is a MVB or a WVB player. " + 2864 "Because this cannot be determined, " + 2865 "we will make the automatic assumption that this is a MVB player." 2866 ) 2867 sport_id = "MVB" 2868 2869 table_data = soup.find_all( 2870 "table", {"class": "small_font dataTable table-bordered"} 2871 )[1] 2872 2873 temp_table_headers = table_data.find("thead").find("tr").find_all("th") 2874 table_headers = [x.text for x in temp_table_headers] 2875 2876 del temp_table_headers 2877 2878 temp_t_rows = table_data.find("tbody") 2879 temp_t_rows = temp_t_rows.find_all("tr") 2880 season_name = ( 2881 soup.find("select", {"id": "year_list"}) 2882 .find("option", {"selected": "selected"}) 2883 .text 2884 ) 2885 2886 if sport_id == "MVB": 2887 season = f"{season_name[0:2]}{season_name[-2:]}" 2888 season = int(season) 2889 elif sport_id == "WVB": 2890 season = f"{season_name[0:4]}" 2891 season = int(season) 2892 2893 for t in temp_t_rows: 2894 game_num = 1 2895 ot_periods = 0 2896 # innings = 9 2897 row_id = t.get("id") 2898 opp_team_name = "" 2899 2900 if "contest" not in row_id: 2901 continue 2902 del row_id 2903 2904 t_cells = t.find_all("td") 2905 t_cells = [x.text.strip() for x in t_cells] 2906 2907 g_date = t_cells[0] 2908 2909 if "(" in g_date: 2910 g_date, game_num = g_date.split("(") 2911 g_date = g_date.strip() 2912 2913 game_num = game_num.replace(")", "") 2914 game_num = int(game_num) 2915 2916 try: 2917 opp_team_id = t.find_all("td")[1].find("a").get("href") 2918 except AttributeError as e: 2919 logging.info( 2920 "Could not extract a team ID for this game. " + 2921 f"Full exception {e}" 2922 ) 2923 except Exception as e: 2924 logging.warning( 2925 "An unhandled exception has occurred when " 2926 + "trying to get the opposition team ID for this game. " 2927 f"Full exception `{e}`." 2928 ) 2929 raise e 2930 2931 try: 2932 opp_team_id = opp_team_id.replace("/teams/", "") 2933 opp_team_id = opp_team_id.replace( 2934 "javascript:toggleDefensiveStats(", "" 2935 ) 2936 opp_team_id = opp_team_id.replace(");", "") 2937 opp_team_id = int(opp_team_id) 2938 2939 temp_df["opponent_team_id"] = opp_team_id 2940 except Exception: 2941 logging.info( 2942 "Couldn't find the opposition team naIDme " 2943 + "for this row. " 2944 ) 2945 opp_team_id = None 2946 # print(i.find("td").text) 2947 try: 2948 opp_team_name = t.find_all("td")[1].find_all("img")[1].get("alt") 2949 except AttributeError: 2950 logging.info( 2951 "Couldn't find the opposition team name " 2952 + "for this row from an image element. " 2953 + "Attempting a backup method" 2954 ) 2955 opp_team_name = t_cells[1] 2956 except IndexError: 2957 logging.info( 2958 "Couldn't find the opposition team name " 2959 + "for this row from an image element. " 2960 + "Attempting a backup method" 2961 ) 2962 opp_team_name = t_cells[1] 2963 except Exception as e: 2964 logging.warning( 2965 "Unhandled exception when trying to get the " 2966 + "opposition team name from this game. " 2967 + f"Full exception `{e}`" 2968 ) 2969 raise e 2970 2971 if opp_team_name == "Defensive Stats": 2972 opp_team_name = t_cells[1] 2973 2974 if "@" in opp_team_name: 2975 opp_team_name = opp_team_name.split("@")[0] 2976 2977 result_str = t_cells[2] 2978 2979 result_str = ( 2980 result_str.lower().replace("w", "").replace("l", "").replace( 2981 "t", "" 2982 ) 2983 ) 2984 2985 if ( 2986 result_str.lower() == "ppd" or 2987 result_str.lower() == "" or 2988 result_str.lower() == "canceed" 2989 ): 2990 continue 2991 2992 result_str = result_str.replace("\n", "") 2993 result_str = result_str.replace("*", "") 2994 2995 tm_score, opp_score = result_str.split("-") 2996 t_cells = [x.replace("*", "") for x in t_cells] 2997 t_cells = [x.replace("/", "") for x in t_cells] 2998 t_cells = [x.replace("\\", "") for x in t_cells] 2999 3000 temp_df = pd.DataFrame( 3001 data=[t_cells], 3002 columns=table_headers, 3003 # index=[0] 3004 ) 3005 3006 tm_score = int(tm_score) 3007 if "(" in opp_score: 3008 opp_score = opp_score.replace(")", "") 3009 opp_score, ot_periods = opp_score.split("(") 3010 temp_df["ot_periods"] = ot_periods 3011 3012 if "\n" in opp_score: 3013 opp_score = opp_score.strip() 3014 # opp_score = opp_score 3015 opp_score = int(opp_score) 3016 3017 temp_df["team_sets_won"] = tm_score 3018 temp_df["opponent_sets_won"] = opp_score 3019 3020 del tm_score 3021 del opp_score 3022 3023 try: 3024 g_id = t.find_all("td")[2].find("a").get("href") 3025 3026 g_id = g_id.replace("/contests", "") 3027 g_id = g_id.replace("/box_score", "") 3028 g_id = g_id.replace("/", "") 3029 3030 g_id = int(g_id) 3031 temp_df["game_id"] = g_id 3032 del g_id 3033 except AttributeError: 3034 logging.warning( 3035 f"Could not find a game ID for a {g_date} game " + 3036 f"against {opp_team_name}." 3037 ) 3038 temp_df["game_id"] = None 3039 except Exception as e: 3040 raise e 3041 3042 temp_df.rename( 3043 columns={"Opponent": "opponent", "Date": "date"}, 3044 inplace=True, 3045 ) 3046 game_date = datetime.strptime(g_date, "%m/%d/%Y").date() 3047 3048 temp_df["date"] = game_date 3049 temp_df["game_num"] = game_num 3050 # temp_df["game_innings"] = innings 3051 3052 if len(opp_team_name) > 0: 3053 temp_df["opponent"] = opp_team_name 3054 del opp_team_name 3055 3056 duplicate_cols = temp_df.columns[temp_df.columns.duplicated()] 3057 temp_df.drop(columns=duplicate_cols, inplace=True) 3058 3059 stats_df_arr.append(temp_df) 3060 del temp_df 3061 3062 stats_df = pd.concat(stats_df_arr, ignore_index=True) 3063 stats_df = stats_df.replace("/", "", regex=True) 3064 stats_df = stats_df.replace("", np.nan) 3065 stats_df = stats_df.infer_objects() 3066 3067 stats_df["player_id"] = player_id 3068 stats_df["sport_id"] = sport_id 3069 stats_df["season"] = season 3070 3071 stats_df.rename( 3072 columns={ 3073 "#": "player_jersey_number", 3074 "Player": "player_full_name", 3075 "Yr": "player_class", 3076 "Pos": "player_position", 3077 "Ht": "player_height", 3078 "S": "sets_played", 3079 "Kills": "kills", 3080 "Errors": "errors", 3081 "Total Attacks": "total_attacks", 3082 "TotalAttacks": "total_attacks", 3083 "Hit Pct": "hit%", 3084 "HitPct": "hit%", 3085 "Assists": "assists", 3086 "Aces": "aces", 3087 "SErr": "serve_errors", 3088 "Digs": "digs", 3089 "RetAtt": "return_attacks", 3090 "RErr": "return_errors", 3091 "Block Solos": "solo_blocks", 3092 "BlockSolos": "solo_blocks", 3093 "Block Assists": "assisted_blocks", 3094 "BlockAssists": "assisted_blocks", 3095 "BErr": "block_errors", 3096 "PTS": "points", 3097 "Trpl Dbl": "TRP_DBL", 3098 "Dbl Dbl": "DBL_DBL", 3099 "TB": "total_blocks", 3100 "SrvAtt": "serve_attempts", 3101 }, 3102 inplace=True, 3103 ) 3104 # This is a separate function call because these stats 3105 # *don't* exist in every season. 3106 3107 if "serve_attempts" not in stats_df.columns: 3108 stats_df["serve_attempts"] = None 3109 3110 if "return_attacks" not in stats_df.columns: 3111 stats_df["return_attacks"] = None 3112 3113 stats_df = stats_df.infer_objects().fillna(0) 3114 stats_df = stats_df.astype( 3115 { 3116 "GP": "uint16", 3117 "sets_played": "uint16", 3118 # "MS": "uint16", 3119 "kills": "uint16", 3120 "errors": "uint16", 3121 "total_attacks": "uint16", 3122 "hit%": "float32", 3123 "assists": "uint16", 3124 "aces": "uint16", 3125 "serve_errors": "uint16", 3126 "digs": "uint16", 3127 "return_attacks": "uint16", 3128 "return_errors": "uint16", 3129 "solo_blocks": "uint16", 3130 "assisted_blocks": "uint16", 3131 "block_errors": "uint16", 3132 # "total_blocks": "uint16", 3133 "points": "float32", 3134 "BHE": "uint16", 3135 "serve_attempts": "uint16", 3136 # "DBL_DBL": "uint8", 3137 # "TRP_DBL": "uint8", 3138 } 3139 ) 3140 3141 stats_df.loc[ 3142 (stats_df["solo_blocks"] > 0) | (stats_df["assisted_blocks"] > 0), 3143 "total_blocks" 3144 ] = ( 3145 stats_df["solo_blocks"] + 3146 (stats_df["assisted_blocks"] / 2) 3147 ) 3148 stats_df["total_blocks"] = stats_df["total_blocks"].astype("float32") 3149 3150 # Columns used to calculate double doubles and triple doubles. 3151 # Credits: 3152 # https://en.wikipedia.org/wiki/Double_(volleyball) 3153 # https://stackoverflow.com/a/54381918 3154 double_stats_arr = [ 3155 "aces", 3156 "kills", 3157 "total_blocks", 3158 "digs", 3159 "assists", 3160 ] 3161 stats_df["DBL_DBL"] = ( 3162 ( 3163 (stats_df[double_stats_arr] >= 10).sum(1) 3164 ) >= 2 3165 ) 3166 stats_df["DBL_DBL"] = stats_df["DBL_DBL"].astype(int) 3167 3168 stats_df["TRP_DBL"] = ( 3169 ( 3170 (stats_df[double_stats_arr] >= 10).sum(1) 3171 ) >= 3 3172 ) 3173 stats_df["TRP_DBL"] = stats_df["TRP_DBL"].astype(int) 3174 3175 for i in stats_df.columns: 3176 if i in stat_columns: 3177 pass 3178 elif "Attend" in stat_columns: 3179 pass 3180 else: 3181 raise ValueError( 3182 f"Unhandled column name {i}" 3183 ) 3184 stats_df = stats_df.reindex(columns=stat_columns) 3185 3186 stats_df.to_csv( 3187 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/" 3188 + "player_game_stats/" 3189 + f"{player_id}_player_game_stats.csv", 3190 index=False, 3191 ) 3192 return stats_df
Given a valid player ID and season, this function retrieves the game stats for this player at a game level.
Parameters
player_id
(int, mandatory):
Required argument.
Specifies the player you want game stats from.
season
(int, mandatory):
Required argument.
Specifies the season you want game stats from.
Usage
from ncaa_stats_py.volleyball import (
get_volleyball_player_game_stats
)
########################################
# Women's volleyball #
########################################
# Get the game stats of Zuzanna Wieczorek in 2024 (Idaho).
print(
"Get the game stats of Zuzanna Wieczorek in 2024 (Idaho)."
)
df = get_volleyball_player_game_stats(player_id=8432514)
print(df)
# Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2).
print(
"Get the game stats of Jalyn Stevenson in 2023 (Washburn, D2)."
)
df = get_volleyball_player_game_stats(player_id=8145555)
print(df)
# Get the game stats of Lauren Gips in 2022 (Babson, D3).
print(
"Get the game stats of Lauren Gips in 2022 (Babson, D3)."
)
df = get_volleyball_player_game_stats(player_id=7876821)
print(df)
# Get the game stats of Rhett Robinson in 2021 (North Texas).
print(
"Get the game stats of Rhett Robinson in 2021 (North Texas)."
)
df = get_volleyball_player_game_stats(player_id=7234089)
print(df)
# Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2).
print(
"Get the game stats of Audrey Keenan in 2020 (Florida Tech, D2)."
)
df = get_volleyball_player_game_stats(player_id=6822147)
print(df)
# Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3).
print(
"Get the game stats of Ta'korya Green in 2019 (Oglethorpe, D3)."
)
df = get_volleyball_player_game_stats(player_id=6449807)
print(df)
########################################
# Men's volleyball #
########################################
# Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial).
print(
"Get the game stats of Matthew Gentry in 2024 (Lincoln Memorial)."
)
df = get_volleyball_player_game_stats(player_id=8253076)
print(df)
# Get the game stats of Ray Rodriguez in 2023 (Lehman, D3).
print(
"Get the game stats of Ray Rodriguez in 2023 (Lehman, D3)."
)
df = get_volleyball_player_game_stats(player_id=7883459)
print(df)
# Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus).
print(
"Get the game stats of Gannon Chinen in 2022 (Alderson Broaddus)."
)
df = get_volleyball_player_game_stats(player_id=7413984)
print(df)
# Get the game stats of Tyler Anderson in 2021 (Alvernia, D3).
print(
"Get the game stats of Tyler Anderson in 2021 (Alvernia, D3)."
)
df = get_volleyball_player_game_stats(player_id=7118023)
print(df)
# Get the game stats of Jaylen Jasper in 2020 (Stanford).
print(
"Get the game stats of Jaylen Jasper in 2020 (Stanford)."
)
df = get_volleyball_player_game_stats(player_id=6357146)
print(df)
# Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3).
print(
"Get the game stats of Brian Sheddy in 2019 (Penn St.-Altoona, D3)."
)
df = get_volleyball_player_game_stats(player_id=5816111)
print(df)
Returns
A pandas DataFrame
object with a player's batting game logs
in a given season.
3195def get_volleyball_game_player_stats(game_id: int) -> pd.DataFrame: 3196 """ 3197 Given a valid game ID, 3198 this function will attempt to get all player game stats, if possible. 3199 3200 Parameters 3201 ---------- 3202 `game_id` (int, mandatory): 3203 Required argument. 3204 Specifies the game you want player game stats from. 3205 3206 Usage 3207 ---------- 3208 ```python 3209 3210 from ncaa_stats_py.volleyball import get_volleyball_game_player_stats 3211 3212 ######################################## 3213 # Women's volleyball # 3214 ######################################## 3215 3216 # Get the game stats of the 3217 # 2024 NCAA D1 Women's Volleyball National Championship game. 3218 print( 3219 "Get the game stats of the " 3220 + "2024 NCAA D1 Women's volleyball National Championship game" 3221 ) 3222 df = get_volleyball_game_player_stats(6080706) 3223 print(df) 3224 3225 # Get the game stats of a September 14th, 2024 3226 # game between the UNC Asheville Bulldogs and the Iona Gaels. 3227 print( 3228 "Get the game stats of a September 14th, 2024 " 3229 + "game between the UNC Asheville Bulldogs " 3230 + "and the Iona Gaels" 3231 ) 3232 df = get_volleyball_game_player_stats(5670752) 3233 print(df) 3234 3235 # Get the game stats of a September 16th, 2023 3236 # game between the Saginaw Valley Cardinals 3237 # and the Lake Superior St. Lakes. 3238 print( 3239 "Get the game stats of a September 16th, 2023 " 3240 + "game between the Saginaw Valley Cardinals " 3241 + "and the Lake Superior St. Lakes." 3242 ) 3243 df = get_volleyball_game_player_stats(3243563) 3244 print(df) 3245 3246 # Get the game stats of a October 15th, 2022 3247 # game between the Macalester Scots 3248 # and the St. Scholastica Saints (D3). 3249 print( 3250 "Get the game stats of a October 15th, 2022 " 3251 + "game between the Macalester Scots and " 3252 + "the St. Scholastica Saints (D3)." 3253 ) 3254 df = get_volleyball_game_player_stats(2307684) 3255 print(df) 3256 3257 # Get the game stats of a October 24th, 2021 3258 # game between the Howard Bison and the UMES Hawks. 3259 print( 3260 "Get the game stats of a October 24th, 2021 " 3261 + "game between the Howard Bison and the UMES Hawks." 3262 ) 3263 df = get_volleyball_game_player_stats(2113627) 3264 print(df) 3265 3266 # Get the game stats of a March 5th, 2021 3267 # game between the Notre Dame (OH) Falcons 3268 # and the Alderson Broaddus Battlers. 3269 print( 3270 "Get the game stats of a March 5th, 2021 " 3271 + "game between the Notre Dame (OH) Falcons " 3272 + "and the Alderson Broaddus Battlers." 3273 ) 3274 df = get_volleyball_game_player_stats(2005442) 3275 print(df) 3276 3277 # Get the game stats of a November 14th, 2019 3278 # game between the Wittenberg Tigers 3279 # and the Muskingum Fighting Muskies (D3). 3280 print( 3281 "Get the game stats of a November 14th, 2019 " 3282 + "game between the Wittenberg Tigers and " 3283 + "the Muskingum Fighting Muskies (D3)." 3284 ) 3285 df = get_volleyball_game_player_stats(1815514) 3286 print(df) 3287 3288 ######################################## 3289 # Men's volleyball # 3290 ######################################## 3291 3292 # Get the game stats of the 3293 # 2024 NCAA D1 Men's Volleyball National Championship game. 3294 print( 3295 "Get the game stats of the " 3296 + "2024 NCAA D1 Men's volleyball National Championship game" 3297 ) 3298 df = get_volleyball_game_player_stats(5282845) 3299 print(df) 3300 3301 # Get the game stats of a January 14th, 2025 3302 # game between the Kean Cougars and the Arcadia Knights. 3303 print( 3304 "Get the game stats of a January 14th, 2025 " 3305 + "game between the UNC Asheville Bulldogs " 3306 + "and the Iona Gaels" 3307 ) 3308 df = get_volleyball_game_player_stats(6081598) 3309 print(df) 3310 3311 # Get the game stats of a January 13th, 2024 3312 # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders. 3313 print( 3314 "Get the game stats of a September 14th, 2024 " 3315 + "game between the Purdue Fort Wayne Mastodons " 3316 + "and the NJIT Highlanders." 3317 ) 3318 df = get_volleyball_game_player_stats(4473231) 3319 print(df) 3320 3321 # Get the game stats of a January 21st, 2023 3322 # game between the Baruch Bearcats and the Widener Pride. 3323 print( 3324 "Get the game stats of a January 21st, 2023 " 3325 + "game between the Baruch Bearcats and the Widener Pride." 3326 ) 3327 df = get_volleyball_game_player_stats(2355323) 3328 print(df) 3329 3330 # Get the game stats of a February 24th, 2022 3331 # game between the Ball St. Cardinals and the Lindenwood Lions. 3332 print( 3333 "Get the game stats of a February 24th, 2022 " 3334 + "game between the Ball St. Cardinals and the Lindenwood Lions." 3335 ) 3336 df = get_volleyball_game_player_stats(2162239) 3337 print(df) 3338 3339 # Get the game stats of a March 20th, 2021 3340 # game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals. 3341 print( 3342 "Get the game stats of a March 20th, 2021 " 3343 + "game between the SUNY New Paltz Hawks " 3344 + "and the St. John Fisher Cardinals." 3345 ) 3346 df = get_volleyball_game_player_stats(2059180) 3347 print(df) 3348 3349 # Get the game stats of a March 1th, 2020 3350 # game between the USC Trojans and the CUI Golden Eagles. 3351 print( 3352 "Get the game stats of a March 1th, 2020 " 3353 + "game between the USC Trojans and the CUI Golden Eagles." 3354 ) 3355 df = get_volleyball_game_player_stats(1820058) 3356 print(df) 3357 3358 # Get the game stats of an April 4th, 2019 3359 # game between the Lesly Lynx and the Pine Manor Gators (D3). 3360 print( 3361 "Get the game stats of an April 4th, 2019 " 3362 + "game between the Lesly Lynx and the Pine Manor Gators (D3)." 3363 ) 3364 df = get_volleyball_game_player_stats(1723131) 3365 print(df) 3366 3367 3368 ``` 3369 3370 Returns 3371 ---------- 3372 A pandas `DataFrame` object with player game stats in a given game. 3373 3374 """ 3375 load_from_cache = True 3376 3377 sport_id = "" 3378 season = 0 3379 3380 MVB_teams_df = load_volleyball_teams(get_mens_data=True) 3381 MVB_team_ids_arr = MVB_teams_df["team_id"].to_list() 3382 3383 WVB_teams_df = load_volleyball_teams(get_mens_data=False) 3384 WVB_team_ids_arr = WVB_teams_df["team_id"].to_list() 3385 3386 stats_df = pd.DataFrame() 3387 stats_df_arr = [] 3388 3389 temp_df = pd.DataFrame() 3390 home_dir = expanduser("~") 3391 home_dir = _format_folder_str(home_dir) 3392 3393 stat_columns = [ 3394 "season", 3395 "sport_id", 3396 "game_datetime", 3397 "game_id", 3398 "team_id", 3399 "team_name", 3400 "player_id", 3401 "player_num", 3402 "player_full_name", 3403 "player_position", 3404 "GP", 3405 "sets_played", 3406 "kills", 3407 "errors", 3408 "total_attacks", 3409 "hit%", 3410 "assists", 3411 "aces", 3412 "serve_errors", 3413 "digs", 3414 "return_attacks", 3415 "return_errors", 3416 "solo_blocks", 3417 "assisted_blocks", 3418 "block_errors", 3419 "total_blocks", 3420 "points", 3421 "BHE", 3422 "DBL_DBL", 3423 "TRP_DBL", 3424 ] 3425 3426 url = f"https://stats.ncaa.org/contests/{game_id}/individual_stats" 3427 3428 if exists(f"{home_dir}/.ncaa_stats_py/"): 3429 pass 3430 else: 3431 mkdir(f"{home_dir}/.ncaa_stats_py/") 3432 3433 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"): 3434 pass 3435 else: 3436 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/") 3437 3438 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/"): 3439 pass 3440 else: 3441 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/") 3442 3443 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/"): 3444 pass 3445 else: 3446 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/") 3447 3448 if exists( 3449 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/" 3450 + f"{game_id}_player_game_stats.csv" 3451 ): 3452 games_df = pd.read_csv( 3453 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/" 3454 + f"{game_id}_player_game_stats.csv" 3455 ) 3456 games_df = games_df.infer_objects() 3457 file_mod_datetime = datetime.fromtimestamp( 3458 getmtime( 3459 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/game_stats/player/" 3460 + f"{game_id}_player_game_stats.csv" 3461 ) 3462 ) 3463 load_from_cache = True 3464 else: 3465 file_mod_datetime = datetime.today() 3466 load_from_cache = False 3467 3468 if exists(f"{home_dir}/.ncaa_stats_py/"): 3469 pass 3470 else: 3471 mkdir(f"{home_dir}/.ncaa_stats_py/") 3472 3473 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"): 3474 pass 3475 else: 3476 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/") 3477 3478 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/"): 3479 pass 3480 else: 3481 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/") 3482 3483 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/"): 3484 pass 3485 else: 3486 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/") 3487 3488 if exists( 3489 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/" 3490 + f"{game_id}_player_game_stats.csv" 3491 ): 3492 games_df = pd.read_csv( 3493 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/" 3494 + f"{game_id}_player_game_stats.csv" 3495 ) 3496 games_df = games_df.infer_objects() 3497 file_mod_datetime = datetime.fromtimestamp( 3498 getmtime( 3499 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/game_stats/player/" 3500 + f"{game_id}_player_game_stats.csv" 3501 ) 3502 ) 3503 load_from_cache = True 3504 else: 3505 logging.info("Could not find a WVB player game stats file") 3506 3507 now = datetime.today() 3508 3509 age = now - file_mod_datetime 3510 3511 if age.days >= 35: 3512 load_from_cache = False 3513 3514 if load_from_cache is True: 3515 return games_df 3516 3517 response = _get_webpage(url=url) 3518 soup = BeautifulSoup(response.text, features="lxml") 3519 3520 info_table = soup.find( 3521 "td", 3522 { 3523 "style": "padding: 0px 30px 0px 30px", 3524 "class": "d-none d-md-table-cell" 3525 } 3526 ).find( 3527 "table", 3528 {"style": "border-collapse: collapse"} 3529 ) 3530 3531 info_table_rows = info_table.find_all("tr") 3532 3533 game_date_str = info_table_rows[3].find("td").text 3534 if "TBA" in game_date_str: 3535 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA') 3536 elif "tba" in game_date_str: 3537 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba') 3538 elif "TBD" in game_date_str: 3539 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD') 3540 elif "tbd" in game_date_str: 3541 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd') 3542 elif ( 3543 "tbd" not in game_date_str.lower() and 3544 ":" not in game_date_str.lower() 3545 ): 3546 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y') 3547 else: 3548 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y %I:%M %p') 3549 game_datetime = game_datetime.astimezone(timezone("US/Eastern")) 3550 game_date_str = game_datetime.isoformat() 3551 del game_datetime 3552 3553 table_boxes = soup.find_all("div", {"class": "card p-0 table-responsive"}) 3554 3555 for box in table_boxes: 3556 t_header = box.find( 3557 "div", {"class": "card-header"} 3558 ).find( 3559 "div", {"class": "row"} 3560 ) 3561 3562 t_header_str = t_header.text 3563 t_header_str = t_header_str.replace("Period Stats", "") 3564 t_header_str = t_header_str.replace("\n", "") 3565 t_header_str = t_header_str.strip() 3566 3567 team_id = t_header.find("a").get("href") 3568 team_id = team_id.replace("/teams", "") 3569 team_id = team_id.replace("/", "") 3570 team_id = int(team_id) 3571 3572 table_data = box.find( 3573 "table", 3574 {"class": "display dataTable small_font"} 3575 ) 3576 table_headers = box.find("thead").find_all("th") 3577 table_headers = [x.text for x in table_headers] 3578 3579 temp_t_rows = table_data.find("tbody") 3580 temp_t_rows = temp_t_rows.find_all("tr") 3581 3582 spec_stats_df = pd.DataFrame() 3583 spec_stats_df_arr = [] 3584 for t in temp_t_rows: 3585 # row_id = t.get("id") 3586 game_played = 1 3587 # game_started = 0 3588 3589 try: 3590 player_id = t.find("a").get("href") 3591 player_id = player_id.replace("/players", "") 3592 player_id = player_id.replace("/player", "") 3593 player_id = player_id.replace("/", "") 3594 except Exception as e: 3595 logging.debug( 3596 "Could not replace player IDs. " + 3597 f"Full exception: `{e}`" 3598 ) 3599 3600 t_cells = t.find_all("td") 3601 p_name = t_cells[1].text.replace("\n", "") 3602 p_name = p_name.strip() 3603 3604 if t_header_str in p_name: 3605 continue 3606 elif p_name.lower() == "team": 3607 continue 3608 # if "\xa0" in p_name: 3609 # game_started = 0 3610 3611 t_cells = [x.text.strip() for x in t_cells] 3612 player_id = int(player_id) 3613 3614 temp_df = pd.DataFrame( 3615 data=[t_cells], 3616 columns=table_headers 3617 ) 3618 3619 duplicate_cols = temp_df.columns[temp_df.columns.duplicated()] 3620 temp_df.drop(columns=duplicate_cols, inplace=True) 3621 3622 temp_df["player_id"] = player_id 3623 temp_df["GP"] = game_played 3624 # temp_df["GS"] = game_started 3625 3626 spec_stats_df_arr.append(temp_df) 3627 del temp_df 3628 3629 spec_stats_df = pd.concat( 3630 spec_stats_df_arr, 3631 ignore_index=True 3632 ) 3633 3634 if team_id in MVB_team_ids_arr: 3635 sport_id = "MVB" 3636 df = MVB_teams_df[MVB_teams_df["team_id"] == team_id] 3637 season = df["season"].iloc[0] 3638 elif team_id in WVB_team_ids_arr: 3639 sport_id = "WVB" 3640 df = WVB_teams_df[WVB_teams_df["team_id"] == team_id] 3641 season = df["season"].iloc[0] 3642 else: 3643 raise ValueError( 3644 f"Unhandled team ID {team_id}" 3645 ) 3646 3647 spec_stats_df["team_id"] = team_id 3648 spec_stats_df["team_name"] = t_header_str 3649 stats_df_arr.append(spec_stats_df) 3650 del spec_stats_df 3651 3652 stats_df = pd.concat(stats_df_arr) 3653 stats_df["season"] = season 3654 stats_df.rename( 3655 columns={ 3656 "#": "player_num", 3657 "Name": "player_full_name", 3658 "P": "player_position", 3659 "Ht": "player_height", 3660 "S": "sets_played", 3661 "Kills": "kills", 3662 "Errors": "errors", 3663 "Total Attacks": "total_attacks", 3664 "TotalAttacks": "total_attacks", 3665 "Hit Pct": "hit%", 3666 "HitPct": "hit%", 3667 "Assists": "assists", 3668 "Aces": "aces", 3669 "SErr": "serve_errors", 3670 "Digs": "digs", 3671 "RetAtt": "return_attacks", 3672 "RErr": "return_errors", 3673 "Block Solos": "solo_blocks", 3674 "BlockSolos": "solo_blocks", 3675 "Block Assists": "assisted_blocks", 3676 "BlockAssists": "assisted_blocks", 3677 "BErr": "block_errors", 3678 "PTS": "points", 3679 "Trpl Dbl": "TRP_DBL", 3680 "Dbl Dbl": "DBL_DBL", 3681 "TB": "total_blocks", 3682 "SrvAtt": "serve_attempts", 3683 }, 3684 inplace=True, 3685 ) 3686 3687 if "return_attacks" not in stats_df.columns: 3688 stats_df["return_attacks"] = None 3689 3690 if "serve_attempts" not in stats_df.columns: 3691 stats_df["serve_attempts"] = None 3692 3693 stats_df = stats_df.infer_objects().fillna(0) 3694 stats_df = stats_df.astype( 3695 { 3696 "GP": "uint16", 3697 "sets_played": "uint16", 3698 # "MS": "uint16", 3699 "kills": "uint16", 3700 "errors": "uint16", 3701 "total_attacks": "uint16", 3702 "hit%": "float32", 3703 "assists": "uint16", 3704 "aces": "uint16", 3705 "serve_errors": "uint16", 3706 "digs": "uint16", 3707 "return_attacks": "uint16", 3708 "return_errors": "uint16", 3709 "solo_blocks": "uint16", 3710 "assisted_blocks": "uint16", 3711 "block_errors": "uint16", 3712 # "total_blocks": "uint16", 3713 "points": "float32", 3714 "BHE": "uint16", 3715 "serve_attempts": "uint16", 3716 # "DBL_DBL": "uint8", 3717 # "TRP_DBL": "uint8", 3718 } 3719 ) 3720 # print(stats_df.columns) 3721 stats_df["game_datetime"] = game_date_str 3722 stats_df["sport_id"] = sport_id 3723 3724 stats_df["game_id"] = game_id 3725 3726 stats_df["total_blocks"] = ( 3727 stats_df["solo_blocks"] + 3728 (stats_df["assisted_blocks"] / 2) 3729 ) 3730 stats_df["total_blocks"] = stats_df["total_blocks"].astype("float32") 3731 3732 # Columns used to calculate double doubles and triple doubles. 3733 # Credits: 3734 # https://en.wikipedia.org/wiki/Double_(volleyball) 3735 # https://stackoverflow.com/a/54381918 3736 double_stats_arr = [ 3737 "aces", 3738 "kills", 3739 "total_blocks", 3740 "digs", 3741 "assists", 3742 ] 3743 stats_df["DBL_DBL"] = ((stats_df[double_stats_arr] >= 10).sum(1)) >= 2 3744 stats_df["DBL_DBL"] = stats_df["DBL_DBL"].astype(int) 3745 3746 stats_df["TRP_DBL"] = ((stats_df[double_stats_arr] >= 10).sum(1)) >= 3 3747 stats_df["TRP_DBL"] = stats_df["TRP_DBL"].astype(int) 3748 3749 for i in stats_df.columns: 3750 if i in stat_columns: 3751 pass 3752 elif "Attend" in stat_columns: 3753 pass 3754 else: 3755 raise ValueError( 3756 f"Unhandled column name {i}" 3757 ) 3758 3759 stats_df = stats_df.reindex( 3760 columns=stat_columns 3761 ) 3762 3763 # print(stats_df.columns) 3764 stats_df.to_csv( 3765 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/game_stats/player/" 3766 + f"{game_id}_player_game_stats.csv", 3767 index=False 3768 ) 3769 return stats_df
Given a valid game ID, this function will attempt to get all player game stats, if possible.
Parameters
game_id
(int, mandatory):
Required argument.
Specifies the game you want player game stats from.
Usage
from ncaa_stats_py.volleyball import get_volleyball_game_player_stats
########################################
# Women's volleyball #
########################################
# Get the game stats of the
# 2024 NCAA D1 Women's Volleyball National Championship game.
print(
"Get the game stats of the "
+ "2024 NCAA D1 Women's volleyball National Championship game"
)
df = get_volleyball_game_player_stats(6080706)
print(df)
# Get the game stats of a September 14th, 2024
# game between the UNC Asheville Bulldogs and the Iona Gaels.
print(
"Get the game stats of a September 14th, 2024 "
+ "game between the UNC Asheville Bulldogs "
+ "and the Iona Gaels"
)
df = get_volleyball_game_player_stats(5670752)
print(df)
# Get the game stats of a September 16th, 2023
# game between the Saginaw Valley Cardinals
# and the Lake Superior St. Lakes.
print(
"Get the game stats of a September 16th, 2023 "
+ "game between the Saginaw Valley Cardinals "
+ "and the Lake Superior St. Lakes."
)
df = get_volleyball_game_player_stats(3243563)
print(df)
# Get the game stats of a October 15th, 2022
# game between the Macalester Scots
# and the St. Scholastica Saints (D3).
print(
"Get the game stats of a October 15th, 2022 "
+ "game between the Macalester Scots and "
+ "the St. Scholastica Saints (D3)."
)
df = get_volleyball_game_player_stats(2307684)
print(df)
# Get the game stats of a October 24th, 2021
# game between the Howard Bison and the UMES Hawks.
print(
"Get the game stats of a October 24th, 2021 "
+ "game between the Howard Bison and the UMES Hawks."
)
df = get_volleyball_game_player_stats(2113627)
print(df)
# Get the game stats of a March 5th, 2021
# game between the Notre Dame (OH) Falcons
# and the Alderson Broaddus Battlers.
print(
"Get the game stats of a March 5th, 2021 "
+ "game between the Notre Dame (OH) Falcons "
+ "and the Alderson Broaddus Battlers."
)
df = get_volleyball_game_player_stats(2005442)
print(df)
# Get the game stats of a November 14th, 2019
# game between the Wittenberg Tigers
# and the Muskingum Fighting Muskies (D3).
print(
"Get the game stats of a November 14th, 2019 "
+ "game between the Wittenberg Tigers and "
+ "the Muskingum Fighting Muskies (D3)."
)
df = get_volleyball_game_player_stats(1815514)
print(df)
########################################
# Men's volleyball #
########################################
# Get the game stats of the
# 2024 NCAA D1 Men's Volleyball National Championship game.
print(
"Get the game stats of the "
+ "2024 NCAA D1 Men's volleyball National Championship game"
)
df = get_volleyball_game_player_stats(5282845)
print(df)
# Get the game stats of a January 14th, 2025
# game between the Kean Cougars and the Arcadia Knights.
print(
"Get the game stats of a January 14th, 2025 "
+ "game between the UNC Asheville Bulldogs "
+ "and the Iona Gaels"
)
df = get_volleyball_game_player_stats(6081598)
print(df)
# Get the game stats of a January 13th, 2024
# game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
print(
"Get the game stats of a September 14th, 2024 "
+ "game between the Purdue Fort Wayne Mastodons "
+ "and the NJIT Highlanders."
)
df = get_volleyball_game_player_stats(4473231)
print(df)
# Get the game stats of a January 21st, 2023
# game between the Baruch Bearcats and the Widener Pride.
print(
"Get the game stats of a January 21st, 2023 "
+ "game between the Baruch Bearcats and the Widener Pride."
)
df = get_volleyball_game_player_stats(2355323)
print(df)
# Get the game stats of a February 24th, 2022
# game between the Ball St. Cardinals and the Lindenwood Lions.
print(
"Get the game stats of a February 24th, 2022 "
+ "game between the Ball St. Cardinals and the Lindenwood Lions."
)
df = get_volleyball_game_player_stats(2162239)
print(df)
# Get the game stats of a March 20th, 2021
# game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals.
print(
"Get the game stats of a March 20th, 2021 "
+ "game between the SUNY New Paltz Hawks "
+ "and the St. John Fisher Cardinals."
)
df = get_volleyball_game_player_stats(2059180)
print(df)
# Get the game stats of a March 1th, 2020
# game between the USC Trojans and the CUI Golden Eagles.
print(
"Get the game stats of a March 1th, 2020 "
+ "game between the USC Trojans and the CUI Golden Eagles."
)
df = get_volleyball_game_player_stats(1820058)
print(df)
# Get the game stats of an April 4th, 2019
# game between the Lesly Lynx and the Pine Manor Gators (D3).
print(
"Get the game stats of an April 4th, 2019 "
+ "game between the Lesly Lynx and the Pine Manor Gators (D3)."
)
df = get_volleyball_game_player_stats(1723131)
print(df)
Returns
A pandas DataFrame
object with player game stats in a given game.
3772def get_volleyball_game_team_stats(game_id: int) -> pd.DataFrame: 3773 """ 3774 Given a valid game ID, 3775 this function will attempt to get all team game stats, if possible. 3776 3777 Parameters 3778 ---------- 3779 `game_id` (int, mandatory): 3780 Required argument. 3781 Specifies the game you want team game stats from. 3782 3783 Usage 3784 ---------- 3785 ```python 3786 3787 from ncaa_stats_py.volleyball import get_volleyball_game_team_stats 3788 3789 ######################################## 3790 # Women's volleyball # 3791 ######################################## 3792 3793 # Get the game stats of the 3794 # 2024 NCAA D1 Women's Volleyball National Championship game. 3795 print( 3796 "Get the game stats of the " 3797 + "2024 NCAA D1 Women's volleyball National Championship game" 3798 ) 3799 df = get_volleyball_game_team_stats(6080706) 3800 print(df) 3801 3802 # Get the game stats of a September 14th, 2024 3803 # game between the UNC Asheville Bulldogs and the Iona Gaels. 3804 print( 3805 "Get the game stats of a September 14th, 2024 " 3806 + "game between the UNC Asheville Bulldogs " 3807 + "and the Iona Gaels" 3808 ) 3809 df = get_volleyball_game_team_stats(5670752) 3810 print(df) 3811 3812 # Get the game stats of a September 16th, 2023 3813 # game between the Saginaw Valley Cardinals 3814 # and the Lake Superior St. Lakes. 3815 print( 3816 "Get the game stats of a September 16th, 2023 " 3817 + "game between the Saginaw Valley Cardinals " 3818 + "and the Lake Superior St. Lakes." 3819 ) 3820 df = get_volleyball_game_team_stats(3243563) 3821 print(df) 3822 3823 # Get the game stats of a October 15th, 2022 3824 # game between the Macalester Scots 3825 # and the St. Scholastica Saints (D3). 3826 print( 3827 "Get the game stats of a October 15th, 2022 " 3828 + "game between the Macalester Scots and " 3829 + "the St. Scholastica Saints (D3)." 3830 ) 3831 df = get_volleyball_game_team_stats(2307684) 3832 print(df) 3833 3834 # Get the game stats of a October 24th, 2021 3835 # game between the Howard Bison and the UMES Hawks. 3836 print( 3837 "Get the game stats of a October 24th, 2021 " 3838 + "game between the Howard Bison and the UMES Hawks." 3839 ) 3840 df = get_volleyball_game_team_stats(2113627) 3841 print(df) 3842 3843 # Get the game stats of a March 5th, 2021 3844 # game between the Notre Dame (OH) Falcons 3845 # and the Alderson Broaddus Battlers. 3846 print( 3847 "Get the game stats of a March 5th, 2021 " 3848 + "game between the Notre Dame (OH) Falcons " 3849 + "and the Alderson Broaddus Battlers." 3850 ) 3851 df = get_volleyball_game_team_stats(2005442) 3852 print(df) 3853 3854 # Get the game stats of a November 14th, 2019 3855 # game between the Wittenberg Tigers 3856 # and the Muskingum Fighting Muskies (D3). 3857 print( 3858 "Get the game stats of a November 14th, 2019 " 3859 + "game between the Wittenberg Tigers and " 3860 + "the Muskingum Fighting Muskies (D3)." 3861 ) 3862 df = get_volleyball_game_team_stats(1815514) 3863 print(df) 3864 3865 ######################################## 3866 # Men's volleyball # 3867 ######################################## 3868 3869 # Get the game stats of the 3870 # 2024 NCAA D1 Men's Volleyball National Championship game. 3871 print( 3872 "Get the game stats of the " 3873 + "2024 NCAA D1 Men's volleyball National Championship game" 3874 ) 3875 df = get_volleyball_game_team_stats(5282845) 3876 print(df) 3877 3878 # Get the game stats of a January 14th, 2025 3879 # game between the Kean Cougars and the Arcadia Knights. 3880 print( 3881 "Get the game stats of a January 14th, 2025 " 3882 + "game between the UNC Asheville Bulldogs " 3883 + "and the Iona Gaels" 3884 ) 3885 df = get_volleyball_game_team_stats(6081598) 3886 print(df) 3887 3888 # Get the game stats of a January 13th, 2024 3889 # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders. 3890 print( 3891 "Get the game stats of a September 14th, 2024 " 3892 + "game between the Purdue Fort Wayne Mastodons " 3893 + "and the NJIT Highlanders." 3894 ) 3895 df = get_volleyball_game_team_stats(4473231) 3896 print(df) 3897 3898 # Get the game stats of a January 21st, 2023 3899 # game between the Baruch Bearcats and the Widener Pride. 3900 print( 3901 "Get the game stats of a January 21st, 2023 " 3902 + "game between the Baruch Bearcats and the Widener Pride." 3903 ) 3904 df = get_volleyball_game_team_stats(2355323) 3905 print(df) 3906 3907 # Get the game stats of a February 24th, 2022 3908 # game between the Ball St. Cardinals and the Lindenwood Lions. 3909 print( 3910 "Get the game stats of a February 24th, 2022 " 3911 + "game between the Ball St. Cardinals and the Lindenwood Lions." 3912 ) 3913 df = get_volleyball_game_team_stats(2162239) 3914 print(df) 3915 3916 # Get the game stats of a March 20th, 2021 3917 # game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals. 3918 print( 3919 "Get the game stats of a March 20th, 2021 " 3920 + "game between the SUNY New Paltz Hawks " 3921 + "and the St. John Fisher Cardinals." 3922 ) 3923 df = get_volleyball_game_team_stats(2059180) 3924 print(df) 3925 3926 # Get the game stats of a March 1th, 2020 3927 # game between the USC Trojans and the CUI Golden Eagles. 3928 print( 3929 "Get the game stats of a March 1th, 2020 " 3930 + "game between the USC Trojans and the CUI Golden Eagles." 3931 ) 3932 df = get_volleyball_game_team_stats(1820058) 3933 print(df) 3934 3935 # Get the game stats of an April 4th, 2019 3936 # game between the Lesly Lynx and the Pine Manor Gators (D3). 3937 print( 3938 "Get the game stats of an April 4th, 2019 " 3939 + "game between the Lesly Lynx and the Pine Manor Gators (D3)." 3940 ) 3941 df = get_volleyball_game_team_stats(1723131) 3942 print(df) 3943 3944 ``` 3945 3946 Returns 3947 ---------- 3948 A pandas `DataFrame` object with team game stats in a given game. 3949 3950 """ 3951 df = get_volleyball_game_player_stats(game_id=game_id) 3952 # print(df.columns) 3953 df = df.infer_objects() 3954 stats_df = df.groupby( 3955 [ 3956 "season", 3957 "sport_id", 3958 "game_datetime", 3959 "game_id", 3960 "team_id", 3961 "team_name" 3962 ], 3963 as_index=False, 3964 ).agg( 3965 { 3966 "sets_played": "sum", 3967 "kills": "sum", 3968 "errors": "sum", 3969 "total_attacks": "sum", 3970 # "hit%": "sum", 3971 "assists": "sum", 3972 "aces": "sum", 3973 "serve_errors": "sum", 3974 "digs": "sum", 3975 "return_attacks": "sum", 3976 "return_errors": "sum", 3977 "solo_blocks": "sum", 3978 "assisted_blocks": "sum", 3979 "block_errors": "sum", 3980 "total_blocks": "sum", 3981 "points": "sum", 3982 "BHE": "sum", 3983 "DBL_DBL": "sum", 3984 "TRP_DBL": "sum", 3985 } 3986 ) 3987 stats_df["hit%"] = ( 3988 (stats_df["kills"] - stats_df["errors"]) / stats_df["total_attacks"] 3989 ) 3990 return stats_df
Given a valid game ID, this function will attempt to get all team game stats, if possible.
Parameters
game_id
(int, mandatory):
Required argument.
Specifies the game you want team game stats from.
Usage
from ncaa_stats_py.volleyball import get_volleyball_game_team_stats
########################################
# Women's volleyball #
########################################
# Get the game stats of the
# 2024 NCAA D1 Women's Volleyball National Championship game.
print(
"Get the game stats of the "
+ "2024 NCAA D1 Women's volleyball National Championship game"
)
df = get_volleyball_game_team_stats(6080706)
print(df)
# Get the game stats of a September 14th, 2024
# game between the UNC Asheville Bulldogs and the Iona Gaels.
print(
"Get the game stats of a September 14th, 2024 "
+ "game between the UNC Asheville Bulldogs "
+ "and the Iona Gaels"
)
df = get_volleyball_game_team_stats(5670752)
print(df)
# Get the game stats of a September 16th, 2023
# game between the Saginaw Valley Cardinals
# and the Lake Superior St. Lakes.
print(
"Get the game stats of a September 16th, 2023 "
+ "game between the Saginaw Valley Cardinals "
+ "and the Lake Superior St. Lakes."
)
df = get_volleyball_game_team_stats(3243563)
print(df)
# Get the game stats of a October 15th, 2022
# game between the Macalester Scots
# and the St. Scholastica Saints (D3).
print(
"Get the game stats of a October 15th, 2022 "
+ "game between the Macalester Scots and "
+ "the St. Scholastica Saints (D3)."
)
df = get_volleyball_game_team_stats(2307684)
print(df)
# Get the game stats of a October 24th, 2021
# game between the Howard Bison and the UMES Hawks.
print(
"Get the game stats of a October 24th, 2021 "
+ "game between the Howard Bison and the UMES Hawks."
)
df = get_volleyball_game_team_stats(2113627)
print(df)
# Get the game stats of a March 5th, 2021
# game between the Notre Dame (OH) Falcons
# and the Alderson Broaddus Battlers.
print(
"Get the game stats of a March 5th, 2021 "
+ "game between the Notre Dame (OH) Falcons "
+ "and the Alderson Broaddus Battlers."
)
df = get_volleyball_game_team_stats(2005442)
print(df)
# Get the game stats of a November 14th, 2019
# game between the Wittenberg Tigers
# and the Muskingum Fighting Muskies (D3).
print(
"Get the game stats of a November 14th, 2019 "
+ "game between the Wittenberg Tigers and "
+ "the Muskingum Fighting Muskies (D3)."
)
df = get_volleyball_game_team_stats(1815514)
print(df)
########################################
# Men's volleyball #
########################################
# Get the game stats of the
# 2024 NCAA D1 Men's Volleyball National Championship game.
print(
"Get the game stats of the "
+ "2024 NCAA D1 Men's volleyball National Championship game"
)
df = get_volleyball_game_team_stats(5282845)
print(df)
# Get the game stats of a January 14th, 2025
# game between the Kean Cougars and the Arcadia Knights.
print(
"Get the game stats of a January 14th, 2025 "
+ "game between the UNC Asheville Bulldogs "
+ "and the Iona Gaels"
)
df = get_volleyball_game_team_stats(6081598)
print(df)
# Get the game stats of a January 13th, 2024
# game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
print(
"Get the game stats of a September 14th, 2024 "
+ "game between the Purdue Fort Wayne Mastodons "
+ "and the NJIT Highlanders."
)
df = get_volleyball_game_team_stats(4473231)
print(df)
# Get the game stats of a January 21st, 2023
# game between the Baruch Bearcats and the Widener Pride.
print(
"Get the game stats of a January 21st, 2023 "
+ "game between the Baruch Bearcats and the Widener Pride."
)
df = get_volleyball_game_team_stats(2355323)
print(df)
# Get the game stats of a February 24th, 2022
# game between the Ball St. Cardinals and the Lindenwood Lions.
print(
"Get the game stats of a February 24th, 2022 "
+ "game between the Ball St. Cardinals and the Lindenwood Lions."
)
df = get_volleyball_game_team_stats(2162239)
print(df)
# Get the game stats of a March 20th, 2021
# game between the SUNY New Paltz Hawks and the St. John Fisher Cardinals.
print(
"Get the game stats of a March 20th, 2021 "
+ "game between the SUNY New Paltz Hawks "
+ "and the St. John Fisher Cardinals."
)
df = get_volleyball_game_team_stats(2059180)
print(df)
# Get the game stats of a March 1th, 2020
# game between the USC Trojans and the CUI Golden Eagles.
print(
"Get the game stats of a March 1th, 2020 "
+ "game between the USC Trojans and the CUI Golden Eagles."
)
df = get_volleyball_game_team_stats(1820058)
print(df)
# Get the game stats of an April 4th, 2019
# game between the Lesly Lynx and the Pine Manor Gators (D3).
print(
"Get the game stats of an April 4th, 2019 "
+ "game between the Lesly Lynx and the Pine Manor Gators (D3)."
)
df = get_volleyball_game_team_stats(1723131)
print(df)
Returns
A pandas DataFrame
object with team game stats in a given game.
3993def get_volleyball_raw_pbp(game_id: int) -> pd.DataFrame: 3994 """ 3995 Given a valid game ID, 3996 this function will attempt to get the raw play-by-play (PBP) 3997 data for that game. 3998 3999 Parameters 4000 ---------- 4001 `game_id` (int, mandatory): 4002 Required argument. 4003 Specifies the game you want play-by-play data (PBP) from. 4004 4005 Usage 4006 ---------- 4007 ```python 4008 4009 from ncaa_stats_py.volleyball import get_volleyball_raw_pbp 4010 4011 ######################################## 4012 # Women's volleyball # 4013 ######################################## 4014 4015 # Get the play-by-play data of the 4016 # 2024 NCAA D1 Women's Volleyball National Championship game. 4017 print( 4018 "Get the play-by-play data of the " 4019 + "2024 NCAA D1 Women's volleyball National Championship game" 4020 ) 4021 df = get_volleyball_raw_pbp(6080706) 4022 print(df) 4023 4024 # Get the play-by-play data of a September 14th, 2024 4025 # game between the UNC Asheville Bulldogs and the Iona Gaels. 4026 print( 4027 "Get the play-by-play data of a September 14th, 2024 " 4028 + "game between the UNC Asheville Bulldogs " 4029 + "and the Iona Gaels" 4030 ) 4031 df = get_volleyball_raw_pbp(5670752) 4032 print(df) 4033 4034 # Get the play-by-play data of a September 16th, 2023 4035 # game between the Saginaw Valley Cardinals 4036 # and the Lake Superior St. Lakes. 4037 print( 4038 "Get the play-by-play data of a September 16th, 2023 " 4039 + "game between the Saginaw Valley Cardinals " 4040 + "and the Lake Superior St. Lakes." 4041 ) 4042 df = get_volleyball_raw_pbp(3243563) 4043 print(df) 4044 4045 # Get the play-by-play data of a October 15th, 2022 4046 # game between the Macalester Scots 4047 # and the St. Scholastica Saints (D3). 4048 print( 4049 "Get the play-by-play data of a October 15th, 2022 " 4050 + "game between the Macalester Scots and " 4051 + "the St. Scholastica Saints (D3)." 4052 ) 4053 df = get_volleyball_raw_pbp(2307684) 4054 print(df) 4055 4056 # Get the play-by-play data of a October 24th, 2021 4057 # game between the Howard Bison and the UMES Hawks. 4058 print( 4059 "Get the play-by-play data of a October 24th, 2021 " 4060 + "game between the Howard Bison and the UMES Hawks." 4061 ) 4062 df = get_volleyball_raw_pbp(2113627) 4063 print(df) 4064 4065 # Get the play-by-play data of a March 5th, 2021 4066 # game between the Notre Dame (OH) Falcons 4067 # and the Alderson Broaddus Battlers. 4068 print( 4069 "Get the play-by-play data of a March 5th, 2021 " 4070 + "game between the Notre Dame (OH) Falcons " 4071 + "and the Alderson Broaddus Battlers." 4072 ) 4073 df = get_volleyball_raw_pbp(2005442) 4074 print(df) 4075 4076 # Get the play-by-play data of a November 14th, 2019 4077 # game between the Wittenberg Tigers 4078 # and the Muskingum Fighting Muskies (D3). 4079 print( 4080 "Get the play-by-play data of a November 14th, 2019 " 4081 + "game between the Wittenberg Tigers and " 4082 + "the Muskingum Fighting Muskies (D3)." 4083 ) 4084 df = get_volleyball_raw_pbp(1815514) 4085 print(df) 4086 4087 ######################################## 4088 # Men's volleyball # 4089 ######################################## 4090 4091 # Get the play-by-play data of the 4092 # 2024 NCAA D1 Men's Volleyball National Championship game. 4093 print( 4094 "Get the play-by-play data of the " 4095 + "2024 NCAA D1 Men's volleyball National Championship game" 4096 ) 4097 df = get_volleyball_raw_pbp(5282845) 4098 print(df) 4099 4100 # Get the play-by-play data of a January 14th, 2025 4101 # game between the Kean Cougars and the Arcadia Knights. 4102 print( 4103 "Get the play-by-play data of a January 14th, 2025 " 4104 + "game between the UNC Asheville Bulldogs " 4105 + "and the Iona Gaels" 4106 ) 4107 df = get_volleyball_raw_pbp(6081598) 4108 print(df) 4109 4110 # Get the play-by-play data of a January 13th, 2024 4111 # game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders. 4112 print( 4113 "Get the play-by-play data of a September 14th, 2024 " 4114 + "game between the Purdue Fort Wayne Mastodons " 4115 + "and the NJIT Highlanders." 4116 ) 4117 df = get_volleyball_raw_pbp(4473231) 4118 print(df) 4119 4120 # Get the play-by-play data of a January 21st, 2023 4121 # game between the Baruch Bearcats and the Widener Pride. 4122 print( 4123 "Get the play-by-play data of a January 21st, 2023 " 4124 + "game between the Baruch Bearcats and the Widener Pride." 4125 ) 4126 df = get_volleyball_raw_pbp(2355323) 4127 print(df) 4128 4129 # Get the play-by-play data of a February 24th, 2022 4130 # game between the Ball St. Cardinals and the Lindenwood Lions. 4131 print( 4132 "Get the play-by-play data of a February 24th, 2022 " 4133 + "game between the Ball St. Cardinals and the Lindenwood Lions." 4134 ) 4135 df = get_volleyball_raw_pbp(2162239) 4136 print(df) 4137 4138 # Get the play-by-play data of a March 7th, 2021 4139 # game between the Adrian Bulldogs and the Baldwin Wallace Yellow Jackets. 4140 print( 4141 "Get the play-by-play data of a March 7th, 2021 " 4142 + "game between the Adrian Bulldogs " 4143 + "and the Baldwin Wallace Yellow Jackets." 4144 ) 4145 df = get_volleyball_raw_pbp(1998844) 4146 print(df) 4147 4148 # Get the play-by-play data of a March 1th, 2020 4149 # game between the USC Trojans and the CUI Golden Eagles. 4150 print( 4151 "Get the play-by-play data of a March 1th, 2020 " 4152 + "game between the USC Trojans and the CUI Golden Eagles." 4153 ) 4154 df = get_volleyball_raw_pbp(1820058) 4155 print(df) 4156 4157 # Get the play-by-play data of an April 4th, 2019 4158 # game between the Lesly Lynx and the Pine Manor Gators (D3). 4159 print( 4160 "Get the play-by-play data of an April 4th, 2019 " 4161 + "game between the Lesly Lynx and the Pine Manor Gators (D3)." 4162 ) 4163 df = get_volleyball_raw_pbp(1723131) 4164 print(df) 4165 4166 ``` 4167 4168 Returns 4169 ---------- 4170 A pandas `DataFrame` object with a play-by-play (PBP) data in a given game. 4171 4172 """ 4173 load_from_cache = True 4174 # is_overtime = False 4175 4176 sport_id = "" 4177 season = 0 4178 away_score = 0 4179 home_score = 0 4180 4181 home_sets_won = 0 4182 away_sets_won = 0 4183 4184 home_set_1_score = 0 4185 away_set_1_score = 0 4186 4187 home_set_2_score = 0 4188 away_set_2_score = 0 4189 4190 home_set_3_score = 0 4191 away_set_3_score = 0 4192 4193 home_set_4_score = 0 4194 away_set_4_score = 0 4195 4196 home_set_5_score = 0 4197 away_set_5_score = 0 4198 4199 home_cumulative_score = 0 4200 away_cumulative_score = 0 4201 4202 MVB_teams_df = load_volleyball_teams(get_mens_data=True) 4203 MVB_team_ids_arr = MVB_teams_df["team_id"].to_list() 4204 4205 WVB_teams_df = load_volleyball_teams(get_mens_data=False) 4206 WVB_team_ids_arr = WVB_teams_df["team_id"].to_list() 4207 4208 pbp_df = pd.DataFrame() 4209 pbp_df_arr = [] 4210 temp_df = pd.DataFrame() 4211 4212 temp_df = pd.DataFrame() 4213 home_dir = expanduser("~") 4214 home_dir = _format_folder_str(home_dir) 4215 4216 stat_columns = [ 4217 "season", 4218 "game_id", 4219 "sport_id", 4220 "game_datetime", 4221 "set_num", 4222 "event_num", 4223 "event_team", 4224 "event_text", 4225 "is_scoring_play", 4226 "home_set_score", 4227 "away_set_score", 4228 "is_extra_points", 4229 "home_cumulative_score", 4230 "away_cumulative_score", 4231 "home_sets_won", 4232 "away_sets_won", 4233 "stadium_name", 4234 "attendance", 4235 "away_team_id", 4236 "away_team_name", 4237 "home_team_id", 4238 "home_team_name", 4239 "home_set_1_score", 4240 "away_set_1_score", 4241 "home_set_2_score", 4242 "away_set_2_score", 4243 "home_set_3_score", 4244 "away_set_3_score", 4245 "home_set_4_score", 4246 "away_set_4_score", 4247 "home_set_5_score", 4248 "away_set_5_score", 4249 ] 4250 4251 url = f"https://stats.ncaa.org/contests/{game_id}/play_by_play" 4252 4253 if exists(f"{home_dir}/.ncaa_stats_py/"): 4254 pass 4255 else: 4256 mkdir(f"{home_dir}/.ncaa_stats_py/") 4257 4258 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"): 4259 pass 4260 else: 4261 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/") 4262 4263 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/"): 4264 pass 4265 else: 4266 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/") 4267 4268 if exists( 4269 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/" 4270 + f"{game_id}_raw_pbp.csv" 4271 ): 4272 games_df = pd.read_csv( 4273 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/" 4274 + f"{game_id}_raw_pbp.csv" 4275 ) 4276 games_df = games_df.infer_objects() 4277 file_mod_datetime = datetime.fromtimestamp( 4278 getmtime( 4279 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/" 4280 + f"{game_id}_raw_pbp.csv" 4281 ) 4282 ) 4283 load_from_cache = True 4284 else: 4285 file_mod_datetime = datetime.today() 4286 load_from_cache = False 4287 4288 if exists(f"{home_dir}/.ncaa_stats_py/"): 4289 pass 4290 else: 4291 mkdir(f"{home_dir}/.ncaa_stats_py/") 4292 4293 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"): 4294 pass 4295 else: 4296 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/") 4297 4298 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/"): 4299 pass 4300 else: 4301 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/") 4302 4303 if exists( 4304 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/" 4305 + f"{game_id}_raw_pbp.csv" 4306 ): 4307 games_df = pd.read_csv( 4308 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/" 4309 + f"{game_id}_raw_pbp.csv" 4310 ) 4311 games_df = games_df.infer_objects() 4312 file_mod_datetime = datetime.fromtimestamp( 4313 getmtime( 4314 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/" 4315 + f"{game_id}_raw_pbp.csv" 4316 ) 4317 ) 4318 load_from_cache = True 4319 else: 4320 logging.info("Could not find a WVB player game stats file") 4321 4322 now = datetime.today() 4323 4324 age = now - file_mod_datetime 4325 4326 if age.days >= 35: 4327 load_from_cache = False 4328 4329 if load_from_cache is True: 4330 return games_df 4331 4332 response = _get_webpage(url=url) 4333 soup = BeautifulSoup(response.text, features="lxml") 4334 4335 info_table = soup.find( 4336 "td", 4337 { 4338 "style": "padding: 0px 30px 0px 30px", 4339 "class": "d-none d-md-table-cell" 4340 } 4341 ).find( 4342 "table", 4343 {"style": "border-collapse: collapse"} 4344 ) 4345 4346 info_table_rows = info_table.find_all("tr") 4347 4348 game_date_str = info_table_rows[3].find("td").text 4349 if "TBA" in game_date_str: 4350 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBA') 4351 elif "tba" in game_date_str: 4352 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tba') 4353 elif "TBD" in game_date_str: 4354 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y TBD') 4355 elif "tbd" in game_date_str: 4356 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y tbd') 4357 elif ( 4358 "tbd" not in game_date_str.lower() and 4359 ":" not in game_date_str.lower() 4360 ): 4361 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y') 4362 else: 4363 game_datetime = datetime.strptime(game_date_str, '%m/%d/%Y %I:%M %p') 4364 game_datetime = game_datetime.astimezone(timezone("US/Eastern")) 4365 game_date_str = game_datetime.isoformat() 4366 # del game_datetime 4367 4368 stadium_str = info_table_rows[4].find("td").text 4369 4370 attendance_str = info_table_rows[5].find("td").text 4371 attendance_int = re.findall( 4372 r"([0-9\,]+)", 4373 attendance_str 4374 )[0] 4375 attendance_int = attendance_int.replace(",", "") 4376 attendance_int = int(attendance_int) 4377 4378 del attendance_str 4379 team_cards = soup.find_all( 4380 "td", 4381 { 4382 "valign": "center", 4383 "class": "grey_text d-none d-sm-table-cell" 4384 } 4385 ) 4386 4387 away_url = team_cards[0].find_all("a") 4388 away_url = away_url[0] 4389 home_url = team_cards[1].find_all("a") 4390 home_url = home_url[0] 4391 4392 away_team_name = away_url.text 4393 home_team_name = home_url.text 4394 4395 away_team_id = away_url.get("href") 4396 home_team_id = home_url.get("href") 4397 4398 away_team_id = away_team_id.replace("/teams", "") 4399 away_team_id = away_team_id.replace("/team", "") 4400 away_team_id = away_team_id.replace("/", "") 4401 away_team_id = int(away_team_id) 4402 4403 home_team_id = home_team_id.replace("/teams", "") 4404 home_team_id = home_team_id.replace("/team", "") 4405 home_team_id = home_team_id.replace("/", "") 4406 home_team_id = int(home_team_id) 4407 4408 if home_team_id in MVB_team_ids_arr: 4409 sport_id = "MVB" 4410 temp_df = MVB_teams_df[MVB_teams_df["team_id"] == home_team_id] 4411 season = temp_df["season"].iloc[0] 4412 del temp_df 4413 elif home_team_id in WVB_team_ids_arr: 4414 sport_id = "WVB" 4415 temp_df = WVB_teams_df[WVB_teams_df["team_id"] == home_team_id] 4416 season = temp_df["season"].iloc[0] 4417 del temp_df 4418 # This should never be the case, 4419 # but if something goes very horribly wrong, 4420 # double check the away team ID to 4421 # the MVB and WVB team ID list. 4422 elif away_team_id in MVB_team_ids_arr: 4423 sport_id = "MVB" 4424 temp_df = MVB_teams_df[MVB_teams_df["team_id"] == away_team_id] 4425 season = temp_df["season"].iloc[0] 4426 del temp_df 4427 elif away_team_id in WVB_team_ids_arr: 4428 sport_id = "WVB" 4429 temp_df = WVB_teams_df[WVB_teams_df["team_id"] == home_team_id] 4430 season = temp_df["season"].iloc[0] 4431 del temp_df 4432 # If we get to this, we are in a code red situation. 4433 # "SHUT IT DOWN" - Gordon Ramsay 4434 else: 4435 raise ValueError( 4436 "Could not identify if this is a " + 4437 "MVB or WVB game based on team IDs. " 4438 ) 4439 4440 section_cards = soup.find_all( 4441 "div", 4442 {"class": "row justify-content-md-center w-100"} 4443 ) 4444 4445 if len(section_cards) == 0: 4446 logging.warning( 4447 f"Could not find any plays for game ID `{game_id}`. " + 4448 "Returning empty DataFrame." 4449 ) 4450 df = pd.DataFrame(columns=stat_columns) 4451 return df 4452 4453 # play_id = 0 4454 for card in section_cards: 4455 is_extra_points = False 4456 event_text = "" 4457 4458 set_num_str = card.find( 4459 "div", 4460 {"class": "card-header"} 4461 ).text 4462 set_num = re.findall( 4463 r"([0-9]+)", 4464 set_num_str 4465 ) 4466 4467 set_num = int(set_num[0]) 4468 4469 table_body = card.find("table").find("tbody").find_all("tr") 4470 4471 # pbp rows 4472 for row in table_body: 4473 is_scoring_play = True 4474 t_cells = row.find_all("td") 4475 t_cells = [x.text.strip() for x in t_cells] 4476 game_time_str = t_cells[0] 4477 4478 if len(t_cells[0]) > 0: 4479 event_team = away_team_id 4480 event_text = t_cells[0] 4481 elif len(t_cells[2]) > 0: 4482 event_team = home_team_id 4483 event_text = t_cells[2] 4484 4485 if "+" in event_text: 4486 temp = event_text.split("\n") 4487 if len(temp) >= 2: 4488 event_text = temp[1] 4489 else: 4490 raise Exception( 4491 "Unhandled situation " + 4492 f"when parsing a scoring play: `{temp}`" 4493 ) 4494 # print() 4495 else: 4496 event_text = event_text.replace("\n", "") 4497 4498 event_text = event_text.replace(" ", " ") 4499 event_text = event_text.strip() 4500 4501 if len(t_cells) == 3: 4502 try: 4503 away_score, home_score = t_cells[1].split("-") 4504 4505 away_score = int(away_score) 4506 home_score = int(home_score) 4507 is_scoring_play = True 4508 except ValueError: 4509 logging.info( 4510 "Could not extract a score " + 4511 f"from the following play `{event_text}`" 4512 ) 4513 is_scoring_play = False 4514 except Exception as e: 4515 logging.warning( 4516 f"An unhandled exception has occurred: `{e}`" 4517 ) 4518 raise e 4519 # scoring_play = False 4520 elif len(t_cells) > 3: 4521 raise SyntaxError( 4522 f"Unhandled PBP row format in game ID `{game_id}`" 4523 ) 4524 4525 if set_num <= 4 and home_score == 24 and away_score == 24: 4526 is_extra_points = True 4527 elif set_num == 5 and home_score == 14 and away_score == 14: 4528 is_extra_points = True 4529 4530 temp_home_cumulative_score = home_cumulative_score + home_score 4531 temp_away_cumulative_score = away_cumulative_score + away_score 4532 4533 temp_df = pd.DataFrame( 4534 { 4535 # "season": season, 4536 # "game_id": game_id, 4537 # "sport_id": sport_id, 4538 # "away_team_id": away_team_id, 4539 # "away_team_name": away_team_name, 4540 # "home_team_id": home_team_id, 4541 # "home_team_name": home_team_name, 4542 "game_time_str": game_time_str, 4543 "set_num": set_num, 4544 "away_set_score": away_score, 4545 "home_set_score": home_score, 4546 "event_team": event_team, 4547 "event_text": event_text, 4548 "is_scoring_play": is_scoring_play, 4549 "is_extra_points": is_extra_points, 4550 "home_cumulative_score": temp_home_cumulative_score, 4551 "away_cumulative_score": temp_away_cumulative_score, 4552 "home_sets_won": home_sets_won, 4553 "away_sets_won": away_sets_won, 4554 }, 4555 index=[0], 4556 ) 4557 pbp_df_arr.append(temp_df) 4558 4559 if set_num == 1: 4560 home_set_1_score = home_score 4561 away_set_1_score = away_score 4562 home_cumulative_score = home_set_1_score 4563 away_cumulative_score = away_set_1_score 4564 elif set_num == 2: 4565 home_set_2_score = home_score 4566 away_set_2_score = away_score 4567 home_cumulative_score += home_set_2_score 4568 away_cumulative_score += away_set_2_score 4569 elif set_num == 3: 4570 home_set_3_score = home_score 4571 away_set_3_score = away_score 4572 home_cumulative_score += home_set_3_score 4573 away_cumulative_score += away_set_3_score 4574 elif set_num == 4: 4575 home_set_4_score = home_score 4576 away_set_4_score = away_score 4577 home_cumulative_score += home_set_4_score 4578 away_cumulative_score += away_set_4_score 4579 elif set_num == 5: 4580 home_set_5_score = home_score 4581 away_set_5_score = away_score 4582 home_cumulative_score += home_set_4_score 4583 away_cumulative_score += away_set_4_score 4584 4585 if temp_away_cumulative_score > home_cumulative_score: 4586 away_sets_won += 1 4587 elif temp_away_cumulative_score < home_cumulative_score: 4588 home_sets_won += 1 4589 4590 # End of set play 4591 temp_df = pd.DataFrame( 4592 { 4593 # "season": season, 4594 # "game_id": game_id, 4595 # "sport_id": sport_id, 4596 # "away_team_id": away_team_id, 4597 # "away_team_name": away_team_name, 4598 # "home_team_id": home_team_id, 4599 # "home_team_name": home_team_name, 4600 "game_time_str": game_time_str, 4601 "set_num": set_num, 4602 "away_set_score": away_score, 4603 "home_set_score": home_score, 4604 "event_team": event_team, 4605 "event_text": f"END SET {set_num}", 4606 "is_scoring_play": is_scoring_play, 4607 "is_extra_points": is_extra_points, 4608 "home_cumulative_score": temp_home_cumulative_score, 4609 "away_cumulative_score": temp_away_cumulative_score, 4610 "home_sets_won": home_sets_won, 4611 "away_sets_won": away_sets_won, 4612 }, 4613 index=[0], 4614 ) 4615 pbp_df_arr.append(temp_df) 4616 4617 # End of game play 4618 temp_df = pd.DataFrame( 4619 { 4620 # "season": season, 4621 # "game_id": game_id, 4622 # "sport_id": sport_id, 4623 # "away_team_id": away_team_id, 4624 # "away_team_name": away_team_name, 4625 # "home_team_id": home_team_id, 4626 # "home_team_name": home_team_name, 4627 "game_time_str": game_time_str, 4628 "set_num": set_num, 4629 "away_set_score": away_score, 4630 "home_set_score": home_score, 4631 "event_team": event_team, 4632 "event_text": "END MATCH", 4633 "is_scoring_play": is_scoring_play, 4634 "is_extra_points": is_extra_points, 4635 "home_cumulative_score": temp_home_cumulative_score, 4636 "away_cumulative_score": temp_away_cumulative_score, 4637 "home_sets_won": home_sets_won, 4638 "away_sets_won": away_sets_won, 4639 }, 4640 index=[0], 4641 ) 4642 pbp_df_arr.append(temp_df) 4643 pbp_df = pd.concat(pbp_df_arr, ignore_index=True) 4644 pbp_df["event_num"] = pbp_df.index + 1 4645 pbp_df["game_datetime"] = game_date_str 4646 pbp_df["season"] = season 4647 pbp_df["game_id"] = game_id 4648 pbp_df["sport_id"] = sport_id 4649 pbp_df["stadium_name"] = stadium_str 4650 pbp_df["attendance"] = attendance_int 4651 pbp_df["away_team_id"] = away_team_id 4652 pbp_df["away_team_name"] = away_team_name 4653 pbp_df["home_team_id"] = home_team_id 4654 pbp_df["home_team_name"] = home_team_name 4655 4656 pbp_df["home_set_1_score"] = home_set_1_score 4657 pbp_df["away_set_1_score"] = away_set_1_score 4658 4659 pbp_df["home_set_2_score"] = home_set_2_score 4660 pbp_df["away_set_2_score"] = away_set_2_score 4661 4662 pbp_df["home_set_3_score"] = home_set_3_score 4663 pbp_df["away_set_3_score"] = away_set_3_score 4664 4665 pbp_df["home_set_4_score"] = home_set_4_score 4666 pbp_df["away_set_4_score"] = away_set_4_score 4667 4668 pbp_df["home_set_5_score"] = home_set_5_score 4669 pbp_df["away_set_5_score"] = away_set_5_score 4670 4671 # print(pbp_df.columns) 4672 pbp_df = pbp_df.reindex(columns=stat_columns) 4673 pbp_df = pbp_df.infer_objects() 4674 4675 if sport_id == "MVB": 4676 pbp_df.to_csv( 4677 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/raw_pbp/" 4678 + f"{game_id}_raw_pbp.csv", 4679 index=False 4680 ) 4681 elif sport_id == "WVB": 4682 pbp_df.to_csv( 4683 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/raw_pbp/" 4684 + f"{game_id}_raw_pbp.csv", 4685 index=False 4686 ) 4687 else: 4688 raise ValueError( 4689 f"Improper Sport ID: `{sport_id}`" 4690 ) 4691 4692 return pbp_df
Given a valid game ID, this function will attempt to get the raw play-by-play (PBP) data for that game.
Parameters
game_id
(int, mandatory):
Required argument.
Specifies the game you want play-by-play data (PBP) from.
Usage
from ncaa_stats_py.volleyball import get_volleyball_raw_pbp
########################################
# Women's volleyball #
########################################
# Get the play-by-play data of the
# 2024 NCAA D1 Women's Volleyball National Championship game.
print(
"Get the play-by-play data of the "
+ "2024 NCAA D1 Women's volleyball National Championship game"
)
df = get_volleyball_raw_pbp(6080706)
print(df)
# Get the play-by-play data of a September 14th, 2024
# game between the UNC Asheville Bulldogs and the Iona Gaels.
print(
"Get the play-by-play data of a September 14th, 2024 "
+ "game between the UNC Asheville Bulldogs "
+ "and the Iona Gaels"
)
df = get_volleyball_raw_pbp(5670752)
print(df)
# Get the play-by-play data of a September 16th, 2023
# game between the Saginaw Valley Cardinals
# and the Lake Superior St. Lakes.
print(
"Get the play-by-play data of a September 16th, 2023 "
+ "game between the Saginaw Valley Cardinals "
+ "and the Lake Superior St. Lakes."
)
df = get_volleyball_raw_pbp(3243563)
print(df)
# Get the play-by-play data of a October 15th, 2022
# game between the Macalester Scots
# and the St. Scholastica Saints (D3).
print(
"Get the play-by-play data of a October 15th, 2022 "
+ "game between the Macalester Scots and "
+ "the St. Scholastica Saints (D3)."
)
df = get_volleyball_raw_pbp(2307684)
print(df)
# Get the play-by-play data of a October 24th, 2021
# game between the Howard Bison and the UMES Hawks.
print(
"Get the play-by-play data of a October 24th, 2021 "
+ "game between the Howard Bison and the UMES Hawks."
)
df = get_volleyball_raw_pbp(2113627)
print(df)
# Get the play-by-play data of a March 5th, 2021
# game between the Notre Dame (OH) Falcons
# and the Alderson Broaddus Battlers.
print(
"Get the play-by-play data of a March 5th, 2021 "
+ "game between the Notre Dame (OH) Falcons "
+ "and the Alderson Broaddus Battlers."
)
df = get_volleyball_raw_pbp(2005442)
print(df)
# Get the play-by-play data of a November 14th, 2019
# game between the Wittenberg Tigers
# and the Muskingum Fighting Muskies (D3).
print(
"Get the play-by-play data of a November 14th, 2019 "
+ "game between the Wittenberg Tigers and "
+ "the Muskingum Fighting Muskies (D3)."
)
df = get_volleyball_raw_pbp(1815514)
print(df)
########################################
# Men's volleyball #
########################################
# Get the play-by-play data of the
# 2024 NCAA D1 Men's Volleyball National Championship game.
print(
"Get the play-by-play data of the "
+ "2024 NCAA D1 Men's volleyball National Championship game"
)
df = get_volleyball_raw_pbp(5282845)
print(df)
# Get the play-by-play data of a January 14th, 2025
# game between the Kean Cougars and the Arcadia Knights.
print(
"Get the play-by-play data of a January 14th, 2025 "
+ "game between the UNC Asheville Bulldogs "
+ "and the Iona Gaels"
)
df = get_volleyball_raw_pbp(6081598)
print(df)
# Get the play-by-play data of a January 13th, 2024
# game between the Purdue Fort Wayne Mastodons and the NJIT Highlanders.
print(
"Get the play-by-play data of a September 14th, 2024 "
+ "game between the Purdue Fort Wayne Mastodons "
+ "and the NJIT Highlanders."
)
df = get_volleyball_raw_pbp(4473231)
print(df)
# Get the play-by-play data of a January 21st, 2023
# game between the Baruch Bearcats and the Widener Pride.
print(
"Get the play-by-play data of a January 21st, 2023 "
+ "game between the Baruch Bearcats and the Widener Pride."
)
df = get_volleyball_raw_pbp(2355323)
print(df)
# Get the play-by-play data of a February 24th, 2022
# game between the Ball St. Cardinals and the Lindenwood Lions.
print(
"Get the play-by-play data of a February 24th, 2022 "
+ "game between the Ball St. Cardinals and the Lindenwood Lions."
)
df = get_volleyball_raw_pbp(2162239)
print(df)
# Get the play-by-play data of a March 7th, 2021
# game between the Adrian Bulldogs and the Baldwin Wallace Yellow Jackets.
print(
"Get the play-by-play data of a March 7th, 2021 "
+ "game between the Adrian Bulldogs "
+ "and the Baldwin Wallace Yellow Jackets."
)
df = get_volleyball_raw_pbp(1998844)
print(df)
# Get the play-by-play data of a March 1th, 2020
# game between the USC Trojans and the CUI Golden Eagles.
print(
"Get the play-by-play data of a March 1th, 2020 "
+ "game between the USC Trojans and the CUI Golden Eagles."
)
df = get_volleyball_raw_pbp(1820058)
print(df)
# Get the play-by-play data of an April 4th, 2019
# game between the Lesly Lynx and the Pine Manor Gators (D3).
print(
"Get the play-by-play data of an April 4th, 2019 "
+ "game between the Lesly Lynx and the Pine Manor Gators (D3)."
)
df = get_volleyball_raw_pbp(1723131)
print(df)
Returns
A pandas DataFrame
object with a play-by-play (PBP) data in a given game.
4695def get_parsed_volleyball_pbp(game_id: int) -> pd.DataFrame: 4696 """ 4697 Given a valid game ID, 4698 this function will attempt to parse play-by-play (PBP) 4699 data for that game. 4700 4701 Parameters 4702 ---------- 4703 `game_id` (int, mandatory): 4704 Required argument. 4705 Specifies the game you want play-by-play data (PBP) from. 4706 4707 Usage 4708 ---------- 4709 ```python 4710 ``` 4711 4712 Returns 4713 ---------- 4714 A pandas `DataFrame` object with a play-by-play (PBP) data in a given game. 4715 4716 """ 4717 home_team_id = 0 4718 away_team_id = 0 4719 sport_id = "" 4720 4721 home_roster_df = pd.DataFrame() 4722 away_roster_df = pd.DataFrame() 4723 4724 home_dir = expanduser("~") 4725 home_dir = _format_folder_str(home_dir) 4726 4727 if exists(f"{home_dir}/.ncaa_stats_py/"): 4728 pass 4729 else: 4730 mkdir(f"{home_dir}/.ncaa_stats_py/") 4731 4732 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/"): 4733 pass 4734 else: 4735 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/") 4736 4737 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/"): 4738 pass 4739 else: 4740 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/") 4741 4742 if exists( 4743 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/" 4744 + f"{game_id}_parsed_pbp.csv" 4745 ): 4746 games_df = pd.read_csv( 4747 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/" 4748 + f"{game_id}_parsed_pbp.csv" 4749 ) 4750 games_df = games_df.infer_objects() 4751 file_mod_datetime = datetime.fromtimestamp( 4752 getmtime( 4753 f"{home_dir}/.ncaa_stats_py/volleyball_MVB/parsed_pbp/" 4754 + f"{game_id}_parsed_pbp.csv" 4755 ) 4756 ) 4757 load_from_cache = True 4758 else: 4759 file_mod_datetime = datetime.today() 4760 load_from_cache = False 4761 4762 if exists(f"{home_dir}/.ncaa_stats_py/"): 4763 pass 4764 else: 4765 mkdir(f"{home_dir}/.ncaa_stats_py/") 4766 4767 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/"): 4768 pass 4769 else: 4770 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/") 4771 4772 if exists(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/"): 4773 pass 4774 else: 4775 mkdir(f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/") 4776 4777 if exists( 4778 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/" 4779 + f"{game_id}_parsed_pbp.csv" 4780 ): 4781 games_df = pd.read_csv( 4782 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/" 4783 + f"{game_id}_parsed_pbp.csv" 4784 ) 4785 games_df = games_df.infer_objects() 4786 file_mod_datetime = datetime.fromtimestamp( 4787 getmtime( 4788 f"{home_dir}/.ncaa_stats_py/volleyball_WVB/parsed_pbp/" 4789 + f"{game_id}_parsed_pbp.csv" 4790 ) 4791 ) 4792 load_from_cache = True 4793 else: 4794 logging.info("Could not find a WVB player game stats file") 4795 4796 now = datetime.today() 4797 4798 age = now - file_mod_datetime 4799 4800 if age.days > 1: 4801 load_from_cache = False 4802 4803 if load_from_cache is True: 4804 return games_df 4805 4806 raw_df = get_volleyball_raw_pbp(game_id=game_id) 4807 4808 sport_id = raw_df["sport_id"].iloc[0] 4809 home_team_id = raw_df["home_team_id"].iloc[0] 4810 away_team_id = raw_df["away_team_id"].iloc[0] 4811 4812 pbp_df = _volleyball_pbp_helper(raw_df=raw_df) 4813 4814 home_roster_df = get_volleyball_team_roster(team_id=home_team_id) 4815 home_roster_df["Name"] = home_roster_df["Name"].str.lower() 4816 4817 away_roster_df = get_volleyball_team_roster(team_id=away_team_id) 4818 away_roster_df["Name"] = away_roster_df["Name"].str.lower() 4819 4820 home_players_arr = dict( 4821 zip( 4822 home_roster_df["Name"], home_roster_df["player_id"] 4823 ) 4824 ) 4825 away_players_arr = dict( 4826 zip( 4827 away_roster_df["Name"], away_roster_df["player_id"] 4828 ) 4829 ) 4830 players_arr = home_players_arr | away_players_arr 4831 name_cols = [ 4832 "substitution_player_1_name", 4833 "substitution_player_2_name", 4834 "substitution_player_3_name", 4835 "substitution_player_4_name", 4836 "serve_player_name", 4837 "reception_player_name", 4838 "set_player_name", 4839 "set_error_player_name", 4840 "attack_player_name", 4841 "dig_player_name", 4842 "kill_player_name", 4843 "block_player_1_name", 4844 "block_player_2_name", 4845 "ball_handling_error_player_name", 4846 "dig_error_player_name", 4847 ] 4848 id_cols = [ 4849 "substitution_player_1_id", 4850 "substitution_player_2_id", 4851 "substitution_player_3_id", 4852 "substitution_player_4_id", 4853 "serve_player_id", 4854 "reception_player_id", 4855 "set_player_id", 4856 "set_error_player_id", 4857 "attack_player_id", 4858 "dig_player_id", 4859 "kill_player_id", 4860 "block_player_1_id", 4861 "block_player_2_id", 4862 "ball_handling_error_player_id", 4863 "dig_error_player_id", 4864 ] 4865 4866 for i in range(0, len(id_cols)): 4867 name_column = name_cols[i] 4868 id_column = id_cols[i] 4869 pbp_df[name_column] = pbp_df[name_column].str.replace("3a", "") 4870 pbp_df[name_column] = pbp_df[name_column].str.replace(".", "") 4871 pbp_df[id_column] = pbp_df[name_column].str.lower() 4872 pbp_df.loc[pbp_df[id_column].notnull(), id_column] = pbp_df[ 4873 id_column 4874 ].map(_name_smother) 4875 pbp_df[id_column] = pbp_df[id_column].map(players_arr) 4876 4877 pbp_df.to_csv( 4878 f"{home_dir}/.ncaa_stats_py/volleyball_{sport_id}/parsed_pbp/" 4879 + f"{game_id}_parsed_pbp.csv", 4880 index=False 4881 ) 4882 return pbp_df
Given a valid game ID, this function will attempt to parse play-by-play (PBP) data for that game.
Parameters
game_id
(int, mandatory):
Required argument.
Specifies the game you want play-by-play data (PBP) from.
Usage
Returns
A pandas DataFrame
object with a play-by-play (PBP) data in a given game.