Get PWHL Play-By-Play Data

 

Introduction

This document provides a function for getting the play-by-play data for a specified PWHL game. The function does not return all of the data available through the PWHL’s API, and it adds variables that are not included in the source data. You can take this function and modify it to fit your own needs.

I'm updating the function as I explore this new API. If you find a glitch please let me know about it.

This function does not work for game_id < 28. I have a function for those game_ids (plus some other helpful stuff) on GitHub: https://github.com/ma-barbour/pwhl  

Basic Setup

#install.packages("tidyverse")
#install.packages("jsonlite")
#install.packages("httr")

library(tidyverse)
library(jsonlite)
library(httr)

Function: Get Play-By-Play Data

This function gets the play-by-play data for a specified PWHL game_id. The data are pulled from the PWHL’s API. The function includes explanatory comments (accompanied by a hashtag “#”).

A preview of the data returned by the function is provided below under the heading “Example Data”.

Here’s the function.

get_pbp_data <- function(game_id) {
        
        # Get basic game data
        
        url_1 <- paste0("https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=gameSummary&game_id=", game_id ,"&key=694cfeed58c932ee&site_id=2&client_code=pwhl&lang=en&league_id=&callback=angular.callbacks._6")
        
        game_data <- GET(url_1) |>
                content("text",
                        encoding = "UTF-8") %>%
                sub("angular.callbacks._6(", "", ., fixed = TRUE) %>%
                sub(")$", "", .) |>
                fromJSON(flatten = TRUE)
        
        date <- game_data[["details"]][["GameDateISO8601"]]
        home_team_id <- game_data[["homeTeam"]][["info"]][["id"]]
        home_team <- game_data[["homeTeam"]][["info"]][["abbreviation"]]
        away_team_id <- game_data[["visitingTeam"]][["info"]][["id"]]
        away_team <- game_data[["visitingTeam"]][["info"]][["abbreviation"]]
        
        # Get the raw pbp data
        
        url_2 <- paste0("https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=gameCenterPlayByPlay&game_id=", game_id ,"&key=694cfeed58c932ee&client_code=pwhl&lang=en&league_id=&callback=angular.callbacks._8")
        
        raw_data <- GET(url_2) |>
                content("text",
                        encoding = "UTF-8") %>%
                sub("angular.callbacks._8(", "", ., fixed = TRUE) %>%
                sub(")$", "", .) |>
                fromJSON(flatten = TRUE)
        
        # Unnest the data
        
        pbp_data <- raw_data |>
                unnest_wider(details.assists) |>
                unnest_wider(id,
                             names_sep = "_") |>
                unnest_wider(firstName,
                             names_sep = "_") |>
                unnest_wider(lastName,
                             names_sep = "_") |>
                unnest_wider(jerseyNumber,
                             names_sep = "_") |>
                unnest_wider(position,
                             names_sep = "_") |>
                unnest_wider(birthDate,
                             names_sep = "_") |>
                unnest_wider(playerImageURL,
                             names_sep = "_") |>
                unnest_wider(details.assistNumbers,
                             names_sep = "_") |>
                unnest_wider(details.plus_players,
                             names_sep = "_") |>
                unnest_wider(details.plus_players_id,
                             names_sep = "_") |>
                unnest_wider(details.plus_players_firstName,
                             names_sep = "_") |>
                unnest_wider(details.plus_players_lastName,
                             names_sep = "_") |>
                unnest_wider(details.plus_players_jerseyNumber,
                             names_sep = "_") |>
                unnest_wider(details.plus_players_position,
                             names_sep = "_") |>
                unnest_wider(details.plus_players_birthDate,
                             names_sep = "_") |>
                unnest_wider(details.plus_players_playerImageURL,
                             names_sep = "_") |>
                unnest_wider(details.minus_players,
                             names_sep = "_") |>
                unnest_wider(details.minus_players_id,
                             names_sep = "_") |>
                unnest_wider(details.minus_players_firstName,
                             names_sep = "_") |>
                unnest_wider(details.minus_players_lastName,
                             names_sep = "_") |>
                unnest_wider(details.minus_players_jerseyNumber,
                             names_sep = "_") |>
                unnest_wider(details.minus_players_position,
                             names_sep = "_") |>
                unnest_wider(details.minus_players_birthDate,
                             names_sep = "_") |>
                unnest_wider(details.minus_players_playerImageURL,
                             names_sep = "_")
        
        # Add basic game data to play-by-play data
        
        pbp_data <- pbp_data |>
                mutate(game_id = game_id,
                       date = date,
                       home_team_id = home_team_id,
                       home_team = home_team,
                       away_team_id = away_team_id,
                       away_team = away_team)
        
        # Convert date to proper date format
        
        pbp_data$date <- as_date(pbp_data$date)
        
        # Tidy period_ids (fix OT and shootout data)
        
        pbp_data <- pbp_data |>
                mutate(details.period.id = if_else(event == "shootout", "5", details.period.id)) |>
                mutate(details.period.id = if_else(details.period.id == "OT1", "4", details.period.id))
        
        pbp_data$details.period.id <- as.integer(pbp_data$details.period.id)
        
        # Convert period time to seconds and add game clock
        
        pbp_data <- pbp_data |>
                separate_wider_delim(cols = details.time,
                                     delim = ":",
                                     names = c("min", "sec"))
        pbp_data$min <- as.numeric(pbp_data$min)
        pbp_data$sec <- as.numeric(pbp_data$sec)
        
        pbp_data <- pbp_data |>
                mutate(period_seconds = (min * 60) + sec,
                       .after = sec) |>
                mutate(game_seconds = case_when(
                        details.period.id == 1 ~ period_seconds,
                        details.period.id == 2 ~ period_seconds + 1200,
                        details.period.id == 3 ~ period_seconds + 2400,
                        details.period.id == 4 ~ period_seconds + 3600,
                        TRUE ~ NA),
                       .after = period_seconds) |>
                select(-min, -sec)
        pbp_data$period_seconds <- as.integer(pbp_data$period_seconds)
        pbp_data$game_seconds <- as.integer(pbp_data$game_seconds)
        
        # Remove "details." from column names
        
        names(pbp_data) = gsub(pattern = "details.", replacement = "", x = names(pbp_data))
        
        # Remove the goalie_change events
        
        pbp_data <- pbp_data |>
                filter(event != "goalie_change")
        
        # Tidy the assists data
        
        if ("id_2" %in% colnames(pbp_data) == TRUE) {
        
        pbp_data <- pbp_data |>
                rename(primary_assist_id = id_1,
                       secondary_assist_id = id_2) |>
                mutate(primary_assist_player = paste(firstName_1, lastName_1),
                       secondary_assist_player = paste(firstName_2, lastName_2)) |>
                mutate(primary_assist_player = if_else(primary_assist_player == "NA NA", NA, primary_assist_player)) |>
                mutate(secondary_assist_player = if_else(secondary_assist_player == "NA NA", NA, secondary_assist_player))
        
        } else {
                
        pbp_data <- pbp_data |>
                rename(primary_assist_id = id_1) |>
                mutate(primary_assist_player = paste(firstName_1, lastName_1)) |>
                mutate(primary_assist_player = if_else(primary_assist_player == "NA NA", NA, primary_assist_player)) 
                
        }

        # Consolidate player_ids across events
        
        pbp_data <- pbp_data |>
                mutate(event_player_id = case_when(
                        event == "faceoff" & homeWin == 1 ~ homePlayer.id,
                        event == "faceoff" & homeWin == 0 ~ visitingPlayer.id,
                        event == "hit" ~ player.id,
                        event == "shot" ~ shooter.id,
                        event == "blocked_shot" ~ blocker.id,
                        event == "goal" ~ scoredBy.id,
                        event == "penalty" ~ takenBy.id,
                        event == "shootout" ~ shooter.id,
                        event == "penaltyshot" ~ shooter.id)) 
        
        # Add goal scorer to shooter_id
        
        pbp_data <- pbp_data |>
                mutate(shooter_id = case_when(
                        event == "shot" | event == "blocked_shot" | event == "shootout" | event == "penaltyshot" ~ shooter.id,
                        event == "goal" ~ scoredBy.id,
                        TRUE ~ NA))
        
        # Add faceoff loser player_id
        
        pbp_data <- pbp_data |>
                mutate(faceoff_loser_id = case_when(
                        event == "faceoff" & homeWin == 0 ~ homePlayer.id,
                        event == "faceoff" & homeWin == 1 ~ visitingPlayer.id,
                        TRUE ~ NA))
        
        # Consolidate player names across events
        
        pbp_data <- pbp_data |>
                mutate(event_player = case_when(
                        event == "faceoff" & homeWin == 1 ~ paste(homePlayer.firstName, homePlayer.lastName),
                        event == "faceoff" & homeWin == 0 ~ paste(visitingPlayer.firstName, visitingPlayer.lastName),
                        event == "hit" ~ paste(player.firstName, player.lastName),
                        event == "shot" ~ paste(shooter.firstName, shooter.lastName),
                        event == "blocked_shot" ~ paste(blocker.firstName, blocker.lastName),
                        event == "goal" ~ paste(scoredBy.firstName, scoredBy.lastName),
                        event == "penalty" ~ paste(takenBy.firstName, takenBy.lastName),
                        event == "shootout" ~ paste(shooter.firstName, shooter.lastName),
                        event == "penaltyshot" ~ paste(shooter.firstName, shooter.lastName))) 
        
        # Add goal scorers to shooters
        
        pbp_data <- pbp_data |>
                mutate(shooter = case_when(
                        event == "shot" | event == "blocked_shot" | event == "shootout" | event == "penaltyshot" ~ paste(shooter.firstName, shooter.lastName),
                        event == "goal" ~ paste(scoredBy.firstName, scoredBy.lastName),
                        TRUE ~ NA))
        
        # Add faceoff losers
        
        pbp_data <- pbp_data |>
                mutate(faceoff_loser = case_when(
                        event == "faceoff" & homeWin == 0 ~ paste(homePlayer.firstName, homePlayer.lastName),
                        event == "faceoff" & homeWin == 1 ~ paste(visitingPlayer.firstName, visitingPlayer.lastName),
                        TRUE ~ NA))
        
        # Tidy the goalies
        
        pbp_data <- pbp_data |>
                mutate(goalie = paste(goalie.firstName, goalie.lastName)) |>
                mutate(goalie = if_else(goalie == "NA NA", NA, goalie))
        
        # Add event teams
        
        pbp_data <- pbp_data |>
                mutate(event_team_id = case_when(
                        event == "faceoff" & homeWin == 1 ~ home_team_id,
                        event == "faceoff" & homeWin == 0 ~ away_team_id,
                        event == "hit" ~ as.integer(teamId),
                        event == "shot" ~ as.integer(shooterTeamId),
                        event == "blocked_shot" & shooterTeamId == home_team_id ~ away_team_id,
                        event == "blocked_shot" & shooterTeamId == away_team_id ~ home_team_id,
                        event == "goal" ~ as.integer(team.id),
                        event == "penalty" ~ as.integer(againstTeam.id)))
        
        if("penaltyshot" %in% pbp_data$event) {
                
                pbp_data <- pbp_data |>
                        mutate(event_team_id = if_else(event == "penaltyshot", shooter_team.id, event_team_id))
                
        }
        
        if(5 %in% pbp_data$period.id) {            
                
                pbp_data <- pbp_data |>
                        mutate(event_team_id = if_else(period.id == 5, shooterTeam.id, event_team_id))
                
        }
        
        pbp_data$event_team_id <- as.integer(pbp_data$event_team_id)
        
        # Convert penalty minutes to integer
        
        pbp_data$minutes <- as.integer(pbp_data$minutes)
        
        # Get plus/minus (on-ice) data
        
        pm_data <- pbp_data |>
                select(starts_with("plus_players_id") | starts_with("minus_players_id"))
        
        # Shrink the data
        
        pbp_data <- pbp_data |>
                mutate(game_id = as.integer(game_id)) |>
                select(any_of(c("game_id",
                                "date",
                                "home_team_id",
                                "home_team",
                                "away_team_id",
                                "away_team",
                                "period.id",
                                "period_seconds",
                                "game_seconds",
                                "event",
                                "event_team_id",
                                "event_player_id",
                                "event_player",
                                "xLocation",
                                "yLocation",
                                "isGoal",
                                "properties.isPowerPlay",
                                "properties.isShortHanded",
                                "properties.isEmptyNet",
                                "properties.isPenaltyShot",
                                "shotType",
                                "shotQuality",
                                "primary_assist_id",
                                "primary_assist_player",
                                "secondary_assist_id",
                                "secondary_assist_player",
                                "goalie.id",
                                "goalie",
                                "shooter_id",
                                "shooter",
                                "faceoff_loser_id",
                                "faceoff_loser",
                                "minutes",
                                "description",
                                "isPowerPlay",
                                "isBench")))
        
        # Rename some columns
        
        col_names <- c(period = "period.id",
                       x_location = "xLocation",
                       y_location = "yLocation",
                       is_goal = "isGoal",
                       goal_is_pp = "properties.isPowerPlay",
                       goal_is_sh = "properties.isShortHanded",
                       goal_is_en = "properties.isEmptyNet",
                       goal_is_ps = "properties.isPenaltyShot",
                       shot_type = "shotType",
                       shot_quality = "shotQuality",
                       goalie_id = "goalie.id",
                       penalty_minutes = "minutes",
                       penalty_description = "description",
                       is_power_play = "isPowerPlay",
                       is_bench = "isBench")
        
        pbp_data <- rename(pbp_data, any_of(col_names))
        
        # Join plus/minus (on-ice) data
        
        pbp_data <- pbp_data |>
                bind_cols(pm_data)
        
        # Convert goal descriptions to logicals
        
        pbp_data$goal_is_pp <- if_else(pbp_data$goal_is_pp == 1, TRUE, FALSE)
        pbp_data$goal_is_sh <- if_else(pbp_data$goal_is_sh == 1, TRUE, FALSE)
        pbp_data$goal_is_en <- if_else(pbp_data$goal_is_en == 1, TRUE, FALSE)
        pbp_data$goal_is_ps <- if_else(pbp_data$goal_is_ps == 1, TRUE, FALSE)
        # Add event team abbreviation
        
        pbp_data <- pbp_data |>
                mutate(event_team = if_else(event_team_id == home_team_id, home_team, away_team),
                       .after = event_team_id)
        
        return(pbp_data)
        
}

#write_rds(get_pbp_data, "pbp_function.rds")

Example Data

The function returns a tibble that (usually) has 47 variables. The variables (and their data types) are displayed here.

example_data <- get_pbp_data(28) 

example_data |>
        str()
tibble [117 × 47] (S3: tbl_df/tbl/data.frame)
 $ game_id                : int [1:117] 28 28 28 28 28 28 28 28 28 28 ...
 $ date                   : Date[1:117], format: "2024-02-15" "2024-02-15" ...
 $ home_team_id           : int [1:117] 2 2 2 2 2 2 2 2 2 2 ...
 $ home_team              : chr [1:117] "MIN" "MIN" "MIN" "MIN" ...
 $ away_team_id           : int [1:117] 5 5 5 5 5 5 5 5 5 5 ...
 $ away_team              : chr [1:117] "OTT" "OTT" "OTT" "OTT" ...
 $ period                 : int [1:117] 1 1 1 1 1 1 1 1 1 1 ...
 $ period_seconds         : int [1:117] 0 21 42 104 161 191 271 296 320 329 ...
 $ game_seconds           : int [1:117] 0 21 42 104 161 191 271 296 320 329 ...
 $ event                  : chr [1:117] "faceoff" "faceoff" "blocked_shot" "hit" ...
 $ event_team_id          : int [1:117] 2 2 5 5 5 5 2 5 2 5 ...
 $ event_team             : chr [1:117] "MIN" "MIN" "OTT" "OTT" ...
 $ event_player_id        : int [1:117] 23 115 50 142 58 62 105 55 24 51 ...
 $ event_player           : chr [1:117] "Kelly Pannek" "Michela Cava" "Ashton Bell" "Natalie Snodgrass" ...
 $ x_location             : int [1:117] 300 457 120 413 410 115 183 555 511 459 ...
 $ y_location             : int [1:117] 150 49 165 273 58 46 12 104 205 194 ...
 $ is_goal                : logi [1:117] NA NA NA NA FALSE NA ...
 $ goal_is_pp             : logi [1:117] NA NA NA NA NA NA ...
 $ goal_is_sh             : logi [1:117] NA NA NA NA NA NA ...
 $ goal_is_en             : logi [1:117] NA NA NA NA NA NA ...
 $ goal_is_ps             : logi [1:117] NA NA NA NA NA NA ...
 $ shot_type              : chr [1:117] NA NA "Wrist" NA ...
 $ shot_quality           : chr [1:117] NA NA "Quality blocked" NA ...
 $ primary_assist_id      : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ primary_assist_player  : chr [1:117] NA NA NA NA ...
 $ secondary_assist_id    : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ secondary_assist_player: chr [1:117] NA NA NA NA ...
 $ goalie_id              : int [1:117] NA NA 59 NA 123 59 NA 123 123 123 ...
 $ goalie                 : chr [1:117] NA NA "Emerance Maschmeyer" NA ...
 $ shooter_id             : int [1:117] NA NA 129 NA 58 20 NA 55 55 51 ...
 $ shooter                : chr [1:117] NA NA "Mellissa Channell" NA ...
 $ faceoff_loser_id       : int [1:117] 58 57 NA NA NA NA NA NA NA NA ...
 $ faceoff_loser          : chr [1:117] "Brianne Jenner" "Gabbie Hughes" NA NA ...
 $ penalty_minutes        : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ penalty_description    : chr [1:117] NA NA NA NA ...
 $ is_power_play          : logi [1:117] NA NA NA NA NA NA ...
 $ is_bench               : logi [1:117] NA NA NA NA NA NA ...
 $ plus_players_id_1      : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ plus_players_id_2      : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ plus_players_id_3      : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ plus_players_id_4      : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ plus_players_id_5      : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ minus_players_id_1     : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ minus_players_id_2     : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ minus_players_id_3     : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ minus_players_id_4     : int [1:117] NA NA NA NA NA NA NA NA NA NA ...
 $ minus_players_id_5     : int [1:117] NA NA NA NA NA NA NA NA NA NA ...

Note About X|Y Locations

The x_location and y_location variables have not been modified from the original. The data points for these variables are on the following scales:

  • x = 0 - 600; and

  • y = 0 - 150.

The data points are “fixed” such that the same team is always “shooting to the left” or “shooting to the right” (example shown below under the heading “Example Shot Plot”). It is worth noting that the 2:1 axis ratio does not reflect the dimensions of a normal hockey rink.

Note About “Plus” And “Minus” Players

The data include “plus” and “minus” players for each goal. The labels “plus” and “minus” appear in the source data. I kept those labels, but it might be helpful to think of the data as “on-ice” data rather than “plus/minus” data. A power play goal has “plus” and “minus” players.

Example Shot Plot