WHKYHAC: End Of Possession Viz

 

The Viz Launchpad Competition

WHKYHAC and Sportlogiq announced a visualization competition using data from the 2023 Season of the Professional Women’s Hockey Players Association. The entry deadline for the competition is July 2, 2023. This will be my final “trial” data visualization before the entry deadline.

How Did Teams Lose Possession Of The Puck?

This viz is intended to be clear and concise. The viewer should immediately understand that successful passing is important when it comes to maintaining possession of the puck.

I know some people don’t like this type of data viz (i.e., anything that resembles a pie chart). In this case I think it works to deliver the simple message that passing is important when it comes to maintaining puck possession.

Open the image in a new tab if you want to see a larger version.

This simple data viz doesn’t provide any useful information about how to improve passing. For a look at the success rate of a pass attempt based on its angle and the zone in which it originated check out my previous post.

The Code

Here’s the code for this data viz.

# SETUP ########################################################################

setwd("~/18_skaters/r_studio/whkyhac")

library(tidyverse)
library(stringr)
library(lubridate)
library(geomtextpath)


# LOAD DATA ####################################################################

raw_pbp_data <- read_csv("23_PBP_WHKYHAC_SPORTLOGIQ.csv", 
                         locale = locale(encoding = "ISO-8859-1"))

# EXPLORE DATA #################################################################

#print(str(raw_pbp_data))

# Players

player_names <- unique(raw_pbp_data$player)

# Events

event_names <- unique(raw_pbp_data$eventname)

event_outcomes <- unique(raw_pbp_data$outcome)

event_types <- unique(raw_pbp_data$type)

# Strength states

strength_states <- unique(raw_pbp_data$strengthstate)

# CLEAN AND MANIPULATE DATA (AREAS OF INTEREST ONLY) ###########################

# Note: much of this script was originally written for a different viz that looked at all shot attempt assists

clean_pbp_data <- raw_pbp_data

# Fix name for Kristin O’Neill

clean_pbp_data$player <- str_replace_all(clean_pbp_data$player, "\\031", "’")

# Add game_id

clean_pbp_data$game_id <- paste(clean_pbp_data$game, clean_pbp_data$date)

# Add event_id

clean_pbp_data$event_id <- seq(1:length(clean_pbp_data$seasonstage))

# Add empty_net to opposing_goalie variable

clean_pbp_data$opposing_goalie <- ifelse(
        is.na(clean_pbp_data$opposing_goalie),
        "empty_net", 
        clean_pbp_data$opposing_goalie)

# Reorganize

clean_pbp_data <- select(clean_pbp_data, c(30:31,1:29))

# EXPLORE POSSESSION DATA ######################################################

# Isolate a single game

exploration_data <- filter(clean_pbp_data, 
                           game_id == "ADI-HAR 2022-10-15")

# Get final possession event_ids and add them to play-by-play data

end_possession_data <- exploration_data %>%
        group_by(currentpossession) %>% 
        summarise(end_possession_event_id = last(event_id)) %>%
        ungroup()

end_possession_ids <- end_possession_data$end_possession_event_id

exploration_data$end_possession <- ifelse(
        exploration_data$event_id %in% end_possession_ids,
        TRUE,
        FALSE)

exploration_data <- exploration_data %>%
        select(c(1:10, 32, 11:31))

# Filter for end possession events and summarize

end_possession_events_explore <- exploration_data %>%
        filter(end_possession == TRUE) %>%
        group_by(eventname) %>%
        summarize(end_events = n(),
                  fails = sum(outcome == "failed"),
                  successes = sum(outcome == "successful"),
                  undetermined = sum (outcome == "undetermined")) %>%
        arrange(desc(end_events)) %>%
        ungroup()

# Shrink data to make it easier to explore relationship between shot/pass/block

exploration_data <- exploration_data %>%
        select(end_possession,
               eventname,
               type, 
               outcome)

# Extend this approach to all play-by-play data and summarize

all_end_possession_data <- clean_pbp_data %>%
        group_by(game_id, currentpossession) %>% 
        summarise(end_possession_event_id = last(event_id)) %>%
        ungroup()

all_end_possession_ids <- all_end_possession_data$end_possession_event_id

clean_pbp_data$end_possession <- ifelse(
        clean_pbp_data$event_id %in% all_end_possession_ids,
        TRUE,
        FALSE)

clean_pbp_data <- clean_pbp_data %>%
        select(c(1:10, 32, 11:31))

end_possession_events_all <- clean_pbp_data %>%
        filter(end_possession == TRUE) %>%
        group_by(eventname) %>%
        summarize(end_events = n(),
                  fails = sum(outcome == "failed"),
                  successes = sum(outcome == "successful"),
                  undetermined = sum (outcome == "undetermined")) %>%
        arrange(desc(end_events)) %>%
        ungroup()

# Make some adjustments to this summary
# Start with the 151 FAILED blocks classified as end_events

failed_blocks <- clean_pbp_data %>%
        filter(end_possession == TRUE,
               eventname == "block",
               outcome == "failed")

failed_block_types <- unique(failed_blocks$type)

# It looks like the failed blocks should be attributed to shots and dump outs

failed_block_ids <- failed_blocks$event_id

failed_block_prior_events_id <- c(failed_block_ids -1, 
                                  failed_block_ids -2, 
                                  failed_block_ids -3)

failed_block_prior_events <- clean_pbp_data %>%
        filter(event_id %in% failed_block_prior_events_id) %>%
        group_by(eventname) %>%
        summarize (prior_events = n()) %>%
        arrange(desc(prior_events)) %>%
        ungroup()

# There were 115 shots and 36 dump outs in prior events, which equals 151 :)

# Examine events around successful lpr and puck protection

success_lpr_pp_ids <- clean_pbp_data %>%
        filter(end_possession == TRUE,
               eventname == "lpr" | eventname == "puckprotection",
               outcome == "successful")

success_lpr_pp_ids <- success_lpr_pp_ids$event_id

success_lpr_pp_other_event_ids <- c(success_lpr_pp_ids -3, 
                                    success_lpr_pp_ids -2, 
                                    success_lpr_pp_ids -1, 
                                    success_lpr_pp_ids, 
                                    success_lpr_pp_ids +1, 
                                    success_lpr_pp_ids + 2, 
                                    success_lpr_pp_ids +3)

success_lpr_pp_other_event <- clean_pbp_data %>%
        filter(event_id %in% success_lpr_pp_other_event_ids) %>%
        select(event_id,
               end_possession,
               eventname,
               type,
               outcome) %>%
        arrange()

# It looks like most of the successful events were followed by a faceoff

success_lpr_pp_next_event <- clean_pbp_data %>%
        filter(event_id %in% (success_lpr_pp_ids +1)) %>%
        group_by(eventname) %>%
        summarize(next_events = n()) %>%
        ungroup()

# For current purposes I will simply assume that the successful events should not be treated as a loss of possession

# There are 18 passes with an undetermined outcome
# According to the data dictionary this happens when there is a game stoppage - I'll assume this should not be treated as a loss of possession

# Breakdown the successful blocks

blocks_breakdown <- clean_pbp_data %>%
        filter(end_possession == TRUE,
               eventname == "block",
               outcome == "successful") %>%
        group_by(type) %>%
        summarize(count = n()) %>%
        arrange(desc(count)) %>%
        ungroup()

blocks_breakdown <- blocks_breakdown %>%
        mutate(proportion = count / sum(count))

# Roughly 75% of blocks are attributable to blocked passes

# Manually adjust some of the end_event totals:

# Subtract failed blocks (added to shot and dumpout)
# Add 115 shots
# Add 36 dump outs
# Subtract successful lpr outcomes 
# Subtract successful puck protection outcomes 
# Subtract undetermined pass by outcomes

end_possession_events_all_adjusted <- end_possession_events_all %>%
        mutate(end_events = case_when(
        eventname == "block" ~ successes,
        eventname == "shot" ~ end_events + 115,
        eventname == "dumpout" ~ end_events + 36,
        eventname == "lpr" ~ fails,
        eventname == "puckprotection" ~ fails,
        eventname == "pass" ~ fails,
        TRUE ~ end_events)) 

# Breakdown successful blocks by type and remove generic blocks

blocks_bolt_on <- clean_pbp_data %>%
        filter(end_possession == TRUE,
               eventname == "block",
               outcome == "successful") %>%
        group_by(type) %>%
        summarize(end_events = n(),
                  fails = sum(outcome == "failed"),
                  successes = sum(outcome == "successful"),
                  undetermined = sum (outcome == "undetermined")) %>%
        arrange(desc(end_events)) %>%
        ungroup() %>%
        rename(eventname = type)

blocks_bolt_on$eventname <- paste0("blocked_", blocks_bolt_on$eventname)

end_possession_events_all_adjusted <- end_possession_events_all_adjusted %>%
        bind_rows(blocks_bolt_on) %>%
        arrange(desc(end_events))

end_possession_events_all_adjusted <- end_possession_events_all_adjusted %>%
        filter(eventname != "block")

# Finalize the data
# I'll exclude shots from the list because they're usually a "good" way to end possession
# I'll also filter for events with 1,000+ instances

plot_data <- end_possession_events_all_adjusted %>%
        filter(end_events >= 1000,
               eventname != "shot") %>%
        select(c(1:2)) 

plot_data <- plot_data %>%
        mutate(perc = round(end_events / sum(end_events),2) * 100) %>%
        mutate(label = paste0(perc, "%"))

plot_data[1,1] <- "FAILED PASS"
plot_data[2,1] <- "BLOCKED PASS"
plot_data[3,1] <- "FAILED \n PUCK RECOVERY"
plot_data[4,1] <- "FAILED \n PUCK PROTECTION"
plot_data[5,1] <- "DUMP IN"
plot_data[6,1] <- "DUMP OUT"
plot_data[7,1] <- "OPPONENT \n CHECK"

plot_data <- plot_data %>%
        mutate(eventname = fct_reorder(eventname, end_events))

# PLOT THE DATA ################################################################

plot <- ggplot(plot_data, 
               aes(x = eventname, 
                   y = end_events, 
                   fill = eventname,
                   label = label)) +
        geom_col(show.legend = FALSE,
                 width = 1,
                 colour = "grey38") +
        theme_minimal() +
        geom_text(aes(colour = eventname),
                  nudge_y = -450,
                  nudge_x = 0.03,
                  show.legend = FALSE,
                  fontface = "bold",
                  size = 5) +
        theme(aspect.ratio = 1,
              axis.text.x = element_text(size = 18),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              plot.title = element_text(size = 20,
                                        face = "bold"),
              plot.subtitle = element_text(size = 16),
              plot.title.position = "plot",
              plot.caption = element_text(size = 14)) +
        labs(x = NULL, 
             y = NULL,
             title = "How did teams lose possession of the puck?",
             subtitle = "PWHPA 2023 Season",
             caption = "Data by Sportlogiq, viz by 18 Skaters, #WHKYHAC") + 
        scale_fill_manual(values = c("grey90", "grey90", "grey90", "grey90", "grey90", "purple", "purple3")) +
        scale_colour_manual(values = c("grey20", "grey20", "grey20", "grey20", "grey20", "yellow", "yellow")) +
        coord_curvedpolar() 
#plot      

The End

That’s it. I’ll post my submission to the Viz Launchpad Competition in the next few days.

Mark (18 Skaters)