WHKYHAC: Pass Direction Success Rate Viz

 

The Viz Launchpad Competition

WHKYHAC and Sportlogiq announced a visualization competition using data from the 2023 Season of the Professional Women’s Hockey Players Association. The entry deadline for the competition is July 2, 2023. Prior to that deadline I’ll post some “trial” data visualizations here.

Today’s Data Viz

This post is about successful (and failed) passing plays. I’ll look at the success rate for a pass attempt based on its angle and the zone in which it originated.

Successful Pass Attempts Based On Pass Angle and Zone

This viz shows successful pass attempts (blue) and failed pass attempts (red) based on the angle of the pass attempt. For this purpose each pass attempt was put into one of eighteen groups based on its angle, and each group was separated into the zone from which the pass attempt originated. The pass attempts on the far right of each plot were “straight forward” from the position of the person making the pass towards the far end of the attacking zone. The pass attempts rotate from “straight forward” to “sideways” to “straight backward” as you move from the right to the left side of each zone plot.

Open the image in a new tab if you want to see a larger version.

It was essentially a coin flip as to whether a forward pass attempt was completed in many cases. Sideways and backward pass attempts succeeded more often, though backward pass attempts obviously do not have the merit of being in the direction of the opponent’s net.

The Code

Here’s the code for this data viz.

# SETUP ########################################################################

setwd("~/18_skaters/r_studio/whkyhac")

library(tidyverse)
library(stringr)
library(lubridate)

# LOAD DATA ####################################################################

raw_pbp_data <- read_csv("23_PBP_WHKYHAC_SPORTLOGIQ.csv", 
                         locale = locale(encoding = "ISO-8859-1"))

# EXPLORE DATA #################################################################

# Players

player_names <- unique(raw_pbp_data$player)

# Events

event_names <- unique(raw_pbp_data$eventname)

event_outcomes <- unique(raw_pbp_data$outcome)

event_types <- unique(raw_pbp_data$type)

# Strength states

strength_states <- unique(raw_pbp_data$strengthstate)

# CLEAN AND MANIPULATE DATA (AREAS OF INTEREST ONLY) ###########################

clean_pbp_data <- raw_pbp_data

# Fix name for Kristin O’Neill

clean_pbp_data$player <- str_replace_all(clean_pbp_data$player, "\\031", "’")

# Add game_id

clean_pbp_data$game_id <- paste(clean_pbp_data$game, clean_pbp_data$date)

# Add event_id

clean_pbp_data$event_id <- seq(1:length(clean_pbp_data$seasonstage))

# Add empty_net to opposing_goalie variable

clean_pbp_data$opposing_goalie <- ifelse(
  is.na(clean_pbp_data$opposing_goalie),
  "empty_net", 
  clean_pbp_data$opposing_goalie)

# Reorganize a little

clean_pbp_data <- select(clean_pbp_data, c(30:31,1:29))

# GET PASS DATA ################################################################

# Shrink the data need for this viz

clean_pbp_data_viz <- clean_pbp_data %>%
  select(event_id,
         teamname,
         eventname,
         outcome,
         type,
         xadjcoord,
         yadjcoord)

# Get successful pass / reception data (pass -> reception)

pass_data <- clean_pbp_data_viz %>%
  filter(eventname == "pass")

successful_pass_data <- pass_data %>%
  filter(outcome == "successful")

successful_pass_ids <- successful_pass_data$event_id

loop_list <- list()

for (i in 1:4) {
  
  loop_data <- clean_pbp_data_viz %>%
    filter(event_id %in% (successful_pass_ids + i)) %>%
    filter(eventname == "reception")
  
  loop_list[[i]] <- loop_data$event_id
  
  successful_pass_ids <- setdiff(successful_pass_ids, 
                                 (loop_data$event_id - i))
  
}

successful_reception_ids <- unlist(loop_list)

successful_pass_ids <- successful_pass_data$event_id

target_successful_data <- clean_pbp_data_viz %>%
  filter(event_id %in% successful_pass_ids | 
           event_id %in% successful_reception_ids) %>%
  arrange(event_id)

# Quick check for pass > reception pattern in the target data
# This should print a repeating pattern of "1 2"

#options(max.print=100000)
#match(target_successful_data$eventname, 
#c("pass", "reception"))
#options(max.print=1000)

# Add new event_ids

target_successful_data <- target_successful_data %>%
  mutate(new_event_id = seq(1:length(target_successful_data$event_id)))

# Add zones based on location of "pass"

target_successful_data <- target_successful_data %>%
  mutate(zone = case_when(
    xadjcoord <= -25 ~ "DEFENSIVE ZONE",
    xadjcoord >= 25 ~ "ATTACKING ZONE",
    TRUE ~ "NEUTRAL ZONE"))

# Add the angle of the pass
# Angle = 0 for straight ahead
# Angle = 90 for straight sideways (either direction)
# Angle = 180 for straight backwards

target_successful_data <- target_successful_data %>%
  mutate(angle = ifelse(
    eventname == "pass",
    abs((180 / pi) * atan((yadjcoord - lead(yadjcoord, n = 1)) / (xadjcoord - lead(xadjcoord, n = 1)))),
    NA))

target_successful_data <- target_successful_data %>%
  mutate(angle = ifelse(
    eventname == "pass" & 
      xadjcoord > lead(xadjcoord, n = 1),
    180 - angle,
    angle))

# In cases where the pass and reception coordinates are identical the angle is NaN 
# There are relatively few cases of this (32) so for current purposes I will simply filter them out

nan_data <- target_successful_data %>%
  filter(angle == "NaN")

nan_ids <- nan_data$new_event_id

nan_filter_ids <- c(nan_ids, (nan_ids + 1))

target_successful_data <- target_successful_data %>%
  filter(!new_event_id %in% nan_filter_ids)

# Group pass angles into 18 bins

target_successful_data <- target_successful_data %>%
  mutate(angle_group = case_when(
    angle > 170 ~ "group_A",
    angle > 160 ~ "group_B",
    angle > 150 ~ "group_C",
    angle > 140 ~ "group_D",
    angle > 130 ~ "group_E",
    angle > 120 ~ "group_F",
    angle > 110 ~ "group_G",
    angle > 100 ~ "group_H",
    angle > 90 ~ "group_I",
    angle > 80 ~ "group_J",
    angle > 70 ~ "group_K",
    angle > 60 ~ "group_L",
    angle > 50 ~ "group_M",
    angle > 40 ~ "group_N",
    angle > 30 ~ "group_O",
    angle > 20 ~ "group_P",
    angle > 10 ~ "group_Q",
    angle >= 0 ~ "group_R"))

# Now do something similar for failed pass attempts

failed_pass_data <- pass_data %>%
  filter(outcome == "failed")

failed_pass_ids <- failed_pass_data$event_id

# Explore subsequent events

failed_pass_subsequent_events <- clean_pbp_data_viz %>%
  filter(event_id %in% (failed_pass_ids + 1))

summary_failed_pass_subsequent_events <- failed_pass_subsequent_events %>%
  group_by(eventname) %>%
  summarize(events = n()) %>%
  ungroup() %>%
  arrange(desc(events))

# Look a little more closely at controlled exits

failed_controlled_exits <- failed_pass_subsequent_events %>%
  filter(eventname == "controlledexit")

failed_controlled_exits_ids <- failed_controlled_exits$event_id

failed_controlled_exits_data <- clean_pbp_data_viz %>%
  filter(event_id %in% c((failed_controlled_exits_ids - 1),
                         (failed_controlled_exits_ids),
                         (failed_controlled_exits_ids + 1),
                         failed_controlled_exits_ids + 2))

# This is a mixed bag - I will make the following assumptions:
# The location of blocks and failed receptions can be used to find pass angle
# The location of failed controlled exits is the same as failed receptions and therefore can be used to find pass angle
# The location of lpr is less reliable but I'll treat it as "good enough" - it's too bad this makes up the majority of events as it introduces more uncertainty about the pass angle
# The above items represent most of the failed pass attempts - I'll simply filter out the remaining items for current purposes

failed_pass_subsequent_events <- failed_pass_subsequent_events %>%
  filter(eventname == "lpr" |
           eventname == "block" |
           eventname == "reception" |
           eventname == "controlledexit")

# Revised summary just to make sure things went smoothly

rev_summary_failed_pass_subsequent_events <- failed_pass_subsequent_events %>%
  group_by(eventname) %>%
  summarize(events = n()) %>%
  ungroup() %>%
  arrange(desc(events))

# Build out target failed pass data similar to successful pass data

target_failed_pass_ids <- failed_pass_subsequent_events$event_id

target_failed_data <- clean_pbp_data_viz %>%
  filter(event_id %in% target_failed_pass_ids |
           event_id %in% (target_failed_pass_ids - 1)) %>%
  arrange(event_id)

# Add new event_ids

target_failed_data <- target_failed_data %>%
  mutate(new_event_id = seq(1:length(target_failed_data$event_id)))

# Add zones based on location of "pass"

target_failed_data <- target_failed_data %>%
  mutate(zone = case_when(
    xadjcoord <= -25 ~ "DEFENSIVE ZONE",
    xadjcoord >= 25 ~ "ATTACKING ZONE",
    TRUE ~ "NEUTRAL ZONE"))

# Need to flip coordinates when the subsequent event team is not the same as the passing team

target_failed_data <- target_failed_data %>%
  mutate(flip = ifelse(
    eventname != "pass" &
      teamname != lag(teamname),
    TRUE,
    FALSE))

target_failed_data <- target_failed_data %>%
  mutate(xadjcoord = ifelse(
    flip == TRUE,
    xadjcoord * -1,
    xadjcoord))

target_failed_data <- target_failed_data %>%
  mutate(yadjcoord = ifelse(
    flip == TRUE,
    yadjcoord * -1,
    yadjcoord))

# Add the angle of the pass
# Angle = 0 for straight ahead
# Angle = 90 for straight sideways (either direction)
# Angle = 180 for straight backwards

target_failed_data <- target_failed_data %>%
  mutate(angle = ifelse(
    eventname == "pass",
    abs((180 / pi) * atan((yadjcoord - lead(yadjcoord, n = 1)) / (xadjcoord - lead(xadjcoord, n = 1)))),
    NA))

target_failed_data <- target_failed_data %>%
  mutate(angle = ifelse(
    eventname == "pass" & 
      xadjcoord > lead(xadjcoord, n = 1),
    180 - angle,
    angle))

# Group pass angles into 18 bins

target_failed_data <- target_failed_data %>%
  mutate(angle_group = case_when(
    angle > 170 ~ "group_A",
    angle > 160 ~ "group_B",
    angle > 150 ~ "group_C",
    angle > 140 ~ "group_D",
    angle > 130 ~ "group_E",
    angle > 120 ~ "group_F",
    angle > 110 ~ "group_G",
    angle > 100 ~ "group_H",
    angle > 90 ~ "group_I",
    angle > 80 ~ "group_J",
    angle > 70 ~ "group_K",
    angle > 60 ~ "group_L",
    angle > 50 ~ "group_M",
    angle > 40 ~ "group_N",
    angle > 30 ~ "group_O",
    angle > 20 ~ "group_P",
    angle > 10 ~ "group_Q",
    angle >= 0 ~ "group_R"))

# Filter out NaN cases (3) 

nan_data_2 <- target_failed_data %>%
  filter(angle == "NaN")

nan_ids_2 <- nan_data_2$new_event_id

nan_filter_ids_2 <- c(nan_ids_2, (nan_ids_2 + 1))

target_failed_data <- target_failed_data %>%
  filter(!new_event_id %in% nan_filter_ids_2)

# PLOT #########################################################################

# Prep the plot data

plot_data_successful <- target_successful_data %>%
  filter(eventname == "pass") %>%
  group_by(angle_group, zone) %>%
  summarize(successful_passes = n()) %>%
  ungroup()


plot_data_failed <- target_failed_data %>%
  filter(eventname == "pass") %>%
  group_by(angle_group, zone) %>%
  summarize(failed_passes = n()) %>%
  ungroup()

plot_data <- plot_data_successful
plot_data$failed_passes <- plot_data_failed$failed_passes

plot_data <- plot_data %>%
  mutate(success_rate = round((successful_passes / (successful_passes + failed_passes)) * 100))

# Plot the results

plot <- ggplot() +
  geom_col(data = group_by(plot_data, angle_group),
           aes(x = angle_group,
               y = successful_passes),
           fill = "blue3") + 
  geom_col(data = group_by(plot_data, angle_group),
           aes(x = angle_group,
               y = (failed_passes * -1)),
           fill = "red3") +
  geom_point(data = group_by(plot_data, angle_group),
             aes(x = angle_group,
                 y = 1200,
                 colour = success_rate),
             shape = "square",
             size = 8) + 
  geom_text(data = group_by(plot_data, angle_group),
            aes(x = angle_group,
                y = 1205,
                label = success_rate),
            colour = "white",
            size = 3.8,
            fontface = "bold") +
  annotate("text",
           x = 16.5,
           y = 1300,
           label = "FORWARD") +
  annotate("text",
           x = 9.5,
           y = 1300,
           label = "SIDEWAYS") +
  annotate("text",
           x = 2.5,
           y = 1300,
           label = "BACKWARD") +
  annotate("text",
           x = 9.5,
           y = 1120,
           label = "Success Rate (%)") +
  theme_minimal() +
  theme(panel.grid = element_blank(),
        plot.title = element_text(size = 20,
                                  face = "bold"),
        plot.subtitle = element_text(size = 16),
        plot.caption = element_text(size = 14),
        axis.title.x = element_blank(),
        axis.text = element_blank(),
        legend.position = "none",
        strip.background = element_rect(fill = "grey18"),
        strip.text = element_text(colour =  "white",
                                  size = 10,
                                  face = "bold")) +
  labs(title = "Direction Of Successful And Failed Pass Attempts",
       subtitle = "The pass direction rotates from stright forward (on the right) to straight backward (on the left) in each zone plot",
       y = "Number of Pass Attempts",
       caption = "Data by Sportlogiq, viz by 18 Skaters, #WHKYHAC") +
  scale_colour_gradient(low = "red", high = "blue") +
  facet_wrap(vars(zone))
#plot

The End

That’s it. I’ll post more data visualizations in the days leading up to the July 2 deadline for the Viz Launchpad Competition.

Mark (18 Skaters)