options(stringsAsFactors = FALSE)
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message = FALSE)
library(dplyr)
library(ggplot2)
library(tidyverse)

This document lays out the R code for making plots in R from Michael Schuckers’ talk at the 2019 RIT Sports Analytics Conference. Slides from that talk can be found here: http://myslu.stlawu.edu/~msch/Schuckers_Joy_RITSAC2019.pdflink. The idea of these plots is to take a morph the usual ice hockey rink heatmaps so that areas of the rink that are of less importance are given less area on the plot. The form that these plots take is a perspective plot since it is areas that are further from the goal (and goal line) that have less importance both in terms of the number of shots but in the probability that those shots will lead to a goal. As a consequence we will make those areas smaller because they have measureably less importance.

For the code below we need to specify the scale variable. To do this you comment out one of the lines in the following chunk. By using n.x we are scaling the plot by the number of shots and then the color variable will represent the shot probability. By using prob we are scaling the plot by the shot probability and then the color variable will represent the number of shots from each location.

#scaleVariable = "Shotlocationprobability$n.x"
scaleVariable = "Shotlocationprobability$prob"

Loading the data

Data here comes from <moneypuck.com>link and you’ll need to have the following files in the appropriate folder: shots_2007-2017.csv and shots_2018.csv. Note that this file was created before the collapsed file through 2018 was created.

Cleaning the data

We next filter the data to include only regular season, even strength, regulation shots from the offensive zone when neither net is empty.

shots2017 <- shots2017 %>%
  filter(isPlayoffGame==0) %>%
  filter(event=="SHOT"|event=="GOAL") %>%
  filter(period<4) %>%
  filter(homeSkatersOnIce==5)%>%
  filter(awaySkatersOnIce==5)%>%
  filter(shotOnEmptyNet==0) %>%
  filter(location !="Neu. Zone") %>%
  filter(arenaAdjustedXCordABS<91)%>%
  filter(arenaAdjustedXCordABS>25)

#table(shots2017$location)

Making probabilities and shot counts by location

This section makes the counts of shots from each location and finds the probability of each shot being a goal from that location.

shotsummary<-shots2017 %>%  #total shot count 
  group_by(arenaAdjustedXCordABS,arenaAdjustedYCord) %>%
  tally()

goalsummary<-shots2017 %>%  #goal count
  filter(event=="GOAL") %>%
  group_by(arenaAdjustedXCordABS,arenaAdjustedYCord) %>%
  tally() 

shotprobxy<-full_join(shotsummary,goalsummary,                  by=c("arenaAdjustedXCordABS","arenaAdjustedYCord"))
shotprobxy$n.y[is.na(shotprobxy$n.y)]=0
shotprobxy$prob=shotprobxy$n.y/shotprobxy$n.x

This next chunk of code tallies the shots totals both counts and probabilities by the X-coordinate shot location assuming that the ice is horizontal from goal line to goal line. It then calculates a smoothed line for those totals via a regression. Some temporary files are written and two histograms are made with the smoothed lines. These lines are made by using a simple regression but this is something that could be done in other ways and might be more representative of the relationship between distance from the goal and importance of that shot.

outshottot<-tapply(shotprobxy$n.x,shotprobxy$arenaAdjustedXCordABS,sum)
outshottot2<-tapply(shotprobxy$prob,shotprobxy$arenaAdjustedXCordABS,sum)

lm1<-lm(outshottot~as.numeric(names(outshottot)))
lm2<-lm(outshottot2~as.numeric(names(outshottot2)))

#write the csv for general shot prob
write.csv(shotprobxy,"Shotlocationprobability.csv",row.names = FALSE)
ggplot(shotprobxy,aes(arenaAdjustedXCordABS,weights=prob))+geom_histogram(binwidth = 1) + ggtitle("Sum of Shot Prob by X location")+geom_abline(slope=lm2$coeff[2],intercept=lm2$coeff[1])

ggplot(shotprobxy,aes(arenaAdjustedXCordABS,weights=n.x))+geom_histogram(binwidth = 1)+ ggtitle("Sum of Shot count by X location")+geom_abline(slope=lm1$coeff[2],intercept=lm1$coeff[1])

#library(tidyverse)

Shotlocationprobability<-read.csv("Shotlocationprobability.csv")

Assigning the Color Variable and the Scaling Variable

In this next chunk of code we specify the data and the variables that we will use for the plots below.

if(scaleVariable =="Shotlocationprobability$n.x"){
  mapData = shotprobxy
  colorVariable = shotprobxy$prob
  outshottot<-tapply(Shotlocationprobability$n.x,shotprobxy$arenaAdjustedXCordABS,sum)  #sum the prob of shots at each position
} else if(scaleVariable == "Shotlocationprobability$prob"){
  mapData = shotprobxy
  colorVariable = Shotlocationprobability$n.x
  outshottot<-tapply(Shotlocationprobability$prob,shotprobxy$arenaAdjustedXCordABS,sum)  #sum the number of shots at each position
} 

Getting the Scaling Factor

So the code below gives us the scaling factor, called gamma that is used to scale the pixels for the heatmaps below.

outshot=data.frame(x=as.numeric(names(outshottot)),y=outshottot)
lm1<-lm(outshottot~as.numeric(names(outshottot)))
pred25<-lm1$coeff[1]+lm1$coeff[2]*25  #get first predicted value
pred90<-lm1$coeff[1]+lm1$coeff[2]*90  #get last predicted value
gamma=pred25/pred90  #get scaling factor

Building the data for the faceoff circles

The code below create the data frames, df.circle1 and df.circle2 for the faceoff circles. Much of this code is borrowed from work by Sam Ventura, Andrew Thomas and Brian Macdonald.

#creating the circles
radius=15

x0=22  #center face off circle on the right
y0=-69

x1=-22  #center face off circle on the left
y1=-69

#First Circle
    angle<-seq(0,2*pi,length=1000)
      xPosition = x0+radius * sin(angle)
      a = (gamma)*45
      yPosition = y0+radius * cos(angle)
      xPositionPrime=(a+(45-a)/(-90+25)*(yPosition+25))* xPosition/45  
      #add perspective
#plot(xPosition,yPosition,pch=".",xlim=c(-45,45),ylim=c(-90,-20))  
#points(xPositionPrime,yPosition,pch=".",col="red")  #plot the right circle

#Second Circle
      xPosition1 = x1+radius * sin(angle)
      a = (gamma)*45 
      yPosition1 = y1+radius * cos(angle)
      xPositionPrime1=(a+(45-a)/(-90+25)*(yPosition1+25))* xPosition1/45  #add perspective
#points(xPositionPrime1,yPosition1,pch=".",col="blue") #plot the left circle

df.circle1 <- data.frame(xPositionPrime, yPosition) #data frame for the first circle
df.circle2<- data.frame(xPositionPrime1, yPosition1) #second circle

Perspective Lines (hockey boards)

To emphasize the perspective nature of our plots we created some lines that emulate hockey boards. This section creates the data for those. Here will also switch the original x and y coordinates from the <moneypuck.com> data as we want to have the Blue Line at the top of our graph not at the left.

newarenaAdjustedXCordABS = Shotlocationprobability$arenaAdjustedXCordABS
newarenaAdjustedYCord = Shotlocationprobability$arenaAdjustedYCord

x = newarenaAdjustedYCord
y = -newarenaAdjustedXCordABS

#perspective height of rink posts
height= 10

##line one is the bottom left
perx1= -44   #x start of line one
pery1= -89   #y start of line one
perx2= -44   #x end of line one
pery2= -89+height   #yend of line one

##line two is the top left
perx3= -44*gamma #x start of line two
pery3= max(y)    #y start of line two
perx4= -44*gamma #x end of line two
pery4= -25+(height*gamma)     #yend of line two

##line three connects one and two
perx5= perx2  #x start of line three
pery5= pery2  #y start of line three
perx6= perx4  #x end of line three
pery6= pery4  #yend of line three

##line four is the bottom right
perx7= 44     #x start of line four
pery7= -89    #y start of line four
perx8= 44     #x end of line four
pery8= -89+height    #yend of line four

##line five is the top right
perx9= 44*gamma   #x start of line five
pery9= -25     #y start of line five
perx10= 44*gamma  #x end of line five
pery10= -25+(height*gamma)     #y end of line five

##line six connects four and five
perx11= perx8   #x start of line six
pery11= pery8   #y start of line six
perx12= perx10  #x end of line six
pery12= pery10  #yend of line six

Rescaling Points

To make our perspective plot using ggplot we are going to use geom_rect, so we can control the size of each pixel or region that is plotted. For the viewer we will keep the y-dimension (height) the same, but adjust the x-dimension (width) by the scaling variable that we chose at the start of the code. The next chunk of code creates a data frame, hockeymap, that has added the minimum and maximum values for making the rectangles that will be plotted as part of the perspective plot below.

#shifting the points
a = (gamma)*45
xprime =(a+(45-a)/(-90+25)*(y+25))* x/45
x.min=(a+(45-a)/(-90+25)*(y+25))* (x-0.5)/45
x.max=(a+(45-a)/(-90+25)*(y+25))* (x+0.5)/45
y.min=y-0.5
y.max=y+0.5
hockeymap=data.frame(x,y,xprime,x.min,x.max,y.min,y.max,probs=Shotlocationprobability$prob,n.x=Shotlocationprobability$n.x)

Defining the Color Palette

Here we defining the color palette and steps in the color scaling. By default we have the center of our scaling to be the color white at the median of the color variable. We use a palette here with brown representing high values and blue representing low values. This palette should be visible by individuals who are colorblind.

# source: https://menugget.blogspot.com/2011/11/define-color-steps-for-colorramppalette.html#more
#This is a wrapper function for colorRampPalette. It allows for the
#definition of the number of intermediate colors between the main colors.
#Using this option one can stretch out colors that should predominate
#the palette spectrum. Additional arguments of colorRampPalette can also
#be added regarding the type and bias of the subsequent interpolation.
color.palette <- function(steps, n.steps.between=NULL, ...){
 
 if(is.null(n.steps.between)) n.steps.between <- rep(0, (length(steps)-1))
 if(length(n.steps.between) != length(steps)-1) stop("Must have one less n.steps.between value than steps")
 
 fill.steps <- cumsum(rep(1, length(steps))+c(0,n.steps.between))
 RGB <- matrix(NA, nrow=3, ncol=fill.steps[length(fill.steps)])
 RGB[,fill.steps] <- col2rgb(steps)
 
 for(i in which(n.steps.between>0)){
  col.start=RGB[,fill.steps[i]]
  col.end=RGB[,fill.steps[i+1]]
  for(j in seq(3)){
   vals <- seq(col.start[j], col.end[j], length.out=n.steps.between[i]+2)[2:(2+n.steps.between[i]-1)]  
   RGB[j,(fill.steps[i]+1):(fill.steps[i+1]-1)] <- vals
  }
 }
 
 new.steps <- rgb(RGB[1,], RGB[2,], RGB[3,], maxColorValue = 255)
 pal <- colorRampPalette(new.steps, ...)
 return(pal)
}

#colfunc2<-colorRampPalette(c("blue","white","pink","red", "red", "red", "dark red","black", "black", "black"))
steps1=c("blue","lightblue","white","tan", "brown")

if (scaleVariable == "Shotlocationprobability$n.x"){
  between1=diff(round(summary(hockeymap$probs)[c(1,2,3,5,6)]*100,0))+4
  color.name="Probability"
  scale.name="Shot Count"
}

if (scaleVariable == "Shotlocationprobability$prob"){
  between1=diff(round(summary(hockeymap$n.x)[c(1,2,3,5,6)],0))+4
  color.name="Shot Count"
  scale.name="Probability"
}

colfunc2<-color.palette(steps =steps1,
                        n.steps.between=between1)

#colfunc2<-colorRampPalette(c("blue","white","pink","red", "red", "red", "dark red"))
#plot(rep(1,50),col=(colfunc2(50)), pch=19,cex=2)

Making the Perspective Plot

Plotting the perspective heatmap via geom_rect which we might call a Conehead plot.

newg2<-ggplot(mapData, aes(x=xprime, y=y, 
                           #fill = colorVariable,
                           col=colorVariable))+
geom_rect(aes(xmin=x.min,xmax=x.max,ymin=y.min,ymax=y.max,fill=colorVariable),data=hockeymap)+  scale_color_gradientn(colours = colfunc2(70))+
 #scale_color_manual(values=manualpalette,breaks=breaks1,guide=FALSE)+ 
  scale_fill_gradientn(colours=colfunc2(70),guide=FALSE)+
  #make circles
  geom_point(aes(x=xPositionPrime,y=yPosition,guide=FALSE),color="red",size=0.5,data= df.circle1)+ 
  geom_point(aes(x=xPositionPrime1,y=yPosition1),color="red",size=0.5,data= df.circle2)+
  labs(x="",y="",colour=color.name)+
  #side lines
  geom_segment(aes(x = -44, y = -89, xend = -44*gamma, yend = -25),color="black",size=1, data = NULL)+
  geom_segment(aes(x = 44, y = -89, xend = 44*gamma, yend = -25),color="black", size=1,data = NULL)+
  #top and bottom
  geom_segment(aes(x = -44*gamma, y = -25, xend = 44*gamma, yend = -25),color="blue" ,size=1,data = NULL)+
  geom_segment(aes(x = -44, y = -89, xend = 44, yend = -89),color="red", size=1.5,data = NULL)+
  #perspective lines
  geom_segment(aes(x = perx1, y = pery1, xend = perx2, yend = pery2), color="black",size=1,data = NULL)+
  geom_segment(aes(x = perx3, y = pery3, xend = perx4, yend = pery4), color="black",size=1,data = NULL)+
  geom_segment(aes(x = perx5, y = pery5, xend = perx6, yend = pery6), color="black",size=1,data = NULL)+
  geom_segment(aes(x = perx7, y = pery7, xend = perx8, yend = pery8),color="black", size=1,data = NULL)+
  geom_segment(aes(x = perx9, y = pery9, xend = perx10, yend = pery10), color="black",size=1,data = NULL)+
  geom_segment(aes(x = perx11, y = pery11, xend = perx12, yend = pery12), color="black",size=1,data = NULL)+
  #center dots
  geom_point(x=(a+(45-a)/(-90+25)*(-69+25))* -22/45,y=-69,colour="red",size=2,fill=1.5)+
geom_point(x=(a+(45-a)/(-90+25)*(-69+25))* 22/45,y=-69,colour="red",size=2,fill=1.5)+
  #goal
   annotate("rect", xmin=-5, xmax=5, ymin=-89, ymax=-85, alpha=0.8, fill="grey",col="red")+
#  geom_rect(xmin=-5, xmax=5, ymin=-89, ymax=-85,fill="grey",color="red",alpha=0.4)+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

newg2

Making Unscaled Heatmap

We do this by redoing most of the above code but having taken the scaling factor out, ie gamma =1 .

gamma=1.00
#creating the circles
radius=15

x0=22  #center face off circle on the right
y0=-69

x1=-22  #center face off circle on the left
y1=-69

#First Circle
    angle<-seq(0,2*pi,length=1000)
      xPosition = x0+radius * sin(angle);
      a = (gamma)*45
      yPosition = y0+radius * cos(angle);
      xPositionPrime=(a+(45-a)/(-90+25)*(yPosition+25))* xPosition/45  #add perspective
#plot(xPosition,yPosition,pch=".",xlim=c(-45,45),ylim=c(-90,-20))  
#points(xPositionPrime,yPosition,pch=".",col="red")  #plot the right circle

#Second Circle
      xPosition1 = x1+radius * sin(angle);
      a = (gamma)*45 
      yPosition1 = y1+radius * cos(angle);
      xPositionPrime1=(a+(45-a)/(-90+25)*(yPosition1+25))* xPosition1/45  #add perspective
#points(xPositionPrime1,yPosition1,pch=".",col="blue") #plot the left circle

This shows you the adjusted circles and how perspective effects them.

Data Frame for Circles

df.circle1 <- data.frame(xPositionPrime, yPosition) #data frame for the first circle
df.circle2<- data.frame(xPositionPrime1, yPosition1) #second circle

Perspective Lines (hockey boards)

newarenaAdjustedXCordABS = Shotlocationprobability$arenaAdjustedXCordABS
newarenaAdjustedYCord = Shotlocationprobability$arenaAdjustedYCord

x = newarenaAdjustedYCord
y = -newarenaAdjustedXCordABS

#perspective height of rink posts
height= 10

##line one is the bottom left
perx1= -44   #x start of line one
pery1= -89   #y start of line one
perx2= -44   #x end of line one
pery2= -89+height   #yend of line one

##line two is the top left
perx3= -44*gamma #x start of line two
pery3= max(y)    #y start of line two
perx4= -44*gamma #x end of line two
pery4= -25+(height*gamma)     #yend of line two

##line three connects one and two
perx5= perx2  #x start of line three
pery5= pery2  #y start of line three
perx6= perx4  #x end of line three
pery6= pery4  #yend of line three

##line four is the bottom right
perx7= 44     #x start of line four
pery7= -89    #y start of line four
perx8= 44     #x end of line four
pery8= -89+height    #yend of line four

##line five is the top right
perx9= 44*gamma   #x start of line five
pery9= -25     #y start of line five
perx10= 44*gamma  #x end of line five
pery10= -25+(height*gamma)     #y end of line five

##line six connects four and five
perx11= perx8   #x start of line six
pery11= pery8   #y start of line six
perx12= perx10  #x end of line six
pery12= pery10  #yend of line six

Rescaling Points

newarenaAdjustedXCordABS = Shotlocationprobability$arenaAdjustedXCordABS
newarenaAdjustedYCord = Shotlocationprobability$arenaAdjustedYCord

#flipping x and y so the graph is in the correct orientation
x = newarenaAdjustedYCord
y = -newarenaAdjustedXCordABS
#shifting the points
a = (gamma)*45 
xprime =(a+(45-a)/(-90+25)*(y+25))* x/45
x.min=(a+(45-a)/(-90+25)*(y+25))* (x-0.5)/45
x.max=(a+(45-a)/(-90+25)*(y+25))* (x+0.5)/45
y.min=y-0.5
y.max=y+0.5
hockeymap=data.frame(x,y,xprime,x.min,x.max,y.min,y.max,probs=Shotlocationprobability$prob,n.x=Shotlocationprobability$n.x)

Defining the Color Palette

# source: https://menugget.blogspot.com/2011/11/define-color-steps-for-colorramppalette.html#more
#This is a wrapper function for colorRampPalette. It allows for the
#definition of the number of intermediate colors between the main colors.
#Using this option one can stretch out colors that should predominate
#the palette spectrum. Additional arguments of colorRampPalette can also
#be added regarding the type and bias of the subsequent interpolation.
color.palette <- function(steps, n.steps.between=NULL, ...){
 
 if(is.null(n.steps.between)) n.steps.between <- rep(0, (length(steps)-1))
 if(length(n.steps.between) != length(steps)-1) stop("Must have one less n.steps.between value than steps")
 
 fill.steps <- cumsum(rep(1, length(steps))+c(0,n.steps.between))
 RGB <- matrix(NA, nrow=3, ncol=fill.steps[length(fill.steps)])
 RGB[,fill.steps] <- col2rgb(steps)
 
 for(i in which(n.steps.between>0)){
  col.start=RGB[,fill.steps[i]]
  col.end=RGB[,fill.steps[i+1]]
  for(j in seq(3)){
   vals <- seq(col.start[j], col.end[j], length.out=n.steps.between[i]+2)[2:(2+n.steps.between[i]-1)]  
   RGB[j,(fill.steps[i]+1):(fill.steps[i+1]-1)] <- vals
  }
 }
 
 new.steps <- rgb(RGB[1,], RGB[2,], RGB[3,], maxColorValue = 255)
 pal <- colorRampPalette(new.steps, ...)
 return(pal)
}

#colfunc2<-colorRampPalette(c("blue","white","pink","red", "red", "red", "dark red","black", "black", "black"))
steps1=c("blue","lightblue","white","tan", "brown")

if (scaleVariable == "Shotlocationprobability$n.x"){
  between1=diff(round(summary(hockeymap$probs)[c(1,2,3,5,6)]*100,0))+4
  color.name="Probability"
  scale.name="Shot Count"
}

if (scaleVariable == "Shotlocationprobability$prob"){
  between1=diff(round(summary(hockeymap$n.x)[c(1,2,3,5,6)],0))+4
  color.name="Shot Count"
  scale.name="Probability"
}

colfunc2<-color.palette(steps =steps1,
                        n.steps.between=between1)

#colfunc2<-colorRampPalette(c("blue","white","pink","red", "red", "red", "dark red"))
#plot(rep(1,50),col=(colfunc2(50)), pch=19,cex=2)

Making the Unscaled plots

Again we use geom_rect

newg2<-ggplot(mapData, aes(x=xprime, y=y, 
                           #fill = colorVariable,
                           col=colorVariable))+
geom_rect(aes(xmin=x.min,xmax=x.max,ymin=y.min,ymax=y.max,fill=colorVariable),data=hockeymap)+  scale_color_gradientn(colours = colfunc2(70))+
 #scale_color_manual(values=manualpalette,breaks=breaks1,guide=FALSE)+ 
  scale_fill_gradientn(colours=colfunc2(70),guide=FALSE)+
  #makde circles
  geom_point(aes(x=xPositionPrime,y=yPosition,guide=FALSE),color="red",size=0.5,data= df.circle1)+ 
  geom_point(aes(x=xPositionPrime1,y=yPosition1),color="red",size=0.5,data= df.circle2)+
  labs(x="",y="",colour=color.name)+
  #side lines
  geom_segment(aes(x = -44, y = -89, xend = -44*gamma, yend = -25),color="black",size=1, data = NULL)+
  geom_segment(aes(x = 44, y = -89, xend = 44*gamma, yend = -25),color="black", size=1,data = NULL)+
  #top and bottom
  geom_segment(aes(x = -44*gamma, y = -25, xend = 44*gamma, yend = -25),color="blue" ,size=1,data = NULL)+
  geom_segment(aes(x = -44, y = -89, xend = 44, yend = -89),color="red", size=1.5,data = NULL)+
  #perspective lines
  geom_segment(aes(x = perx1, y = pery1, xend = perx2, yend = pery2), color="black",size=1,data = NULL)+
  geom_segment(aes(x = perx3, y = pery3, xend = perx4, yend = pery4), color="black",size=1,data = NULL)+
  geom_segment(aes(x = perx5, y = pery5, xend = perx6, yend = pery6), color="black",size=1,data = NULL)+
  geom_segment(aes(x = perx7, y = pery7, xend = perx8, yend = pery8),color="black", size=1,data = NULL)+
  geom_segment(aes(x = perx9, y = pery9, xend = perx10, yend = pery10), color="black",size=1,data = NULL)+
  geom_segment(aes(x = perx11, y = pery11, xend = perx12, yend = pery12), color="black",size=1,data = NULL)+
  #center dots
  geom_point(x=(a+(45-a)/(-90+25)*(-69+25))* -22/45,y=-69,colour="red",size=2,fill=1.5)+
geom_point(x=(a+(45-a)/(-90+25)*(-69+25))* 22/45,y=-69,colour="red",size=2,fill=1.5)+
  #goal
   annotate("rect", xmin=-5, xmax=5, ymin=-89, ymax=-85, alpha=0.8, fill="grey",col="red")+
#  geom_rect(xmin=-5, xmax=5, ymin=-89, ymax=-85,fill="grey",color="red",alpha=0.4)+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

newg2

Note how different you process the importance of values near the blue line when comparing these plots.