options(warn = -1)
library(tidyverse)
library(baseballr)
library(ggthemes) # Themes for ggplot2.
library(ggimage) # Supports images in ggplot2.
library(gt) # Create tables.
library(readr) # Read in datasets.
library(scatterplot3d)
library(reshape)
library(ExcelFunctionsR)
# Custom plot settings.
theme_reach <- function() {
theme_fivethirtyeight() +
theme(
legend.position = "none",
plot.title = element_text(size = 22, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 18, hjust = 0.5),
plot.caption = element_text(size = 16),
axis.title.x = element_text(size=18, margin = margin(t = 20)),
axis.title.y = element_text(size=18, margin = margin(r = 20)),
axis.text = element_text(size = 14),
strip.text = element_text(size = 16, face = "bold"),
legend.text = element_text(size = 14)
)
}
all_data_2021 <- read.csv(file = "C:/Users/chris/Documents/Datasets/BaseballR/2021/2021merged.csv")
logan_all <- all_data_2021 %>%
filter(pitcher == 669302) %>%
mutate(color_code = as.factor(pitch_name))
colors <- c("#000000", "#1f78b4", "#e41a1c", "#33a02c")
colors <- colors[as.numeric(logan_all$color_code)]
logan_all$colors <- colors
logan_pre <- logan_all %>%
filter(game_date.y <= "2021-08-21")
logan_post <- logan_all %>%
filter(game_date.y > "2021-08-21")
logan_pitch_stats_pre <- logan_pre %>%
summarize(pitch_name,
description,
launch_speed,
launch_angle,
spin_axis,
hitData.trajectory,
details.isBall,
details.isStrike,
details.isInPlay,
hitData.trajectory,
estimated_ba_using_speedangle,
launch_speed_angle)
logan_pitch_stats_post <- logan_post %>%
summarize(pitch_name,
description,
launch_speed,
launch_angle,
spin_axis,
hitData.trajectory,
details.isBall,
details.isStrike,
details.isInPlay,
hitData.trajectory,
estimated_ba_using_speedangle,
launch_speed_angle)
options(warn = -1)
logan_pitch_stats_pre <- logan_pitch_stats_pre %>%
group_by(pitch_name) %>%
summarize(strike_rate = round((sum(description %in% c("swinging_strike", "swinging_strike_blocked", "called_strike", "foul", "foul_bunt", "foul_tip", "hit_into_play"))/n())*100, 1),
swing_rate = round((sum(description %in% c("swinging_strike", "swinging_strike_blocked", "foul", "foul_bunt", "foul_tip", "hit_into_play"))/n())*100, 1),
whiff_rate = round((sum(description %in% c("swinging_strike", "swinging_strike_blocked"))/
sum(description %in% c("swinging_strike", "swinging_strike_blocked", "hit_into_play", "foul", "foul_tip")))*100, 1),
exit_velo = round(AVERAGEIF(description, "hit_into_play", launch_speed), 1),
launch_angle = round(AVERAGEIF(description, "hit_into_play", launch_angle), 1),
barrel_rate = round((sum(launch_speed_angle %in% c(6))/sum(launch_speed_angle %in% c(1, 2, 3, 4, 5)))*100, 1),
expected_avg = round(AVERAGEIF(description, "hit_into_play", estimated_ba_using_speedangle), 3),
gb_rate = round((sum(hitData.trajectory %in% c("ground_ball"))/sum(hitData.trajectory %in% c("ground_ball", "fly_ball", "line_drive", "popup")))*100, 1),
fb_rate = round((sum(hitData.trajectory %in% c("fly_ball"))/sum(hitData.trajectory %in% c("ground_ball", "fly_ball", "line_drive", "popup")))*100, 1),
ld_rate = round((sum(hitData.trajectory %in% c("line_drive"))/sum(hitData.trajectory %in% c("ground_ball", "fly_ball", "line_drive", "popup")))*100, 1),
pitches_thrown = n()
) %>%
mutate(freq = round((pitches_thrown/sum(pitches_thrown))*100, 1))
logan_pitch_stats_post <- logan_pitch_stats_post %>%
group_by(pitch_name) %>%
summarize(strike_rate = round((sum(description %in% c("swinging_strike", "swinging_strike_blocked", "called_strike", "foul", "foul_bunt", "foul_tip", "hit_into_play"))/n())*100, 1),
swing_rate = round((sum(description %in% c("swinging_strike", "swinging_strike_blocked", "foul", "foul_bunt", "foul_tip", "hit_into_play"))/n())*100, 1),
whiff_rate = round((sum(description %in% c("swinging_strike", "swinging_strike_blocked"))/
sum(description %in% c("swinging_strike", "swinging_strike_blocked", "hit_into_play", "foul", "foul_tip")))*100, 1),
exit_velo = round(AVERAGEIF(description, "hit_into_play", launch_speed), 1),
launch_angle = round(AVERAGEIF(description, "hit_into_play", launch_angle), 1),
barrel_rate = round((sum(launch_speed_angle %in% c(6))/sum(launch_speed_angle %in% c(1, 2, 3, 4, 5)))*100, 1),
expected_avg = round(AVERAGEIF(description, "hit_into_play", estimated_ba_using_speedangle), 3),
gb_rate = round((sum(hitData.trajectory %in% c("ground_ball"))/sum(hitData.trajectory %in% c("ground_ball", "fly_ball", "line_drive", "popup")))*100, 1),
fb_rate = round((sum(hitData.trajectory %in% c("fly_ball"))/sum(hitData.trajectory %in% c("ground_ball", "fly_ball", "line_drive", "popup")))*100, 1),
ld_rate = round((sum(hitData.trajectory %in% c("line_drive"))/sum(hitData.trajectory %in% c("ground_ball", "fly_ball", "line_drive", "popup")))*100, 1),
pitches_thrown = n()
) %>%
mutate(freq = round((pitches_thrown/sum(pitches_thrown))*100, 1))
logan_pitch_stats_pre <- subset(logan_pitch_stats_pre, select = -c(pitches_thrown, launch_angle))
logan_pitch_stats_post <- subset(logan_pitch_stats_post, select = -c(pitches_thrown, launch_angle))
logan_pitch_stats_pre <- logan_pitch_stats_pre[, c(1, 11, 2, 3, 4, 5, 6, 7, 8, 9, 10)]
logan_pitch_stats_post <- logan_pitch_stats_post[, c(1, 11, 2, 3, 4, 5, 6, 7, 8, 9, 10)]
logan_diff <- data.frame(pitch_name = logan_pitch_stats_pre$pitch_name)
diff_matrix <- data.matrix(subset(logan_pitch_stats_post, select = -c(pitch_name))) -
data.matrix(subset(logan_pitch_stats_pre, select = -c(pitch_name)))
logan_diff <- cbind(logan_diff, diff_matrix)
logan_pitch_stats_pre$data_type <- c("Through 8-21", "Through 8-21", "Through 8-21", "Through 8-21")
logan_pitch_stats_post$data_type <- c("After 8-21", "After 8-21", "After 8-21", "After 8-21")
logan_diff$data_type <- c("Differences", "Differences", "Differences", "Differences")
logan_diff$freq <- ifelse(!grepl("-", logan_diff$freq), paste("+", as.character(round(logan_diff$freq, 1)), sep = ""), as.character(round(logan_diff$freq, 1)))
logan_diff$strike_rate <- ifelse(!grepl("-", logan_diff$strike_rate), paste("+", as.character(round(logan_diff$strike_rate, 1)), sep = ""), as.character(round(logan_diff$strike_rate, 1)))
logan_diff$swing_rate <- ifelse(!grepl("-", logan_diff$swing_rate), paste("+", as.character(round(logan_diff$swing_rate, 1)), sep = ""), as.character(round(logan_diff$swing_rate, 1)))
logan_diff$whiff_rate <- ifelse(!grepl("-", logan_diff$whiff_rate), paste("+", as.character(round(logan_diff$whiff_rate, 1)), sep = ""), as.character(round(logan_diff$whiff_rate, 1)))
logan_diff$gb_rate <- ifelse(!grepl("-", logan_diff$gb_rate), paste("+", as.character(round(logan_diff$gb_rate, 1)), sep = ""), as.character(round(logan_diff$gb_rate, 1)))
logan_diff$fb_rate <- ifelse(!grepl("-", logan_diff$fb_rate), paste("+", as.character(round(logan_diff$fb_rate, 1)), sep = ""), as.character(round(logan_diff$fb_rate, 1)))
logan_diff$ld_rate <- ifelse(!grepl("-", logan_diff$ld_rate), paste("+", as.character(round(logan_diff$ld_rate, 1)), sep = ""), as.character(round(logan_diff$ld_rate, 1)))
logan_diff$exit_velo <- ifelse(!grepl("-", logan_diff$exit_velo), paste("+", as.character(round(logan_diff$exit_velo, 1)), sep = ""), as.character(round(logan_diff$exit_velo, 1)))
logan_diff$barrel_rate <- ifelse(!grepl("-", logan_diff$barrel_rate), paste("+", as.character(round(logan_diff$barrel_rate, 1)), sep = ""), as.character(round(logan_diff$barrel_rate, 1)))
logan_diff$expected_avg <- ifelse(!grepl("-", logan_diff$expected_avg), paste("+", as.character(sprintf("%.3f", round(logan_diff$expected_avg, 3))), sep = ""), as.character(sprintf("%.3f", round(logan_diff$expected_avg, 3))))
logan_pitch_stats_pre$expected_avg <- sprintf("%.3f", round(logan_pitch_stats_pre$expected_avg, 3))
logan_pitch_stats_post$expected_avg <- sprintf("%.3f", round(logan_pitch_stats_post$expected_avg, 3))
logan_pitch_table <- rbind(logan_pitch_stats_pre, logan_pitch_stats_post, logan_diff)
logan_pitch_table_gt <- logan_pitch_table %>%
gt(groupname_col = "data_type") %>%
cols_label(pitch_name = "Pitch Name",
strike_rate = "Strike Rate",
swing_rate = "Swing Rate",
whiff_rate = "Whiff Rate",
exit_velo = "Exit Velocity",
barrel_rate = "Barrel Rate",
expected_avg = "Expected BABIP",
gb_rate = "Groundball",
fb_rate = "Flyball",
ld_rate = "Line Drive",
freq = "Frequency"
) %>%
cols_align(align = "center",
columns = c("strike_rate", "whiff_rate", "exit_velo", "barrel_rate", "expected_avg", "gb_rate", "fb_rate",
"ld_rate", "swing_rate", "freq")) %>%
gtExtras::gt_theme_espn() %>%
tab_header(title = "Logan Gilbert Pitch Outcome Table, 2021") %>%
tab_style(style = cell_borders(sides = "right",
color = "#D3D3D3",
weight = px(2)),
locations = cells_body(columns = c(freq, whiff_rate, expected_avg),
rows = everything())) %>%
tab_style(style = cell_borders(sides = "bottom",
color = "#808080",
weight = px(4)),
locations = cells_body(columns = everything(),
rows = c(4, 8, 12))) %>%
tab_style(style = cell_borders(sides = "top",
color = "#808080",
weight = px(4)),
locations = cells_body(columns = everything(),
rows = 1)) %>%
tab_style(style = cell_fill(color = "#90EE90"),
locations = cells_body(columns = strike_rate,
rows = (!grepl("-", strike_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#ff726f"),
locations = cells_body(columns = strike_rate,
rows = (grepl("-", strike_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#90EE90"),
locations = cells_body(columns = swing_rate,
rows = (!grepl("-", swing_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#ff726f"),
locations = cells_body(columns = swing_rate,
rows = (grepl("-", swing_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#90EE90"),
locations = cells_body(columns = whiff_rate,
rows = (!grepl("-", whiff_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#ff726f"),
locations = cells_body(columns = whiff_rate,
rows = (grepl("-", whiff_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#ff726f"),
locations = cells_body(columns = exit_velo,
rows = (!grepl("-", exit_velo)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#90EE90"),
locations = cells_body(columns = exit_velo,
rows = (grepl("-", exit_velo)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#ff726f"),
locations = cells_body(columns = barrel_rate,
rows = (!grepl("-", barrel_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#90EE90"),
locations = cells_body(columns = barrel_rate,
rows = (grepl("-", barrel_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#ff726f"),
locations = cells_body(columns = expected_avg,
rows = (!grepl("-", expected_avg)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#90EE90"),
locations = cells_body(columns = expected_avg,
rows = (grepl("-", expected_avg)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#90EE90"),
locations = cells_body(columns = gb_rate,
rows = (!grepl("-", gb_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#ff726f"),
locations = cells_body(columns = gb_rate,
rows = (grepl("-", gb_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#90EE90"),
locations = cells_body(columns = fb_rate,
rows = (!grepl("-", fb_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#ff726f"),
locations = cells_body(columns = fb_rate,
rows = (grepl("-", fb_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#ff726f"),
locations = cells_body(columns = ld_rate,
rows = (!grepl("-", ld_rate)) & (data_type == "Differences"))) %>%
tab_style(style = cell_fill(color = "#90EE90"),
locations = cells_body(columns = ld_rate,
rows = (grepl("-", ld_rate)) & (data_type == "Differences"))) %>%
tab_spanner(label = "Trajectory Rates", columns = c(gb_rate, fb_rate, ld_rate)) %>%
tab_spanner(label = "Contact", columns = c(exit_velo, barrel_rate, expected_avg)) %>%
tab_options(table.width = pct(100))
gtsave(logan_pitch_table_gt, "logan-pitch-outcome-table.png", vwidth = 1200)