---
title: "ay23-24"
output: html_document
date: "2024-03-12"
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(tidyverse)
primary <- c("#D1495B", "#EDAE49", "#00798C","#003D5B", "#30638E","#56B4E9", "#7F67B6", "#A4A8D1", "#380606")
```
#cleaning
```{r}
survey|>
filter(!is.na(Q1))|>
select(9, 18:36)|>
mutate(Q1 = str_remove_all(Q1, "~"),
Q1 = str_remove_all(Q1, "\\+"),
Q1 = as.numeric(Q1),
Q2 = str_replace_all(Q2, "Science, Logic, and Mathematics", "SLM"))|>
separate_wider_delim(cols = Q2, delim = ",", names_sep = "_sep", too_few = "align_start")|>
select(-6, -7)->survey_clean
```
#intro numbers
```{r}
survey_clean|>
summary()
survey_clean|>
arrange(Q1)|>
view()
```
#overviews
```{r}
#overview: counts
survey_clean|>
count(Q3)|>
na.omit()|>
mutate(Q3 = factor(Q3, levels = c("Certificate", "Bachelors", "Masters", "Doctorate")))|>
rename(`Degree type` = Q3,
Count = n)|>
arrange(`Degree type`)|>
gt::gt(caption = "Responses by program terminal degree")
survey_clean|>
count(Q2_sep1)|>
view()
```
#details--plot titles indicate what's being tracked
```{r}
survey_clean|>
select(Q1, Q3)|>
filter(Q3 != "Certificate")|>
na.omit()|>
arrange(Q3, Q1)|>
group_by(Q3)|>
mutate(rn = row_number())|>
ungroup()|>
ggplot(aes(rn, Q1))+
geom_bar(stat = "identity")+
geom_smooth(linewidth = 2, se = FALSE)+
facet_wrap(~Q3, scales = "free_x")+
theme_minimal(base_size = 60)+
ggtitle("Volume of applications by program status")+
theme(axis.text.x = element_blank())+
xlab("")+
ylab("counts")->vol_status_23
ggsave(plot = vol_status_23, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\vol_status_23_b.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
survey_clean|>
filter(str_detect(Q3.1, "R[0-9]"))|>
select(Q1, Q3.1)|>
arrange(Q1)|>
mutate(rn = row_number())|>
rename(status = Q3.1,
counts = Q1)|>
ggplot(aes(rn, counts, fill = status))+
geom_bar(stat = "identity")+
scale_fill_manual(values = primary)+
theme_minimal(base_size = 60)+
ggtitle("Volume of applications to PhD-granting programs")+
theme(axis.text.x = element_blank())+
xlab("")+
ylab("counts")->vol_status_phd_23
ggsave(plot = vol_status_phd_23, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\vol_status_phd_23_a.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
survey_clean|>
filter(Q4 == "Assistant professor/tenure-track")|>
select(Q1, Q3)|>
na.omit()|>
arrange(Q1)|>
mutate(rn = row_number())|>
rename(status = Q3,
counts = Q1)|>
ggplot(aes(rn, counts, fill = status))+
geom_bar(stat = "identity")+
scale_fill_manual(values = primary)+
theme_minimal(base_size = 60)+
ggtitle("Volume of applications for TT jobs")+
theme(axis.text.x = element_blank())+
xlab("")+
ylab("counts")->vol_TT_23
ggsave(plot = vol_TT_23, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\vol_TT_23.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
survey_clean|>
select(Q1, Q2_sep1, Q3)|>
na.omit()|>
arrange(-Q1)|>
group_by(Q2_sep1, Q3)|>
summarise(avg = mean(Q1),
max = max(Q1),
min = min(Q1))|>
ungroup()|>
mutate(Q2_sep1 = as_factor(Q2_sep1),
aos_avg = mean(avg))|>
ggplot(aes(Q3, avg, color = Q2_sep1))+
geom_point(alpha = .8, size = 8)+
geom_errorbar(aes(ymin = min, ymax = max),alpha = .7, linewidth = 5)+
geom_hline(aes(yintercept = aos_avg), linetype = "solid", alpha = .3, linewidth = 5)+
coord_flip()+
facet_wrap(~ Q2_sep1)+
theme_minimal(base_size = 60)+
xlab("AOS")+
ylab("counts")+
ggtitle("Volume of applications by AOS and program status")+
theme(legend.position="none")+
scale_color_manual(values = primary)->aos_max_min_avg
ggsave(plot = aos_max_min_avg, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\aos_max_min_avg_b.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
survey_clean|>
filter(!is.na(Q7))|>
arrange(Q1)|>
mutate(rn = row_number())|>
rename(counts = Q1)|>
mutate(Q7 = str_replace_all(Q7, "There were about as many applicants as I was expecting", "Expected"),
Q7 = str_replace_all(Q7, "There were fewer applicants than I was expecting", "Fewer"),
Q7 = str_replace_all(Q7, "There were more applicants than I was expecting", "More"))|>view()
ggplot(aes(rn, counts, fill = Q7))+
geom_bar(stat = "identity")+
scale_fill_manual(values = primary)+
theme_minimal(base_size = 60)+
ggtitle("Volume of applications for TT jobs")+
theme(axis.text.x = element_blank())+
xlab("")+
ylab("counts")->num_imp_23
ggsave(plot = num_imp_23, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\num_imp_23.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
```
#tables
```{r}
survey_clean|>
select(Q1, Q2_sep1, Q3)|>
na.omit()|>
group_by(Q3)|>
reframe(std = sd(Q1), average = mean(Q1))|>
ungroup()|>
rename(status = Q3,
`standard deviation` = std)|>
mutate(across(where(is.double), round))|>
gt::gt(caption = "Averages and dispersion") -> overview_p23
gt::gtsave(overview_p23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\overview_p23.png", expand = 10)
survey_clean|>
select(Q1, Q3)|>
na.omit()|>
arrange(-Q1)|>
group_by(Q3)|>
summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1),most = max(Q1) )|>
ungroup()|>
mutate(across(where(is.double), round))|>
rename(status = Q3,
mean = avg,
median = mid)|>
gt::gt(caption = "Overview of applicantions by program status")->all_over_23
gt::gtsave(all_over_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\all_over_23.png", expand = 10)
survey_clean|>
filter(Q4 == "Assistant professor/tenure-track")|>
group_by(Q2_sep1)|>
summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1), most = max(Q1))|>
ungroup()|>
mutate(across(where(is.double), round))|>
rename(AOS = Q2_sep1,
mean = avg,
median = mid)|>
gt::gt(caption = "TT position applications")->tt_23
gt::gtsave(tt_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\tt_23.png", expand = 10)
survey_clean|>
filter(Q4 == "Postdoc" | str_detect(Q4, "Fixed"))|>
group_by(Q2_sep1)|>
summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1), most = max(Q1))|>
ungroup()|>
mutate(across(where(is.double), round))|>
rename(AOS = Q2_sep1,
mean = avg,
median = mid)|>
gt::gt(caption = "Postdoc and fixed-term position applications")->pd_fixed_23
gt::gtsave(pd_fixed_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\pd_fixed_23.png", expand = 10)
survey_clean|>
filter(Q4 != "Postdoc" & Q4 != "Assistant professor/tenure-track" & str_detect(Q4, "Fixed", negate = TRUE))|>
group_by(Q2_sep1)|>
summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1), most = max(Q1))|>
ungroup()|>
mutate(across(where(is.double), round))|>
rename(AOS = Q2_sep1,
mean = avg,
median = mid)|>
gt::gt(caption = "Open and tenured position applications")->open_23
gt::gtsave(open_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\open_23.png", expand = 10)
survey_clean|>
filter(!is.na(Q7))|>
count(Q7)|>
rename(response = Q7,
count = n)|>
arrange(-count)|>
gt::gt(caption = "Impressions of applicant volume")->imp_app_23
gt::gtsave(imp_app_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\imp_app_23.png", expand = 10)
survey_clean|>
filter(!is.na(Q6))|>
count(Q6, Q3)|>view()
rename(response = Q6,
count = n)|>
arrange(-count)|>view()
gt::gt(caption = "Time allotment for job")->time_job_23
gt::gtsave(time_job_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\time_job_23.png", expand = 10)
```