--- title: "ay23-24" output: html_document date: "2024-03-12" editor_options: chunk_output_type: console --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r} library(tidyverse) primary <- c("#D1495B", "#EDAE49", "#00798C","#003D5B", "#30638E","#56B4E9", "#7F67B6", "#A4A8D1", "#380606") ``` #cleaning ```{r} survey <- read.csv("Survey about job applicants, AY24-25.csv") survey%>% select(-1:-6, -8, -10:-17, -X)%>% mutate(Q1 = str_remove_all(Q1, "~"), Q1 = str_remove_all(Q1, "\\+"), Q1 = as.numeric(Q1), Q2 = str_replace_all(Q2, "Science, Logic, and Mathematics", "SLM"), Q3 = str_replace_all(Q3, "^(?=\\s*$)", "not given"), Q2 = str_replace_all(Q2, "^(?=\\s*$)", "not given"), Q3 = factor(Q3, levels = c("Bachelors", "Masters", "Doctorate")))%>% filter(!is.na(Q1))%>% separate_wider_delim(cols = Q2, delim = ",", names_sep = "_sep", too_few = "align_start")%>% select(-6, -7)->survey_clean ``` #intro numbers ```{r} survey_clean|> summary() survey_clean|> arrange(Q2_sep1, Q1)|> view() ``` #boxplots ```{r} survey_clean%>% ggplot(aes(x = Finished, y = Q1))+ geom_boxplot() quantile(survey_clean$Q1) survey_clean%>% ggplot(aes(x = Q3, y = Q1))+ geom_boxplot() survey_clean%>% ggplot(aes(x = Q2_sep1, y = Q1))+ geom_boxplot() ``` #overviews ```{r} #overview: counts survey_clean|> count(Q3)|> na.omit()|> rename(`Degree type` = Q3, Count = n)|> arrange(`Degree type`)|> gt::gt(caption = "Responses by program terminal degree") survey_clean|> count(Q2_sep1)|> view() survey_clean%>% arrange(Q3, Q1)%>% view() ``` #details--plot titles indicate what's being tracked ```{r} survey_clean|> select(Q1, Q3)|> filter(Q3 != "not given")|> na.omit()|> arrange(Q3, Q1)|> group_by(Q3)|> mutate(rn = row_number(), med = median(Q1), avg = round(mean(Q1), 1))|> ungroup()|> ggplot(aes(rn, Q1))+ geom_bar(stat = "identity")+ geom_hline(aes(yintercept = med, color = factor(med)), linewidth = 4)+ #geom_line()+ #geom_smooth(linewidth = 2, se = FALSE)+ facet_wrap(~Q3, scales = "free_x")+ theme_minimal(base_size = 60)+ ggtitle("Volume of applications by program status")+ theme(axis.text.x = element_blank())+ labs(color = "Median")+ xlab("")+ ylab("counts")+ scale_color_manual(values = primary)->ay24_vol_status ggsave(plot = vol_status_24, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\ay24_vol_status.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white") survey_clean|> select(Q1, Q3)|> filter(Q3 != "not given")|> na.omit()|> arrange(Q3, Q1)|> group_by(Q3)|> mutate(rn = row_number(), med = median(Q1), avg = round(mean(Q1), 1), diff = avg - Q1)|> ungroup()|> ggplot(aes(rn, diff)) + geom_bar(stat = "identity") + facet_wrap(~Q3, scales = "free_x") #contract type survey_clean|> select(Q1, Q3, Q4, Q2_sep1)|> mutate(Q4 = str_replace_all(Q4, "^(?=\\s*$)", "x"))%>% filter(Q4 != "x" & Q4 != "Associate professor/tenured" & Q4 != "Fixed-term with no option for renewal (e.g. sabbatical replacement)")%>% na.omit()|> arrange(Q4, Q1)|> group_by(Q4)|> mutate(rn = row_number(), med = median(Q1), avg = round(mean(Q1), 1), diff = med - Q1, std = sd(Q1), upper = round(std, 1), lower = round(-1*std, 1), dist = max(Q1) - min(Q1), Q4 = str_remove_all(Q4, "\\(e.g. rolling contract\\)"))%>% ungroup()%>% ggplot(aes(rn, Q1, fill = Q3)) + geom_bar(stat = "identity", position = "dodge") + facet_wrap(~Q4, scales = "free_x")+ theme_minimal(base_size = 60)+ ggtitle("Volume of applications by contract type and program status")+ theme(axis.text.x = element_blank())+ labs(fill = "Program\nstatus")+ xlab("")+ ylab("counts")+ scale_fill_manual(values = primary)->ay24_vol_contract_program ggsave(plot = ay24_vol_contract_program, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\vol_contract_program_24_a.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white") survey_clean|> select(Q1, Q2_sep1, Q3)|> filter(Q2_sep1 != "not given")%>% na.omit()|> arrange(-Q1)|> group_by(Q2_sep1, Q3)|> summarise(avg = median(Q1), # change to median? max = max(Q1), min = min(Q1))|> mutate(Q2_sep1 = as_factor(Q2_sep1), aos_avg = median(avg))|> ungroup()|> ggplot(aes(Q3, avg, color = Q2_sep1))+ geom_point(alpha = .8, size = 8)+ geom_errorbar(aes(ymin = min, ymax = max),alpha = .7, linewidth = 5)+ geom_hline(aes(yintercept = aos_avg), linetype = "solid", alpha = .2, linewidth = 5)+ coord_flip()+ facet_wrap(~ Q2_sep1)+ theme_minimal(base_size = 60)+ xlab("AOS")+ ylab("counts")+ ggtitle("Volume of applications by AOS and program status")+ theme(legend.position="none")+ scale_color_manual(values = primary)->ay24_aos_max_min_avg ggsave(plot = ay24_aos_max_min_avg, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\ay_24_aos_program_max_min_avg_c.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white") survey_clean|> select(Q1, Q2_sep1, Q3, Q4)|> mutate(Q4 = str_replace_all(Q4, "^(?=\\s*$)", "x"), Q2_sep1 = str_replace_all(Q2_sep1, "Metaphysics and Epistemology", "Metaphysics, Epistemology"))%>% filter(Q4 != "x" & Q4 != "Associate professor/tenured" & Q4 != "Fixed-term with no option for renewal (e.g. sabbatical replacement)")%>% filter(Q2_sep1 != "not given")%>% na.omit()|> arrange(-Q1)|> group_by(Q2_sep1)|> mutate(aos_med = median(Q1))|> ungroup()%>% group_by(Q2_sep1, Q4)|> mutate(spec_med = median(Q1), # change to median? max = max(Q1), min = min(Q1), Q4 = str_remove_all(Q4, "\\(e.g. rolling contract\\)"), Q4 = as_factor(Q4))|> ungroup()|> ggplot(aes(Q4, spec_med, color = Q4))+ geom_point(alpha = .8, size = 8)+ geom_errorbar(aes(ymin = min, ymax = max),alpha = .7, linewidth = 5)+ geom_hline(aes(yintercept = aos_med), linetype = "solid", alpha = .2, linewidth = 5)+ coord_flip()+ facet_wrap(~ Q2_sep1)+ theme_minimal(base_size = 60)+ xlab("AOS")+ ylab("counts")+ ggtitle("Volume of applications by AOS and contract type")+ theme(legend.position="none")+ scale_color_manual(values = primary)->ay24_aos_contract_max_min_avg ggsave(plot = ay24_aos_contract_max_min_avg, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\ay_24_aos_contract_max_min_avg_d.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white") survey_clean|> filter(!is.na(Q7))|> arrange(Q1)|> mutate(rn = row_number())|> rename(counts = Q1)|> mutate(Q7 = str_replace_all(Q7, "There were about as many applicants as I was expecting", "Expected"), Q7 = str_replace_all(Q7, "There were fewer applicants than I was expecting", "Fewer"), Q7 = str_replace_all(Q7, "There were more applicants than I was expecting", "More"))|> ggplot(aes(rn, counts, fill = Q7))+ geom_bar(stat = "identity")+ scale_fill_manual(values = primary)+ facet_wrap(~Q3) theme_minimal(base_size = 60)+ ggtitle("Volume of applications for TT jobs")+ theme(axis.text.x = element_blank())+ xlab("")+ ylab("counts")->num_imp_23 ggsave(plot = num_imp_23, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\num_imp_23.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` #tables: not used in AY24-25 ```{r} survey_clean|> select(Q1, Q2_sep1, Q3)|> na.omit()|> group_by(Q3)|> reframe(std = sd(Q1), average = mean(Q1))|> ungroup()|> rename(status = Q3, `standard deviation` = std)|> mutate(across(where(is.double), round))|> gt::gt(caption = "Averages and dispersion") -> overview_p23 gt::gtsave(overview_p23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\overview_p23.png", expand = 10) survey_clean|> select(Q1, Q3)|> na.omit()|> arrange(-Q1)|>view() group_by(Q3)|> summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1),most = max(Q1) )|> ungroup()|> mutate(across(where(is.double), round))|> rename(status = Q3, mean = avg, median = mid)|> gt::gt(caption = "Overview of applicantions by program status")->all_over_23 gt::gtsave(all_over_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\all_over_23.png", expand = 10) survey_clean|> filter(Q4 == "Assistant professor/tenure-track")|> group_by(Q2_sep1)|> summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1), most = max(Q1))|> ungroup()|> mutate(across(where(is.double), round))|> rename(AOS = Q2_sep1, mean = avg, median = mid)|> gt::gt(caption = "TT position applications")->tt_23 gt::gtsave(tt_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\tt_23.png", expand = 10) survey_clean|> filter(Q4 == "Postdoc" | str_detect(Q4, "Fixed"))|> group_by(Q2_sep1)|> summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1), most = max(Q1))|> ungroup()|> mutate(across(where(is.double), round))|> rename(AOS = Q2_sep1, mean = avg, median = mid)|> gt::gt(caption = "Postdoc and fixed-term position applications")->pd_fixed_23 gt::gtsave(pd_fixed_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\pd_fixed_23.png", expand = 10) survey_clean|> filter(Q4 != "Postdoc" & Q4 != "Assistant professor/tenure-track" & str_detect(Q4, "Fixed", negate = TRUE))|> group_by(Q2_sep1)|> summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1), most = max(Q1))|> ungroup()|> mutate(across(where(is.double), round))|> rename(AOS = Q2_sep1, mean = avg, median = mid)|> gt::gt(caption = "Open and tenured position applications")->open_23 gt::gtsave(open_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\open_23.png", expand = 10) survey_clean|> filter(!is.na(Q7))|> count(Q7)|> rename(response = Q7, count = n)|> arrange(-count)|> gt::gt(caption = "Impressions of applicant volume")->imp_app_23 gt::gtsave(imp_app_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\imp_app_23.png", expand = 10) survey_clean|> filter(!is.na(Q6))|> count(Q6, Q3)|>view() rename(response = Q6, count = n)|> arrange(-count)|>view() gt::gt(caption = "Time allotment for job")->time_job_23 gt::gtsave(time_job_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\time_job_23.png", expand = 10) ``` #perceptions ```{r} survey_clean%>% select(Q3, Q4, Q7)%>% arrange(Q3, Q4, Q7)%>% view() ``` #top and bottom ```{r} survey_clean|> select(Q1, Q2_sep1, Q3)%>% na.omit()%>% arrange(Q1)%>% add_count(Q2_sep1, Q3)%>% mutate(ends = ifelse(Q1 > 65 & Q1 < 159, FALSE, TRUE), end = ifelse(ends == TRUE & Q1<= 65, "Bottom", "Top"))%>% rename(`overall frequency` = n)%>% filter(ends == TRUE)%>% add_count(Q2_sep1, Q3, end)%>% mutate(ratio = round(n / `overall frequency`, 2))%>% rename(frequency = n, AOS = Q2_sep1, `degree type` = Q3)%>% distinct(AOS, `degree type`, frequency, end, .keep_all = TRUE)%>% select(AOS, `degree type`, frequency, `overall frequency`, ratio, end)%>% arrange(end, ratio, AOS, `degree type`)%>% gt::gt(caption = "Chararteristics of top and bottom quartile for applicant volume")->top_bottom gt::gtsave(top_bottom, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\ay24_top_bottom_a.png", expand = 10) survey_clean|> select(Q1, Q2_sep1, Q3)%>% na.omit()%>% arrange(Q1)%>% add_count(Q2_sep1, Q3)%>% mutate(ends = ifelse(Q1 > 65 & Q1 < 159, FALSE, TRUE), end = ifelse(ends == TRUE & Q1<= 65, "Bottom", "Top"))%>% filter(end == "Top")%>% select(2,3)%>% fastDummies::dummy_cols()%>% select(-1, -2)%>% cor()-> bottom_cor corrplot::cor.mtest(bottom_cor, conf.level = .95) -> bottom_cor_sig corrplot::corrplot(bottom_cor, method = "number") #method = 'circle', type = 'lower', insig='blank', # addCoef.col ='black', number.cex = 0.8, order = 'AOE', diag=FALSE ``` ```{r} DN|> count(Q3, Q2_sep1)|> group_by(Q3)|> mutate(tots = n / sum(n))|> ungroup()|> view() DN|> select(Q2_sep1, Q3, Q1)|> filter(Q2_sep1 == "Open")|> select(-Q1)|> table()->AOS_status (prop.table(AOS_status)) addmargins(AOS_status) fisher.test(AOS_status) chisq.test(AOS_status) table(survey_clean$Q2_sep1) ```