--- title: "2022-2023 job cycle" output: html_document editor_options: chunk_output_type: console --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` #load libraries & set theme ```{r} library(tidyverse) library(viridis) library(lubridate) library(ggthemes) library(RColorBrewer) library(tidygeocoder) library(tidycensus) library(maps) library(ggsci) library(ggmap) library(gghighlight) register_google(key = "AIzaSyC1jqd-OsIzSFK6VhDUTea6txRtfTWa87M") theme_set( theme_minimal(base_size = 50) ## big base size for blog pics ) ``` #misc checking ```{r} temp_clean <- jobs_clean%>% mutate(Post_date = mdy(`Date posted`), mth = month(Post_date), yr = year(Post_date))%>% select(-`Date posted`) anti_join(temp_clean, master_temp, by = "Id")->temp_prim write_csv(temp_prim, "temp_prim.csv") ``` #getting & cleaning latest data ```{r} ##main idea: add on to the new file #step 0: reload master file (if needed) #last update: 19 July 2023 master_file_philjobs <- read_csv("master_file_philjobs_2023-07-19.csv") #step 1: import latest data & clean up format jobs_raw<-read_csv("C:\\Users\\lassiter\\Downloads\\jobs.csv", col_types = cols(`Date posted` = col_date(format = "%m/%d/%Y"))) jobs_raw%>% rename(Post_date = `Date posted`)%>% select(-`How to apply`, -`Info link`, -`Application link`, -`PhilJobs page`, -`Start time`,)%>% mutate(AOS_main = "XXXX", AOS_detail = "XXXX", other_keyword = "XXXX", yr = year(Post_date), mth = month(Post_date)) -> jobs_clean #step 2: get only latest data jobs_latest <- jobs_clean%>% anti_join(master_file_philjobs) #step 3: clean AOS jobs_latest <- edit(jobs_latest) #jobs_latest <- jobs_secondary ``` #rejoining to main file ```{r} #this gets everything together into a temp file master_temp <- rbind(jobs_latest, master_file_philjobs) #if everything checks out, switch temp file to permanent master_file_philjobs <- master_temp #to save the updated file with the date d <- paste0("master_file_philjobs_", Sys.Date(), ".csv") write_csv(master_file_philjobs, d) master_file_philjobs <- master_file_philjobs%>% mutate(Post_date = as.Date(Post_date, format = "%m/%d/%Y")) ``` #primary cycle hiring ##creating df for primary cycle ```{r} master_file_philjobs%>% filter(mth >= 7 & mth <= 12)->jobs_primary ``` ##all job posts with historical context, primary cycle ```{r} jobs_primary%>% add_count(`Job type`, yr)%>% select(`Job type`, yr, n)%>% ggplot(mapping = aes(x = yr, y = n, color = `Job type`, label = n))+ geom_point(size = 3)+ geom_line(size = 3)+ scale_color_viridis(discrete = TRUE)+ labs(x = "Year", y = "Posts", title = "")#->job_plots_history ggsave(plot = job_plots_history, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\all_jobs_22_history_big.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` ##adding labels for a handful of job types ```{r} jobs_primary%>% filter(`Job type` == "Junior faculty" |`Job type` == "Postdoc or similar" | `Job type` == "Other (non-academic)" | `Job type` == "Faculty (open rank)" )%>% add_count(`Job type`, yr)%>% select(`Job type`, yr, n)%>% distinct()%>% ggplot(mapping = aes(x = yr, y = n, color = `Job type`, label = n))+ geom_point(size = 3)+ geom_line(size = 3)+ geom_text_repel(aes(label = n), nudge_y = 10, nudge_x = 0, max.overlaps = 100, size = 10)+ #geom_text(hjust = 1, vjust = -1)+ scale_color_viridis(discrete = TRUE)+ labs(x = "Year", y = "Posts", title = "")->some_job_plots_history ggsave(plot = some_job_plots_history, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\some_jobs_22_history_big.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` ##filtering for junior and postdoc ```{r} jobs_primary%>% filter(`Job type` == "Junior faculty" | `Job type` == "Postdoc or similar" )->junior ``` ##looking at junior trends ```{r} junior%>% #filter(yr != 2020)%>% add_count(`Contract type`, yr)%>% select(`Contract type`, yr, n)%>% distinct()%>% ggplot(mapping = aes(x = yr, y = n, color = `Contract type`))+ geom_point(size = 2)+ geom_line(size = 2)+ #geom_smooth(method = "lm", se = FALSE)+ scale_color_viridis(discrete = TRUE)+ labs(x = "Year", y = "Posts", title = "")+ geom_text_repel(aes(label = n), nudge_y = 10, nudge_x = 0, max.overlaps = 100, size = 10)->junior_plots_history ggsave(plot = junior_plots_history, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_22_history_labels_big.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` ##deadlines with historical context, primary cycle ```{r} jobs_primary%>% filter(Deadline != "---")%>% mutate(Deadline = str_remove_all(Deadline, "\\(.*\\)"), Deadline = str_replace_all(Deadline, "/", "-"), Deadline_clean = parse_date_time(Deadline, orders = c("mdY", "Ymd")))%>% arrange(Deadline_clean)%>% mutate(Deadline_day = yday(Deadline_clean), Deadline_yr = factor(year(Deadline_clean)))%>% filter(Deadline_yr != 2015 & Deadline_yr != 2023)%>% filter(Deadline_day >= 260)%>% ##change to <= 50 to get beginning of year deadlines group_by(Deadline_yr, Deadline_clean)%>% add_count()%>% ungroup()%>% select(Deadline_day, Deadline_clean, Deadline_yr, n)%>% distinct()%>% group_by(Deadline_yr)%>% mutate(totals = cumsum(n), maximums = max(totals))%>% ungroup()%>% select(Deadline_day, n, maximums, Deadline_yr)%>% distinct()%>% mutate(st_apps = n/maximums)%>% ggplot(mapping = aes(x = Deadline_day, y = st_apps, color = Deadline_yr, group = Deadline_yr))+ geom_line(size = 1, alpha = .5)+ labs(x = "Deadlines Sept 17 to Dec 31", y = "")+ theme_minimal(base_size = 11)#->deadlines_2022_history ggsave(plot = deadlines_2022_history, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\deadlines_22_history.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` ##daily posts, all and junior ```{r} #ALL jobs_primary%>% mutate(DayMonth = format(as.Date(Post_date), "%m-%d"))%>% #filter(mth == 9 | mth == 10 | mth == 11)%>% group_by(yr, Post_date)%>% add_count()%>% ungroup()%>% select(DayMonth, yr, n)%>% unique()%>% arrange(yr, DayMonth)%>% group_by(yr)%>% mutate(c = cumsum(n), d = c/max(c))%>% ## d relativizes to total postings for the cycle, use n for daily posts and c for cummulative posts ungroup()%>% mutate(yr = factor(yr))%>% group_by(DayMonth)%>% mutate(big = max(d), big_yr = ifelse(d == big, yr, NA), big_yr = big_yr + 2014)%>% ungroup()%>% na.omit()%>% add_count(big_yr)%>% ggplot()+ aes(x = DayMonth, y = d, group = yr, color = yr)+ #geom_line(linewidth = 1.5)+ geom_point()+ #gghighlight(max(d))+ labs(x = "date", y = "")+ theme_minimal(base_size = 11)+theme(axis.text.x = element_blank())+ labs(x = "July 1 to December 31", y="")+ scale_color_viridis(discrete = TRUE) #JUNIOR junior%>% mutate(DayMonth = format(as.Date(Post_date), "%m-%d"))%>% #filter(mth == 9 | mth == 10 | mth == 11)%>% group_by(yr, Post_date)%>% add_count()%>% ungroup()%>% select(DayMonth, yr, n)%>% unique()%>% arrange(yr, DayMonth)%>% group_by(yr)%>% mutate(c = cumsum(n), d = c/max(c))%>% ungroup()%>% mutate(yr = factor(yr))%>% ggplot()+ aes(x = DayMonth, y = d, color = yr, group = yr)+ geom_point()+ #geom_line(linewidth = 1.5)+ #gghighlight(yr == 2022 | yr == 2015)+ gghighlight(yr == 2022)+ theme_minimal(base_size = 11)+ theme(axis.text.x = element_blank())+ labs(x = "July 1 to December 31", y="") ``` #secondary cycle hiring ##creating df for secondary cycle ```{r} master_file_philjobs%>% filter(mth >= 1 & mth <= 6)->jobs_secondary ``` ##number of jobs, present year ```{r} js <- jobs_secondary%>% filter(yr == 2023)%>% add_count(`Job type`, yr)%>% select(`Job type`, yr, n)%>% distinct jsp <- ggplot(data = js, mapping = aes(x = `Job type`, y = n)) jsp_f <- jsp+geom_bar(stat = "identity")+ coord_flip()+ labs(x = "Job type", y = "Posts", title = "") ggsave(plot = jsp_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_secondary_2023.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` ##number of jobs, historical context ```{r} jsh <- jobs_secondary%>% add_count(`Job type`, yr)%>% select(`Job type`, yr, n)%>% distinct() jshp <- ggplot(data = jsh, mapping = aes(x = yr, y = n, color = `Job type`)) jshp_f <- jshp + geom_point(size = 5)+ geom_smooth(method = "lm", se = FALSE, linewidth = 5)+ scale_color_simpsons()+ labs(x = "Year", y = "Posts", title = "") ggsave(plot = jshp_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_secondary_history.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` ##select positions labeled ```{r} jobs_secondary%>% filter(`Job type` == "Junior faculty" |`Job type` == "Postdoc or similar" | `Job type` == "Faculty (open rank)" )%>% add_count(`Job type`, yr)%>% select(`Job type`, yr, n)%>% distinct()%>% ggplot(mapping = aes(x = yr, y = n, color = `Job type`))+ geom_point(size = 3)+ geom_smooth(method = "lm", se = FALSE, size = 3)+ ggrepel::geom_text_repel(aes(label = n), nudge_y = 10, nudge_x = 0, max.overlaps = 100, size = 10)+ #geom_text(hjust = 1, vjust = -1)+ scale_color_simpsons()+ labs(x = "Year", y = "Posts", title = "") ``` ##filtering for junior and postdoc ```{r} jobs_secondary%>% filter(`Job type` == "Junior faculty" | `Job type` == "Postdoc or similar" )->junior ``` ##looking at junior trends ```{r} jun_second <- junior%>% #filter(yr != 2020)%>% add_count(`Contract type`, yr)%>% select(`Contract type`, yr, n)%>% distinct() jun_second_p <- ggplot(data = jun_second, mapping = aes(x = yr, y = n, color = `Contract type`)) jun_second_p1 <- jun_second_p+ geom_point(size = 5)+ geom_smooth(method = "lm", se = FALSE, linewidth = 5) dev.new(width=50000, height=30000, unit="px") jun_second_p1_f <- jun_second_p1 + ggrepel::geom_text_repel(aes(label = n), size = 8, color = "black", hjust = 0, vjust = 0, nudge_x = .1, nudge_y = .5)+ scale_color_simpsons()+ labs(x = "Year", y = "Posts", title = "") ggsave(plot = jun_second_p1_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_secondary_23_history.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` #comparing primary and secondary and across entire year ```{r} p_s <- master_file_philjobs%>% filter(yr != 2023)%>% mutate(cycle = ifelse((mth >=7 & mth <= 12), "primary", "seconary"))%>% add_count(yr, cycle, name = "annual_posts")%>% add_count(`Contract type`, yr, cycle)%>% select(`Contract type`, yr, cycle, n, annual_posts)%>% distinct()%>% mutate(rel_posts = n / annual_posts) p_s_p <- ggplot(data = p_s, mapping = aes(x = yr, y = n, color = cycle)) p_s_p_f <- p_s_p + geom_point(size = 5)+ geom_smooth(method = "lm", se = FALSE, size = 5)+ scale_color_simpsons()+ labs(x = "Year", y = "Posts", title = "")+ scale_x_continuous(breaks = c(2013, 2015, 2017, 2019, 2021))+ facet_wrap(~`Contract type`) ggsave(plot = p_s_p_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_compare_cycles.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ############# p_s_no_2020 <- master_file_philjobs%>% filter(yr != 2023 )%>% mutate(cycle = ifelse((mth >=7 & mth <= 12), "primary", "seconary"))%>% add_count(yr, cycle, name = "annual_posts")%>% add_count(`Contract type`, yr, cycle)%>% select(`Contract type`, yr, cycle, n, annual_posts)%>% distinct()%>% mutate(rel_posts = n / annual_posts) p_s_p_n <- ggplot(data = p_s_no_2020, mapping = aes(x = yr, y = n, color = cycle)) p_s_p_n_f <- p_s_p_n + geom_point(size = 5)+ geom_smooth(method = "lm", se = FALSE, size = 5)+ scale_color_simpsons()+ labs(x = "Year", y = "Posts", title = "", caption = "Omitting 2020 job data")+ scale_x_continuous(breaks = c(2013, 2015, 2017, 2019, 2021))+ facet_wrap(~`Contract type`) ggsave(plot = p_s_p_n_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_compare_cycles_no_2020.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") p_s_no_2020 <- master_file_philjobs%>% filter(yr != 2023 )%>% mutate(cycle = ifelse((mth >=7 & mth <= 12), "primary", "seconary"))%>% add_count(yr, cycle, name = "annual_posts")%>% add_count(`Contract type`, yr, cycle)%>% select(`Contract type`, yr, cycle, n, annual_posts)%>% distinct()%>% mutate(rel_posts = n / annual_posts) p_s_p_n <- ggplot(data = p_s_no_2020, mapping = aes(x = yr, y = rel_posts, color = cycle)) p_s_p_n_f <- p_s_p_n + geom_point(size = 5)+ geom_smooth(method = "lm", se = FALSE, size = 5)+ scale_color_simpsons()+ labs(x = "Year", y = "Posts", title = "", caption = "Omitting 2020 job data")+ scale_x_continuous(breaks = c(2013, 2015, 2017, 2019, 2021))+ facet_wrap(~`Contract type`) ggsave(plot = p_s_p_n_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_compare_cycles_no_2020.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` #AOS analyses ```{r} #for these, you'll need to update jobs_primary and jobs_secondary as needed ``` ##AOS, all ```{r} jobs_%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>% separate(AOS_main, into = c("AOS1", "AOS2", "AOS3"), sep = " or ")%>% select(AOS1, AOS2, AOS3)%>% pivot_longer(cols = c(AOS1, AOS2, AOS3), names_to = "aos", values_to = "values")%>% na.omit()%>% add_count(values)%>% select(values, n)%>% distinct()%>% ggplot()+ aes(x = reorder(values, n), y = n)+ geom_bar(stat = "identity")+ labs(x = "areas", y = "")+ theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))->aos_all ggsave(plot = aos_all, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_22_all_2.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` ##AOS, value theory ```{r} jobs_%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>% filter(AOS_main %in% "value_theory")%>% separate(AOS_detail, into = c("AOSd1", "AOSd2", "AOSd3"), sep = " or ")%>% select(`Job type`, `Contract type`, AOSd1, AOSd2, AOSd3)%>% #, other_keyword pivot_longer(cols = c( AOSd1, AOSd2, AOSd3), names_to = "names", values_to = "values")%>% na.omit()%>% add_count(values)%>% arrange(n)%>% select(values, n)%>% distinct()%>%view() ggplot()+ aes(x = reorder(values,n), y = n)+ geom_bar(stat = "identity")+ labs(x = "Value theory detail", y = "")+ theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))->aos_vt ggsave(plot = aos_vt, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_22_vt_1.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ## VT, other jobs_clean%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>% filter(AOS_main %in% "value_theory")%>% filter(AOS_detail %in% "value_other")%>% select(other_keyword)%>% add_count(other_keyword)%>% distinct()%>% view() ``` ##AOS, hist/trad ```{r} jobs_%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>% filter(AOS_main %in% "hist_trad")%>% select(AOS_detail)%>% separate(AOS_detail, into = c("A1", "A2", "A3", "A4", "A5", "A6"), sep = " or ")%>% pivot_longer(cols = c(A1, A2, A3, A4, A5, A6), names_to = "names", values_to = "values")%>% na.omit()%>% add_count(values)%>% select(n, values)%>% distinct()%>% ggplot()+ aes(x = reorder(values, n), y = n)+ geom_bar(stat = "identity")+ theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+ labs(x = "History and traditions detail", y = "")-> aos_hist_trad_22 ggsave(plot = aos_hist_trad_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_hist_trad_22_1.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` ##AOS, open ```{r} jobs_%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>% separate(AOS_main, into = c("AOS1", "AOS2", "AOS3"), sep = " or ")%>% select(AOS1, AOS2, AOS3, `Job type`)%>% #filter(`Job type` == "Junior" | `Job type` == "Postdoc or similar")%>% pivot_longer(cols = c(AOS1, AOS2, AOS3), names_to = "aos", values_to = "values")%>% na.omit()%>% ggplot()+ aes(x = `Job type`)+ geom_histogram(stat = "count")+ theme_minimal(base_size = 11)+ facet_wrap(~values)+ theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) #open, junior and postdoc jobs_clean%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>% separate(AOS_main, into = c("AOS1", "AOS2", "AOS3"), sep = " or ")%>% filter(`Job type` == "Junior faculty" | `Job type` == "Postdoc or similar")%>% filter(`Contract type` == "Tenure-track or similar")%>% filter(AOS1 == "open")%>% select(AOS1, Institution, `Job type`)%>% pivot_longer(cols = c(AOS1, AOS2, AOS3), names_to = "aos", values_to = "values")%>% na.omit()%>% ggplot()+ aes(x = `Job type`)+ geom_histogram(stat = "count")+ facet_wrap(~values)+ theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) ``` ##AOS, science, logic, math ```{r} jobs_%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"), AOS_detail = str_replace_all(AOS_detail, "phil_science", "phil_sci"))%>% filter(AOS_main %in% "science_logic_math")%>% select(AOS_detail)%>% separate(AOS_detail, into = c("A1", "A2", "A3", "A4"), sep = " or ")%>% pivot_longer(cols = c(A1, A2, A3, A4), names_to = "names", values_to = "values")%>% na.omit()%>% add_count(values)%>% select(n, values)%>% distinct()%>% ggplot()+ aes(x = reorder(values, n), y = n)+ geom_bar(stat = "identity")+ labs(x = "Science, logic, math detail", y = "")+ theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))->aos_slm_22 ggsave(plot = aos_slm_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_slm_22_1.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ##slm_other jobs_%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"), AOS_detail = str_replace_all(AOS_detail, "phil_science", "phil_sci"))%>% filter(AOS_detail %in% "slm_other")%>% select(other_keyword)%>% view() ``` ##AOS, epist_meta ```{r} jobs_%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"), AOS_detail = str_replace_all(AOS_detail, "phil_science", "phil_sci"), AOS_detail = str_replace_all(AOS_detail, "epis.*_meta_other", "meta_epist_other"), AOS_detail = str_replace_all(AOS_detail, "phil_mind", "mind"))%>% filter(AOS_main %in% "meta_epist")%>% select(AOS_detail)%>% separate(AOS_detail, into = c("A1", "A2", "A3", "A4"), sep = " or ")%>% pivot_longer(cols = c(A1, A2, A3, A4), names_to = "names", values_to = "values")%>% na.omit()%>% add_count(values)%>% select(n, values)%>% distinct()%>% ggplot()+ aes(x = reorder(values, n), y = n)+ geom_bar(stat = "identity")+ labs(x = "Metaphysics and epistemology detail", y = "")+ theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))->aos_me_22_2 ggsave(plot = aos_me_22_2, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_me_22_2.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ##me_other jobs_%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"), AOS_detail = str_replace_all(AOS_detail, "phil_science", "phil_sci"), AOS_detail = str_replace_all(AOS_detail, "epis.*_meta_other", "meta_epist_other"), AOS_detail = str_replace_all(AOS_detail, "phil_mind", "mind"))%>% filter(AOS_detail %in% "meta_epist_other")%>% select(other_keyword)%>% view() ``` ##AOS, junior ```{r} jobs_%>% mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"), AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>% separate(AOS_main, into = c("AOS1", "AOS2", "AOS3"), sep = " or ")%>% filter(`Job type` == "Junior faculty" | `Job type` == "Postdoc or similar")%>% select(AOS1, AOS2, AOS3, `Job type`)%>% pivot_longer(cols = c(AOS1, AOS2, AOS3), names_to = "aos", values_to = "values")%>% na.omit()%>% add_count(values, `Job type`)%>% select(-aos)%>% distinct()%>% ggplot()+ aes(x = values, y = n)+ geom_bar(stat = "identity")+ facet_wrap(~`Job type`)+ labs(x = "areas", y = "")+ theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) -> aos_junior ggsave(plot = aos_junior, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_junior_22.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ``` ##AOS, misc ```{r} master_file_philjobs%>% filter(str_detect(AOS, ".*AI.*"))%>% add_count(yr)%>% ggplot()+ aes(x = yr, y = n)+ geom_point()+ geom_smooth(method = "lm") ``` #census analyses ```{r} v20 <- load_variables(2020, "acs5", cache = TRUE) view(v20) v20%>% filter(str_detect(concept, "POPULATION"), str_detect(name, ".*[0-9]_001"))%>% view() #B25070_001 -- median rent as percentage of hoursehold income #B25064_001 -- median gross rent #B01003_001 -- population ``` ##population ```{r} pop_df <- get_acs( geography = "county", variables = "B01003_001", survey = "acs5", year = 2020 ) pop_df%>% ggplot()+ aes(x = estimate, fill = "red")+ geom_histogram(bins = 1000) pop_df%>% filter(estimate >= 36700)%>% mutate(population = dvmisc::quant_groups(estimate, groups = 5))%>% separate(NAME, into = c("subregion", "region"), sep = ", ")%>% mutate(region = tolower(region), subregion = tolower(subregion), subregion = str_remove_all(subregion, " county"), subregion = str_remove_all(subregion, " parish"))%>% select(3, 2, 5, 7)->pop_clean levels(pop_clean$population) pop_clean$population <- recode(pop_clean$population, "[117,8.94e+03]"="117-8.94k", "(8.94e+03,1.9e+04]"="8.94k-19k", "(1.9e+04,3.67e+04]" = "19k-36.7k", "(3.67e+04,9.22e+04]" = "36.7k-92.2k", "(9.22e+04,1e+07]" = "92.2k-10m") ``` ##median income ```{r} income_df <- get_acs( geography = "county", variables = "B19013_001", survey = "acs5", year = 2020 ) income_df%>% ggplot()+ aes(x = estimate)+ geom_histogram(bins = 50) income_df %>% mutate(income = dvmisc::quant_groups(estimate, groups = 5))%>% separate(NAME, into = c("subregion", "region"), sep = ", ")%>% mutate(region = tolower(region), subregion = tolower(subregion), subregion = str_remove_all(subregion, " county"), subregion = str_remove_all(subregion, " parish"))%>% select(3, 2, 5, 7)->income_clean levels(income_clean$estimate_disc) income_clean$income <- recode(income_clean$income, "[1.23e+04,4.29e+04]"="12.3k-42.9k", "(4.29e+04,5e+04]"="42.9k-50k", "(5e+04,5.55e+04]" = "50k-55k", "(5.55e+04,6.39e+04]" = "55.5k-63.9k", "(6.39e+04,1.47e+05]" = "63.9k-147k") ``` ##median rent ```{r} rent_df <- get_acs( geography = "county", variables = "B25064_001", survey = "acs5", year = 2020 ) rent_df %>% filter(estimate >= 765)%>% mutate(rent = dvmisc::quant_groups(estimate, groups = 5))%>% #rename(region = NAME)%>% separate(NAME, into = c("subregion", "region"), sep = ", ")%>% #for county level mutate(region = tolower(region), subregion = tolower(subregion), subregion = str_remove_all(subregion, " county"), subregion = str_remove_all(subregion, " parish"))%>% select(region, subregion, rent)%>% na.omit()->rent_clean levels(rent_clean$rent) rent_clean$rent <- recode(rent_clean$rent, "[275,621]"="275-621", "(621,690]"="621-690", "(690,765]" = "690-765", "(765,915]" = "765-915", "(915,2.44e+03]" = "915-2440") ``` ##income and rent ```{r} rent_income_df <- get_acs( geography = "county", variables = "B25071_001", survey = "acs5", year = 2020 ) rent_income_df %>% mutate(rent_percent = dvmisc::quant_groups(estimate, groups = 5))%>% separate(NAME, into = c("subregion", "region"), sep = ", ")%>% mutate(region = tolower(region), subregion = tolower(subregion), subregion = str_remove_all(subregion, " county"), subregion = str_remove_all(subregion, " parish"))%>% select(3, 2, 5, 7)%>% na.omit()->rent_income_clean ``` ##locating the institutions ```{r} jobs_primary%>% filter(yr == 2022)%>% tidygeocoder::geocode(Institution, method = 'osm', lat = latitude , long = longitude)->long_lat ##geocoder doesn't pick up U of TX Rio Grande Valley long_lat%>% mutate(latitude = ifelse(str_detect(Institution, "Rio Grande"), 26.30715335, latitude), longitude = ifelse(str_detect(Institution, "Rio Grande"), -98.1728187, longitude))%>% na.omit()->long_lat long_lat%>% filter(longitude > -128 & longitude < -64 & latitude < 50 & latitude > 20)%>% mutate(long = as.numeric(longitude), lat = as.numeric(latitude), hit = TRUE)%>% select(Institution, latitude, longitude)->long_lat_us ``` ##getting census data into mappable format for locating institution ```{r} job_locations <- map_data("county") ##options for the 2nd arg in left_join: pop_clean, income_clean, rent_clean, rent_income_clean ## use by = c("region, "subregion") for county level left_join(job_locations, rent_clean, by = c("region", "subregion"), keep = T)->final_df ``` ##making the maps ```{r} p <- ggplot(data = final_df, mapping = aes(x = long, y = lat, group = group, fill = rent))+ theme_minimal(base_size = 25) ##don't forget to change the "fill" value: income, rent, rent_percent p1 <- p + geom_polygon(color = "grey90", linewidth = 0.1) + coord_map(projection = "albers", lat0 = 39, lat1 = 45)+ scale_fill_simpsons() p1 + layer( geom = "point", aes (x = longitude, y = latitude), stat = "identity", position = "identity", data = long_lat_us, params = list(size = 1, alpha = .5,na.rm = TRUE), inherit.aes = FALSE )# -> map_jobs_pop_22 ``` ##saving the plots ```{r} #population ggsave(map_jobs_pop_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_and_pop_22.png", width = 80, height = 65, units = "in", dpi = 300, limitsize = FALSE, bg = "white") #income ggsave(map_jobs_income_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_and_income_22.png", width = 80, height = 65, units = "in", dpi = 300, limitsize = FALSE, bg = "white") #rent ggsave(map_jobs_rent_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_and_rents_22.png", width = 80, height = 65, units = "in", dpi = 300, limitsize = FALSE, bg = "white") #rent and income ggsave(map_jobs_rent_income_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_income_and_rents_22.png", width = 80, height = 65, units = "in", dpi = 300, limitsize = FALSE, bg = "white") ```