---
title: "2022-2023 job cycle"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

#load libraries & set theme
```{r}
library(tidyverse)
library(viridis)
library(lubridate)
library(ggthemes)
library(RColorBrewer)
library(tidygeocoder)
library(tidycensus)
library(maps)
library(ggsci)
library(ggmap)
library(gghighlight)
register_google(key = "AIzaSyC1jqd-OsIzSFK6VhDUTea6txRtfTWa87M")

theme_set(
    theme_minimal(base_size = 50) ## big base size for blog pics
)
```

#misc checking
```{r}
temp_clean <- jobs_clean%>%
  mutate(Post_date = mdy(`Date posted`),
         mth = month(Post_date),
         yr = year(Post_date))%>%
  select(-`Date posted`)

anti_join(temp_clean, master_temp, by = "Id")->temp_prim

write_csv(temp_prim, "temp_prim.csv")

```


#getting & cleaning latest data
```{r}
##main idea: add on to the new file
#step 0: reload master file (if needed)
#last update: 19 July 2023
master_file_philjobs <- read_csv("master_file_philjobs_2023-07-19.csv")
#step 1: import latest data & clean up format
jobs_raw<-read_csv("C:\\Users\\lassiter\\Downloads\\jobs.csv", 
                   col_types = cols(`Date posted` = col_date(format = "%m/%d/%Y")))

jobs_raw%>%
  rename(Post_date = `Date posted`)%>%
  select(-`How to apply`, 
         -`Info link`, -`Application link`, -`PhilJobs page`, -`Start time`,)%>%
  mutate(AOS_main = "XXXX",
         AOS_detail = "XXXX",
         other_keyword = "XXXX",
         yr = year(Post_date),
         mth = month(Post_date)) -> jobs_clean

#step 2: get only latest data
jobs_latest <- jobs_clean%>%
  anti_join(master_file_philjobs)

#step 3: clean AOS
jobs_latest <- edit(jobs_latest)
#jobs_latest <- jobs_secondary


```

#rejoining to main file
```{r}
#this gets everything together into a temp file
master_temp <- rbind(jobs_latest, master_file_philjobs)

#if everything checks out, switch temp file to permanent
master_file_philjobs <- master_temp

#to save the updated file with the date
d <- paste0("master_file_philjobs_", Sys.Date(), ".csv")
write_csv(master_file_philjobs, d)

master_file_philjobs <- master_file_philjobs%>%
  mutate(Post_date = as.Date(Post_date, format = "%m/%d/%Y"))
```


#primary cycle hiring
##creating df for primary cycle
```{r}
master_file_philjobs%>%
  filter(mth >= 7 & mth <= 12)->jobs_primary
```


##all job posts with historical context, primary cycle
```{r}
jobs_primary%>%
  add_count(`Job type`, yr)%>%
  select(`Job type`, yr, n)%>%
  ggplot(mapping = aes(x = yr, y = n, color = `Job type`, label = n))+
  geom_point(size = 3)+
  geom_line(size = 3)+
  scale_color_viridis(discrete = TRUE)+
  labs(x = "Year",
       y = "Posts",
       title = "")#->job_plots_history

ggsave(plot = job_plots_history, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\all_jobs_22_history_big.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

```

##adding labels for a handful of job types
```{r}
jobs_primary%>%
  filter(`Job type` == "Junior faculty" |`Job type` == "Postdoc or similar" | `Job type` == "Other (non-academic)" | `Job type` == "Faculty (open rank)" )%>%
  add_count(`Job type`, yr)%>%
  select(`Job type`, yr, n)%>%
  distinct()%>%
  ggplot(mapping = aes(x = yr, y = n, color = `Job type`, label = n))+
  geom_point(size = 3)+
  geom_line(size = 3)+
  geom_text_repel(aes(label = n), nudge_y = 10, nudge_x = 0, max.overlaps = 100, size = 10)+
  #geom_text(hjust = 1, vjust = -1)+
  scale_color_viridis(discrete = TRUE)+
  labs(x = "Year",
       y = "Posts",
       title = "")->some_job_plots_history

ggsave(plot = some_job_plots_history, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\some_jobs_22_history_big.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
```


##filtering for junior and postdoc
```{r}
jobs_primary%>%
  filter(`Job type` == "Junior faculty" | `Job type` == "Postdoc or similar" )->junior
```

##looking at junior trends
```{r}
junior%>%
  #filter(yr != 2020)%>%
  add_count(`Contract type`, yr)%>%
  select(`Contract type`, yr, n)%>%
  distinct()%>%
  ggplot(mapping = aes(x = yr, y = n, color = `Contract type`))+
  geom_point(size = 2)+
  geom_line(size = 2)+
  #geom_smooth(method = "lm", se = FALSE)+
  scale_color_viridis(discrete = TRUE)+
  labs(x = "Year",
       y = "Posts",
       title = "")+
    geom_text_repel(aes(label = n), nudge_y = 10, nudge_x = 0, max.overlaps = 100, size = 10)->junior_plots_history

ggsave(plot = junior_plots_history, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_22_history_labels_big.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
```



##deadlines with historical context, primary cycle
```{r}
jobs_primary%>%
  filter(Deadline != "---")%>%
  mutate(Deadline = str_remove_all(Deadline, "\\(.*\\)"),
         Deadline = str_replace_all(Deadline, "/", "-"),
         Deadline_clean = parse_date_time(Deadline, orders = c("mdY", "Ymd")))%>%
  arrange(Deadline_clean)%>%
  mutate(Deadline_day = yday(Deadline_clean),
         Deadline_yr = factor(year(Deadline_clean)))%>%
  filter(Deadline_yr != 2015 & Deadline_yr != 2023)%>%
  filter(Deadline_day >= 260)%>% ##change to <= 50 to get beginning of year deadlines
  group_by(Deadline_yr, Deadline_clean)%>%
  add_count()%>%
  ungroup()%>%
  select(Deadline_day, Deadline_clean, Deadline_yr, n)%>%
  distinct()%>%
  group_by(Deadline_yr)%>%
  mutate(totals = cumsum(n),
         maximums = max(totals))%>%
  ungroup()%>%
  select(Deadline_day, n, maximums, Deadline_yr)%>%
  distinct()%>%
  mutate(st_apps = n/maximums)%>%
  ggplot(mapping = aes(x = Deadline_day, y = st_apps, color = Deadline_yr, group = Deadline_yr))+
  geom_line(size = 1, alpha = .5)+
  labs(x = "Deadlines Sept 17 to Dec 31",
       y = "")+
  theme_minimal(base_size = 11)#->deadlines_2022_history

ggsave(plot = deadlines_2022_history, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\deadlines_22_history.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

```


##daily posts, all and junior
```{r}
#ALL
jobs_primary%>%
  mutate(DayMonth = format(as.Date(Post_date), "%m-%d"))%>%
  #filter(mth == 9 | mth == 10 | mth == 11)%>%
  group_by(yr, Post_date)%>%
  add_count()%>%
  ungroup()%>%
  select(DayMonth, yr, n)%>%
  unique()%>%
  arrange(yr, DayMonth)%>%
  group_by(yr)%>%
  mutate(c = cumsum(n),
         d = c/max(c))%>% ## d relativizes to total postings for the cycle, use n for daily posts and c for cummulative posts
  ungroup()%>%
  mutate(yr = factor(yr))%>%
  group_by(DayMonth)%>%
  mutate(big = max(d),
         big_yr = ifelse(d == big, yr, NA),
         big_yr = big_yr + 2014)%>%
  ungroup()%>%
  na.omit()%>%
  add_count(big_yr)%>%
  ggplot()+
  aes(x = DayMonth, y = d, group = yr, color = yr)+
  #geom_line(linewidth = 1.5)+
  geom_point()+
  #gghighlight(max(d))+
  labs(x = "date",
       y = "")+
  theme_minimal(base_size = 11)+theme(axis.text.x = element_blank())+
  labs(x = "July 1 to December 31",
       y="")+
  scale_color_viridis(discrete = TRUE)

#JUNIOR

junior%>%
  mutate(DayMonth = format(as.Date(Post_date), "%m-%d"))%>%
  #filter(mth == 9 | mth == 10 | mth == 11)%>%
  group_by(yr, Post_date)%>%
  add_count()%>%
  ungroup()%>%
  select(DayMonth, yr, n)%>%
  unique()%>%
  arrange(yr, DayMonth)%>%
  group_by(yr)%>%
  mutate(c = cumsum(n),
         d = c/max(c))%>%
  ungroup()%>%
  mutate(yr = factor(yr))%>%
  ggplot()+
  aes(x = DayMonth, y = d, color = yr, group = yr)+
  geom_point()+
  #geom_line(linewidth = 1.5)+
  #gghighlight(yr == 2022 | yr == 2015)+
  gghighlight(yr == 2022)+
  theme_minimal(base_size = 11)+
  theme(axis.text.x = element_blank())+
  labs(x = "July 1 to December 31",
       y="")

```

#secondary cycle hiring

##creating df for secondary cycle
```{r}
master_file_philjobs%>%
  filter(mth >= 1 & mth <= 6)->jobs_secondary
```

##number of jobs, present year
```{r}
js <- jobs_secondary%>%
  filter(yr == 2023)%>%
  add_count(`Job type`, yr)%>%
  select(`Job type`, yr, n)%>%
  distinct

jsp <- ggplot(data = js, mapping = aes(x = `Job type`, y = n))

jsp_f <- jsp+geom_bar(stat = "identity")+
  coord_flip()+
  labs(x = "Job type",
       y = "Posts",
       title = "")

ggsave(plot = jsp_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_secondary_2023.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
```


##number of jobs, historical context
```{r}
jsh <- jobs_secondary%>%
  add_count(`Job type`, yr)%>%
  select(`Job type`, yr, n)%>%
  distinct()

jshp <- ggplot(data = jsh, mapping = aes(x = yr, y = n, color = `Job type`))
  
jshp_f <- jshp +  geom_point(size = 5)+
  geom_smooth(method = "lm", se = FALSE, linewidth = 5)+ 
  scale_color_simpsons()+
  labs(x = "Year",
       y = "Posts",
       title = "")

ggsave(plot = jshp_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_secondary_history.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
```

##select positions labeled
```{r}
jobs_secondary%>%
  filter(`Job type` == "Junior faculty" |`Job type` == "Postdoc or similar" | `Job type` == "Faculty (open rank)" )%>%
  add_count(`Job type`, yr)%>%
  select(`Job type`, yr, n)%>%
  distinct()%>%
  ggplot(mapping = aes(x = yr, y = n, color = `Job type`))+
  geom_point(size = 3)+
  geom_smooth(method = "lm", se = FALSE, size = 3)+
  ggrepel::geom_text_repel(aes(label = n), nudge_y = 10, nudge_x = 0, max.overlaps = 100, size = 10)+
  #geom_text(hjust = 1, vjust = -1)+
  scale_color_simpsons()+
  labs(x = "Year",
       y = "Posts",
       title = "")
```

##filtering for junior and postdoc
```{r}
jobs_secondary%>%
  filter(`Job type` == "Junior faculty" | `Job type` == "Postdoc or similar" )->junior
```

##looking at junior trends
```{r}
jun_second <- junior%>%
  #filter(yr != 2020)%>%
  add_count(`Contract type`, yr)%>%
  select(`Contract type`, yr, n)%>%
  distinct()

jun_second_p <-  ggplot(data = jun_second, mapping = aes(x = yr, y = n, color = `Contract type`))

jun_second_p1 <- jun_second_p+
  geom_point(size = 5)+
  geom_smooth(method = "lm", se = FALSE, linewidth = 5)
  
dev.new(width=50000, height=30000, unit="px")

jun_second_p1_f <- jun_second_p1 +
ggrepel::geom_text_repel(aes(label = n), size = 8, color = "black", hjust = 0, vjust = 0, nudge_x = .1, nudge_y = .5)+
  scale_color_simpsons()+
  labs(x = "Year",
       y = "Posts",
       title = "")

ggsave(plot = jun_second_p1_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_secondary_23_history.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
```

#comparing primary and secondary and across entire year
```{r}
p_s <- master_file_philjobs%>%
    filter(yr != 2023)%>%
  mutate(cycle = ifelse((mth >=7 & mth <= 12), "primary", "seconary"))%>%
  add_count(yr, cycle, name = "annual_posts")%>%
  add_count(`Contract type`, yr, cycle)%>%
  select(`Contract type`, yr, cycle, n, annual_posts)%>%
  distinct()%>%
  mutate(rel_posts = n / annual_posts)

p_s_p <- ggplot(data = p_s, mapping = aes(x = yr, y = n, color = cycle))
  
p_s_p_f <- p_s_p +
  geom_point(size = 5)+ 
  geom_smooth(method = "lm", se = FALSE, size = 5)+
  scale_color_simpsons()+
  labs(x = "Year",
       y = "Posts",
       title = "")+
  scale_x_continuous(breaks = c(2013, 2015, 2017, 2019, 2021))+
  facet_wrap(~`Contract type`)


ggsave(plot = p_s_p_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_compare_cycles.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

#############
p_s_no_2020 <- master_file_philjobs%>%
    filter(yr != 2023 )%>%
  mutate(cycle = ifelse((mth >=7 & mth <= 12), "primary", "seconary"))%>%
  add_count(yr, cycle, name = "annual_posts")%>%
  add_count(`Contract type`, yr, cycle)%>%
  select(`Contract type`, yr, cycle, n, annual_posts)%>%
  distinct()%>%
  mutate(rel_posts = n / annual_posts)

p_s_p_n <- ggplot(data = p_s_no_2020, mapping = aes(x = yr, y = n, color = cycle))
  
p_s_p_n_f <-
  p_s_p_n +
  geom_point(size = 5)+ 
  geom_smooth(method = "lm", se = FALSE, size = 5)+
  scale_color_simpsons()+
  labs(x = "Year",
       y = "Posts",
       title = "",
       caption = "Omitting 2020 job data")+
  scale_x_continuous(breaks = c(2013, 2015, 2017, 2019, 2021))+
  facet_wrap(~`Contract type`)


ggsave(plot = p_s_p_n_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_compare_cycles_no_2020.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
  

p_s_no_2020 <- master_file_philjobs%>%
    filter(yr != 2023 )%>%
  mutate(cycle = ifelse((mth >=7 & mth <= 12), "primary", "seconary"))%>%
  add_count(yr, cycle, name = "annual_posts")%>%
  add_count(`Contract type`, yr, cycle)%>%
  select(`Contract type`, yr, cycle, n, annual_posts)%>%
  distinct()%>%
  mutate(rel_posts = n / annual_posts)

p_s_p_n <- ggplot(data = p_s_no_2020, mapping = aes(x = yr, y = rel_posts, color = cycle))
  
p_s_p_n_f <-
  p_s_p_n +
  geom_point(size = 5)+ 
  geom_smooth(method = "lm", se = FALSE, size = 5)+
  scale_color_simpsons()+
  labs(x = "Year",
       y = "Posts",
       title = "",
       caption = "Omitting 2020 job data")+
  scale_x_continuous(breaks = c(2013, 2015, 2017, 2019, 2021))+
  facet_wrap(~`Contract type`)


ggsave(plot = p_s_p_n_f, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\junior_jobs_compare_cycles_no_2020.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
  
```


#AOS analyses
```{r}
#for these, you'll need to update jobs_primary and jobs_secondary as needed
```

##AOS, all
```{r}
jobs_%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>%
  separate(AOS_main, into = c("AOS1", "AOS2", "AOS3"), sep = " or ")%>%
  select(AOS1, AOS2, AOS3)%>%
  pivot_longer(cols = c(AOS1, AOS2, AOS3), names_to = "aos", values_to = "values")%>%
  na.omit()%>%
  add_count(values)%>%
  select(values, n)%>%
  distinct()%>%
  ggplot()+
  aes(x = reorder(values, n), y = n)+
  geom_bar(stat = "identity")+
  labs(x = "areas",
       y = "")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))->aos_all

ggsave(plot = aos_all, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_22_all_2.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
```

##AOS, value theory
```{r}
jobs_%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>%
  filter(AOS_main %in% "value_theory")%>%
  separate(AOS_detail, into = c("AOSd1", "AOSd2", "AOSd3"), sep = " or ")%>%
  select(`Job type`, `Contract type`, AOSd1, AOSd2, AOSd3)%>% #, other_keyword
  pivot_longer(cols = c( AOSd1, AOSd2, AOSd3), names_to = "names", values_to = "values")%>%
  na.omit()%>%
  add_count(values)%>%
  arrange(n)%>%
  select(values, n)%>%
  distinct()%>%view()
  ggplot()+
  aes(x = reorder(values,n), y = n)+
  geom_bar(stat = "identity")+
  labs(x = "Value theory detail",
       y = "")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))->aos_vt

ggsave(plot = aos_vt, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_22_vt_1.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

## VT, other
jobs_clean%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>%
  filter(AOS_main %in% "value_theory")%>%
  filter(AOS_detail %in% "value_other")%>% 
  select(other_keyword)%>%
  add_count(other_keyword)%>%
  distinct()%>%
  view()

```

##AOS, hist/trad

```{r}
jobs_%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>%
  filter(AOS_main %in% "hist_trad")%>%
  select(AOS_detail)%>%
  separate(AOS_detail, into = c("A1", "A2", "A3", "A4", "A5", "A6"), sep = " or ")%>%
  pivot_longer(cols = c(A1, A2, A3, A4, A5, A6), names_to = "names", values_to = "values")%>%
  na.omit()%>%
  add_count(values)%>%
  select(n, values)%>%
  distinct()%>%
  ggplot()+
  aes(x = reorder(values, n), y = n)+
  geom_bar(stat = "identity")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+
  labs(x = "History and traditions detail",
       y = "")-> aos_hist_trad_22

ggsave(plot = aos_hist_trad_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_hist_trad_22_1.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
```


##AOS, open
```{r}
jobs_%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>%
  separate(AOS_main, into = c("AOS1", "AOS2", "AOS3"), sep = " or ")%>%
  select(AOS1, AOS2, AOS3, `Job type`)%>%
  #filter(`Job type` == "Junior" | `Job type` == "Postdoc or similar")%>%
  pivot_longer(cols = c(AOS1, AOS2, AOS3), names_to = "aos", values_to = "values")%>%
  na.omit()%>%
  ggplot()+
  aes(x = `Job type`)+
  geom_histogram(stat = "count")+
  theme_minimal(base_size = 11)+
    facet_wrap(~values)+
  theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1))

#open, junior and postdoc
jobs_clean%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>%
  separate(AOS_main, into = c("AOS1", "AOS2", "AOS3"), sep = " or ")%>%
  filter(`Job type` == "Junior faculty" | `Job type` == "Postdoc or similar")%>%
  filter(`Contract type` == "Tenure-track or similar")%>%
  filter(AOS1 == "open")%>% 
  select(AOS1, Institution, `Job type`)%>%
  pivot_longer(cols = c(AOS1, AOS2, AOS3), names_to = "aos", values_to = "values")%>%
  na.omit()%>%
  ggplot()+
  aes(x = `Job type`)+
  geom_histogram(stat = "count")+
    
    facet_wrap(~values)+
  theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1))

```

##AOS, science, logic, math
```{r}
jobs_%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"),
         AOS_detail = str_replace_all(AOS_detail, "phil_science", "phil_sci"))%>%
  filter(AOS_main %in% "science_logic_math")%>%
  select(AOS_detail)%>%
  separate(AOS_detail, into = c("A1", "A2", "A3", "A4"), sep = " or ")%>%
  pivot_longer(cols = c(A1, A2, A3, A4), names_to = "names", values_to = "values")%>%
  na.omit()%>%
  add_count(values)%>%
  select(n, values)%>%
  distinct()%>%
  ggplot()+
  aes(x = reorder(values, n), y = n)+
  geom_bar(stat = "identity")+
  labs(x = "Science, logic, math detail",
       y = "")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))->aos_slm_22

ggsave(plot = aos_slm_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_slm_22_1.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

##slm_other
jobs_%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"),
         AOS_detail = str_replace_all(AOS_detail, "phil_science", "phil_sci"))%>%
  filter(AOS_detail %in% "slm_other")%>%
  select(other_keyword)%>%
  view()
```

##AOS, epist_meta
```{r}
jobs_%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"),
         AOS_detail = str_replace_all(AOS_detail, "phil_science", "phil_sci"),
         AOS_detail = str_replace_all(AOS_detail, "epis.*_meta_other", "meta_epist_other"),
         AOS_detail = str_replace_all(AOS_detail, "phil_mind", "mind"))%>%
  filter(AOS_main %in% "meta_epist")%>%
  select(AOS_detail)%>%
  separate(AOS_detail, into = c("A1", "A2", "A3", "A4"), sep = " or ")%>%
  pivot_longer(cols = c(A1, A2, A3, A4), names_to = "names", values_to = "values")%>%
  na.omit()%>%
  add_count(values)%>%
  select(n, values)%>%
  distinct()%>%
  ggplot()+
  aes(x = reorder(values, n), y = n)+
  geom_bar(stat = "identity")+
  labs(x = "Metaphysics and epistemology detail",
       y = "")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))->aos_me_22_2

ggsave(plot = aos_me_22_2, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_me_22_2.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

##me_other
jobs_%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"),
         AOS_detail = str_replace_all(AOS_detail, "phil_science", "phil_sci"),
         AOS_detail = str_replace_all(AOS_detail, "epis.*_meta_other", "meta_epist_other"),
         AOS_detail = str_replace_all(AOS_detail, "phil_mind", "mind"))%>%
  filter(AOS_detail %in% "meta_epist_other")%>%
  select(other_keyword)%>%
  view()
```


##AOS, junior
```{r}
jobs_%>%
  mutate(AOS_main = str_replace_all(AOS_main, "epist_meta", "meta_epist"),
         AOS_main = str_replace_all(AOS_main, "science_math_logic", "science_logic_math"))%>%
  separate(AOS_main, into = c("AOS1", "AOS2", "AOS3"), sep = " or ")%>%
  filter(`Job type` == "Junior faculty" | `Job type` == "Postdoc or similar")%>%
  select(AOS1, AOS2, AOS3, `Job type`)%>%
  pivot_longer(cols = c(AOS1, AOS2, AOS3), names_to = "aos", values_to = "values")%>%
  na.omit()%>%
  add_count(values, `Job type`)%>%
  select(-aos)%>%
  distinct()%>%
  ggplot()+
  aes(x = values, y = n)+
  geom_bar(stat = "identity")+
  facet_wrap(~`Job type`)+
  labs(x = "areas",
       y = "")+
  theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) -> aos_junior

ggsave(plot = aos_junior, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\aos_junior_22.png", width = 30, height = 20, units = "in", dpi = 300, limitsize = FALSE, bg = "white")



```
##AOS, misc
```{r}

master_file_philjobs%>%
  filter(str_detect(AOS, ".*AI.*"))%>%
  add_count(yr)%>%
  ggplot()+
  aes(x = yr, y = n)+
  geom_point()+
  geom_smooth(method = "lm")

```


#census analyses
```{r}
v20 <- load_variables(2020, "acs5", cache = TRUE)

view(v20)
v20%>%
  filter(str_detect(concept, "POPULATION"),
         str_detect(name, ".*[0-9]_001"))%>%
  view()

#B25070_001 -- median rent as percentage of hoursehold income
#B25064_001 -- median gross rent
#B01003_001 -- population
```

##population
```{r}
pop_df <- get_acs(
  geography = "county", 
  variables = "B01003_001", 
  survey = "acs5",
  year = 2020
)

pop_df%>%
  ggplot()+
  aes(x = estimate, fill = "red")+
  geom_histogram(bins = 1000)

pop_df%>%
  filter(estimate >= 36700)%>%
  mutate(population = dvmisc::quant_groups(estimate, groups = 5))%>%
  separate(NAME, into = c("subregion", "region"), sep = ", ")%>%
  mutate(region = tolower(region),
         subregion = tolower(subregion),
         subregion = str_remove_all(subregion, " county"),
         subregion = str_remove_all(subregion, " parish"))%>%
  select(3, 2, 5, 7)->pop_clean

levels(pop_clean$population)
pop_clean$population <- recode(pop_clean$population,
                                    "[117,8.94e+03]"="117-8.94k",
                                    "(8.94e+03,1.9e+04]"="8.94k-19k",
                                    "(1.9e+04,3.67e+04]" = "19k-36.7k", 
                                    "(3.67e+04,9.22e+04]" = "36.7k-92.2k", 
                                    "(9.22e+04,1e+07]" = "92.2k-10m")
  
```


##median income
```{r}
income_df <- get_acs(
  geography = "county", 
  variables = "B19013_001", 
  survey = "acs5",
  year = 2020
)

income_df%>%
  ggplot()+
  aes(x = estimate)+
  geom_histogram(bins = 50)

income_df %>%
  mutate(income = dvmisc::quant_groups(estimate, groups = 5))%>%
  separate(NAME, into = c("subregion", "region"), sep = ", ")%>%
  mutate(region = tolower(region),
         subregion = tolower(subregion),
         subregion = str_remove_all(subregion, " county"),
         subregion = str_remove_all(subregion, " parish"))%>%
  select(3, 2, 5, 7)->income_clean

levels(income_clean$estimate_disc)

income_clean$income <- recode(income_clean$income,
                                    "[1.23e+04,4.29e+04]"="12.3k-42.9k",
                                    "(4.29e+04,5e+04]"="42.9k-50k",
                                    "(5e+04,5.55e+04]" = "50k-55k", 
                                    "(5.55e+04,6.39e+04]" = "55.5k-63.9k", 
                                    "(6.39e+04,1.47e+05]" = "63.9k-147k")
```

##median rent
```{r}
rent_df <- get_acs(
  geography = "county", 
  variables = "B25064_001", 
  survey = "acs5",
  year = 2020
)

rent_df %>%
  filter(estimate >= 765)%>%
  mutate(rent = dvmisc::quant_groups(estimate, groups = 5))%>%
  #rename(region = NAME)%>%
  separate(NAME, into = c("subregion", "region"), sep = ", ")%>% #for county level
  mutate(region = tolower(region),
         subregion = tolower(subregion),
         subregion = str_remove_all(subregion, " county"),
         subregion = str_remove_all(subregion, " parish"))%>%
  select(region, subregion, rent)%>%
  na.omit()->rent_clean

levels(rent_clean$rent)

rent_clean$rent <- recode(rent_clean$rent,
                                    "[275,621]"="275-621",
                                    "(621,690]"="621-690",
                                    "(690,765]" = "690-765", 
                                    "(765,915]" = "765-915", 
                                    "(915,2.44e+03]" = "915-2440")

```

##income and rent
```{r}
rent_income_df <- get_acs(
  geography = "county", 
  variables = "B25071_001", 
  survey = "acs5",
  year = 2020
)

rent_income_df %>%
  mutate(rent_percent = dvmisc::quant_groups(estimate, groups = 5))%>%
  separate(NAME, into = c("subregion", "region"), sep = ", ")%>%
  mutate(region = tolower(region),
         subregion = tolower(subregion),
         subregion = str_remove_all(subregion, " county"),
         subregion = str_remove_all(subregion, " parish"))%>%
  select(3, 2, 5, 7)%>%
  na.omit()->rent_income_clean
```


##locating the institutions
```{r}
jobs_primary%>%
  filter(yr == 2022)%>%
  tidygeocoder::geocode(Institution, method = 'osm', lat = latitude , long = longitude)->long_lat

##geocoder doesn't pick up U of TX Rio Grande Valley
long_lat%>% 
  mutate(latitude = ifelse(str_detect(Institution, "Rio Grande"), 26.30715335, latitude),
         longitude = ifelse(str_detect(Institution, "Rio Grande"), -98.1728187, longitude))%>%
  na.omit()->long_lat


long_lat%>%
  filter(longitude > -128 & longitude < -64 & latitude < 50 & latitude > 20)%>%
  mutate(long = as.numeric(longitude),
         lat = as.numeric(latitude),
         hit = TRUE)%>%
  select(Institution, latitude, longitude)->long_lat_us
```

##getting census data into mappable format for locating institution
```{r}

job_locations <- map_data("county")

##options for the 2nd arg in left_join: pop_clean, income_clean, rent_clean, rent_income_clean
## use by = c("region, "subregion") for county level
left_join(job_locations, rent_clean, by = c("region", "subregion"), keep = T)->final_df
``` 

##making the maps
```{r}
p <- ggplot(data = final_df,
            mapping = aes(x = long, y = lat,
                          group = group, fill = rent))+
  theme_minimal(base_size = 25)
##don't forget to change the "fill" value: income, rent, rent_percent

p1 <- p + geom_polygon(color = "grey90", linewidth = 0.1) +
    coord_map(projection = "albers", lat0 = 39, lat1 = 45)+
    scale_fill_simpsons()
p1 +
  layer(
    geom = "point", aes (x = longitude, y = latitude), stat = "identity", position = "identity",
    data = long_lat_us, params = list(size = 1, alpha = .5,na.rm = TRUE), inherit.aes = FALSE
  )# -> map_jobs_pop_22

```

##saving the plots
```{r}
#population
ggsave(map_jobs_pop_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_and_pop_22.png", width = 80, height = 65, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

#income
ggsave(map_jobs_income_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_and_income_22.png", width = 80, height = 65, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

#rent
ggsave(map_jobs_rent_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_and_rents_22.png", width = 80, height = 65, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

#rent and income
ggsave(map_jobs_rent_income_22, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs outputs\\jobs_income_and_rents_22.png", width = 80, height = 65, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

```