## Linking Workshop Exercise 1 library(ipumsr) library(dplyr) library(tidyr) library(tidyverse) # Exercise 1, Part 1 ------------------------------------------------------- # Two month link ddi_ex1p1 <- read_ipums_ddi("cps_00049.xml") data_ex1p1 <- read_ipums_micro(ddi_ex1p1) # Link individuals in August and September # Create August data frame august_df <- data_ex1p1 %>% filter(MONTH == 8) %>% select(CPSIDP, AGE, SEX, RACE, MISH) # Create September data frame september_df <- data_ex1p1 %>% filter(MONTH == 9) %>% select(CPSIDP, AGE, SEX, RACE, MISH) # Merge data frames linked_df_p1 <- merge(august_df, september_df, by = "CPSIDP") # Cross-tab August MISH and September MISH MISH_crosstab <- linked_df_p1 %>% group_by(MISH.x, MISH.y) %>% summarize(n = n()) %>% spread(MISH.y, n) # Exercise 1, Part 2 ------------------------------------------------------- # Full panel link ddi_ex1p2 <- read_ipums_ddi("cps_00050.xml") data_ex1p2 <- read_ipums_micro(ddi_ex1p2) %>% unite('DATE', c(MONTH,YEAR), remove = FALSE) # Link individuals in full panel (starting in August 2015) # Create August 2015 data frame august2015_df <- data_ex1p2 %>% filter(YEAR == 2015 & MONTH == 8) %>% select(CPSIDP, AGE, SEX, RACE, DATE) # Create September 2015 data frame september2015_df <- data_ex1p2 %>% filter(YEAR == 2015 & MONTH == 9) %>% select(CPSIDP, AGE, SEX, RACE, DATE) # Create October 2015 data frame october2015_df <- data_ex1p2 %>% filter(YEAR == 2015 & MONTH == 10) %>% select(CPSIDP, AGE, SEX, RACE, DATE) # Create November 2015 data frame november2015_df <- data_ex1p2 %>% filter(YEAR == 2015 & MONTH == 11) %>% select(CPSIDP, AGE, SEX, RACE, DATE) # Create August 2016 data frame august2016_df <- data_ex1p2 %>% filter(YEAR == 2016 & MONTH == 8) %>% select(CPSIDP, AGE, SEX, RACE, DATE) # Create September 2016 data frame september2016_df <- data_ex1p2 %>% filter(YEAR == 2016 & MONTH == 9) %>% select(CPSIDP, AGE, SEX, RACE, DATE) # Create October 2016 data frame october2016_df <- data_ex1p2 %>% filter(YEAR == 2016 & MONTH == 10) %>% select(CPSIDP, AGE, SEX, RACE, DATE) # Create November 2016 data frame november2016_df <- data_ex1p2 %>% filter(YEAR == 2016 & MONTH == 11) %>% select(CPSIDP, AGE, SEX, RACE, DATE) # Merge data frames df_list <- list(august2015_df, september2015_df, october2015_df, november2015_df, august2016_df, september2016_df, october2016_df, november2016_df) linked_df_p2 <- df_list %>% reduce(inner_join, by = "CPSIDP") # How many people begin the rotation pattern in August 2015? (15,891) data_ex1p2 %>% filter(YEAR==2015 & MONTH==8 & MISH==1) %>% summarize(n = n())