## Linking Workshop Exercise 3 library(ipumsr) library(dplyr) library(tidyr) library(tidyverse) ddi <- read_ipums_ddi("cps_00052.xml") data <- read_ipums_micro(ddi) # Check for duplicate persons (based on CPSIDP) count_duplicates <- data %>% group_by(CPSIDP) %>% summarize(n = n()) # MISH values for December records that link # First Link the datasets to identify MISH for each in a linked dataset dec_2014 <- data %>% filter(YEAR==2014 & MONTH==12) %>% select(CPSIDP, AGE, SEX, RACE, MISH) # December 2014 dataset asec_2015 <- data %>% filter(YEAR==2015 & MONTH==3 & ASECFLAG==1) %>% select(CPSIDP, AGE, SEX, RACE, MISH) # Merge data frames linked_df <- merge(dec_2014, asec_2015, by = "CPSIDP")