#delimit;
clear;
set more off;
set matsize 2000;


local logdate = string( d(`c(current_date)'), "%dN.D.Y" );
log using logs/generate_weights_for_linked_cps_data_`logdate'.log, t replace;

/*******************************************************************************
Created: August 28, 2018

This do-file is an example of how to create weights for linked IPUMS CPS data. It
assumes that you have separate extracts for the data you are linking together. 
The example we present is for January 2012 data linked to February 2012. 

We use the ipfraking package in Stata (see step 4) written by Stanislav Kolenikov. 
This package is documented in The Stata Journal volume 14, issue 1. **ipfraking** 
must be installed for this program to run properly. 

The program assumes the following directory structure
/directory where you do work
	generate_weights_for_linked_cps_data.do
	/extracts
	/generated
	/includes
	/logs

The program creates a set of population counts for the individuals in January who 
are eligible to link to February (PRE_IPFRAKING_TOTAL) and then adjusts the 
weights for those who actually link to February so that the population count 
of the linked sample (POST_IPFRAKING_TOTAL) is equivalent to the population 
count of those who were eligible to link. 

The final weight produced in this file, FINAL_RAKED, is within one digit of the 
IPUMS CPS weight LNKFW1MWT in January of 2012.

Updated: December 16, 2025
-add "version 15" in front of svy commands to force old _b results 
********************************************************************************/

/**************************************************************
1. Run IPUMS-CPS extracts. Extract 1 contains
January 2012. Extract 2 contains February 2012.

Variables included are:
	CPSIDP
	YEAR
	MONTH
	WTFINL
	MISH
	AGE
	SEX
	RACE
	HISPAN
	STATEFIP
***************************************************************/

disp "...running and saving IPUMS CPS extracts";
local extracts extracts;
local name cps;
local jan_2012 00196;
local feb_2012 00151;
local jan 1;
local feb 2;

cd `extracts';

foreach yr in 2012 {;
foreach mo in jan feb {;
	qui do `name'_``mo'_`yr'';
	sort cpsidp;
	save ../generated/``mo'_`yr''.dta, replace;
	}; //end mo
	}; //end yr


cd ..;


/**************************************************************
2. Calculating population totals for three combinatinos of 
variables in the January 2012 data.

	hispanic, age, sex totals 	(HAS_GROUP)
	race, age, sex totals 		(RAS_GROUP)
	state, age, sex totals 		(GAS_GROUP)
***************************************************************/

foreach yr in 2012 {;
foreach mo in jan {;
	
	clear;
	use generated/``mo'_`yr'';

	svyset [pw = wtfinl];

	gen _one = 1;

	disp "...combining categories of age, sex, race, hispan, state from which we will generate population totals";
	include includes/create_grouped_vars.doi;

	foreach x in has_group ras_group gas_group {;
		disp "...summing weights for raking groups based on individuals who are eligible to be in the next month";
		version 15: svy: total _one if year == `yr' & month == ``mo'' & mish!=4 & mish!=8, over(`x', nolabel);
			matrix rake_`x' = e(b);
			matrix rowname rake_`x' = `x';
			matrix list rake_`x';
			};

	/*rename variables from January so we can preserve both January and February values when we merge files*/
	rename mish misjan;
	rename wtfinl wtfinl`mo';

	save generated/`mo'_`yr'_recode, replace;


/**************************************************************
3. Identify observations that are in January or in both January
and February.
***************************************************************/

	disp "...merge January with February using CPSIDP";
	merge 1:1 cpsidp using generated/`feb_`yr''.dta;
	keep if _merge==1 | _merge==3;

	disp "...flag cases that merge properly between January and February";
	gen merged_sample=1 if _merge==3 & misjan!=4 & misjan!=8;

	save generated/merge_`yr', replace;


/**************************************************************
4. Perform raking, generate a pre and post raking population 
count, verify sum of pre and post weights are identical (or 
very close).

package must be installed:

net install ipfraking,from(http://staskolenikov.net/stata)
***************************************************************/

	disp "...generate population count of those in January who are eligible to link to February";
	svyset [pw=wtfinljan];
	version 15: svy: total _one if year == `yr' & month == ``mo'' & misj!=4 & misj!=8;
	gen double pre_ipfraking_total=`e(N_pop)';


	disp "...perform raking using groups defined in create_grouped_vars.doi";
	local threeway rake_ras_group rake_has_group rake_gas_group;
	ipfraking if merged_sample==1 [pw = wtfinl`mo'], ctotal(`threeway') gen(rakeweight_threeway);


	disp "...generate population count of those in January who actually link to February";
	svyset [pw=rakeweight_threeway];
	version 15: svy: total _one if merged_sample==1;
	gen double post_ipfraking_total=`e(N_pop)';



	/*check pre-raked weight sum and post-raked weight sum*/
	format pre_ipfraking_total %15.0f;
	format post_ipfraking_total %15.0f;

	tab pre_ipfraking_total;
	tab post_ipfraking_total;

	gen final_raked=rakeweight_threeway;
	recode final_raked . = 0 if merged_sample!= 1;

	disp "...checking that raked weight is non-zero if wtfinl in January is non-zero and record links to February";
	assert final_raked>0 if wtfinljan>0 & merged_sample==1;

	gen diff=round(final_raked)-round(lnkfw1mwt);
	tab diff;

	disp "...saving the raked file";
	save generated/raked_`yr'.dta, replace;

	}; //end mo
	}; //end yr


log close;
