program define ppml_panel_sg, eclass *! PPML (Panel) Structural Gravity Estimation, by Tom Zylkin *! Department of Economics, University of Richmond, Richmond, Virginia USA. *! Options for multi-way clustering are thanks to Joschka Wanner and Mario Larch. *! This version: v1.11, July 2018 *! *! Suggested citation: Larch, Wanner, Yotov, & Zylkin (2017): *! "The Currency Union Effect: A PPML Re-assessment with High-dimensional Fixed Effects" *! Drexel University School of Economics Working Paper 2017-07 // requires: reghdfe (if "olsguess" option used), hdfe // v1.11 - added "dropsingletons" option // v1.1 - now uses hdfe for faster computation of standard errors // - now supports multi-way clustering (with thanks to Joschka Wanner & Mario Larch) // - fixed issue with stored pair FEs varying over time in unbalanced panels // - industry-level regressions now default to clustering on "pair", not "pair-industry" // - New / updated options: // * "multiway": automatically cluster by // * "cluster_id" now allows multiple cluster vars for multi-way clustering // * "nocheck": skip "check for existence" step // * "strict": use stricter condition when determining which variables to drop when checking for existence; mimics "strict" from ppml. // * "keep": mimic ppml option "keep". // * "noaccel": turn off acceleration during iteration loops // v1.08 - fixed scaling issue with S, M, and D values // - added "predict" option // v1.07 - fixed selectidx13() bug; fixed year bug // - added checks for if hdfe, reghdfe are installed // - added support for e(sample), e(r2) // v1.06 - fixed selectindex() backwards compatibility // v1.05 - added check for existence and check to make sure user-provided IDs uniquely describe data. // v1.04: - switched to IRLS, made year optional // v1.03: - fixed display of #obs, loglikelihood // v1.02: - updated collinearity warning to address cases where standard errors are computed to be missing. // - also fixed "variable trade not found" bug. // v1.01: - fixed 3 typos and addressed bug with industry-by-pair time trends tempvar exclude X_ij Y_w y_i e_j D tt phi fta_effect OMR IMR /// ind_id yr_id exp_id imp_id S M panel_id /// X_ij_hat temp time lhs exp_time imp_time esample tempname beta v1 v1_help n_iter N_obs N_dropped center which id_flag vers ok_flag genfes scalar `vers' = c(version) version 11 syntax varlist [if] [in], /// EXporter(varname) /// IMporter(varname) /// [ /// Year(varname numeric) /// INDustry(varname) /// guessD(varname numeric) /// These are useful for nested loops, bootstraps, etc. guessO(varname numeric) /// Defaults: D = O = I = 1; TT = 0. guessI(varname numeric) /// guessS(varname numeric) /// guessM(varname numeric) /// guessTT(varname numeric) /// guessB(string) /// genD(name) /// genTT(name) /// genO(name) /// genI(name) /// genS(name) /// genM(name) /// PREDict(name) /// NOsterr /// Do not compute standard errors RObust /// Options for robust SEs and clustered SEs; default is cluster(pair) MULTiway /// automatic multi-way clustering, default is cluster(exporter importer year) CLUSter_id(varlist) /// supports multi-way clustering NOPAIR /// These options will change the fixed effects structure. "Nopair" = no pair fixed effects SYMmetric /// Symmetric pair fixed effects (ignored if nopair) TREND /// use pair-specific linear time trends (ignored if nopair) NOCHeck /// will skip the check for existence DROPSINGletons /// Will drop singletons beforehand STRICT /// will mimic ppml "strict" option KEEP /// will mimic ppml "keep" option OLSguess /// use reghfe to guess betas upfront OFFset(name) /// a user-specified offset, for if, e.g., you wish to impose constraints on coefficients NOACCel /// Turn off acceleration VERBose(int 0) /// TOLerance(real 1e-12) /// MAXiter(int 10000) /// ] ** Ex: ppml_panel_sg trade fta1 fta2 ..., exp(exp_id) imp(imp_id) y(year_id) ind(industry_id) ** guessO() guessO() guessD() guessB() ** genD() genO() genI() genS(ex by yr FEs) genM(im by yr FEs) /** 0. parse syntax, create temp vars **/ cap which hdfe if _rc == 111 { di in red "You will need to install -hdfe- in order to use this command." di di in red "To install, type -ssc install hdfe-". exit 111 } tokenize `varlist' local trade `1' macro shift local policyvars `*' unab policyvars: `*' if "`industry'" == "" { tempvar industry gen `industry' = 1 } else { local dash_ind = "-`industry'" } if "`year'" == "" { if "`nopair'" == "" { di in red "Error: Year ID required if -nopair- option not specified" exit 111 } else { tempvar year gen `year' =1 } } local nvars : word count `policyvars' *di `nvars' local rest = "`policyvars'" forvalues n = 1(1)`nvars' { gettoken policyvar_`n' rest: rest } /** I. Initialization: flags missing values, checks user inputs for errors, and sets up group IDs, sets initial values for OMR, IMR, D, beta **/ di "Initializing..." * exclusion catcher qui gen `exclude' = missing(`trade') qui sum `trade' if `r(min)' < 0 { di as error "Error: Dependent variable cannot contain negative values" exit 111 } qui forvalues n = 1(1)`nvars' { qui replace `exclude' = 1 if missing(`policyvar_`n'') } qui marksample touse qui replace `touse' = 0 if `exclude' *marks those specified by [if] or [in] as 1, zero otherwise if "`offset'" == "" { //user-defined offset to be passed as an option to sg_ppml_iter algorithm. tempvar offset cap gen `offset' = 0 if `touse' } else { qui replace `touse' = 0 if missing(`offset') } qui gen `X_ij' = `trade' * (1-`exclude') * (`touse') // set up IDs qui egen `ind_id' = group(`industry') if `touse' qui egen `yr_id' = group(`year') if `touse' qui egen `exp_id' = group(`exporter') if `touse' qui egen `imp_id' = group(`importer') if `touse' qui sum `year' if `touse' qui gen `time' = `year' - `r(min)' if `touse' qui replace `time' = 0 if "`trend'" == "" if (r(sd) == 0) { if "`nopair'" == "" { di in red "Error: More than 1 time period required if -nopair- option not specified" exit 111 } local one_yr_only = "one year only" } else { local dash_year = "-`year'" } local id_flag = 0 local check_ids = "`exp_id' `imp_id' `ind_id' `yr_id'" // Check to make sure each obs. is uniquely ID'd by a single mata: id_check("`check_ids'", "`touse'") // origin, destination, industry, and time if `id_flag' != 0 { di in red "Error: the set of origin, destination, industry, and time IDs do not uniquely describe the data" di in red "If this is not a mistake, try collapsing the data first using collapse (sum)" exit 111 } mata: country_ids("`exporter'", "`importer'", "`exp_id'", "`imp_id'", "`vers'","`touse'") //creates unique country IDs, for cases where the set of exporters is not the //same as the set of importers // II. Set up fixed effects structure and check for collinearity and possible non-existence of estimates if "`nocheck'" == "" { di "Checking for possible non-existence issues..." } qui egen `exp_time' = group(`yr_id' `ind_id' `exp_id') if `touse' qui egen `imp_time' = group(`yr_id' `ind_id' `imp_id') if `touse' if "`nopair'" != "" { scalar `which' = 2 if "`symmetric'" != "" { di in red "Option -symmetric- ignored, since -nopair- enabled" } if "`trend'" != "" { di in red "Option -trend' ignored, since -nopair- enabled" } qui gen `panel_id' = 1 if `touse' local _fes = "`exp_time' `imp_time'" } else if "`symmetric'" != "" { tempvar pair_id symm_id gen `pair_id' = `exporter' + `importer' qui replace `pair_id' = `importer' + `exporter' if (`importer' < `exporter') qui egen `panel_id' = group(`industry' `exporter' `importer') if `touse' qui egen `symm_id' = group(`industry' `pair_id') if `touse' if "`trend'" != "" { scalar `which' = 4 local _fes = "`exp_time' `imp_time' `symm_id'##c.`time'" } else { scalar `which' = 1 local _fes = "`exp_time' `imp_time' `symm_id'" } } else { qui egen `panel_id' = group(`industry' `exporter' `importer') if `touse' if "`trend'" != "" { scalar `which' = 3 local _fes = "`exp_time' `imp_time' `panel_id'##c.`time'" } else { scalar `which' = 0 local _fes = "`exp_time' `imp_time' `panel_id'" } } if "`dropsingletons'" != "" { * drop singletons tempvar ln_LHS qui gen `ln_LHS' = ln(`X_ij'+1) - `offset' qui reghdfe `ln_LHS' `policyvars' if `touse', absorb(`_fes') // See Correia (2015): Singletons, Cluster-robust Standard Errors & Fixed Effects: A Bad Mix qui sum `X_ij' if e(sample)==0 &`touse' di "Number of singleton observations dropped = " as result r(N) qui replace `exclude'=1 if e(sample)==0 qui replace `touse' = 0 if `exclude' qui replace `X_ij' = `trade' * (1-`exclude') * (`touse') } //EnsureExist borrows concepts from "RemoveCollinear" by Sergio Correia, originally from -reghdfe- if "`nocheck'" == "" { EnsureExist if `touse', dep(`X_ij') indep(`policyvars') off(`offset') fes(`_fes') flag(`ok_flag') `strict' `keep' local policyvars `r(okvars)' local nvars : word count `policyvars' local rest = "`policyvars'" if "`policyvars'" == "" { di di in red "Error: all main covariates appear to be collinear with the implied set of fixed effects" exit 111 } if ("`keep'"== "") { qui cap replace `touse' = `ok_flag' } forvalues n = 1(1)`nvars' { gettoken policyvar_`n' rest: rest } } qui sum `X_ij' scalar `center' = `r(mean)' qui replace `X_ij' = `X_ij' / `center' //otherwise, the algorithm for computing s.e.s will be sensitive to the scale of the dep. var. cap gen `Y_w' = . cap gen `y_i' = . cap gen `e_j' = . // The default is that initial `trade' is "frictionless": D=1, ln_phi = 0 ==> IMR = OMR = 1 // The option "olsguess" uses -reghdfe- to initialize betas based on OLS. if "`olsguess'"!="" { cap which reghdfe if _rc == 111 { di in red "You will need to install -reghdfe- in order to use the -olsguess- option." di di in red "To install, type -ssc install reghdfe-". exit 111 } tempvar ln_LHS qui gen `ln_LHS' = ln(`X_ij') - `offset' qui reghdfe `ln_LHS' `policyvars' if `touse', absorb(`_fes') qui gen `OMR' = 1 if `touse' qui gen `IMR' = 1 if `touse' qui gen `D' = 1 if `touse' qui gen `tt' = 0 if `touse' matrix `beta' = J(1, `nvars',0) forvalues n = 1(1)`nvars' { matrix `beta'[1, `n'] = _b[`policyvar_`n''] } matrix colnames `beta' = `policyvars' } else { //else: either use user-specified guesses //or: default guess is "frictionless trade: D=1, ln_phi = 0 ==> IMR = OMR = 1 qui gen `D' = 1 if `touse' qui gen `tt' = 0 if `touse' qui gen `OMR' = 1 if `touse' qui gen `IMR' = 1 if `touse' if `which' != 2 { qui cap replace `D' = `guessD' if "`guessD'" != "" & `touse' if (`which' == 3 | `which' == 4) { qui cap replace `tt' = `guessTT' if "`guessTT'" != "" & `touse' } } if "`guessO'" != "" { qui cap replace `OMR' = `guessO' if `touse' } else if "`guessS'" != "" { qui cap replace `OMR' = -`guessS' if `touse' // use negative sign to flag as S, not OMR } if "`guessI'" != "" { qui cap replace `IMR' = `guessI' if `touse' } else if "`guessM'" != "" { qui cap replace `IMR' = -`guessM' if `touse' // use negative sign to flag as M, not IMR } matrix `beta' = J(1, `nvars',0) if "`guessB'" != "" { local nguesses = min(`nvars',colsof(`guessB')) qui forvalues n = 1(1)`nguesses' { cap matrix `beta'[1, `n'] = `guessB'[1,`n'] * (rowsof(`guessB') != .) } } matrix colnames `beta' = `policyvars' } scalar `genfes' = 1 if "`genD'`genTT'`genO'`genO'`genI'`genS'`genM'" != "" { scalar `genfes' = 1 // if user wants to store fes } /** III. Iterate on fixed effects using structural gravity and compute Poisson estimates **/ di "Iterating..." local mata_varlist_iter = "`ind_id' `yr_id' `exp_id' `imp_id' `X_ij' `D' `OMR' `IMR' `tt' `time' `offset'" //"offset" is the user-specified offset // compute beta iteratively: mata: sg_ppml_iter("`policyvars'", "`mata_varlist_iter'", "`beta'", /// "`Y_w'", "`y_i'", "`e_j'", "`which'", /// "`n_iter'", "`N_obs'", "`N_dropped'", "`tolerance'", "`maxiter'", /// "`genfes'", "`noaccel'", "`verbose'", "`vers'", "`touse'" ) qui gen `temp' = log((`y_i' * `e_j' * `Y_w') * (`D' / (`OMR' * `IMR')))+(`tt'*`time')+`offset'+log(`center') matrix colnames `beta' = `policyvars' qui gen `lhs' = `X_ij' * `center' qui poisson `lhs' `policyvars' if `touse', offset(`temp') from(`beta') noconst // computes e() statistics qui drop `temp' `lhs' matrix colnames `beta' = `policyvars' matrix rownames `beta' = coeff estimates table, keep(`policyvars') //note that the constant is absorbed by `temp' di "iterations: " `n_iter' di "tolerance: " `tolerance' if `n_iter' == `maxiter' { di in red "Max number of iterations reached before estimates converged. Consider adjusting the maxiter() option" di } qui gen `esample' = e(sample) local ll =e(ll) /** IV. Compute standard errors (if option enabled) and set up for posting. **/ if "`nosterr'"=="" { di as text "Computing standard errors" if "`multiway'" != "" & `which' != 2 { local display_ses = "Multi-way clustered SEs, clustered by `exporter', `importer', `year'" } else if "`multiway'" != "" { if "`one_yr_only'"=="" { local display_ses = "Multi-way clustered SEs, clustered by `exporter', `importer', `year'" } else { local display_ses = "Multi-way clustered SEs, clustered by `exporter', `importer'" } } else if "`cluster_id'" != "" { // check cluster vars input by user for errors local ncvars : word count `cluster_id' if (`ncvars' > 4) { local ncvars = 4 tokenize `cluster_id' local cluster_id `1' `2' `3' `4' di di in red "Max allowed cluster vars is 4. Only <`cluster_id'> will be used." } local ncvars : word count `cluster_id' tokenize `cluster_id' local cluster_id = "" forvalues i = 1(1)`ncvars'{ qui sum ``i'' if `touse' if r(sd) != 0 { local cluster_id `cluster_id' ``i'' } else { di in red "Cluster var ``i'' does not vary over the sample and will be ignored" } } if "`cluster_id'" != "" { local display_ses = "Clustered standard errors, clustered by `cluster_id' (user-specified)" } else if `which'==2 { local display_ses = "Robust standard errors (default)" } else { local display_ses = "Clustered standard errors, clustered by `exporter'-`importer' (default)" } } else if "`robust'" != ""{ local display_ses = "Robust standard errors" } else if `which'==2 { local display_ses = "Robust standard errors (default)" } else { local display_ses = "Clustered standard errors, clustered by `exporter'-`importer' (default)" } * update "phi" one last time qui gen `fta_effect' = 0 forvalues n = 1(1)`nvars' { matrix `beta'[1, `n'] = _b[`policyvar_`n''] qui replace `fta_effect' = `fta_effect' + `beta'[1, `n'] * `policyvar_`n'' } qui gen `phi' = `D' * exp(`tt'*`time') * exp(`fta_effect') * exp(`offset') qui replace `phi' = 0 if missing(`phi') | `exclude' | !`touse' *** 1. fit X_ij directly using structural gravity qui gen `X_ij_hat' = (`y_i' * `e_j' * `Y_w') * (`phi' / (`OMR' * `IMR')) qui gen `temp'=sqrt(`X_ij_hat') *** 2. "expurgate" the fixed effects from each policyvar: cap drop _s_* cap drop _r_* qui hdfe `policyvars' if `touse' [pw=`X_ij_hat'], absorb(`_fes') gen(_r_) keepsingletons foreach var of varlist `policyvars' { qui replace _r_`var' = _r_`var' * sqrt(`X_ij_hat') qui replace `touse' = 0 if missing(_r_`var') // What you are getting out of this is the residual associated with each policy var after netting out the FEs via weighted linear regression. // You weight by each residual by the square root of the conditional mean because this is the weight that is implicitly // used by the PPML estimator. See: Figuereido, Guimaraes, and Woodward JUE 2015. } *** 3. Regress the original dep var on resulting weighted residuals of the policy vars after "expurgating" the influence of the fixed effects. qui _regress `X_ij' _r_* if `touse', nocons // The VCV of the betas from this regression is proportional to the VCV you would have obtained from // a Poisson regression with the fixed effects included. // This is an application of the Frisch-Waugh-Lovell theorem. See: Figueiredo, Guimaeraes, & Woodward JUE 2015 local s2=e(rmse)*e(rmse) matrix `v1'=e(V)/`s2' local N=e(N) *** 4. Adjust standard errors for clustering using _robust. Default is clustered SEs, clustered by country-pair. *** 4a. Compute standard errors // I would like to thank Joschka Wanner and Mario Larch for contributing the majority of the code used for the // case of "multi-way" clustering. For a reference on multi-way clustering, see Cameron, Gelbach, and Miller JBES 2011. qui replace `temp'=(`X_ij'-`temp'*`temp')/`temp' // multiple use of "temp" may be confusing. "temp" here initially is sqrt(Xij_hat), // then becomes the reidual divided by sqrt(Xij_hat). if "`multiway'" != "" { // apply default multi-way clustering (i.e., exporter, importer, year) tempname v_exp v_imp v_yr v_pair v_expyr v_impyr v_expimpyr expimpyr if ("`dash_ind'" != "") { drop `exp_time' drop `imp_time' drop `panel_id' qui egen `exp_time' = group(`yr_id' `exp_id') if `touse' qui egen `imp_time' = group(`yr_id' `imp_id') if `touse' qui egen `panel_id' = group(`exp_id' `imp_id') if `touse' } if ("`one_yr_only'" == "") { matrix `v_exp' = `v1' _robust `temp' if `touse', v(`v_exp') cluster(`exp_id') matrix `v_imp' =`v1' _robust `temp' if `touse', v(`v_imp') cluster(`imp_id') matrix `v_yr'=`v1' _robust `temp' if `touse', v(`v_yr') cluster(`yr_id') matrix `v_expyr'=`v1' _robust `temp' if `touse', v(`v_expyr') cluster(`exp_time') matrix `v_impyr'=`v1' _robust `temp' if `touse', v(`v_impyr') cluster(`imp_time') matrix `v_pair'=`v1' _robust `temp' if `touse', v(`v_pair') cluster(`panel_id') matrix `v_expimpyr'=`v1' qui egen `expimpyr'=group(`exp_id' `imp_id' `yr_id') if `touse' _robust `temp' if `touse', v(`v_expimpyr') cluster(`expimpyr') matrix `v1'=`v_exp'+`v_imp'+`v_yr'-`v_expyr'-`v_impyr'-`v_pair'+`v_expimpyr' } else { matrix `v_exp' = `v1' _robust `temp' if `touse', v(`v_exp') cluster(`exp_id') matrix `v_imp' =`v1' _robust `temp' if `touse', v(`v_imp') cluster(`imp_id') matrix `v_pair'=`v1' _robust `temp' if `touse', v(`v_pair') cluster(`panel_id') matrix `v1'=`v_exp'+`v_imp'-`v_pair' } } else if "`cluster_id'" != "" { // "cluster_id" is a varname or varlist input by user. THe max # of vars allowed for multi-way clustering is 4. local ncvars : word count `cluster_id' local rest = "`cluster_id'" forvalues n = 1(1)`ncvars' { gettoken clusvar_`n' rest: rest } if (`ncvars' == 1) { _robust `temp' if `touse', v(`v1') cluster(`clusvar_1') } else if (`ncvars' == 2) { forvalues i = 1(1)`ncvars'{ tempname v1_`i' id`i' qui egen `id`i'' = group(`clusvar_`i'') if `touse' matrix `v1_`i'' = `v1' _robust `temp' if `touse', v(`v1_`i'') cluster(`id`i'') } tempname v1_12 id12 qui egen `id12' = group(`clusvar_1' `clusvar_2') if `touse' matrix `v1_12' = `v1' _robust `temp' if `touse', v(`v1_12') cluster(`id12') matrix `v1' = `v1_1' + `v1_2' - `v1_12' } else if (`ncvars' == 3) { forvalues i = 1(1)`ncvars'{ tempname v1_`i' id`i' qui egen `id`i'' = group(`clusvar_`i'') if `touse' matrix `v1_`i'' = `v1' _robust `temp' if `touse', v(`v1_`i'') cluster(`id`i'') local ip1 = `i'+1 forvalues j = `ip1'(1)`ncvars'{ tempname v1_`i'`j' id`i'`j' qui egen `id`i'`j'' = group(`clusvar_`i'' `clusvar_`j'') if `touse' matrix `v1_`i'`j'' = `v1' _robust `temp' if `touse', v(`v1_`i'`j'') cluster(`id`i'`j'') } } tempname v1_123 id123 qui egen `id123' = group(`clusvar_1' `clusvar_2' `clusvar_3') if `touse' matrix `v1_123' = `v1' _robust `temp' if `touse', v(`v1_123') cluster(`id123') matrix `v1' = `v1_1' + `v1_2' + `v1_3' - `v1_12' - `v1_13' - `v1_23' + `v1_123' } else if (`ncvars' == 4) { forvalues i = 1(1)`ncvars'{ tempname v1_`i' id`i' qui egen `id`i'' = group(`clusvar_`i'') if `touse' matrix `v1_`i'' = `v1' _robust `temp' if `touse', v(`v1_`i'') cluster(`id`i'') local ip1 = `i'+1 forvalues j = `ip1'(1)`ncvars'{ tempname v1_`i'`j' id`i'`j' qui egen `id`i'`j'' = group(`clusvar_`i'' `clusvar_`j'') if `touse' matrix `v1_`i'`j'' = `v1' _robust `temp' if `touse', v(`v1_`i'`j'') cluster(`id`i'`j'') local jp1 = `j'+1 forvalues k = `jp1'(1)`ncvars'{ tempname v1_`i'`j'`k' id`i'`j'`k' qui egen `id`i'`j'`k'' = group(`clusvar_`i'' `clusvar_`j'' `clusvar_`k'') if `touse' matrix `v1_`i'`j'`k'' = `v1' _robust `temp' if `touse', v(`v1_`i'`j'`k'') cluster(`id`i'`j'`k'') } } } matrix `v1' = `v1_1'+`v1_2'+`v1_3'+`v1_4'-`v1_12'-`v1_13'-`v1_14'-`v1_23'-`v1_24'-`v1_34'+`v1_123'+`v1_124'+`v1_234' } } else if "`robust'"!="" { _robust `temp' if `touse', v(`v1') } else if `which'==2 { // these are the defaults if user does not specify any particular options. _robust `temp' if `touse', v(`v1') } else { if ("`dash_ind'" != "") { drop `panel_id' egen `panel_id' = group(`exp_id' `imp_id') } _robust `temp' if `touse', v(`v1') cluster(`panel_id') } *** 4b. Replace eigenvalues with zeros for cases where multiple clustering leads to negative variances // References: Politis, Econometric Theory 2011; Cameron, Gelbach and Miller JBES 2011 if "`cluster_id'" != "" | "`multiway'" != "" { tempname Eigenvec lambda lambdaN lambdap lambdapdiag v1N detv1 local v1N = colsof(`v1') scalar Indnegvar = 0 forvalues l=1(1)`v1N' { if `v1'[`l',`l']<0 { scalar Indnegvar = 1 } } matrix `detv1'=det(`v1') scalar Inddet = 0 if `detv1'[1,1]<1e-10 { scalar Inddet = 1 } if (Indnegvar==1 | Inddet==1) { matrix symeigen `Eigenvec' `lambda' = `v1' local lambdaN = colsof(`lambda') matrix `lambdap' = `lambda' forvalues l=1(1)`lambdaN' { if `lambda'[1,`l']>0 { matrix `lambdap'[1,`l'] = `lambda'[1,`l'] } else { matrix `lambdap'[1,`l'] = 0 } } matrix `lambdapdiag' = diag(`lambdap') matrix `v1'=`Eigenvec'*`lambdapdiag'*(`Eigenvec')' } } *** 5. Post estimation results to Stata ereturn clear matrix colnames `beta' = `policyvars' matrix colnames `beta' = `trade': matrix rownames `v1' = `policyvars' matrix rownames `v1' = `trade': matrix colnames `v1' = `policyvars' matrix colnames `v1' = `trade': //qui sum(`X_ij') //local N = r(N) ereturn post `beta' `v1', depname(`trade') obs(`N') esample(`esample') qui corr `X_ij_hat' `X_ij' if `touse' ereturn scalar r2 = r(rho)^2 cap drop _s_* cap drop _r_* //make these temp vars } else { di "You have opted not to compute standard errors." if "`predict'" != "" { qui gen `fta_effect' = 0 forvalues n = 1(1)`nvars' { matrix `beta'[1, `n'] = _b[`policyvar_`n''] qui replace `fta_effect' = `fta_effect' + `beta'[1, `n'] * `policyvar_`n'' } qui gen `phi' = `D' * exp(`tt'*`time') * exp(`fta_effect') * exp(`offset') qui replace `phi' = 0 if missing(`phi') | `exclude' | !`touse' qui gen `X_ij_hat' = (`y_i' * `e_j' * `Y_w') * (`phi' / (`OMR' * `IMR')) } ereturn clear local N=`N_obs' //qui sum(`X_ij') //local N = r(N) eret post `beta', depname(`trade') obs(`N') esample(`esample') } // store fixed effects in memory if requested by user cap drop `genO' cap drop `genI' cap drop `genD' cap drop `genTT' cap drop `genS' cap drop `genM' cap drop `predict' cap gen `genS' = `y_i' * sqrt(`Y_w'*`center') / `OMR' cap gen `genM' = `e_j' * sqrt(`Y_w'*`center') / `IMR' cap gen `predict' = `X_ij_hat' * `center' cap rename `OMR' `genO' cap rename `IMR' `genI' cap rename `D' `genD' cap rename `tt' `genTT' ereturn scalar ll=`ll' ereturn local cmdline "ppml_panel_sg `0'" ereturn local cmd "ppml_panel_sg" ereturn local crittype "log likelihood" // V. Display final regression table and notes Display if (`which' == 0 | `which' == 3){ local pair = "`exporter'-`importer'`dash_ind'" di "Fixed Effects included: `exporter'`dash_ind'`dash_year', `importer'`dash_ind'`dash_year', `pair'" if `which' == 3 { di "Also includes `pair' time trends" } } else if (`which' == 1 | `which' == 4){ local pair = "`exporter'-`importer'`dash_ind'" di "Fixed Effects included: `exporter'`dash_ind'`dash_year', `importer'`dash_ind'`dash_year', `pair' (symmetric)" if `which' == 4 { di "Also includes (symmetric) `pair' time trends" } } else if (`which' == 2) { di "Fixed Effects included: `exporter'`dash_ind'`dash_year', `importer'`dash_ind'`dash_year'" } if "`nosterr'"=="" { di "`display_ses'" if (`N_dropped' > 0) { local `N_dropped' = `N_dropped' di "``N_dropped'' obs. dropped because they belong to groups with all zeros or missing values" } di foreach var of varlist `policyvars' { if (abs(_b[`var']) / _se[`var'] < .001) | (abs(_b[`var']) / _se[`var'] == .) { di in red "`var' appears to be collinear with your set of fixed effects" di as text } } } else { if (`N_dropped' > 0) { local `N_dropped' = `N_dropped' di "``N_dropped'' obs. dropped because they belong to groups with all zeros or missing values" } di } end /***************************************************************************************/ /* ENSUREEXIST (adapted from "RemoveCollinear", by Sergio Correia. see: -reghdfe-) */ /***************************************************************************************/ // - In general, a covariate "x" should be dropped if, after netting out FEs and the other covariates, // there is no residual variation in x within the subsample where lhs > 0. // - Also serves as generalized check for collinearity of covariates // - Requires using -hdfe- first to partial out FEs from each covariate // - Reference: Santos Silva & Tenreyro EL 2010 "On the Existence of MLE Estimates for Poisson Regression" *EnsureExist if `touse', dep(`X_ij') indep(`policyvars') off(`offset') resid(`resid_') program define EnsureExist, rclass syntax [if] [in], /// DEPvar(varname numeric) /// INDEPvars(varlist numeric) /// OFFset_weight(string) /// FEs(string) /// flag(name) /// [ STRICT /// KEEP ] /// qui marksample touse tempname resid1 tempvar zeros qui hdfe `indepvars' if `touse'&`depvar'>0, absorb(`fes') gen(`resid1') qui _rmcoll `offset_weight' `resid1'* if `touse' & `depvar'>0, forcedrop // check simple collinearity across `policyvars' local okvars = r(varlist) if ("`okvars'"==".") local okvars local df_m : list sizeof okvars foreach var of local indepvars { local resid_var = "`resid1'`var'" local ok1 : list resid_var in okvars qui sum `resid1'`var' if `touse' & `depvar'>0 local _mean_r = r(mean) local ok2 = (`r(sd)'>1e-9) // residuals are net of FEs; ~0 residual variation implies // collinearity with one or more of the FEs. local ok = (`ok1'&`ok2') if (`ok' == 0) { qui sum `var' if `touse'&`depvar'==0 local min = r(min) local max = r(max) if (`min'<`_mean_r')&(`max'>`_mean_r'&"`strict'"=="") { local ok = 1 // this is an analogue of the "reasonable overlap" condition used in -ppml- which determines whether } // it is still possible to include x else { qui sum `var' if `touse' & `depvar'>0 local _mean = r(mean) if ("`keep'" == "" & (`min'>=`_mean')|(`max'<=`_mean')) { local drop = "drop" cap gen `zeros'=1 qui sum `var' if `touse', d local _mad=r(p50) qui inspect `var' if `touse' qui replace `zeros'=0 if (`var'!=`_mad')&(r(N_unique)==2)&(`touse') // Mark observations to drop (if "keep" not enabled) } } } local prefix = cond(`ok', "", "o.") local label : char `var'[name] if (!`ok') di in red "note: `var' omitted because of collinearity over lhs>0 (creates possible existence issue)" local varlist `varlist' `prefix'`var' if (`ok') local okvarlist `okvarlist' `var' } if ("`keep'" == "" & "`drop'" != "") { qui su `touse' if `touse', mean local _enne=r(sum) qui replace `touse'=0 if (`zeros'==0)&(`depvar'==0)&(`touse') qui su `touse' if `touse', mean local _enne = `_enne' - r(sum) if (`_enne' > 0) { di in red "note: `_enne' observations dropped because they are perfectly predicted by excluded regressors" di in red " (you may use the -keep- option if you would prefer to keep these observations)" } qui gen `flag' = `touse' } mata: st_local("vars", strtrim(stritrim( "`varlist'" )) ) mata: st_local("okvars", strtrim(stritrim( "`okvarlist'" )) ) return local vars "`vars'" return local okvars "`okvars'" return scalar df_m = `df_m' end /*************************************************************************/ /* DISPLAY (adopted from "Display", by Paulo Guimaraes. see: -poi2hdfe-) */ /*************************************************************************/ program define Display _coef_table_header, title( ******* PPML Panel Structural Gravity Estimation ********** ) _coef_table, level(95) end /*** mata code starts here ***/ /*************************************************************/ /* CHECK_ID (checks whether ID vars uniquely describe data) */ /*************************************************************/ // Sometimes users may make a mistake in providing duplicate observations for the same trade flow. // This will generate a error letting them know. If this is not done by accident, // an equivalent specification can be performed by collapsing the data first. mata: void id_check(string scalar idvars,| string scalar touse) { st_view(id_vars,.,tokens(idvars), touse) uniq_ids = uniqrows(id_vars) if (rows(id_vars) != rows(uniq_ids)) { st_local("id_flag", "1") } } /*******************************************************************/ /* COUNTRY_IDS (sets up unique numerical exporter and importer IDs) */ /*******************************************************************/ // This code only comes into play if the set of exporters differs from the set of // importers. mata: void country_ids(string scalar exp_name, string scalar imp_name, string scalar exp_id_var, string scalar imp_id_var, string scalar vers,| string scalar touse) { EXP_NAMES = st_sdata(.,tokens(exp_name), touse) IMP_NAMES = st_sdata(.,tokens(imp_name), touse) st_view(exp_id,.,tokens(exp_id_var), touse) st_view(imp_id,.,tokens(imp_id_var), touse) vers = st_numscalar(tokens(vers)) exp_uniq = uniqrows(EXP_NAMES) imp_uniq = uniqrows(IMP_NAMES) NN_ex = rows(exp_uniq) NN_im = rows(imp_uniq) NN_c = min((NN_ex, NN_im)) exp_id_uniq = (1..NN_ex)' imp_id_uniq = (1..NN_im)' //Constructing country ids requires tracking countries which only appear as //exporters, but not as importers, and vice versa. for (c=1; c<=NN_c-1; c++) { if (select(exp_uniq,exp_id_uniq:==c) != select(imp_uniq,imp_id_uniq:==c)) { if (select(exp_uniq,exp_id_uniq:==c) < select(imp_uniq,imp_id_uniq:==c)) { imp_id_uniq[selectidx(imp_id_uniq:>=c,vers), 1]=imp_id_uniq[selectidx(imp_id_uniq:>=c,vers), 1] :+1 imp_id[selectidx(imp_id :>= c,vers),.] = imp_id[selectidx(imp_id :>= c,vers),.] :+ 1 NN_im = NN_im + 1 } else { exp_id_uniq[selectidx(exp_id_uniq:>=c,vers), 1]=exp_id_uniq[selectidx(exp_id_uniq:>=c,vers), 1] :+1 exp_id[selectidx(exp_id :>= c,vers),1] = exp_id[selectidx(exp_id :>= c,vers),1] :+1 NN_ex = NN_ex + 1 } NN_c = min((NN_ex, NN_im)) } } /* uniq_names = uniqrows(EXP_NAMES \ IMP_NAMES) NN_c = rows(uniq_names) for (c=1; c<=NN_c; c++) { exp_id = (EXP_NAMES:!=uniq_names[c]) :* exp_id + (EXP_NAMES:==uniq_names[c]) :* c imp_id = (IMP_NAMES:!=uniq_names[c]) :* exp_id + (IMP_NAMES:==uniq_names[c]) :* c } */ // ^ a more concise way to do the same thing, but slower. } end /***************************************************/ /* SG_PPML_ITER (computes PPML estimates for beta) */ /***************************************************/ // This algorithm works by iterating on the system of multilateral resistances in each period, as well as the // pair fixed effects and time trend terms which apply across periods, computing a new estimate for // beta each time through the loop, until betas converge. mata: void sg_ppml_iter(string scalar pvars, string scalar mata_varlist_iter, string scalar b, string scalar Y_w, string scalar y_i, string scalar e_j, string scalar branch, string scalar n_iter, string scalar N_obs, string scalar N_dropped, string scalar tolerance, string scalar maxiter, string scalar keepfes, string scalar noacc, string scalar verbose,string scalar vers,| string scalar touse) { real matrix M, pv, X, phi, sumXhat, sumX real scalar NN_i, NN_y, NN_n real colvector check, P, Pi, y, e, Yw, Pi0, P0, Pi1, P1, D, tt which = st_numscalar(branch) // governs which specification to be used (e.g. symmetric vs. asymmetric pair FEs, time trends vs. not, etc) beta = st_matrix(b)' // guesses for betas (if specified) st_view(pv, ., tokens(pvars),touse) st_view(M, ., tokens(mata_varlist_iter),touse) ind_id = M[.,1] // industry id, year id, exporter and importer ids. yr_id = M[.,2] exp_id = M[.,3] imp_id = M[.,4] lhs = M[.,5] // dependent variable NN_i = max(ind_id) // total #s of included industries, years, and countries, for use in loops NN_y = max(yr_id) NN_n = max((max(exp_id), max(imp_id))) tol = strtoreal(tolerance) max = strtoreal(maxiter) verb = strtoreal(verbose) storefes = st_numscalar(keepfes) accel_ok = (noacc!="noaccel") vers = st_numscalar(tokens(vers)) //"X_index" used for passing (unsorted) data to and from (sorted) matrix form. X_index = ( (yr_id:-1) :* NN_i :* NN_n^2 :+ (ind_id:-1) * NN_n^2 :+ (exp_id:-1):* NN_n :+ imp_id) BIG_N = NN_y*NN_i*NN_n*NN_n X=D=tt=timevar=user_offset = J(BIG_N,1,0) //Note: imposing phi=X=0 is consistent with treating an observation as missing. X[X_index,1] = M[.,5] // M[.,5] is trade D[X_index,1] = M[.,6] // M[.,6] is "D" tt[X_index,1] = M[.,9] // M[.,9] is the set of linear trend coefficients (if trend specified) timevar[X_index,1] = M[.,10] // time intervals for use with trends user_offset[X_index,1] = M[.,11] // user-specified offset, for constraints, etc. X = colshape(X,NN_n) // matricizes X as a (sorted) (NN_t * NN_i * NN_n) x (NN_n) matrix //set up initial, sorted MR terms Pi0 = P0 = P1 = Pi1 = J(NN_i * NN_y * NN_n, 1, 0) P_index1 = (yr_id:-1) :* NN_i :* NN_n :+ (ind_id:-1) :* NN_n :+ imp_id Pi_index1 = (yr_id:-1) :* NN_i :* NN_n :+ (ind_id:-1) :* NN_n :+ exp_id P_index2 = uniqrows(P_index1) Pi_index2 = uniqrows(Pi_index1) Pi0[Pi_index2,1] = Pi1[Pi_index2,1] = uniqrows( (Pi_index1, M[.,7]) )[.,2] P0[P_index2,1] = P1[P_index2,1] = uniqrows( (P_index1, M[.,8]) )[.,2] // simple time vector, with one entry per year, sorted from first to last time = uniqrows(timevar) //set up matrices for output and expenditure shares (y and e), sum of trade over time (sumX) y = e = Yw = J(NN_i * NN_y * NN_n, 1, 0) if (which == 2) { sumX = J(NN_i * NN_n , NN_n, 1) //which = 2: no pair fixed effects } else { sumX = J(NN_i * NN_n , NN_n, 0) } if (which == 3 | which == 4) { Xtrend = countX = J(NN_i * NN_n , NN_n, 0) //which = 3 or 4: include time trends } else { Xtrend = countX = J(NN_i * NN_n , NN_n, 1) } // construct Y, E, and Yw, as well as sum of X over time. for (t=1; t<=NN_y; t++) { for (i=1; i <= NN_i; i++) { long_index = ((t-1) * NN_i * NN_n + (i-1)*NN_n + 1, 1 \ (t-1) * NN_i* NN_n + (i-1)*NN_n + NN_n, 1) wide_index = ((t-1) * NN_i * NN_n +(i-1)*NN_n + 1, 1 \ (t-1) * NN_i * NN_n +(i-1)*NN_n +NN_n,NN_n) Yw[|long_index|] = J(NN_n, 1, 1) # sum(X[|wide_index|]) y[|long_index|] = rowsum(X[|wide_index |]) :/ Yw[|long_index|] e[|long_index|] = rowsum(X[|wide_index|]') :/ Yw[|long_index|] //sum actual trade values within pairs over time //also sum actual trade * t over within pairs over time (for time trends) if (which == 0 | which == 3) { sumX[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = sumX[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + X[|wide_index|] } else if (which == 1 | which == 4) { //which = 1 or 4: pair fixed effects are symmetric sumX[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = sumX[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + X[|wide_index|]+ X[|wide_index|]' } if (which == 3) { //which = 3 or 4: include linear time trends Xtrend[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = Xtrend[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + time[t]*X[|wide_index|] countX[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = countX[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + (X[|wide_index|] :> 0) } if (which == 4) { Xtrend[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = Xtrend[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + time[t]*X[|wide_index|]+ time[t]*X[|wide_index|]' countX[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = countX[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + (X[|wide_index|]+X[|wide_index|]' :> 0) } } } D = D :* colshape( J(NN_y,1,1)#(editmissing(sumX:!=0,0)) ,1) if (which == 3 | which == 4){ free_trend = colshape( J(NN_y,1,1)#editmissing(countX :>= 2,0) ,1) // time trends cannot be ID'd for pairs that do not trade at least twice tt = tt :* free_trend } if (min(Pi0)<0) { Pi0 = Pi1 = -y:/Pi0 // I set things up so Pi0 < 0 implies user specified "guessS" option, rather than "guessO". } if (min(P0)<0) { P0 = P1 = -e:/P0 // Likewise for P0 < 0 if user specified "guessM" rather than "guessI". } YE_Yw = (y[Pi_index1,.] :* e[P_index1,.] :* Yw[Pi_index1,.]) // = (Y_i * E_j / Y_w), sorted to match the data (which need not be sorted) offset = log((YE_Yw) :/ (Pi1[Pi_index1,.] :* P1[P_index1,.]) :* D[X_index,.] ) + (timevar[X_index,.]:* tt[X_index,.]) + user_offset[X_index,.] // set up iteration loop last_change = change1 = 1 store_iters = J(rows(beta), rows(beta)+2,.) accel_iter1 = accel_iter2 = iter1 = hits = 0 even_iter = 1 if (which == 3 | which == 4) { accel_interval = 250 } else if (which == 2) { accel_interval = 50 } else { accel_interval = 100 } do { phi = J(BIG_N,1,0) phi[X_index,.] = D[X_index,.] :* exp(timevar[X_index,.]:* tt[X_index,.]) :* exp(pv*beta) :* exp(user_offset[X_index,.]) // "phi" can be thought of as "t^(1-sigma)" from Anderson and van Wincoop (2003) phi = colshape(phi,NN_n) Pi0 = Pi1 P0 = P1 if (which == 2) { sumXhat = J(NN_i * NN_n , NN_n, 1) } else { sumXhat = J(NN_i * NN_n , NN_n, 0) } if (which == 3 | which == 4) { Xhattrend = J(NN_i * NN_n , NN_n, 0) Xhattrend2 = J(NN_i * NN_n , NN_n, 0) } else { Xhattrend = J(NN_i * NN_n , NN_n, 1) Xhattrend2 = J(NN_i * NN_n , NN_n, 1) } for (t=1; t<=NN_y; t++) { for (i=1; i<=NN_i; i++) { long_index = ((t-1) * NN_i * NN_n + (i-1)*NN_n + 1, 1 \ (t-1) * NN_i* NN_n + (i-1)*NN_n + NN_n, 1) wide_index = ((t-1) * NN_i * NN_n +(i-1)*NN_n + 1, 1 \ (t-1) * NN_i * NN_n +(i-1)*NN_n +NN_n,NN_n) // iterate P0 -> P1 , Pi0 -> Pi1, using P = sum(y*phi/Pi); Pi = sum(e*phi/P) P1[|long_index|] = cross(phi[|wide_index|], (y[|long_index|] :/ Pi0[|long_index|])) Pi1[|long_index|] = cross(phi[|wide_index|]', (e[|long_index |] :/ P1[|long_index|])) // I normalize MRs by imposing them to be the same magnitude. adj = sqrt(sum(Pi1[|long_index|]) :/ sum(P1[|long_index|])) Pi1[|long_index|] = Pi1[|long_index|] :/ adj P1[|long_index|] = P1[|long_index|] :* adj // Sum fitted trade values ("Xhat") over time, using structural gravity: Xhat = (Y*E/Yw) *(phi/(P*Pi)) Xhat = editmissing( phi[|wide_index|] :* y[|long_index|] :* e[|long_index|]' :* Yw[|long_index|] :/ Pi1[|long_index|] :/ (P1[|long_index|]'# J(NN_n,1,1)),0) if (which == 0 | which == 3) { sumXhat[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = sumXhat[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + Xhat } else if (which == 1 | which == 4) { //which = 1: symmetric pair fixed effects sumXhat[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = sumXhat[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + Xhat + Xhat' } // For each ij time trend coeff, the Poisson FOC implies: sum_t{t*X_ij}=sum_t{t*X_ij_hat} if (which == 3) { Xhattrend[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = Xhattrend[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + time[t]*Xhat Xhattrend2[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = // the sum_t{t^2*X_ij_hat} term comes from the Taylor Series approx of the FOC (used below) Xhattrend2[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + time[t]^2*Xhat } if (which == 4) { Xhattrend[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = Xhattrend[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + time[t]*Xhat + time[t]*Xhat' Xhattrend2[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] = Xhattrend2[|(i-1)* NN_n + 1, 1 \ (i-1) * NN_n +NN_n,NN_n|] + time[t]^2*Xhat + time[t]^2*Xhat' } } } // update pair fixed effect (D) and time trends (tt) if (which == 0 | which == 1){ D = D :* colshape( J(NN_y,1,1)#(editmissing(sumX :/ sumXhat,0)) ,1) offset = log((YE_Yw) :/ (Pi1[Pi_index1,.] :* P1[P_index1,.]) :* D[X_index,.]) + user_offset[X_index,.] } if (which == 3 | which == 4){ D = D :* colshape( J(NN_y,1,1)#(editmissing(sumX :/ sumXhat,0)) ,1) dtt = colshape( J(NN_y,1,1)#(editmissing( (Xtrend-Xhattrend) :/ Xhattrend2,0)) ,1) :* free_trend // this comes from 1st-order Taylor Series approx. around dtt = 0 tt = tt + min((iter1/50,.99))*dtt offset = log((YE_Yw) :/ (Pi1[Pi_index1,.] :* P1[P_index1,.]) :* D[X_index,.]) +(timevar[X_index,.]:* tt[X_index,.]) + user_offset[X_index,.] } if (which == 2) { offset = log((YE_Yw) :/ (Pi1[Pi_index1,.] :* P1[P_index1,.])) + user_offset[X_index,.] } if (max(exp(timevar:*tt)) >= 1000) { temp = max(exp(timevar:*tt)) temp = (ln(100)-ln(temp)):/max(timevar) tt = (tt :+ temp) //:* free_trend P1 = P1 :* sqrt(exp(temp:* (time#J(NN_n*NN_i,1,1)) )) Pi1 = Pi1 :* sqrt(exp(temp:* (time#J(NN_n*NN_i,1,1)) )) } if (max(D) >= 1000) { temp = max(D) D = D :* (100 / temp) P1 = P1 :* sqrt(100 / temp) Pi1 = Pi1 :* sqrt(100 / temp) } // Iteratively re-weighted least squares change2 = 1 iter2 = 0 if (which == 3 | which == 4){ even_iter = (mod(iter1,4)==0) } beta_old1=beta notmiss = selectidx(offset :!= .,vers) // ensures loop will handle missing values without blowing up. //IRLS loop: eta = pv[notmiss,.]*beta mu = exp(eta + offset[notmiss,.]) z = eta + ((lhs[notmiss,.]-mu) :/ mu) XWX = cross( pv[notmiss,.], mu, pv[notmiss,.]) XWz = cross( pv[notmiss,.], mu, z) beta = invsym(XWX)*XWz // beta = invsym(pv[notmiss,.]'diag(mu)*pv[notmiss,.])*pv[notmiss,.]'diag(mu)'z /* // newton-raphson loop (old); see: http://cameron.econ.ucdavis.edu/stata/cameronwcsug2008.pdf do { mu = exp(pv[notmiss,.]*beta + offset[notmiss,.]) grad = (pv[notmiss,.])'(lhs[notmiss,.]-mu) // k x 1 gradient vector hes = makesymmetric( (pv[notmiss,.] :* mu)' pv[notmiss,.]) beta_old2 = beta beta = beta_old2 + cholinv(hes)*(grad) // for future: add an irls option change2 = (beta_old2 - beta)'(beta_old2-beta) / (beta' beta) iter2 = iter2+1 } while (change2 > 1e-16 & iter2 < 25) */ //This uses a Steffenson acceleration technique to accelerate guesses after a certain # of iterations //Reference: http://link.springer.com/article/10.1007%2FBF01385782 accel_iter2 = accel_iter2+1 if ((accel_iter2 > accel_interval | change1 < tol*5000) & even_iter & accel_ok ){ if (change1 <= last_change) { accel_iter1 = accel_iter1 +1 store_iters[.,accel_iter1] = beta if (accel_iter1 == rows(beta)+2) { if (verb != 0) { printf("acceleration\n") } step1 = store_iters[|.,2 \.,rows(beta)+1|] - store_iters[|.,1 \.,rows(beta)|] step2 = store_iters[|.,3 \.,rows(beta)+2|] - store_iters[|.,2 \.,rows(beta)+1|] Lambda = step2 * luinv(step1) beta_old1 = store_iters[.,1] beta = store_iters[.,1] + luinv(I(rows(beta)) - Lambda) * step1[.,1] // protect against overshooting the solution. if (change1 < tol*5000) { if (max(abs(beta - store_iters[.,rows(beta)+2])) > .01) { beta = store_iters[.,rows(beta)+2] } else if (change1 < tol*1000) { if (max(abs(beta - store_iters[.,rows(beta)+2])) > .005) { beta = store_iters[.,rows(beta)+2] } else if (change1 < tol*100) { if (max(abs(beta - store_iters[.,rows(beta)+2])) > .0025) { beta = store_iters[.,rows(beta)+2] } else if (change1 < 10*tol) { if (max(abs(beta - store_iters[.,rows(beta)+2])) > .001) { beta = store_iters[.,rows(beta)+2] } else if (change1 < 5*tol) { if (max(abs(beta - store_iters[.,rows(beta)+2])) > .0005) { beta = store_iters[.,rows(beta)+2] } } } } } } if (missing(beta) != 0) { beta = store_iters[.,accel_iter1] if (verb != 0) { printf("acceleration failed. switching to more conservative method.\n") } accel_ok=0 } // reset acceleration loop store_iters = J(rows(beta), rows(beta)+2,.) accel_iter1 = accel_iter2 = 0 } } else { store_iters = J(rows(beta), rows(beta)+2,.) accel_iter1 = accel_iter2 = 0 } } last_change = change1 change1 = editmissing( (beta_old1 - beta)'(beta_old1-beta) / (beta'beta), 0) iter1 = iter1 + 1 if ((change1 > .5) & (iter1 > accel_interval)) { beta = beta_old1 change1 = last_change } //display results if (verb > 0) { kk = min((rows(beta),6)) if ((mod(iter1,verb)==0) | iter1 == 1) { printf("iteration %f: diff = %12.0g ", iter1, change1) printf("coeffs =") for (k = 1; k<=kk; k++) { printf(" %12.0g", beta[k,.]) } printf("\n") } } // require 2 consecutive "hits" for convergence if (change1 > tol) { hits = 0 } else { hits = hits + 1 } //(beta', change1, iter1, max(sumX :/ sumXhat), max(Xtrend :/ Xhattrend)) } while ((hits < 2) & (iter1 < max)) st_numscalar(n_iter, iter1) st_numscalar(N_obs, rows(lhs[notmiss,.])) st_numscalar(N_dropped, rows(lhs) - rows(lhs[notmiss,.])) // apply normalizations - each country's largest D (lowest trade cost) normalized to 1, for both exports and imports if (storefes) { if (which != 2) { D = colshape(D, NN_n) D_temp = J(NN_n * NN_i, NN_n, 0) for (t=1; t<=NN_y; t++) { for (i=1; i<=NN_i; i++) { long_index = ((t-1) * NN_i * NN_n + (i-1)*NN_n + 1, 1 \ (t-1) * NN_i* NN_n + (i-1)*NN_n + NN_n, 1) wide_index = ((t-1) * NN_i * NN_n + (i-1)*NN_n + 1, 1 \ (t-1) * NN_i* NN_n +(i-1)*NN_n +NN_n,NN_n) temp_index = ((i-1)*NN_n + 1, 1 \ (i-1)*NN_n +NN_n,NN_n) missing = (D[|wide_index|]:==0) D_temp[|temp_index|] = missing:*D_temp[|temp_index|] + !missing:*D[|wide_index|] } } //unbalanced_warning = 0 //years_affected =. for (t=1; t<=NN_y; t++) { test = 0 for (i=1; i<=NN_i; i++) { long_index = ((t-1) * NN_i * NN_n + (i-1)*NN_n + 1, 1 \ (t-1) * NN_i* NN_n + (i-1)*NN_n + NN_n, 1) wide_index = ((t-1) * NN_i * NN_n + (i-1)*NN_n + 1, 1 \ (t-1) * NN_i * NN_n +(i-1)*NN_n +NN_n,NN_n) temp_index = ((i-1)*NN_n + 1, 1 \ (i-1)*NN_n +NN_n,NN_n) Pi1[|long_index|] = Pi1[|long_index|] :/ rowmax(D_temp[|temp_index|]) D[|wide_index|] = D[|wide_index|] :/ rowmax(D_temp[|temp_index|]) P1[|long_index|] = P1[|long_index|] :/ colmax(D_temp[|temp_index|]:/ rowmax(D_temp[|temp_index|]))' D[|wide_index|] = D[|wide_index|] :/ colmax(D_temp[|temp_index|]:/ rowmax(D_temp[|temp_index|])) // Normalize MRs by imposing them to be the same magnitude. adj = sqrt(sum(Pi1[|long_index|]) :/ sum(P1[|long_index|])) Pi1[|long_index|] = Pi1[|long_index|] :/ adj P1[|long_index|] = P1[|long_index|] :* adj //test = test + (rowmax(D[|wide_index|]) != rowmax(D_temp[|temp_index|])) //test = test + (colmax(D[|wide_index|]) != colmax(D_temp[|temp_index|])) } /* if (test > 0) { unbalanced_warning = 1 if (cols(years_affected)==1) { years_affected = time[t] } else { years_affected = years_affected, time[t] } } */ } D = colshape(D,1) } } // Future: allow user to specify a numeraire country to normalize P's and Pi's // post results to Stata M[.,6] = D[X_index,.] M[.,7] = Pi1[Pi_index1,.] M[.,8] = P1[P_index1,.] M[.,9] = tt[X_index,.] st_store(., Y_w, touse, Yw[Pi_index1,.]) st_store(., y_i, touse, y[Pi_index1,.]) st_store(., e_j, touse, e[P_index1,.]) st_matrix(b, beta') } end mata: real vector selectidx(real vector x, real scalar vers) { if (vers >= 13) { return(selectidx13(x)) } else { return(selectidx11(x)) } } end mata: real vector selectidx13(real vector x) { return(selectindex(x)) } end // This workaround for selectindex originally created by Hua Peng // http://www.statalist.org/forums/forum/general-stata-discussion/general/1305770-outreg-error mata: real vector selectidx11(real vector x) { real scalar row, cnt, i vector res row = rows(x) cnt = 1 res = J(1, row, 0) for(i=1; i<=row; i++) { if(x[i] != 0) { res[cnt] = i ; cnt++ ; } } if(cnt>1) { res = res[1, 1..cnt-1] } else { res = J(1, 0, 0) } if(row>1) { res = res' } return(res) } end