## Produces figure "Old and New Households in the SOEP" for DTC ## ## Author: Jan Goebel, Selin Kara, Stefan Zimmermann ## ################################################################## library(readstata13) soep.version <- 38 soep.waves <- c(letters, paste0("b", letters))[1:soep.version] soep.years <- 1984:2021 sample.shortnames <- c("A (1984)" , "B (1984)", "C (1990)", "D (1994/95)", "E (1998)*", "F (2000)", "G (2002)", "H (2006)", "I (2009)*", "J (2011)", "K (2012)", "L1 (2010)", "L2 (2010)", "L3 (2011)", "M1 (2013)", "M2 (2015)", "M3 (2016)", "M4 (2016)", "M5 (2017)", "N (2017)" , "O (2018)" , "P (2018)" , "Q (2019)", "M6 (2020)", "M7 (2020)", "M8 (2020)") # hpfad needs to be stata version 12 hpfad <- paste0("H:/daten/hpfad.dta") hpfad <- droplevels(foreign::read.dta(hpfad)) # hpfad <- droplevels(read.dta(hpfad)) hpfad <- hpfad[, c("cid", "hsample", paste0("hid_", soep.years), paste0(soep.waves, "hnetto"))] hpfadl <- reshape(hpfad, direction="long", varying=list(hhnr=paste0("hid_", soep.years), hnetto=paste0(soep.waves, "hnetto")), v.names = c("hid", "hnetto"), timevar="syear", times=1984:(1983+soep.version)) hpfadl <- droplevels(hpfadl[hpfadl$hid > 0,]) # max survey year syears <- 1984:(1983+soep.version) # years with new samples new.samples <- tapply(hpfadl$syear, hpfadl$hsample, min) # define output object old.new.hh <- as.data.frame(matrix(NA, ncol=3, nrow=length(new.samples))) names(old.new.hh) <- c("first wave hh", paste("new hh", 1983+soep.version), "share of new hh") rownames(old.new.hh) <- names(new.samples) # temporary object for detecting new hh tmp <- hpfadl[!duplicated(hpfadl$hid), ] # fill tables by samples for (i in seq(along.with = new.samples)) { old.new.hh[names(new.samples)[i], "first wave hh"] <- sum(hpfadl$syear == new.samples[i] & hpfadl$hsample == names(new.samples)[i] & hpfadl$hnetto %in% c("[1] Successful HH Interview", "[1] Realisiertes Haushaltsinterview", "[1] Realisiertes Haushaltsinterview")) old.new.hh[names(new.samples)[i], paste("new hh", 1983+soep.version)] <- sum(! tmp$hid[tmp$hsample == names(new.samples)[i] & tmp$syear > new.samples[i]] %in% hpfadl$cid[hpfadl$hsample == names(new.samples)[i] & hpfadl$syear == new.samples[i]]) } ################################################ ## FIX for Sample D starting in 1994 AND 1995 ## ################################################ is.sample.d <- names(new.samples) %in% c("[4] D 1994/5 Migration (1984-92/94, West)", "[4] D 1994/5 Migration (1984-92/94 West)", "[4] D Zuwanderer 1984-93", "[4] D Immigrant 1984-1993", "[4] D 84-93 Immigrant (West)", "[4] D 1994/5 Migration (1984-1994, West)", "[4] D 1994/5 Migration (1984-92/94, West)") stopifnot(sum(is.sample.d) == 1) old.new.hh[is.sample.d, "first wave hh"] <- sum(hpfadl$syear %in% c(new.samples[is.sample.d], new.samples[is.sample.d]+1) & hpfadl$hsample == names(new.samples)[is.sample.d] & hpfadl$hnetto %in% c("[1] Successful HH Interview", "[1] Realisiertes Haushaltsinterview", "[1] Realisiertes Haushaltsinterview")) # Share of new hh in last year ok <- hpfadl$syear == 1983+soep.version & hpfadl$hnetto %in% c("[1] Successful HH Interview", "[1] Realisiertes Haushaltsinterview", "[1] Realisiertes Haushaltsinterview") is.new <- ! (hpfadl$cid == hpfadl$hid) old.new.hh[, "share of new hh"] <- round(100* prop.table(table(hpfadl$hsample[ok], is.new[ok]), margin=1)[, "TRUE"], 1) old.new <- t(as.matrix(old.new.hh)) dimnames(old.new)[[2]] <- sample.shortnames ################### ## Create figure ## ################### # postscript("H:/old-new-hh.png", horizontal = TRUE) #postscript("H:/old-new-hh.eps", horizontal = TRUE) par(mar=c(7,4,2,4), las=2) barplot(old.new[1:2,], beside=TRUE, horiz=FALSE, ylim=c(0,7000), legend.text=c("First Wave Households", paste0("New Households in ", 1983+soep.version, " since start of sample"))) par(new=TRUE) plot(seq(from = 1.5, to=0.5+ncol(old.new)*2, by=2), old.new[3,], type="p", pch=19, col=2, axes=FALSE, xlab="", ylab="") box() axis(4, col=2) legend(27, 40, paste0("Share of \"new\" Households\n in ", 1983+soep.version, " (right scale in %)"), pch=19, col=2, cex=0.75) #dev.off() #system("H:/; convert -rotate 90 old-new-hh.eps old-new-hh.png")