--- title: "Analysis Code - The Contribution of Age Structure to the International Homicide Decline" author: "Mateus Renno Santos" editor_options: chunk_output_type: console --- This file contains some example code to assist in the replication and in the manipulation of the analysis of our study. This code is an adaptation of the code used through the entire analysis. Though we have tested it, it is possible that it contains errors - if you find any, please write msantos8@outlook.com. For simplicity, most sensitivity analyses were not included in this file, but are avaiable upon request. Please write msantos8@outlook.com if you have any questions. ```{r Setup, include=FALSE} #Clears the workspace rm(list = ls()) #usePackage() - Library a package, and installs the package if absent usePackage <- function(x) { if (!is.element(x, installed.packages()[,1])) install.packages(x, dep = TRUE) require(x, character.only = TRUE) } #Packages usePackage("readxl") usePackage("tidyr") usePackage("dplyr") usePackage("ggplot2") usePackage('scales') usePackage('gridExtra') usePackage('plm') usePackage("quantreg") usePackage("margins") usePackage('stargazer') usePackage('lmtest') usePackage('tseries') usePackage('corrplot') usePackage('car') usePackage("writexl") usePackage('imputeTS') #Plot theme ptheme <- theme( text = element_text(family = "serif", size = 12), panel.grid.major.x = element_blank(), panel.grid.major.y = element_line(colour = "light gray"), panel.background = element_rect(fill = "white"), legend.key = element_rect(colour = "transparent", fill = "white"), legend.position="bottom", legend.title=element_blank()) ``` ```{r Read, include=FALSE} #Read the S1 Dataset (data on Country and Year level) df <- as.data.frame(read_xlsx("S1 Dataset.xlsx")) ``` #Data Processing ```{r Generate, include=FALSE} #Generate variables df$UNODC_HomRate <- df$UNODC_Hom / df$Pop * 100000 df$WHO_HomRate <- df$WHO_Hom / df$Pop * 100000 df$PerPop_15_29 <- (df$Pop_15_29 / df$Pop) * 100 df$PerMale <- df$PopM / df$Pop * 100 df$GDP_PerCap <- (df$GDP / df$Pop) / 1000 df$PerUrban <- df$Urban / df$Pop * 100 df$UNODC_HomRate[is.infinite(df$UNODC_HomRate)] <- NA df$WHO_HomRate[is.infinite(df$WHO_HomRate)] <- NA df$PerPop_15_29[is.infinite(df$PerPop_15_29)] <- NA df$PerMale[is.infinite(df$PerMale)] <- NA df$GDP_PerCap[is.infinite(df$GDP_PerCap)] <- NA df$PerUrban[is.infinite(df$PerUrban)] <- NA ``` ##Combined Homicide Series Generates the combined homicide series using WHO data between 1960 to 1989, and UNODC data from 1990 to 2016. The combination was only executed for a set of countries that met a specific set of criteria: 1. If average ratio is between 0.7 and 1.3 (1990 to 1993) 2. If there is a continuation from WHO in late 1980's (1987 to 1989) and UNODC in early 1990s (1990 to 1991; Valid) 3. If population is above 1,000,000 in 2016 4. If the correlation between the UNODc and the WHO series is above 0.6 ```{r Hom Gen, include=FALSE} #Start from UNODC homicide df$HomRate <- df$UNODC_HomRate #Ratio UNODC to WHO df$RatioHom <- df$UNODC_HomRate / df$WHO_HomRate #Average ratio per country (1990 to 1993) for (c in unique(df$Country)) { df$RatioMean[df$Country==c] <- mean(df$RatioHom[df$Country==c & df$Year >= 1990 & df$Year <= 1993], na.rm=T) df$Cor[df$Country==c] <- cor(df$UNODC_Hom[df$Country==c], df$WHO_Hom[df$Country==c], use = "pair") df$Valid[df$Country==c] <- sum(!is.na(df$WHO_Hom[df$Country==c & df$Year<=1989 & df$Year>= 1987])) > 0 & sum(!is.na(df$UNODC_Hom[df$Country==c & df$Year<=1991 & df$Year>= 1990])) > 0 & df$Pop[df$Country==c & df$Year==2016] >= 1000000 & df$Cor[df$Country==c] >= 0.6 } df$RatioHom[is.infinite(df$RatioHom)] <- NA df$RatioHom[is.nan(df$RatioHom)] <- NA df$RatioMean[is.infinite(df$RatioMean)] <- NA df$RatioMean[is.nan(df$RatioMean)] <- NA unique(df[,c("Country", "RatioMean", "Cor")]) #Restriction #If average ratio is between 0.7 and 1.3 (1990 to 1993) #If there is a continuation from WHO in late 1980's (1987 to 1989) and UNODC in early 1990s (1990 to 1991; Valid) #If population is above 1,000,000 in 2016 #If the correlation between the UNODc and the WHO series is above 0.6 Restriction <- unique(df$Country[df$RatioMean >= 0.7 & df$RatioMean <= 1.3 & df$Valid==TRUE]) Restriction <- Restriction[!is.na(Restriction)] df$Restriction <- 0 df$Restriction[df$Country %in% Restriction] <- 1 #Replace UNODC missing with ratio adjusted WHO #Replace data only before 1990 - all WHO data after 1990 was revised by UNODC df$HomRate[is.na(df$HomRate) & df$Year < 1990 & df$Country %in% Restriction] <- df$WHO_HomRate[is.na(df$HomRate) & df$Year < 1990 & df$Country %in% Restriction] * df$RatioMean[is.na(df$HomRate) & df$Year < 1990 & df$Country %in% Restriction] #Generate the homicide count from rate df$Hom <- df$HomRate * df$Pop / 100000 ``` ```{r Select, include=FALSE} #Remove UK and Iraq separate components (totals remain in the data) df <- df[-which(df$Country=="England & Wales (UK)"),] df <- df[-which(df$Country=="Northern Ireland (UK)"),] df <- df[-which(df$Country=="Scotland (UK)"),] df <- df[-which(df$Country=="Iraq (Central)"),] df <- df[-which(df$Country=="Iraq (Kurdistan)"),] Restriction <- Restriction[-which(Restriction=="Scotland (UK)")] ``` #Regional Aggregates (1990 to 2015) ##Imputation Use an exponentially weigthed moving average to impute other years of data for countries with at least one year of data ```{r Imputation Gen, include=FALSE} #Aggregate some indicators at the country level Country <- summarise(group_by(df, Country), Years = sum(!is.na(Hom) & !is.na(Pop)), Earliest = min(Year[!is.na(Hom) & !is.na(Pop)]), Latest = max(Year[!is.na(Hom) & !is.na(Pop)]), Years_Age = sum(!is.na(Pop_15_29) & !is.na(Pop))) Country$Earliest[is.infinite(Country$Earliest)] <- NA Country$Latest[is.infinite(Country$Latest)] <- NA #Merge country with data df <- merge(df, Country, all.x=TRUE) #Moving Average (Replacing missing values) df$HomRate_Est <- NA for (i in unique(df$Country)) { if (df$Years[df$Country==i & df$Year>=1990][1]<=1) { #If the country only one year of data, have that as the moving average df$HomRate_Est[df$Country==i & df$Year>=1990] <- sum(df$HomRate[df$Country==i & df$Year>=1990], na.rm=T) } else { #Calculate the moving average of all other values df$HomRate_Est[df$Country==i & df$Year>=1990] <- na.ma(df$HomRate[df$Country==i & df$Year>=1990], k = 100, weighting = "exponential") } } #Replace countries with no data to missing df$HomRate_Est[df$Years==0] <- NA #Estimated Count df$Hom_Est <- (df$HomRate_Est*df$Pop)/100000 #Flag for Estimate df$Estimated <- ifelse(is.na(df$HomRate),1,0) df$Estimated[is.na(df$HomRate_Est)] <- NA #Clean rm(Country, i) ``` Plot the data for each country which met the restriction in a separate PDF device ("Combined Homicide Series.pdf") ```{r Combined Homicide PDF, echo = FALSE, eval = FALSE} #Gather the data in consolidated format (to facilitate the inclusion of legends) pcountry <- df[, c("Country", "Year", "Subregion", "RatioMean", "Cor", "WHO_HomRate", "UNODC_HomRate", "HomRate")] pcountry <- gather(pcountry, key = "Type", value = "Value", -Country, -Year, -Subregion, -RatioMean, -Cor) pcountry$Type[pcountry$Type=="WHO_HomRate"] <- "WHO Rate" pcountry$Type[pcountry$Type=="UNODC_HomRate"] <- "UN Rate" pcountry$Type[pcountry$Type=="HomRate"] <- "Combined Rate" pcountry$Type <- factor(pcountry$Type, levels = c("WHO Rate", "UN Rate", "Combined Rate")) #Filter pcountry <- pcountry[!is.na(pcountry$Value),] pcountry <- pcountry[order(pcountry$Country),] pcountry <- pcountry[!is.na(pcountry$RatioMean),] #Graph Per Country pdf("Combined Homicide Series.pdf") for (i in Restriction) { plot <- ggplot(subset(pcountry, Country==i), aes(x = Year, y = Value, group = Type, colour = Type, linetype = Type)) + ggtitle(paste(i, "-", "Ratio=",round(unique(pcountry$RatioMean[pcountry$Country==i]), 3), "Cor=", round(unique(pcountry$Cor[pcountry$Country==i]), 3))) + ylab("Homicide Rate") + xlab("Year") + geom_line() + geom_point(shape=21, fill="White") + scale_colour_manual(values = c("WHO Rate"="Red", "UN Rate"="Blue", "Combined Rate"="Black")) + scale_linetype_manual(values = c("WHO Rate"=1, "UN Rate"=2, "Combined Rate"=3)) + scale_y_continuous(labels = comma) + scale_x_continuous(breaks = seq(min(df$Year), max(df$Year), 10), limits = c(min(df$Year), max(df$Year))) + ptheme print(plot) print(i) } dev.off() rm(pcountry, plot, i) ``` ##Aggregate Aggregate the imputed data to the level of the regions ```{r Regions, include = FALSE} #Data on region & year level (Including World's total) subregion <- summarise(group_by(df, Region, Subregion, Year), Countries = n(), Sample = sum(!is.na(Hom) & !is.na(Pop)), Hom = sum(Hom, na.rm = TRUE), Covered = sum(!is.na(Hom_Est) & !is.na(Pop)), Hom_Est = sum(Hom_Est, na.rm=T), StD = sd(Hom_Est, na.rm=T), Pop_Covered = sum(Pop*(!is.na(Estimated)), na.rm=T), Pop_Sample = sum(Pop*(Estimated==0), na.rm=T), Pop_Estimated = sum(Pop*Estimated, na.rm=T), Pop = sum(Pop, na.rm = TRUE), PopM = sum(PopM, na.rm = TRUE), PopF = sum(PopF, na.rm = TRUE), Pop_0_14 = sum(Pop_0_14, na.rm = TRUE), Pop_15_29 = sum(Pop_15_29, na.rm = TRUE), Pop_30_44 = sum(Pop_30_44, na.rm = TRUE), Pop_45_59 = sum(Pop_45_59, na.rm = TRUE), Pop_60_ = sum(Pop_60_, na.rm = TRUE)) region <- summarise(group_by(subregion, Region, Year), Subregion=unique(Region), Countries = sum(Countries), Sample = sum(Sample), Hom = sum(Hom), Covered = sum(Covered), Hom_Est = sum(Hom_Est), StD = sd(StD), Pop_Covered = sum(Pop_Covered), Pop_Sample = sum(Pop_Sample), Pop_Estimated = sum(Pop_Estimated), Pop = sum(Pop), PopM = sum(PopM), PopF = sum(PopF), Pop_0_14 = sum(Pop_0_14), Pop_15_29 = sum(Pop_15_29), Pop_30_44 = sum(Pop_30_44), Pop_45_59 = sum(Pop_45_59), Pop_60_ = sum(Pop_60_)) world <- summarise(group_by(region, Year), Region="World", Subregion="World", Countries = sum(Countries), Sample = sum(Sample), Hom = sum(Hom), Covered = sum(Covered), Hom_Est = sum(Hom_Est), StD = sd(StD), Pop_Covered = sum(Pop_Covered), Pop_Sample = sum(Pop_Sample), Pop_Estimated = sum(Pop_Estimated), Pop = sum(Pop), PopM = sum(PopM), PopF = sum(PopF), Pop_0_14 = sum(Pop_0_14), Pop_15_29 = sum(Pop_15_29), Pop_30_44 = sum(Pop_30_44), Pop_45_59 = sum(Pop_45_59), Pop_60_ = sum(Pop_60_)) #Merge the Regions with the World Regions <- rbind(as.data.frame(world), as.data.frame(region)) #Replace invalid to missing Regions$Pop[Regions$Pop==0] <- NA Regions$PopM[Regions$PopM==0] <- NA Regions$PopF[Regions$PopF==0] <- NA Regions$Pop_0_14[Regions$Pop_0_14==0] <- NA Regions$Pop_15_29[Regions$Pop_15_29==0] <- NA Regions$Pop_30_44[Regions$Pop_30_44==0] <- NA Regions$Pop_45_59[Regions$Pop_45_59==0] <- NA Regions$Pop_60_[Regions$Pop_60_==0] <- NA Regions$Hom_Est[Regions$Hom_Est==0] <- NA Regions$Pop_Sample[Regions$Year<=1989] <- NA Regions$Pop_Estimated[Regions$Year<=1989] <- NA Regions$Covered[Regions$Year<=1989] <- NA Regions$Pop_Covered[Regions$Year<=1989] <- NA #Adjust Disaggregated Populations to Total Population #Due to different samples and slight differences between sums and totals in the WPP v <- which(!is.na(Regions$PopM) & !is.na(Regions$PopF) & !is.na(Regions$Pop)) while (sum(Regions$PopM[v] + Regions$PopF[v]) != sum(Regions$Pop[v])) { Regions$PopM <- Regions$Pop * Regions$PopM / (Regions$PopM + Regions$PopF) Regions$PopF <- Regions$Pop * Regions$PopF / (Regions$PopM + Regions$PopF) } v <- which(!is.na(Regions$Pop_0_14) & !is.na(Regions$Pop_15_29) & !is.na(Regions$Pop_30_44) & !is.na(Regions$Pop_45_59) & !is.na(Regions$Pop_60_) & !is.na(Regions$Pop)) while (sum(Regions$Pop_0_14[v] + Regions$Pop_15_29[v] + Regions$Pop_30_44[v] + Regions$Pop_45_59[v] + Regions$Pop_60_[v]) != sum(Regions$Pop[v])) { Regions$Pop_0_14 <- Regions$Pop * Regions$Pop_0_14 / (Regions$Pop_0_14 + Regions$Pop_15_29 + Regions$Pop_30_44 + Regions$Pop_45_59 + Regions$Pop_60_) Regions$Pop_15_29 <- Regions$Pop * Regions$Pop_15_29 / (Regions$Pop_0_14 + Regions$Pop_15_29 + Regions$Pop_30_44 + Regions$Pop_45_59 + Regions$Pop_60_) Regions$Pop_30_44 <- Regions$Pop * Regions$Pop_30_44 / (Regions$Pop_0_14 + Regions$Pop_15_29 + Regions$Pop_30_44 + Regions$Pop_45_59 + Regions$Pop_60_) Regions$Pop_45_59 <- Regions$Pop * Regions$Pop_45_59 / (Regions$Pop_0_14 + Regions$Pop_15_29 + Regions$Pop_30_44 + Regions$Pop_45_59 + Regions$Pop_60_) Regions$Pop_60_ <- Regions$Pop * Regions$Pop_60_ / (Regions$Pop_0_14 + Regions$Pop_15_29 + Regions$Pop_30_44 + Regions$Pop_45_59 + Regions$Pop_60_) } #Calculate the Homicide Rate Regions$HomRate_Est <- (Regions$Hom_Est/Regions$Pop_Covered)*100000 Regions$HomRate_Est[is.infinite(Regions$HomRate_Est)] <- NA #Generate the Percents Regions$PerPop_15_29 <- (Regions$Pop_15_29 / Regions$Pop) * 100 #Readjust the estimate count for the total population Regions$Hom_Est <- (Regions$HomRate_Est*Regions$Pop)/100000 #Calculate the Percent of the Population that is from Covered Countries by Subregion & Year Regions$Per_Covered <- Regions$Pop_Covered/Regions$Pop Regions$Per_Covered[is.infinite(Regions$Per_Covered)] <- NA #Clean rm(world, region, subregion) ``` Plot the data for each region in a separate PDF device ("Regions Age.pdf") ```{r Plot Regions, echo = FALSE, eval= FALSE} pdf("Regions Age.pdf") for (i in unique(Regions$Region)) { a <- Regions$HomRate_Est[Regions$Region==i & !is.na(Regions$HomRate_Est)] b <- Regions$PerPop_15_29[Regions$Region==i & !is.na(Regions$PerPop_15_29)] cor <- cor(Regions$HomRate_Est[Regions$Region==i], Regions$PerPop_15_29[Regions$Region==i], use = "pair") plot <- ggplot(subset(Regions, Region==i & Year>=1990), aes(x = Year)) + ggtitle(paste0(i,"; r = ", round(cor,2))) + ylab("Homicide Rate") + geom_line(aes(y = HomRate_Est, colour = "Homicide Rate")) + geom_line(aes(y = rescale(PerPop_15_29, to = c(min(Regions$HomRate_Est[Regions$Region==i & !is.na(Regions$HomRate_Est)]), max(Regions$HomRate_Est[Regions$Region==i & !is.na(Regions$HomRate_Est)]))), colour = "Percent 15 to 29")) + geom_point(aes(y = HomRate_Est, colour = "Homicide Rate")) + geom_point(aes(y = rescale(PerPop_15_29, to = c(min(Regions$HomRate_Est[Regions$Region==i & !is.na(Regions$HomRate_Est)]), max(Regions$HomRate_Est[Regions$Region==i & !is.na(Regions$HomRate_Est)]))), colour = "Percent 15 to 29")) + scale_colour_manual(values = c("dark red", " dark blue")) + scale_y_continuous(sec.axis = sec_axis(~./1, name = "Percent 15 to 29", breaks = round(seq(min(a), max(a), length.out=4),2), labels = paste0(round(seq(min(b), max(b), length.out=4),1),"%")), breaks = round(seq(min(a), max(a), length.out=4),2)) + scale_x_continuous(breaks = seq(1990, 2015, 5), limits = c(1990, 2015)) + ptheme print(plot) print(i) } dev.off() ``` ##Save Save both data at the Country and Year (Country & Year.xlsx) and at the Region and Year (Region & Year.xlsx) levels as excel files for easy visualization ```{r Save, include=FALSE} write_xlsx(df, "Country & Year.xlsx") write_xlsx(Regions[,c("Year", "Region", "Countries", "Sample", "Pop_Sample", "Hom", "Covered", "Pop_Covered", "Hom_Est", "Pop", "PopM", "PopF", "Pop_0_14", "Pop_15_29", "Pop_30_44", "Pop_45_59", "Pop_60_", "HomRate_Est", "PerPop_15_29", "Per_Covered")], "Region & Year.xlsx") ``` #Data Analysis ##Sample Selection ```{r Sample, include=FALSE} #Remove countries with less than 1 million population in 2016 df <- df[df$Country %in% unique(df$Country[df$Pop >= 1000000 & df$Year == 2016]),] #Remove notable terrorist killings (GTD) cases <- c(which(df$Country=="Norway" & df$Year==2011), which(df$Country=="France" & df$Year==2015), which(df$Country=="France" & df$Year==2016), which(df$Country=="United States of America" & df$Year==2001), which(df$Country=="Germany" & df$Year==2016)) df$HomAdj <- df$Hom df$HomAdj[cases] <- df$HomAdj[cases] - df$Terror[cases] #Calculate adjusted rate df$HomAdjRate <- df$HomAdj / df$Pop * 100000 #Generate Logs df$LnHomAdjRate <- log(df$HomAdjRate) df$LnHomAdjRate[is.infinite(df$LnHomAdjRate)] <- NA #Countries with homicide data since each decade Quality50 <- unique(df$Country[!is.na(df$HomAdjRate) & df$Year==1950]) Quality60 <- unique(df$Country[!is.na(df$HomAdjRate) & df$Year<=1960]) Quality70 <- unique(df$Country[!is.na(df$HomAdjRate) & df$Year<=1970]) Quality80 <- unique(df$Country[!is.na(df$HomAdjRate) & df$Year<=1980]) Quality90 <- unique(df$Country[!is.na(df$HomAdjRate) & df$Year<=1990]) Quality00 <- unique(df$Country[!is.na(df$HomAdjRate) & df$Year<=2000]) Quality10 <- unique(df$Country[!is.na(df$HomAdjRate) & df$Year<=2010]) QualityAny <- unique(df$Country[!is.na(df$HomAdjRate)]) #Flag for countries in the Long Series Sample df$Quality60 <- 0 df$Quality60[df$Country %in% Quality60] <- 1 #Fully observed cases (Analytical Sample) df$Sample <- !is.na(rowSums(df[,c("LnHomAdjRate","PerPop_15_29","PerMale","SWIID_Gini","GDP_PerCap","PerUrban")], na.rm=FALSE)) ``` ##Statisitcs ```{r Statistics, include=FALSE} #Data Availability #Population unique(df$Country[!is.na(df$Pop)]) unique(df$Country[!is.na(df$PerPop_15_29)]) unique(df$Country[!is.na(df$Pop)])[!unique(df$Country[!is.na(df$Pop)]) %in% unique(df$Country[!is.na(df$PerPop_15_29)])] #Summary by Country Country <- summarise(group_by(df, Region, Country), Population = Pop[Year == 2015], Years = sum(!is.na(HomAdjRate)), First = min(Year[!is.na(HomAdjRate)]), Last = max(Year[!is.na(HomAdjRate)]), Mean = mean(HomAdjRate, na.rm=T), SD = sd(HomAdjRate, na.rm=T), Min = min(HomAdjRate, na.rm=T), Max = max(HomAdjRate, na.rm=T)) Country[] <- lapply(Country, function(x) replace(x, is.infinite(x), NA)) Country[] <- lapply(Country, function(x) replace(x, is.nan(x), NA)) #Correlations countries cor(df$HomAdjRate[df$Country=="United States"], df$PerPop_15_29[df$Country=="United States"], use = "pair") cor(df$HomAdjRate[df$Country=="Canada"], df$PerPop_15_29[df$Country=="Canada"], use = "pair") cor(df$HomAdjRate[df$Country=="Austria"], df$PerPop_15_29[df$Country=="Austria"], use = "pair") cor(df$HomAdjRate[df$Country=="Japan"], df$PerPop_15_29[df$Country=="Japan"], use = "pair") cor(df$HomAdjRate[df$Country=="Australia"], df$PerPop_15_29[df$Country=="Australia"], use = "pair") cor(df$HomAdjRate[df$Country=="Italy"], df$PerPop_15_29[df$Country=="Italy"], use = "pair") cor(df$HomAdjRate[df$Country=="Brazil"], df$PerPop_15_29[df$Country=="Brazil"], use = "pair") cor(df$HomAdjRate[df$Country=="Mexico"], df$PerPop_15_29[df$Country=="Mexico"], use = "pair") cor(df$HomAdjRate[df$Country=="Venezuela"], df$PerPop_15_29[df$Country=="Venezuela"], use = "pair") cor(df$HomAdjRate[df$Country=="Costa Rica"], df$PerPop_15_29[df$Country=="Costa Rica"], use = "pair") #Ratio US c <- "United States" df$UNODC_HomRate[df$Country == c] / df$WHO_HomRate[df$Country==c] df$WHO_HomRate[df$Country == c] / df$UNODC_HomRate[df$Country==c] min(df$WHO_HomRate[df$Country == c] / df$UNODC_HomRate[df$Country==c], na.rm=T) max(df$WHO_HomRate[df$Country == c] / df$UNODC_HomRate[df$Country==c], na.rm=T) df$RatioMean[df$Country==c] df$WHO_HomRate[df$Country == c] - df$UNODC_HomRate[df$Country==c] min(df$WHO_HomRate[df$Country == c] - df$UNODC_HomRate[df$Country==c], na.rm=T) max(df$WHO_HomRate[df$Country == c] - df$UNODC_HomRate[df$Country==c], na.rm=T) #Correlation between UNODC & WHO cor(df$UNODC_HomRate[df$Country %in% Quality60], df$WHO_HomRate[df$Country %in% Quality60], use="pairwise") #Correlation mean(df$UNODC_HomRate[df$Country %in% Quality60] - df$WHO_HomRate[df$Country %in% Quality60], na.rm=T) #Average difference mean(df$UNODC_HomRate[df$Country %in% Quality60] / df$WHO_HomRate[df$Country %in% Quality60], na.rm=T) #Average ratio ``` ```{r Descritpives, echo = FALSE} #Descriptive Statistics by Analytical Sample stargazer(df[df$Year<=2015 & df$Year>=1990 & df$Sample, c("HomAdjRate","PerPop_15_29","PerMale","SWIID_Gini", "GDP_PerCap", "PerUrban")], title = "High Coverage Sample", digits = 3, covariate.labels=c("Homicide Rate", "Percent 15 to 29", "Percent Male", "Gini Index", "GDP per Cap (USD 1k)", "Percent Urban"), type="html", out="DescriptivesSample_1990.htm") stargazer(df[df$Year<=2015 & df$Country %in% Quality60 & df$Sample, c("HomAdjRate","PerPop_15_29","PerMale","SWIID_Gini", "GDP_PerCap", "PerUrban")], title = "Long Series Sample", digits = 3, covariate.labels=c("Homicide Rate", "Percent 15 to 29", "Percent Male", "Gini Index", "GDP per Cap (USD 1k)", "Percent Urban"), type="html", out="DescriptivesSample_1960.htm") #Correlation Matrix a <- cor(df[df$Year>=1990, c("HomAdjRate","PerPop_15_29","PerMale","SWIID_Gini", "GDP_PerCap", "PerUrban")], use = "pairwise") b <- cor(df[df$Country %in% Quality60, c("HomAdjRate","PerPop_15_29","PerMale","SWIID_Gini", "GDP_PerCap", "PerUrban")], use = "pairwise") cor.test(df[df$Year>=1990, c("PerMale")], df[df$Year>=1990, c("PerUrban")]) cor.test(df[df$Country %in% Quality60, c("HomAdjRate")], df[df$Country %in% Quality60, c("PerUrban")]) #Variance Inflation Factor modelC90 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, index=c("Country", "Year"), model="pooling", data=df[df$Year>=1990,]) modelC60 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, index=c("Country", "Year"), model="pooling", data=df[df$Country %in% Quality60,]) c <- t(t(vif(modelC90))) d <- t(t(vif(modelC60))) #Export table with coefficients and VIF a <- as.data.frame(a) b <- as.data.frame(b) c <- as.data.frame(c) d <- as.data.frame(d) a$Var <- rownames(a) b$Var <- rownames(b) c$Var <- rownames(c) d$Var <- rownames(d) a <- merge(a, c, all.x=T) b <- merge(b, d, all.x=T) a <- a[c(2,4,3,6,1,5),] b <- b[c(2,4,3,6,1,5),] write.csv(rbind(a, b), "Correlation.csv") #Clean rm(a, b, c, d, modelC90, modelC60) ``` ##Fixed Effects Models Models are exported to an htm device using the stargazer package ("OneWay Fixed Effects Model.htm" , "TwoWay Fixed Effects Model.htm") ```{r OneWay Full Model, echo = FALSE, eval = FALSE} #Full Sample Models (OneWay Fixed Effects) model90_Sample <- plm(LnHomAdjRate ~ PerPop_15_29, index=c("Country", "Year"), model="within", data=df[df$Year>=1990 & df$Sample,]) modelC90 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, index=c("Country", "Year"), model="within", data=df[df$Year>=1990,]) model60_Sample <- plm(LnHomAdjRate ~ PerPop_15_29, index=c("Country", "Year"), model="within", data=df[df$Country %in% Quality60 & df$Sample,]) modelC60 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, index=c("Country", "Year"), model="within", data=df[df$Country %in% Quality60,]) model60_90_Sample <- plm(LnHomAdjRate ~ PerPop_15_29, index=c("Country", "Year"), model="within", data=df[df$Country %in% Quality60 & df$Year>=1990 & df$Sample,]) modelC60_90 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, index=c("Country", "Year"), model="within", data=df[df$Country %in% Quality60 & df$Year>=1990,]) coefficient <- list(coeftest(model90_Sample, vcov=vcovHC(model90_Sample,type="HC0",cluster="group"))[,1], coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group"))[,1], coeftest(model60_Sample, vcov=vcovHC(model60_Sample,type="HC0",cluster="group"))[,1], coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group"))[,1], coeftest(model60_90_Sample, vcov=vcovHC(model60_90_Sample,type="HC0",cluster="group"))[,1], coeftest(modelC60_90, vcov=vcovHC(modelC60_90,type="HC0",cluster="group"))[,1]) clust_error <- list(coeftest(model90_Sample, vcov=vcovHC(model90_Sample,type="HC0",cluster="group"))[,2], coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group"))[,2], coeftest(model60_Sample, vcov=vcovHC(model60_Sample,type="HC0",cluster="group"))[,2], coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group"))[,2], coeftest(model60_90_Sample, vcov=vcovHC(model60_90_Sample,type="HC0",cluster="group"))[,2], coeftest(modelC60_90, vcov=vcovHC(modelC60_90,type="HC0",cluster="group"))[,2]) clust_pvalue <- list(coeftest(model90_Sample, vcov=vcovHC(model90_Sample,type="HC0",cluster="group"))[,4], coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group"))[,4], coeftest(model60_Sample, vcov=vcovHC(model60_Sample,type="HC0",cluster="group"))[,4], coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group"))[,4], coeftest(model60_90_Sample, vcov=vcovHC(model60_90_Sample,type="HC0",cluster="group"))[,4], coeftest(modelC60_90, vcov=vcovHC(modelC60_90,type="HC0",cluster="group"))[,4]) stargazer(model90_Sample, modelC90, model60_Sample, modelC60, model60_90_Sample, modelC60_90, coef = coefficient, se = clust_error, p = clust_pvalue, apply.coef = exp, t.auto = FALSE, p.auto = FALSE, covariate.labels=c("Percent 15 to 29", "Percent Male", "Gini Index", "GDP per Cap (1k)", "Percent Urban"), omit.stat = "adj.rsq", star.cutoffs = c(0.05, 0.01, 0.001), df = FALSE, dep.var.caption ="", dep.var.labels = c(""), column.labels = c("Since 1990", "Since 1990", "Since 1960", "Since 1960", "Since 1990", "Since 1990"), type="html", model.numbers=FALSE, no.space=TRUE, out="OneWay Fixed Effects Model.htm") ``` ```{r TwoWay Full Model, echo = FALSE, eval = FALSE} #Full Sample Models (TwoWay fixed effects) model90_Sample <- plm(LnHomAdjRate ~ PerPop_15_29, effect = "twoways", index=c("Country", "Year"), model="within", data=df[df$Year>=1990 & df$Sample,]) modelC90 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, effect = "twoways", index=c("Country", "Year"), model="within", data=df[df$Year>=1990,]) model60_Sample <- plm(LnHomAdjRate ~ PerPop_15_29, effect = "twoways", index=c("Country", "Year"), model="within", data=df[df$Country %in% Quality60 & df$Sample,]) modelC60 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, effect = "twoways", index=c("Country", "Year"), model="within", data=df[df$Country %in% Quality60,]) model60_90_Sample <- plm(LnHomAdjRate ~ PerPop_15_29, effect = "twoways", index=c("Country", "Year"), model="within", data=df[df$Country %in% Quality60 & df$Year>=1990 & df$Sample,]) modelC60_90 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, effect = "twoways", index=c("Country", "Year"), model="within", data=df[(df$Country %in% Quality60) & df$Year>=1990,]) coefficient <- list(coeftest(model90_Sample, vcov=vcovHC(model90_Sample,type="HC0",cluster="group"))[,1], coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group"))[,1], coeftest(model60_Sample, vcov=vcovHC(model60_Sample,type="HC0",cluster="group"))[,1], coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group"))[,1], coeftest(model60_90_Sample, vcov=vcovHC(model60_90_Sample,type="HC0",cluster="group"))[,1], coeftest(modelC60_90, vcov=vcovHC(modelC60_90,type="HC0",cluster="group"))[,1]) clust_error <- list(coeftest(model90_Sample, vcov=vcovHC(model90_Sample,type="HC0",cluster="group"))[,2], coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group"))[,2], coeftest(model60_Sample, vcov=vcovHC(model60_Sample,type="HC0",cluster="group"))[,2], coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group"))[,2], coeftest(model60_90_Sample, vcov=vcovHC(model60_90_Sample,type="HC0",cluster="group"))[,2], coeftest(modelC60_90, vcov=vcovHC(modelC60_90,type="HC0",cluster="group"))[,2]) clust_pvalue <- list(coeftest(model90_Sample, vcov=vcovHC(model90_Sample,type="HC0",cluster="group"))[,4], coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group"))[,4], coeftest(model60_Sample, vcov=vcovHC(model60_Sample,type="HC0",cluster="group"))[,4], coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group"))[,4], coeftest(model60_90_Sample, vcov=vcovHC(model60_90_Sample,type="HC0",cluster="group"))[,4], coeftest(modelC60_90, vcov=vcovHC(modelC60_90,type="HC0",cluster="group"))[,4]) coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group")) coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group")) coeftest(modelC60_90, vcov=vcovHC(modelC60_90,type="HC0",cluster="group")) stargazer(model90_Sample, modelC90, model60_Sample, modelC60, model60_90_Sample, modelC60_90, coef = coefficient, se = clust_error, p = clust_pvalue, apply.coef = exp, t.auto = FALSE, p.auto = FALSE, covariate.labels=c("Percent 15 to 29", "Percent Male", "Gini Index", "GDP per Cap (1k)", "Percent Urban"), omit.stat = "adj.rsq", star.cutoffs = c(0.05, 0.01, 0.001), df = FALSE, dep.var.caption ="", dep.var.labels = c(""), column.labels = c("Since 1990", "Since 1990", "Since 1960", "Since 1960", "Since 1990", "Since 1990"), type="html", model.numbers=FALSE, no.space=TRUE, out="TwoWay Fixed Effects Model.htm") ``` ##Quantile Regression ```{r Quantile Models, echo = FALSE, eval = FALSE} #Quatile model for the High Coverage Sample (Since 1990) modelC90 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, index=c("Country", "Year"), model="within", data=df[df$Year>=1990,]) a <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.1), data=df[df$Year >= 1990,]) b <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.25), data=df[df$Year >= 1990,]) c <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.5), data=df[df$Year >= 1990,]) d <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.75), data=df[df$Year >= 1990,]) e <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.90), data=df[df$Year >= 1990,]) t10_1990 <- summary.rq(a, se="boot", cluster=c("Country")) t25_1990 <- summary.rq(b, se="boot", cluster=c("Country")) t50_1990 <- summary.rq(c, se="boot", cluster=c("Country")) t75_1990 <- summary.rq(d, se="boot", cluster=c("Country")) t90_1990 <- summary.rq(e, se="boot", cluster=c("Country")) coefficient <- list(coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group"))[,1], t10_1990$coefficients[,1], t25_1990$coefficients[,1], t50_1990$coefficients[,1], t75_1990$coefficients[,1], t90_1990$coefficients[,1]) clust_error <- list(coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group"))[,2], t10_1990$coefficients[,2], t25_1990$coefficients[,2], t50_1990$coefficients[,2], t75_1990$coefficients[,2], t90_1990$coefficients[,2]) clust_pvalue <- list(coeftest(modelC90, vcov=vcovHC(modelC90,type="HC0",cluster="group"))[,4], t10_1990$coefficients[,4], t25_1990$coefficients[,4], t50_1990$coefficients[,4], t75_1990$coefficients[,4], t90_1990$coefficients[,4]) stargazer(modelC90, modelC90, modelC90, modelC90, modelC90, modelC90, coef = coefficient, se = clust_error, p = clust_pvalue, apply.coef = exp, t.auto = FALSE, p.auto = FALSE, covariate.labels=c("Percent 15 to 29", "Percent Male", "Gini Index", "GDP per Cap (1k)", "Percent Urban"), omit.stat = "adj.rsq", star.cutoffs = c(0.05, 0.01, 0.001), omit = c(7:500), df = FALSE, dep.var.caption ="", dep.var.labels = c(""), column.labels = c("Fixed Effects", "t = 0.1", "t = 0.25", "t = 0.5", "t = 0.75", "t = 0.9"), type="html", model.numbers=FALSE, no.space=TRUE, out="Quantile Fixed Effects_1990.htm") #Quatile Model for the Long Series Sample (Since 1960) modelC60 <- plm(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban, index=c("Country", "Year"), model="within", data=df[df$Country %in% Quality60,]) a <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.1), data=df[df$Country %in% Quality60,]) b <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.25), data=df[df$Country %in% Quality60,]) c <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.5), data=df[df$Country %in% Quality60,]) d <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.75), data=df[df$Country %in% Quality60,]) e <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=c(0.90), data=df[df$Country %in% Quality60,]) t10_1960 <- summary.rq(a, se="boot", cluster=c("Country")) t25_1960 <- summary.rq(b, se="boot", cluster=c("Country")) t50_1960 <- summary.rq(c, se="boot", cluster=c("Country")) t75_1960 <- summary.rq(d, se="boot", cluster=c("Country")) t90_1960 <- summary.rq(e, se="boot", cluster=c("Country")) coefficient <- list(coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group"))[,1], t10_1960$coefficients[,1], t25_1960$coefficients[,1], t50_1960$coefficients[,1], t75_1960$coefficients[,1], t90_1960$coefficients[,1]) clust_error <- list(coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group"))[,2], t10_1960$coefficients[,2], t25_1960$coefficients[,2], t50_1960$coefficients[,2], t75_1960$coefficients[,2], t90_1960$coefficients[,2]) clust_pvalue <- list(coeftest(modelC60, vcov=vcovHC(modelC60,type="HC0",cluster="group"))[,4], t10_1960$coefficients[,4], t25_1960$coefficients[,4], t50_1960$coefficients[,4], t75_1960$coefficients[,4], t90_1960$coefficients[,4]) stargazer(modelC60, modelC60, modelC60, modelC60, modelC60, modelC60, coef = coefficient, se = clust_error, p = clust_pvalue, apply.coef = exp, t.auto = FALSE, p.auto = FALSE, covariate.labels=c("Percent 15 to 29", "Percent Male", "Gini Index", "GDP per Cap (1k)", "Percent Urban"), omit.stat = "adj.rsq", star.cutoffs = c(0.05, 0.01, 0.001), omit = c(7:500), df = FALSE, dep.var.caption ="", dep.var.labels = c(""), column.labels = c("Fixed Effects", "t = 0.1", "t = 0.25", "t = 0.5", "t = 0.75", "t = 0.9"), type="html", model.numbers=FALSE, no.space=TRUE, out="Quantile Fixed Effects_1960.htm") ``` The quantile figure was ultimatly produced in excel using the data calculated in the following chunk. The files "Quantile_1990.csv" and "Quantile_1960.csv" contain the results using the High Coverage and Long Series Samples respectively. ```{r Quantile Figure, echo = FALSE, eval = FALSE} #Since 1990 taus <- seq(0.01, 0.99, 0.01) coefficients <- c() errors <- c() for (t in taus) { model <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=t, data=df[df$Year >= 1990,]) model <- summary.rq(model, se="boot", cluster=c("Country")) coefficients <- c(coefficients, model$coefficients[2,1]) errors <- c(errors, model$coefficients[2,2]) print(t) } q1990 <- as.data.frame(cbind(taus, coefficients, errors)) q1990$low <- q1990$coefficients - 1.96*q1990$errors q1990$high <- q1990$coefficients + 1.96*q1990$errors q1990$lowN <- ifelse(q1990$low<0, q1990$low, NA) #Since 1960 taus <- seq(0.01, 0.99, 0.01) coefficients <- c() errors <- c() for (t in taus) { model <- rq(LnHomAdjRate ~ PerPop_15_29 + PerMale + SWIID_Gini + GDP_PerCap + PerUrban + factor(Country), tau=t, data=df[df$Country %in% Quality60,]) model <- summary.rq(model, se="boot", cluster=c("Country")) coefficients <- c(coefficients, model$coefficients[2,1]) errors <- c(errors, model$coefficients[2,2]) print(t) } q1960 <- as.data.frame(cbind(taus, coefficients, errors)) q1960$low <- q1960$coefficients - 1.96*q1960$errors q1960$high <- q1960$coefficients + 1.96*q1960$errors q1960$lowN <- ifelse(q1960$low<0, q1960$low, NA) #Save write.csv(q1990, "Quantile_1990.csv") write.csv(q1960, "Quantile_1960.csv") ```