################################################################################ # Supporting Information S2 (code) to article: # "pvsR: An Open Source Interface to Big Data on the American Political Sphere" # by Ulrich Matter and Alois Sutzer # # REPRODUCTION OF THE REPLICATION SECTION # # Please cite the original article (as well as additional data sources # if indicated in the article) when referring to any material in this # Supporting Information. ################################################################################ #------------------------ # 1) Session Preparation #------------------------ # install.packages("pvsR") # if not installed yet # install.packages("effects") # if not installed yet # install.packages("vcd") # if not installed yet library(pvsR) library(effects) library(vcd) pvs.key <- "YOUR-PVS-KEY" #-------------------------------------- # 2) Query data from Project Vote Smart #-------------------------------------- # get bills from US Congress in year 2000 bills <- Votes.getBillsByYearState(year=2000, stateId="NA"); # string matching of the term "Liability" in the title column bills <- bills[grep("Liability", bills$title),] # get details of the bill, extract data on bill actions separately bill <- Votes.getBill(bills$billId, separate="actions") # get the actionId related to the passage of the bill aId <- bill[["actions"]]$actionId[bill[["actions"]]$stage=="Passage"] votes <- Votes.getBillActionVotes(actionId=aId) # remove absentees and delegates: votes <- votes[votes$action=="Nay" | votes$action=="Yea",] # fetch biographical data on all legislators participating in the vote bio <- CandidateBio.getBio(candidateId=votes$candidateId) #this step might take a minute #-------------------------------------- # 3) Prepare data for analysis #-------------------------------------- # combine the biographical data with the votes: leg_vote <- merge(votes, bio, by="candidateId") # code additional variables for statistical analysis: leg_vote$yes <- 0 leg_vote$yes[leg_vote$action=="Yea"] <- 1 leg_vote$attorney <- 0 leg_vote$attorney[grep("Attorney", leg_vote$candidate.profession)]<-1 leg_vote$attorney[grep("Lawyer", leg_vote$candidate.profession)]<-1 leg_vote$attorney <- as.factor(leg_vote$attorney) leg_vote$republican <- 0 leg_vote$republican[leg_vote$officeParties=="Republican"] <- 1 leg_vote$republican <- as.factor(leg_vote$republican) #-------------------------------------- # 4) Data analysis #-------------------------------------- # mosaic plot preparation tab <- xtabs(~ attorney + yes, data= leg_vote) p.tab <- prop.table(tab, 1) no <- unname(tab[,1]) yes <- unname(tab[,2]) tab2 <- t(data.frame("Yes"=yes, "No"=no, row.names=c("Other", "Attorney"))) tab2 <- round(tab2[,c("Attorney","Other")],2) tab2 <- as.table(tab2) names(dimnames(tab2)) <- c( "Vote", "Professional Background") tab2 <- t(tab2) par(mai=c(0.8,1,0.2,0.1)) mosaic(tab2, split_vertical=TRUE, shade=TRUE, gp=shading_max, gp_args=list(h=c(260,44.4), c=c(100,105), l=c(90,74) ), labeling=TRUE, pop=FALSE ) labeling_cells(text=tab2)(tab2) # prepare data for regression leg_vote2 <- leg_vote[,c("yes", "attorney", "republican", "candidate.gender")] names(leg_vote2) <- c("yes", "Attorney", "Republican", "Gender") # estimate logit model: model1 <- yes~ Attorney model2 <- yes~ Attorney + Republican model3 <- yes~ Attorney + Republican + Gender fits <- lapply(c(model1,model2,model3), FUN=function(model) {summary(glm(model, family=binomial(link="logit"), data=leg_vote2))$coef }) BIC <- sapply(c(model1,model2,model3), FUN=function(model) {BIC(glm(model, family=binomial(link="logit"), data=leg_vote2)) }) BICs <- c("BIC:", round(BIC,3)) N <- dim(na.omit(leg_vote2))[1] Ns <- c("N:", rep(N, times=3)) # combine estimates and additional info # effects plot fit3 <- glm(model3, family=binomial(link="logit"), data=leg_vote2) plot(allEffects(fit3), cols=3, rows=1, ylab="P(y = yes | x)")