POLI 277 MEASUREMENT THEORY
Sixth Assignment
Due 28 May 2008
# # # keith2.r -- This Program was written by Kevin Quinn and modfied by # Keith Poole to run on WINDOZE machines # rm(list=ls(all=TRUE)) # library(foreign) Be sure to load this library # ## does the 104th senate ## (assumes the relevant .dta files are in the current directory) # ## write out the filehead (only need to do that here) filehead <- "c:/ucsd_homework_5/sen104kh" Change Path Statements as Appropriate filehead2 <- "sen104kh" This line needs to be changed as well # ## SHOULDN'T NEED TO CHANGE ANYTHING BELOW THIS LINE ## ####################################################### # ## read data in filestring <- paste(filehead, ".dta", sep="") Creates "c:/ucsd_homework_5/sen104kh.dta" mydata <- read.dta(filestring) read.dta is in the foreign library # ## add rownames corresponding to legislators' names rownames(mydata) <- gsub(" ", "", mydata$name) Pattern Match and Replacement -- takes the "name" column # in the Stata file and deletes all spaces ## recode votes to 0/1/NA rownames uses the "name" variable as labels for the rows of the matrix ## 1 yea, 0 nay, NA missing ncdata <- ncol(mydata) ncol gets the number of columns of the matrix submat <- mydata[,10:ncdata] Note that the first roll call is the 10th variable in Stata submat[submat==0] <- -999 submat[submat==7] <- -999 submat[submat==8] <- -999 submat[submat==9] <- -999 submat[submat==4] <- 0 submat[submat==5] <- 0 submat[submat==6] <- 0 submat[submat==2] <- 1 submat[submat==3] <- 1 submat[submat==-999] <- NA mydata[,10:ncdata] <- submat # ## change name of mydata to whatever filehead is parse creates the character string "sen104kh <- mydata" expr <- parse(text=paste(filehead2, " <- mydata")) using the paste command to concatenate the strings eval(expr) expr (expression) creates an unevaluated call and eval(expression) # evaluates the unevaluated expression. Note that this is a clever way ## save as an .rda file of passing R a command that would normally be entered from the command line expr <- parse(text=paste("save(", filehead2, ", file=\"", filehead, ".rda\", ascii=TRUE)", sep="")) This produces eval(expr) expression(save(sen104kh, file = "c:/ucsd_homework_5/sen104kh.rda", ascii = TRUE)) and eval(expr) executes the save(...) command Note that the \" says use " as part of this stringWhen you run keith2.r in R it creates an ASCII roll call file -- sen104kh.rda -- that MCMCPack reads.
# # # keithMCMC.r -- Program to analyze Congressional Roll Calls using MCMCPack. # Program written by Kevin Quinn and modified by Keith Poole # library(MCMCpack) # load("c:/ucsd_homework_5/sen104kh.rda") # Sen.rollcalls <- sen104kh[,10:ncol(sen104kh)] Pull in the roll calls # posterior <- MCMCirt1d(Sen.rollcalls, Here is the call to the One-Dimensional IRT model in MCMCPack theta.constraints=list("KENNEDY,ED"=-1, "HELMS"=1), Note the Kennedy-Helms Constraint burnin=2000, mcmc=100000, thin=20, verbose=500) The Verbose switch writes to the screen every 500 iterations geweke.diag(posterior) geweke.diag is a convergence diagnostic for Markov Chains pdf(file="c:/ucsd_homework_5/keithplots3.pdf") This opens a PDF file for writing plots plot(posterior) Note that posterior is 100000/20=5000 rows and 104 columns. This does a simple two panel plot of the Chain for every Senator summary(posterior) summary produces two tables -- a table of means and standard deviations, and a table of quantiles for the chain sss <- summary(posterior) write.table(sss$statistics,"c:/ucsd_homework_5/tab1.txt") This just writes the statistics to a file write.table(sss$quantiles,"c:/ucsd_homework_5/tab2.txt") This just writes the quantiles to a file dev.off() This turns off the PDF output file plot(posterior[,1]) This produces a two panel plot of President Clinton -- the first column of the Markov Chain MatrixRun keithMCMC.r and you should see something like this:
"2.5%" "25%" "50%" "75%" "97.5%" "theta.CLINTON" -1.46835666800996 -1.24925180016504 -1.13248618507518 -1.02369748307711 -0.825832766091595 "theta.HEFLIN" -0.305185169022063 -0.260266452156847 -0.236646341912 -0.21152345773924 -0.164788688955976 "theta.SHELBY" 0.817557346249403 0.901230444522503 0.94627890007946 0.989243450561128 1.07933998304934 "theta.MURKOWSKI" 1.09335890421539 1.19824882094799 1.25670093580734 1.31958989471798 1.43254505157744 "theta.STEVENS" 0.691198365174401 0.766577019286392 0.807760941082107 0.846435626627692 0.926813217812819 "theta.KYL" 1.59567832431164 1.74781133917839 1.83463521392058 1.92321709927887 2.12148303178479 "theta.MCCAIN" 0.9127781161526 1.00154539701113 1.04775177337770 1.09681659841682 1.19553831441862 "theta.BUMPERS" -1.15477388790377 -1.06988333786436 -1.02767252758339 -0.98457201662332 -0.906895534402263 "theta.PRYOR" -1.06545271575199 -0.983862584998664 -0.942830287374412 -0.903234960754862 -0.82976514739809 etc. etc. etc. "theta.ROBB" -0.612833067384907 -0.559694564930013 -0.531285687868471 -0.5030526631965 -0.449925319999447 "theta.WARNER" 0.85347410942553 0.939429787416401 0.986236270543455 1.03259160531925 1.12731808501585 "theta.GORTON" 0.71110607989555 0.785047171712733 0.823816515457433 0.86391338271389 0.943106201637842 "theta.MURRAY" -1.31180278699478 -1.21378171795985 -1.16451605149819 -1.11829916326719 -1.03314854643286 "theta.BYRD,ROBER" -0.643928175363656 -0.587217717342924 -0.559534909846884 -0.531827878128873 -0.478858968136141 "theta.ROCKEFELLER" -1.00577257569149 -0.93597359420403 -0.896528257802742 -0.859268091350322 -0.789490457501235 "theta.FEINGOLD" -0.937835231763546 -0.867360887051455 -0.83268419061364 -0.79829756571121 -0.738794173410312 "theta.KOHL" -0.738059251852728 -0.68291897389013 -0.65229443805164 -0.62302183015615 -0.567739852475296 "theta.SIMPSON" 0.595825157941938 0.665454931923443 0.701114911207732 0.738425192411877 0.807921291233901 "theta.THOMAS" 1.19960136679431 1.31320626032423 1.37605953108054 1.43891743965873 1.56019745658643The means and standard deviations -- tab1.txt -- should look something like this:
"Mean" "SD" "Naive SE" "Time-series SE" "theta.CLINTON" -1.13752866573994 0.163361089950778 0.00231027468972442 0.00844161180468047 "theta.HEFLIN" -0.235901092036368 0.0356544557029748 0.000504230148141777 0.00187933550056510 "theta.SHELBY" 0.94584308060994 0.0654691187943003 0.000925873157155148 0.0032655146431644 "theta.MURKOWSKI" 1.25917246194279 0.08755797655415 0.00123825677936824 0.00491303696846677 "theta.STEVENS" 0.807367687377732 0.0594534201434932 0.00084079833096394 0.00326313573660544 "theta.KYL" 1.83982777758233 0.131527890645338 0.00186008526780962 0.00667155039594659 "theta.MCCAIN" 1.05000610380888 0.0722173423210104 0.00102130744948913 0.00380440125010865 "theta.BUMPERS" -1.02792332578180 0.063516501736212 0.000898258981898452 0.00242323003369357 "theta.PRYOR" -0.94363939318942 0.059463963112542 0.000840947431062103 0.00207222197704451 etc. etc. etc. "theta.ROBB" -0.531199445605816 0.0416921346732881 0.000589615822992496 0.00192788263257559 "theta.WARNER" 0.986778912384638 0.0684910928470496 0.000968610324060525 0.00379677755671381 "theta.GORTON" 0.82511325197929 0.0590672758199786 0.000835337425570461 0.00319387251751855 "theta.MURRAY" -1.16717201653990 0.0713172857955998 0.00100857872803775 0.00262299332023583 "theta.BYRD,ROBER" -0.559794239719452 0.0415658308289515 0.000587829616896089 0.00194456442318159 "theta.ROCKEFELLER" -0.897478145286493 0.0565602890945102 0.000799883279291993 0.00208050182976086 "theta.FEINGOLD" -0.834027386936342 0.0515200269079733 0.000728603207870827 0.00205291199886881 "theta.KOHL" -0.652630326803942 0.0438530082329383 0.000620175189938803 0.00188901250096265 "theta.SIMPSON" 0.701892542115515 0.0541614249937179 0.00076595821783569 0.0029933233548159 "theta.THOMAS" 1.37650108453736 0.0918834264310356 0.00129942787816081 0.00492147921862764
# rm(list=ls(all=TRUE)) # library(foreign) # ## does the 102nd senate ## (assumes the relevant .dta files are in the current directory) # ## write out the filehead (only need to do that here) filehead <- "c:/ucsd_homework_6/sen108kh" filehead2 <- "sen108kh" # ## SHOULDN'T NEED TO CHANGE ANYTHING BELOW THIS LINE ## ####################################################### # ## read data in filestring <- paste(filehead, ".dta", sep="") mydata <- read.dta(filestring) # ## add rownames corresponding to legislators' names rownames(mydata) <- gsub(" ", "", mydata$name) # ## recode votes to 0/1/NA ## 1 yea, 0 nay, NA missing ncdata <- ncol(mydata) submat <- mydata[,10:ncdata] submat[submat==0] <- -999 submat[submat==7] <- -999 submat[submat==8] <- -999 submat[submat==9] <- -999 submat[submat==4] <- 0 submat[submat==5] <- 0 submat[submat==6] <- 0 submat[submat==2] <- 1 submat[submat==3] <- 1 submat[submat==-999] <- NA mydata[,10:ncdata] <- submat ## change name of mydata to whatever filehead is expr <- parse(text=paste(filehead2, " <- mydata")) eval(expr) ## save as an .rda file expr <- parse(text=paste("save(", filehead2, ", file=\"", filehead, ".rda\", ascii=TRUE)", sep="")) eval(expr)This will create the SEN108KH.RDA file.
# library(MCMCpack) load("c:/ucsd_homework_6/sen108kh.rda") Sen.rollcalls <- sen108kh[,10:ncol(sen108kh)] posterior <- MCMCirt1d(Sen.rollcalls, theta.constraints=list("KENNEDYED"=-1, "KYL"=1), burnin=2000, mcmc=100000, thin=20, verbose=500) geweke.diag(posterior) pdf(file="c:/ucsd_homework_6/keithplots3.pdf") plot(posterior) summary(posterior) sss <- summary(posterior) write.table(sss$statistics,"c:/ucsd_homework_6/tab1.txt") write.table(sss$quantiles,"c:/ucsd_homework_6/tab2.txt") dev.off() plot(posterior[,1])
11049990999 0USA 100 CLINTON 55 17 6 99 0.733 -0.587 0.065 21041509041 1ALABAMA 20000CALLAHAN 563 58 15 503 0.853 0.729 0.043 31042930041 2ALABAMA 20000EVERETT 571 70 21 502 0.825 0.746 0.041 41041563241 3ALABAMA 10000BROWDER 459 89 120 462 0.664 -0.037 0.015 51041100041 4ALABAMA 10000BEVILL 438 121 108 462 0.660 -0.081 0.015 61042910041 5ALABAMA 10000CRAMER 470 97 116 480 0.662 -0.023 0.014 71042930141 6ALABAMA 20000BACHUS 602 32 34 488 0.856 0.689 0.036 81042930241 7ALABAMA 10000HILLIARD 457 121 34 504 0.726 -0.643 0.023 91041406681 1ALASKA 20000YOUNG, DON 519 49 31 466 0.814 0.592 0.031 101042950061 1ARIZONA 20000SALMON 615 37 45 468 0.840 0.816 0.045 etc etc etc etc etc etcThe legislator coordinates are in the next to the last column (shown in red). For example, former President Clinton's coordinate is -0.587. Use R to make a smoothed histogram of the Republicans and Democrats in the 104th House using the estimated coordinates above (the party code is shown in blue). Use arrows to show the locations of former President Clinton and former Speaker of the House Newt Gingrich.
HOU104KH.ORD Name of Data File NOMINAL MULTIDIMENSIONAL UNFOLDING Title of Run 1321 1 5 Number RCs, Left on 1st, Up on 2nd 1 36 Number Dimensions, Number Characters to Read From Header 15.0000 0.5000 Starting Values for BETA and WEIGHT 0.0250 20 RC Min. Cutoff, Number RCs for Legislator (36A1,3600I1) Format for Roll Call File (1x,I4,36A1,1X,4i4,51f7.3) Format for Legislator Coordinate File -- NOM31.DAT (1x,I4,36A1,1X,51f7.3) Format for H-S Coordinate File -- FORT.34The red "1" in the fourth line is the number of dimensions. This is the number you should change to "2". Leave everything else in the file the same! Run W-NOMINATE on the 104th House in two dimensions. Turn in the NOM21.DAT output file.
11049990999 0USA 100 CLINTON 54 17 7 99 0.729 -0.594 -0.116 0.066 0.129 21041509041 1ALABAMA 20000CALLAHAN 561 36 17 525 0.880 0.758 0.653 0.042 0.132 31042930041 2ALABAMA 20000EVERETT 569 50 23 522 0.852 0.774 0.634 0.040 0.132 41041563241 3ALABAMA 10000BROWDER 473 35 106 516 0.733 -0.034 0.842 0.015 0.083 51041100041 4ALABAMA 10000BEVILL 448 57 98 526 0.723 -0.079 0.759 0.015 0.075 61042910041 5ALABAMA 10000CRAMER 474 46 112 531 0.710 -0.018 0.688 0.015 0.064 71042930141 6ALABAMA 20000BACHUS 597 25 39 495 0.861 0.695 0.232 0.035 0.078 81042930241 7ALABAMA 10000HILLIARD 445 117 46 508 0.748 -0.642 0.417 0.023 0.046 91041406681 1ALASKA 20000YOUNG, DON 517 34 33 481 0.839 0.600 0.583 0.031 0.122 101042950061 1ARIZONA 20000SALMON 623 35 37 470 0.842 0.826 -0.119 0.045 0.075 etc etc etc etc etc etcThe legislator coordinates are are the third and fourth columns from the end (shown in red). For example, former President Clinton's coordinates are -0.594 and -0.116. Use R to plot the legislators in two dimensions. Use "D" for Non-Southern Democrats, "S" for Southern Democrats, "R" for Republicans, and "P" for President Clinton. This graph should be in the same format as the one you did for question 1.b of Homework 5 (see the plot_oc_output_X_and_Z.r program for how to do this).
SEN104KH.ORD NOMINAL MULTIDIMENSIONAL UNFOLDING OF 104TH SENATE 919 1 22 2 36 11 Number Dimensions, Number Characters to Read from Header, Number of Bootstrap Trials 15.0000 0.5000 0.0250 20 (36A1,15000I1) (1x,I4,36A1,1X,4i4,51f7.3) (I4,1X,36A1,80F10.4)It is identical to the NOMSTART.DAT used in question 1 above except for the "11" (colored red) in the fourth line of the file. This is the number of bootstrap trials. Normally it is set to 1001 but we will use 101 in this problem.
1 1049990999 0USA 10000CLINTON -0.9160 -0.3336 -0.8668 -0.2853 0.1039 0.2095 1.0000 0.1436 0.1436 1.0000 2 1041470541 0ALABAMA 10000HEFLIN -0.3923 -0.2917 -0.3546 -0.4203 0.0510 0.1876 1.0000 0.7794 0.7794 1.0000 3 1049465941 0ALABAMA 20000SHELBY 0.5858 -0.3131 0.6871 -0.3176 0.1162 0.0603 1.0000 0.2238 0.2238 1.0000 4 1041490781 0ALASKA 20000MURKOWSKI 0.6600 -0.1009 0.7623 -0.0338 0.1138 0.1090 1.0000 0.0790 0.0790 1.0000 5 1041210981 0ALASKA 20000STEVENS 0.4199 -0.3162 0.5695 -0.3110 0.1645 0.1222 1.0000 0.1487 0.1487 1.0000 etc etc etc 98 1044930873 0WASHING 10000MURRAY -0.9235 -0.1667 -0.8936 -0.1832 0.0555 0.0916 1.0000 -0.1436 -0.1436 1.0000 99 104 136656 0WEST VI 10000BYRD, ROBER -0.6504 -0.5292 -0.6249 -0.6165 0.0465 0.1304 1.0000 0.1703 0.1703 1.0000 100 1041492256 0WEST VI 10000ROCKEFELLER -0.8080 -0.0714 -0.7819 -0.0967 0.0416 0.1585 1.0000 0.6202 0.6202 1.0000 101 1044930925 0WISCONS 10000FEINGOLD -0.8300 0.5577 -0.7844 0.6024 0.0546 0.0564 1.0000 0.6076 0.6076 1.0000 102 1041570325 0WISCONS 10000KOHL -0.6772 0.5656 -0.6449 0.7452 0.0395 0.1934 1.0000 0.5894 0.5894 1.0000 103 1041471068 0WYOMING 20000SIMPSON 0.3605 -0.4309 0.5124 -0.4402 0.1725 0.1322 1.0000 0.4797 0.4797 1.0000 104 1041563368 0WYOMING 20000THOMAS 0.7165 0.3480 0.7850 0.3535 0.0824 0.1281 1.0000 0.5629 0.5629 1.0000The legislator coordinates are are the first two columns after the name of the legislator (shown in red). For example, former President Clinton's coordinates are -0.9160 and -0.3336.
# # Plot_Bootstrap_New.r -- Program reads parametric bootstrap files posted # at http://voteview.org/Lewis_and_Poole.htm # and plots the legislator ideal points and the # standard errors # # Remove all objects just to be safe # rm(list=ls(all=TRUE)) # library(MASS) library(stats) library(ellipse) You Will Need to Download and Install this Library # # Set up to read Parametric Bootstrap File # rc.file <- "c:/ucsd_homework_7/fort.26" # # The variable fields and their widths # rc.fields <- c("counter","cong","id","state","dist","lstate","party", "eh1","eh2","name","wnom1","wnom2","wnom1bs","wnom2bs", "se1","se2","r11","r12","r21","r22") # # Note -- For some files the field widths will be (e.g., H108_BS_1000_2.DAT): # (5,3,5,2,2,7,4,1,1,11,8,7,7,7,7,7,7) # rc.fieldWidths <- c(4,4,5,2,2,7,4,1,1,11,10,10,10,10,10,10,10,10,10,10) # # Read the vote data from fwf (FIXED WIDTH FORMAT -- FWF) # TT <- read.fwf(file=rc.file,widths=rc.fieldWidths,as.is=TRUE,col.names=rc.fields) party <- TT[,7] state <- TT[,4] wnom1 <- TT[,11] wnom2 <- TT[,12] std1 <- TT[,15] std2 <- TT[,16] corr12 <- TT[,18] # nrow <- length(TT[,1]) ncol <- length(TT[1,]) # plot(TT[,11],TT[,12],type="n",asp=1, main="", xlab="", ylab="", xlim=c(-1.0,1.0),ylim=c(-1.0,1.0),font=2) points(wnom1[party == 100 & state >= 40 & state <= 51],wnom2[party == 100 & state >= 40 & state <= 51],pch='S',col="red") points(wnom1[party == 100 & state == 53],wnom2[party == 100 & state == 53],pch='S',col="red") points(wnom1[party == 100 & state == 54],wnom2[party == 100 & state == 54],pch='S',col="red") points(wnom1[party == 100 & (state < 40 | state > 54)],wnom2[party == 100 & (state < 40 | state > 54)],pch='D',col="red") points(wnom1[party == 100 & state == 52],wnom2[party == 100 & state == 52],pch='D',col="red") points(wnom1[party == 200],wnom2[party == 200],pch='R',col="blue") # Main title mtext("104th Senate From W-NOMINATE\nWith Bootstrapped Standard Errors",side=3,line=1.50,cex=1.2,font=2) # x-axis title mtext("Liberal - Conservative",side=1,line=2.75,cex=1.2) # y-axis title mtext("Social/Lifestyle Issues",side=2,line=2.5,cex=1.2) # # # This code does the cross-hairs for the standard errors. If the # correlation is greater than .15 between the two dimensions, # the 95% confidence ellipse is shown # for (i in 1:nrow) { # # These two statements do the cross-hairs # lines(c(wnom1[i],wnom1[i]),c(wnom2[i]-1.96*std2[i],wnom2[i]+1.96*std2[i]),col="gray") lines(c(wnom1[i]-1.96*std1[i],wnom1[i]+1.96*std1[i]),c(wnom2[i],wnom2[i]),col="gray") # # This if statement does the ellipse # if (abs(corr12[i]) > .15){ lines(ellipse(x=corr12[i],scale=c(std1[i],std2[i]), centre=c(wnom1[i],wnom2[i])), col="gray") } } #Run this program and turn in the plot. It should look similar to this:
1 1049990999 0USA 10000CLINTON -0.3160 -0.7979 -0.8467 -0.2906 0.5711 0.5775 0.1089 0.2070 1.0000 -0.8322 -0.8322 1.0000 2 1041470541 0ALABAMA 10000HEFLIN -0.2219 0.0278 0.0011 -0.2066 0.3139 0.3403 0.1975 0.2220 1.0000 -0.9512 -0.9512 1.0000 3 1049465941 0ALABAMA 20000SHELBY 0.6878 -0.0150 0.7162 -0.1751 0.0633 0.1796 0.0529 0.0582 1.0000 -0.3841 -0.3841 1.0000 4 1041490781 0ALASKA 20000MURKOWSKI 0.7285 -0.0996 0.7283 -0.0236 0.0781 0.0929 0.0741 0.0445 1.0000 -0.5790 -0.5790 1.0000 5 1041210981 0ALASKA 20000STEVENS 0.6308 -0.1836 0.6209 -0.2080 0.0682 0.0844 0.0639 0.0763 1.0000 -0.4706 -0.4706 1.0000 etc etc etc 98 1044930873 0WASHING 10000MURRAY -0.8772 -0.0015 -0.9068 -0.1126 0.0506 0.1520 0.0378 0.0919 1.0000 0.0408 0.0408 1.0000 99 104 136656 0WEST VI 10000BYRD, ROBER -0.6919 -0.5972 -0.6128 -0.3697 0.0959 0.2762 0.0448 0.1300 1.0000 -0.7018 -0.7018 1.0000 100 1041492256 0WEST VI 10000ROCKEFELLER -0.7673 -0.0990 -0.7713 -0.1318 0.0585 0.0952 0.0554 0.0842 1.0000 0.2288 0.2288 1.0000 101 1044930925 0WISCONS 10000FEINGOLD -0.9161 0.3985 -0.8439 0.3498 0.0901 0.1056 0.0457 0.0875 1.0000 0.7287 0.7287 1.0000 102 1041570325 0WISCONS 10000KOHL -0.6915 0.2468 -0.7519 0.4653 0.1107 0.2343 0.0859 0.0404 1.0000 -0.2221 -0.2221 1.0000 103 1041471068 0WYOMING 20000SIMPSON 0.5253 -0.1192 0.5113 -0.0559 0.0463 0.1029 0.0417 0.0743 1.0000 -0.3008 -0.3008 1.0000 104 1041563368 0WYOMING 20000THOMAS 0.7193 0.1231 0.7215 0.1730 0.0458 0.1302 0.0434 0.1130 1.0000 -0.4295 -0.4295 1.0000The average legislator coordinates are are the third and fourth columns after the name of the legislator (shown in red). For example, former President Clinton's coordinates are -0.8467 and -0.2906.
# # wnominate_in_R.r # # Program to Run R Version of W-NOMINATE # # # Remove all objects just to be safe # rm(list=ls(all=TRUE)) # library(pscl) library(wnominate) # # sen88 <- readKH("https://legacy.voteview.com/k7ftp/sen88kh.ord", dtl=NULL, yea=c(1,2,3), nay=c(4,5,6), missing=c(7,8,9), notInLegis=0, desc="88th U.S. Senate", debug=FALSE) # result <- wnominate(sen88, dims=2, polarity=c(7,3)) # # ---- Useful Commands To See What is in an Object # # > length(result) # [1] 7 # > class(result) # [1] "nomObject" # > names(result) # [1] "legislators" "rollcalls" "dimensions" "eigenvalues" "beta" # [6] "weights" "fits" # write.table(result$legislators,"c:/ucsd_homework_6/sen88_x.txt") #