# file: similarities_missing.R # # # (Poole added this: Remove all objects just to be safe) # rm(list=ls(all=TRUE)) library(foreign) # # Purpose: Defines function read.ord which reads in roll call data in the # format of the .ord files found on Keith Poole's webpages. # # Jeff Lewis (June 2005) # read.ord <- function(rc.file) { # Standard fields and their widths rc.fields <- c("cong","id","state","dist","lstate","party", "eh1","eh2","name") rc.fieldWidths <- c(3,5,2,2,7,4,1,1,11) # figure out how many vote columns are in the file by reading in the first line # (seems like read.fwf should handle this, but I don't see how...) read.table(file=rc.file,sep="|",as.is=TRUE, nrows=1) recl <- nchar( read.table(file=rc.file,sep="|",as.is=TRUE, nrows=1)[1,] ) votes <- recl - sum(rc.fieldWidths) # Append format and labels for votes rc.fields <- append(rc.fields,paste("V",1:votes,sep="")) rc.fieldWidths <- append(rc.fieldWidths,rep(1,votes)) # Read the vote data from fwf read.fwf(file=rc.file,widths=rc.fieldWidths,as.is=TRUE,col.names=rc.fields) } # # A function to calculate vote totals # # Inputs: # rcdta: A dataset read by readord() # minminority: The min minority size to use in analysis # Returns: # totals: a matrix with columns indicating: # (1) number voting yes # (2) number voting no # (3) 1/0 column set to 1 if minority side larger # than minminority # use: A vector of rcdta column names associated with votes # to use in subsequent analysis # votetotals <- function(rcdta,minminority=0.025) { voteColumns <- grep("V",colnames(rcdta)) voteTotals <- t(apply(rcdta[,voteColumns],2, function(v) c(sum(is.element(v,c(1,2,3))), sum(is.element(v,c(4,5,6))) ) ) ) use <- abs(0.50-voteTotals[,1]/(voteTotals[,1]+voteTotals[,2])) < (0.5-minminority) voteTotals <- cbind(voteTotals,use) colnames(voteTotals) <- c("Yea","Nay","Use") list(totals=voteTotals,use=rownames(voteTotals)[voteTotals[,3]==1]) } # # A function to perform Double Centering on a Matrix with missing # elements # doubleCenterMissing <- function(x){ p <- dim(x)[1] n <- dim(x)[2] -(x-matrix(apply(x,1,mean, na.rm=TRUE),nrow=p,ncol=n) - t(matrix(apply(x,2,mean, na.rm=TRUE),nrow=n,ncol=p)) + mean(x, na.rm=TRUE))/2 } # # Example -- Here is where the file is read # rcdta <- read.ord("ftp://voteview.com/sen90kh.ord") dim(rcdta) # party <- rcdta[,6] state <- rcdta[,3] # t <- votetotals(rcdta) #print( t$totals ) #print( t$use ) # rcdtaUse <- rcdta[,t$use] # # Number of Roll Scalable Roll Calls # nq <- length(rcdtaUse[1,]) np <- length(rcdtaUse[,1]) kdenom <- rep(0,np*np) dim(kdenom) <- c(np,np) knumeryea <- rep(0,np*np) dim(knumeryea) <- c(np,np) knumernay <- rep(0,np*np) dim(knumernay) <- c(np,np) Agree <- rep(0,np*np) dim(Agree) <- c(np,np) # # Compute Agreement Scores # # t() -- transpose the matrix # # apply(matrix,1=row/2=col,function,function arguments) -- compute # functions from an array # # is.element(x,y) -- sets - are the elements in x also in y # # This creates a 519 by 102 True/False matrix for Voting # # whattheheck <- apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3,4,5,6))))) # # This creates a 102 by 519 True/False matrix for Voting # # whattheheck <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3,4,5,6)))))) # # This sequence computes the number of times a pair of Members # voted together # voted <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3,4,5,6)))))) voteyea <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3)))))) votenay <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(4,5,6)))))) j <- 0 i <- 0 kmiss <- 0 while (j < np){ j <- j + 1 while (i < np){ i <- i + 1 # both voted kholy <- voted[j,] & voted[i,] kdenom[j,i] <- sum(is.element(kholy,c(1))) kdenom[i,j] <- kdenom[j,i] # both voted Yea kholy <- voteyea[j,] & voteyea[i,] knumeryea[j,i] <- sum(is.element(kholy,c(1))) knumeryea[i,j] <- knumeryea[j,i] # both voted Nay kholy <- votenay[j,] & votenay[i,] knumernay[j,i] <- sum(is.element(kholy,c(1))) knumernay[i,j] <- knumernay[j,i] # # Agreement Score Matrix # # For the Lack of anything better -- Use -0.50 for missing data # Agree[i,j] <- -0.50 # # I should require at least 10 for the denominator -- but I set it # to 1 for test purposes # if (kdenom[i,j] >= 10){ Agree[i,j] <- (knumeryea[i,j]+knumernay[i,j])/kdenom[i,j] } if(Agree[i,j] == -0.50) kmiss=kmiss+1 if(Agree[i,j] == -0.50) Agree[i,j] <- NA Agree[j,i] <- Agree[i,j] } i <- j } # # Set up for Double-Centering # TT <- rep(0,np*np) dim(TT) <- c(np,np) # # # Transform the Matrix # i <- 0 while (i < np) { i <- i + 1 j <- 0 while (j < np) { j <- j + 1 # # This is the Normal Transformation # TT[i,j] <- ((1 - Agree[i,j]))**2 # } } # # Create a Copy Here # T <- TT # T <- doubleCenterMissing(TT) TMEAN <- mean(T, na.rm=TRUE) T[is.na(T)] <- mean(T, na.rm=TRUE) # TT <- T # #Usage # #eigen(x, symmetric, only.values = FALSE, EISPACK = FALSE) # # Arguments # x -- a numeric or complex matrix whose spectral decomposition is to be computed. Logical matrices are coerced to numeric. # symmetric -- if TRUE, the matrix is assumed to be symmetric (or Hermitian if complex) and only its lower triangle (diagonal included) is used. If symmetric is not specified, the matrix is inspected for symmetry. # only.values -- if TRUE, only the eigenvalues are computed and returned, otherwise both eigenvalues and eigenvectors are returned. # EISPACK -- logical. Defunct and ignored. # ev <- eigen(TT, symmetric=TRUE) # TX <- sqrt(max((abs(ev$vec[,1]))^2 + (abs(ev$vec[,2]))^2)) torgerson1 <- ev$vec[,1]*(1/TX)*sqrt(ev$val[1]) torgerson2 <- ev$vec[,2]*(1/TX)*sqrt(ev$val[2]) # torgerson1 <- -1 * torgerson1 torgerson2 <- -1 * torgerson2 plot(torgerson1,torgerson2, main="", xlab="", ylab="", xlim=c(-2,2), ylim=c(-2,2), asp=1, type="n",font=2) # # Southern Democrats points(torgerson1[party == 100 & state >= 40 & state <= 51],torgerson2[party == 100 & state >= 40 & state <= 51],pch='S',col="red",font=2) points(torgerson1[party == 100 & state == 53],torgerson2[party == 100 & state == 53],pch='S',col="red",font=2) points(torgerson1[party == 100 & state == 54],torgerson2[party == 100 & state == 54],pch='S',col="red",font=2) # Northern Democrats points(torgerson1[party == 100 & state != 99 & (state < 40 | state > 54)],torgerson2[party == 100 & state != 99 & (state < 40 | state > 54)],pch='D',col="red",font=2) points(torgerson1[party == 100 & state == 52],torgerson2[party == 100 & state == 52],pch='D',col="red",font=2) # Republicans points(torgerson1[party == 200 & state != 99],torgerson2[party == 200 & state != 99],pch='R',col="blue",font=2) # Presidents points(torgerson1[state == 99],torgerson2[state == 99],pch='P',col="black",font=2) # # # Main title mtext("90th (1967-68) U.S. Senate\nConfiguration From Double-Centering",side=3,line=0.75,cex=1.5,font=2) # x-axis title mtext("Liberal --- Conservative",side=1,line=2.75,cex=1.2,font=2) # y-axis title mtext("North --- South",side=2,line=2.5,cex=1.2,font=2) #