# file: similarities_missing.R
#
#
#  (Poole added this: Remove all objects just to be safe)
#
rm(list=ls(all=TRUE))
library(foreign)
#
# Purpose: Defines function read.ord which reads in roll call data in the 
#          format of the .ord files found on Keith Poole's webpages.
#
# Jeff Lewis (June 2005)
#
read.ord <- function(rc.file) {

    # Standard fields and their widths
    rc.fields <- c("cong","id","state","dist","lstate","party",
                   "eh1","eh2","name") 
    rc.fieldWidths <- c(3,5,2,2,7,4,1,1,11)

    # figure out how many vote columns are in the file by reading in the first line 
    # (seems like read.fwf should handle this, but I don't see how...)
    read.table(file=rc.file,sep="|",as.is=TRUE, nrows=1)    
    recl  <- nchar( read.table(file=rc.file,sep="|",as.is=TRUE, nrows=1)[1,] )
    votes <- recl - sum(rc.fieldWidths)

    # Append format and labels for votes
    rc.fields      <- append(rc.fields,paste("V",1:votes,sep="")) 
    rc.fieldWidths <- append(rc.fieldWidths,rep(1,votes))
    
    # Read the vote data from fwf
    read.fwf(file=rc.file,widths=rc.fieldWidths,as.is=TRUE,col.names=rc.fields)
}

#
# A function to calculate vote totals
#
# Inputs: 
#          rcdta: A dataset read by readord()
#          minminority: The min minority size to use in analysis
# Returns:
#         totals: a matrix with columns indicating: 
#               (1) number voting yes
#               (2) number voting no
#               (3) 1/0 column set to 1 if minority side larger 
#                   than minminority
#         use: A vector of rcdta column names associated with votes
#              to use in subsequent analysis
#

votetotals <- function(rcdta,minminority=0.025) {
    voteColumns <- grep("V",colnames(rcdta))
    voteTotals  <- t(apply(rcdta[,voteColumns],2,
                    function(v) c(sum(is.element(v,c(1,2,3))),
                                  sum(is.element(v,c(4,5,6))) ) ) )
    use <- abs(0.50-voteTotals[,1]/(voteTotals[,1]+voteTotals[,2])) < (0.5-minminority)                                  
    voteTotals <- cbind(voteTotals,use)
    colnames(voteTotals) <- c("Yea","Nay","Use")
    list(totals=voteTotals,use=rownames(voteTotals)[voteTotals[,3]==1])
}
#
#  A function to perform Double Centering on a Matrix with missing
#  elements
#
doubleCenterMissing <- function(x){
  p <- dim(x)[1]
  n <- dim(x)[2]
  -(x-matrix(apply(x,1,mean, na.rm=TRUE),nrow=p,ncol=n) -
    t(matrix(apply(x,2,mean, na.rm=TRUE),nrow=n,ncol=p)) + mean(x, na.rm=TRUE))/2
}

#
# Example -- Here is where the file is read
#  
rcdta <- read.ord("ftp://voteview.com/sen90kh.ord")
dim(rcdta)
#
party <- rcdta[,6]
state <- rcdta[,3]
#
t <- votetotals(rcdta)
#print( t$totals )
#print( t$use )
#
rcdtaUse <- rcdta[,t$use]
#
# Number of Roll Scalable Roll Calls
#
nq <- length(rcdtaUse[1,])
np <- length(rcdtaUse[,1])
kdenom <- rep(0,np*np)
dim(kdenom) <- c(np,np)
knumeryea <- rep(0,np*np)
dim(knumeryea) <- c(np,np)
knumernay <- rep(0,np*np)
dim(knumernay) <- c(np,np)
Agree <- rep(0,np*np)
dim(Agree) <- c(np,np)
#
#  Compute Agreement Scores
#
#  t() -- transpose the matrix
#
#  apply(matrix,1=row/2=col,function,function arguments) -- compute
#                    functions from an array
#
#  is.element(x,y) -- sets - are the elements in x also in y
#
#  This creates a 519 by 102 True/False matrix for Voting 
#
# whattheheck <- apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3,4,5,6)))))
#
#  This creates a 102 by 519 True/False matrix for Voting
#
# whattheheck <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3,4,5,6))))))
#
#  This sequence computes the number of times a pair of Members
#    voted together
#
 voted <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3,4,5,6))))))
 voteyea <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3))))))
 votenay <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(4,5,6))))))
j <- 0
i <- 0
kmiss <- 0
while (j < np){
   j <- j + 1
   while (i < np){
      i <- i + 1
#  both voted
      kholy <- voted[j,] & voted[i,]
      kdenom[j,i] <- sum(is.element(kholy,c(1)))
      kdenom[i,j] <- kdenom[j,i]
#  both voted Yea
      kholy <- voteyea[j,] & voteyea[i,]
      knumeryea[j,i] <- sum(is.element(kholy,c(1)))
      knumeryea[i,j] <- knumeryea[j,i]
#  both voted Nay
      kholy <- votenay[j,] & votenay[i,]
      knumernay[j,i] <- sum(is.element(kholy,c(1)))
      knumernay[i,j] <- knumernay[j,i]
#
#  Agreement Score Matrix
#
#   For the Lack of anything better -- Use -0.50 for missing data
#
      Agree[i,j] <- -0.50
#
#  I should require at least 10 for the denominator -- but I set it
#                to 1 for test purposes
#
      if (kdenom[i,j] >= 10){
         Agree[i,j] <- (knumeryea[i,j]+knumernay[i,j])/kdenom[i,j]
      }
      if(Agree[i,j] == -0.50) kmiss=kmiss+1
      if(Agree[i,j] == -0.50) Agree[i,j] <- NA
      Agree[j,i] <- Agree[i,j]
   }
   i <- j
}
#
# Set up for Double-Centering
#
TT <- rep(0,np*np)
dim(TT) <- c(np,np)
#
#
# Transform the Matrix
#
i <- 0
while (i < np) {
  i <- i + 1
  j <- 0
  while (j < np) {
     j <- j + 1
#
#  This is the Normal Transformation
#
     TT[i,j] <- ((1 - Agree[i,j]))**2
#
  }
}
#
#  Create a Copy Here
#
T <- TT
#
T <- doubleCenterMissing(TT)
TMEAN <- mean(T, na.rm=TRUE)
T[is.na(T)] <- mean(T, na.rm=TRUE)
#
TT <- T
#
#Usage
#
#eigen(x, symmetric, only.values = FALSE, EISPACK = FALSE)
#
# Arguments
# x -- a numeric or complex matrix whose spectral decomposition is to be computed. Logical matrices are coerced to numeric.
# symmetric -- if TRUE, the matrix is assumed to be symmetric (or Hermitian if complex) and only its lower triangle (diagonal included) is used. If symmetric is not specified, the matrix is inspected for symmetry.
# only.values -- if TRUE, only the eigenvalues are computed and returned, otherwise both eigenvalues and eigenvectors are returned.
# EISPACK -- logical. Defunct and ignored.
#
ev <- eigen(TT, symmetric=TRUE)
#
TX <- sqrt(max((abs(ev$vec[,1]))^2 + (abs(ev$vec[,2]))^2))
torgerson1 <- ev$vec[,1]*(1/TX)*sqrt(ev$val[1])
torgerson2 <- ev$vec[,2]*(1/TX)*sqrt(ev$val[2])
#
torgerson1 <- -1 * torgerson1
torgerson2 <- -1 * torgerson2
plot(torgerson1,torgerson2,
       main="", xlab="", ylab="",
       xlim=c(-2,2), ylim=c(-2,2), asp=1, type="n",font=2)
#
#  Southern Democrats
points(torgerson1[party == 100 & state >= 40 & state <= 51],torgerson2[party == 100 & state >= 40 & state <= 51],pch='S',col="red",font=2)
points(torgerson1[party == 100 & state == 53],torgerson2[party == 100 & state == 53],pch='S',col="red",font=2)
points(torgerson1[party == 100 & state == 54],torgerson2[party == 100 & state == 54],pch='S',col="red",font=2)
#  Northern Democrats
points(torgerson1[party == 100 & state != 99 & (state < 40 | state > 54)],torgerson2[party == 100 & state != 99 & (state < 40 | state > 54)],pch='D',col="red",font=2)
points(torgerson1[party == 100 & state == 52],torgerson2[party == 100 & state == 52],pch='D',col="red",font=2)
#  Republicans
points(torgerson1[party == 200 & state != 99],torgerson2[party == 200 & state != 99],pch='R',col="blue",font=2)
#  Presidents
points(torgerson1[state == 99],torgerson2[state == 99],pch='P',col="black",font=2)
#
#
# Main title
mtext("90th (1967-68) U.S. Senate\nConfiguration From Double-Centering",side=3,line=0.75,cex=1.5,font=2)
# x-axis title
mtext("Liberal    ---    Conservative",side=1,line=2.75,cex=1.2,font=2)
# y-axis title
mtext("North    ---    South",side=2,line=2.5,cex=1.2,font=2)
#