# file: double_center_scaling.R
#
# Purpose: Defines function read.ord which reads in roll call data in the 
#          format of the .ord files found on Keith Poole's webpages.
#
# Jeff Lewis (June 2005)
#
#  (Poole added this: Remove all objects just to be safe)
#
rm(list=ls(all=TRUE))
#
#
read.ord <- function(rc.file) {

    # Standard fields and their widths
    rc.fields <- c("cong","id","state","dist","lstate","party",
                   "eh1","eh2","name") 
    rc.fieldWidths <- c(3,5,2,2,7,4,1,1,11)

    # figure out how many vote columns are in the file by reading in the first line 
    # (seems like read.fwf should handle this, but I don't see how...)
    read.table(file=rc.file,sep="|",as.is=TRUE, nrows=1)    
    recl  <- nchar( read.table(file=rc.file,sep="|",as.is=TRUE, nrows=1)[1,] )
    votes <- recl - sum(rc.fieldWidths)

    # Append format and labels for votes
    rc.fields      <- append(rc.fields,paste("V",1:votes,sep="")) 
    rc.fieldWidths <- append(rc.fieldWidths,rep(1,votes))
    
    # Read the vote data from fwf
    read.fwf(file=rc.file,widths=rc.fieldWidths,as.is=TRUE,col.names=rc.fields)
}

#
# A function to calculate vote totals
#
# Inputs: 
#          rcdta: A dataset read by readord()
#          minminority: The min minority size to use in analysis
# Returns:
#         totals: a matrix with columns indicating: 
#               (1) number voting yes
#               (2) number voting no
#               (3) 1/0 column set to 1 if minority side larger 
#                   than minminority
#         use: A vector of rcdta column names associated with votes
#              to use in subsequent analysis
#

votetotals <- function(rcdta,minminority=0.025) {
    voteColumns <- grep("V",colnames(rcdta))
    voteTotals  <- t(apply(rcdta[,voteColumns],2,
                    function(v) c(sum(is.element(v,c(1,2,3))),
                                  sum(is.element(v,c(4,5,6))) ) ) )
    use <- abs(0.50-voteTotals[,1]/(voteTotals[,1]+voteTotals[,2])) < (0.5-minminority)                                  
    voteTotals <- cbind(voteTotals,use)
    colnames(voteTotals) <- c("Yea","Nay","Use")
    list(totals=voteTotals,use=rownames(voteTotals)[voteTotals[,3]==1])
}
#
# Example -- Here is where the file is read
#  
rcdta <- read.ord("c:/dtaord/sen90kh.ord")
dim(rcdta)
t <- votetotals(rcdta)
#print( t$totals )
#print( t$use )
#
rcdtaUse <- rcdta[,t$use]
#
# *****************************************************
#  The clunky code below was written by Poole!!!  Jeff
#    Lewis is not guilty of the crime of bad code
#    aesthetics!
# *****************************************************
#
# Number of Roll Scalable Roll Calls
#
nq <- length(rcdtaUse[1,])
np <- length(rcdtaUse[,1])
kdenom <- rep(0,np*np)
dim(kdenom) <- c(np,np)
knumeryea <- rep(0,np*np)
dim(knumeryea) <- c(np,np)
knumernay <- rep(0,np*np)
dim(knumernay) <- c(np,np)
Agree <- rep(0,np*np)
dim(Agree) <- c(np,np)
#
#  Compute Agreement Scores
#
#  t() -- transpose the matrix
#
#  apply(matrix,1=row/2=col,function,function arguments) -- compute
#                    functions from an array
#
#  is.element(x,y) -- sets - are the elements in x also in y
#
#  This creates a 519 by 102 True/False matrix for Voting 
#
# whattheheck <- apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3,4,5,6)))))
#
#  This creates a 102 by 519 True/False matrix for Voting
#
# whattheheck <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3,4,5,6))))))
#
#  This sequence computes the number of times a pair of Members
#    voted together
#
 voted <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3,4,5,6))))))
 voteyea <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(1,2,3))))))
 votenay <- t(apply(rcdtaUse,1,function(v) c((is.element(v,c(4,5,6))))))
j <- 0
i <- 0
kmiss <- 0
while (j < np){
   j <- j + 1
   while (i < np){
      i <- i + 1
#  both voted
      kholy <- voted[j,] & voted[i,]
      kdenom[j,i] <- sum(is.element(kholy,c(1)))
      kdenom[i,j] <- kdenom[j,i]
#  both voted Yea
      kholy <- voteyea[j,] & voteyea[i,]
      knumeryea[j,i] <- sum(is.element(kholy,c(1)))
      knumeryea[i,j] <- knumeryea[j,i]
#  both voted Nay
      kholy <- votenay[j,] & votenay[i,]
      knumernay[j,i] <- sum(is.element(kholy,c(1)))
      knumernay[i,j] <- knumernay[j,i]
#
#  Agreement Score Matrix
#
#   For the Lack of anything better -- Use 0.50 for missing data
#
      Agree[i,j] <- .50
#
#  I should require at least 10 for the denominator -- but I set it
#                to 1 for test purposes
#
      if (kdenom[i,j] >= 1){
         Agree[i,j] <- (knumeryea[i,j]+knumernay[i,j])/kdenom[i,j]
      }
      Agree[j,i] <- Agree[i,j]
      if(Agree[i,j] == 0.50) kmiss=kmiss+1
   }
   i <- j
}
#
# Set up for Double-Centering
#
TT <- rep(0,np*np)
dim(TT) <- c(np,np)
TTT <- rep(0,np*np)
dim(TTT) <- c(np,np)
#
xrow <- NULL
xcol <- NULL
xcount <- NULL
matrixmean <- 0
#
# Transform the Matrix
#
i <- 0
while (i < np) {
  i <- i + 1
  xcount[i] <- i
  j <- 0
  while (j < np) {
     j <- j + 1
#
#  This is the Normal Transformation
#
     TT[i,j] <- ((1 - Agree[i,j]))**2
#
#  But the R subroutine wants simple Distances!
#
     TTT[i,j] <- (1 - Agree[i,j])
#
  }
}
T <- TT
#
#  Call Double Center Routine From R Program 
#     cmdscale(....) in stats library
#     The Input data are DISTANCES!!!  Not Squared Distances!!
#     Note that the R routine does not divide
#     by -2.0
#
ndim <- 2
#
dcenter <- cmdscale(TTT,ndim, eig=FALSE,add=FALSE,x.ret=TRUE)
#
#  returns double-center matrix as dcenter$x if x.ret=TRUE
#
#  Do the Division by -2.0 Here
#
TTT <- (dcenter$x)/(-2.0)
#
#  Perform Eigenvalue-Eigenvector Decomposition of Double-Centered Matrix
#
ev <- eigen(TTT)
#
#  Find Point furthest from Center of Space
#
aaa <- sqrt(max(ev$vec[,1]**2 + ev$vec[,2]**2))
#
#  Weight the Eigenvectors to Scale Space to Unit Circle
#
#  NOTE -- Depending upon the sign of the eigenvectors you
#          may have to change the signs here
#
#torgerson1 <- ev$vec[,1]*(1/aaa)
#torgerson2 <- ev$vec[,2]*(1/aaa)
#
torgerson1 <- -ev$vec[,1]*(1/aaa)
torgerson2 <- -ev$vec[,2]*(1/aaa)
#
#  Change the Titles if You do another Congress
#
plot(torgerson1,torgerson2,type="n",asp=1,
       main="90th Senate From Double-Centered\nAgreement Score Matrix (Torgerson)",
       xlab="Liberal-Conservative",
       ylab="Civil Rights/Social Issues",
       xlim=c(-1.0,1.0),ylim=c(-1.0,1.0))
state <- rcdta[,3]
party <- rcdta[,6]
#
#  Change the D, S, R, and P tokens to lower case
#     if you analyze a House
#
points(torgerson1[party == 100 & state >= 40 & state <= 51],torgerson2[party == 100 & state >= 40 & state <= 51],pch='S',col="red")
points(torgerson1[party == 100 & state == 53],torgerson2[party == 100 & state == 53],pch='S',col="red")
points(torgerson1[party == 100 & state == 54],torgerson2[party == 100 & state == 54],pch='S',col="red")
points(torgerson1[party == 100 & (state < 40 | state > 54)],torgerson2[party == 100 & (state < 40 | state > 54)],pch='D',col="red")
points(torgerson1[party == 100 & state == 52],torgerson2[party == 100 & state == 52],pch='D',col="red")
points(torgerson1[party == 200],torgerson2[party == 200],pch='R',col="blue")
#
#  Change this if you analyze a Different Congress
#
text(-.4842,-.2077,"LBJ",col="black",pos=1)