# Double-Center Function for a rectangular squared distance matrix
doubleCenterRect2 <- function(T){
p <- dim(T)[1]
n <- dim(T)[2]
-(T-matrix(apply(T,1,mean),nrow=p,ncol=n)-
t(matrix(apply(T,2,mean),nrow=n,ncol=p))+ mean(T))/2
}
#
etc etc etc
TTSQDC <- doubleCenterRect2(TTSQ) TTSQ is TT squared with the squared mean in the
# Missing entries
# Perform Singular Value Decomposition
#
xsvd <- svd(TTSQDC)
#
# The Two Lines Below Put the Singular Values in a
# Diagonal Matrix -- The first one creates an
# identity matrix and the second command puts
# the singular values on the diagonal
#
Lambda <- diag(nq)
diag(Lambda) <- xsvd$d
#
# Compute U*LAMBDA*V' for check below
#
XREPRODUCE <- xsvd$u %*% Lambda %*% t(xsvd$v)
ssecheck <- sum(abs(TTSQDC-XREPRODUCE)) Check the Decomposition
#
# Starting Coordinates for MLSMU6 Algorithm
#
zz <- xsvd$v[,1:2] Use the Left Singular Vectors as the Stimuli Coordinate Starts
#
xx <- rep(0,np*ndim)
dim(xx) <- c(np,ndim) Use the Right Singular Vectors as the Stimuli Coordinate Starts
xx[,1] <- xsvd$u[,1]*sqrt(xsvd$d[1]) Multiplied by the Square Root of the Singular Values
xx[,2] <- xsvd$u[,2]*sqrt(xsvd$d[2])
###################################################
### chunk number 15:
###################################################
##Here is suma over the NQ dimensions This Function calculates the Sum of Squared Error
sumaj <- function(i){ for the ith Row
jjj <- 1:nq
j <- jjj[!is.na(T[i,])] Here is the trick to have j only range over the non-missing values
s <- (xx[i,1]-zz[j,1])^2+(xx[i,2]-zz[j,2])^2 Note that "j" is a vector so this is a vector calculation
s=sqrt(s)
sx=TEIGHT[i,j]
sum((s-sx)^2) Here is the Sum of Squared Error
}
## now get SUMA over NP dimension
#xx <- xmetric # Used as a check on the MLSMU6 Algorithm
#zz <- zmetric
sumvector <- sapply(1:np,sumaj)
suma <- sum(sumvector) Here is the Total Sum of Squared Error
xxx <- xx
zzz <- zz
kp=0
ktp=0
sumalast <- 100000.00
SAVEsumalast <- sumalast
for(loop in 1:50){
xxx[,1:2] <- 0
zzz[,1:2] <- 0
kp=kp+1
ktp=ktp+1
for(i in 1:np){ This For Loop is doing the sum of the line equations for the Stimuli
for(j in 1:nq){
if(!is.na(T[i,j])){
s=0
for(k in 1:ndim)
s <- s+(xx[i,k]-zz[j,k])^2
xc <- ifelse(s==0, 1.0, TEIGHT[i,j]/sqrt(s))
for(k in 1:ndim)
zzz[j,k]=zzz[j,k]+(xx[i,k]-xc*(xx[i,k]-zz[j,k]))/xcol[j]
}
}
}
for(k in 1:ndim){ This For Loop is doing the sum of the line equations for the Respondents
for(i in 1:np){
sw <- 0.0
for(j in 1:nq){
if(!is.na(T[i,j])){
s=0
for(kk in 1:ndim)
s <- s+(xx[i,kk]-zzz[j,kk])^2
xc <- ifelse(s==0, 1.0, TEIGHT[i,j]/sqrt(s))
xxx[i,k]=xxx[i,k]+(zzz[j,k]-xc*(zzz[j,k]-xx[i,k]))
}
}
xxx[i,k]=xxx[i,k]/xrow[i]
}
}
xx <- xxx
zz <- zzz
sumvector <- sapply(1:np,sumaj)
suma <- sum(sumvector) Calculate Current SSE
#
print(c(loop,suma))
done=((sumalast-suma)/suma)<0.0005 Compare the Current with the Previous SSE
sumalast=suma
if(done) break If Convergence Break out of the Loop
}
# THIS IS THE ONE DIMENSION SECTION OF THE PROGRAM
# ONE DIMENSIONAL RESULTS
#
zmetric1 <- result1$conf.col
zmetric1 <- as.numeric(zmetric1)
#dim(zmetric) <- c(nq,ndim) We do not need this because we have a one-dimensional vector
xmetric1 <- result1$conf.row
xmetric1 <- as.numeric(xmetric1)
#dim(xmetric) <- c(np,ndim)
sse11 <- 0.0
sse22 <- 0.0
for (i in 1:np) {
for(j in 1:nq) {
dist_i_j <- 0.0
#
# Calculate distance between points
#
dist_i_j <- dist_i_j+ (xmetric1[i]-zmetric1[j])*(xmetric1[i]-zmetric1[j])
sse11 <- sse11 + ((TEIGHT[i,j]) - sqrt(dist_i_j))*((TEIGHT[i,j]) - sqrt(dist_i_j))
sse22 <- sse22 + ((TEIGHT[i,j]) - sqrt(dist_i_j))*((TEIGHT[i,j]) - sqrt(dist_i_j))*weightmat[i,j]
}
}
obama.voters <- xmetric1[zz[,3]==1] zz[,3] has the who the respondent voted for
mccain.voters <- xmetric1[zz[,3]==3]
non.voters <- xmetric1[NOTVOTE==1 | NOTVOTE==2 | NOTVOTE==3]
# Note the logic here
obamaShare <- length(obama.voters)/(length(obama.voters)+length(mccain.voters)+length(non.voters))
mccainShare <- length(mccain.voters)/(length(obama.voters)+length(mccain.voters)+length(non.voters))
nonShare <- length(non.voters)/(length(obama.voters)+length(mccain.voters)+length(non.voters))
#
gvdens <- density(obama.voters) Get a smoothed histogram of the voters
gvdens$y <- gvdens$y*obamaShare Adjust the heigth of the density by the share of the vote
# This makes the three densities add up to 1
bvdens <- density(mccain.voters)
bvdens$y <- bvdens$y*mccainShare
#
nvdens <- density(non.voters)
nvdens$y <- nvdens$y*nonShare
# Simple trick to make the 3 densities fit nicely in the graph
maxheight <- 1.1*(max(gvdens$y,bvdens$y,nvdens$y))
windows()
plot(gvdens,xlab="",ylab="",
main="",
xlim=c(-1.5,1.5),ylim=c(0,maxheight),type="l",lwd=3,col="red",font=2)
lines(bvdens,lwd=3,col="blue")
lines(nvdens,lwd=3,col="black")
# Main title
mtext("2008 Thermometer Scaling\nObama and McCain Voters and Non-Voters",side=3,line=1.50,cex=1.2,font=2)
# x-axis title
mtext("Liberal - Conservative",side=1,line=2.75,cex=1.2)
# y-axis title
mtext("Density",side=2,line=2.5,cex=1.2)
# This puts the proportions each candidate gets onto the graph
text(-1.0,0.53,paste("Obama Voters ",
100.0*round(obamaShare, 3)),col="red",font=2)
text(-1.0,0.5,paste("McCain Voters ",
100.0*round(mccainShare, 3)),col="blue",font=2)
text(-1.0,0.47,paste("Non Voters ",
100.0*round(nonShare, 3)),col="black",font=2)
#
#
rm(list=ls(all=TRUE))
#
library(MASS)
library(foreign)
library(basicspace) BasicSpace Package by James Lo, Jeff Lewis, Royce Carroll, and myself
setwd("C:/uga_course_homework_12/")
data <- read.dta("cces_2008.dta",convert.factors = FALSE)
attach(data,warn.conflicts = FALSE)
#
T <- cbind(CC317a,CC317b,CC317c,CC317d,CC317h,CC317g) 0 - 100 Rating of Self and Candidates
TT <- T
TT <- ifelse(is.na(T),999,TT)
mode(TT) <- "double"
colnames(TT) <- c("self","D-Party","R-Party","Bush","Obama","McCain")
#
# Note that polarity=2 selects D-Party -- The Respondent Self Placement
# is in the first column -- respondent=1
# The Call to Aldrich and McKelvey Scaling
result <- aldmck(TT,polarity=2,respondent=1,missing=c(999),verbose=TRUE)
#
#
# ---- Useful Commands To See What is in an Object
#
# > length(result)
# [1] 8
# > class(result)
# [1] "aldmck"
# > names(result)
# [1] "stimuli" "respondents" "eigenvalues" "AMfit" "R2"
# [6] "N" "N.neg" "N.pos"
#
# > names(result$respondents)
# [1] "intercept" "weight" "idealpt" "R2" "selfplace" "polinfo"
#
# > summary(result) -- shows everything
# > plot(result) -- shows basic plots of results
#
windows()
plot(result) The Default Plot in the Package
VOTE <- CC327
VOTED <- ifelse(is.na(VOTE),999,VOTE) Get Who They Voted for
idealpoints1 <- ifelse(is.na(result$respondents$weight),99,result$respondents$weight) The Weight Parameter
idealpoints2 <- ifelse(is.na(result$respondents$idealpt),99,result$respondents$idealpt) The scaled ideal point
obama.voters <- result$respondents$idealpt[idealpoints1>0 & idealpoints2!=99 & VOTED==2] Obama voters
mccain.voters <- result$respondents$idealpt[idealpoints1>0 & idealpoints2!=99 & VOTED==1] McCain voters
#
#crazy.voters <- idealpoints[(idealpoints[,2]>0 & idealpoints[,3]!=99 & (VOTED > 2 & VOTED <=9)),3]
#crazy2.voters <- idealpoints[(idealpoints[,2]>0 & idealpoints[,3]!=99 & VOTED==999),3]
# 10,364
# 10,940
# 2,207
# 2,238
obamaShare <- length(obama.voters)/(length(obama.voters)+length(mccain.voters))
mccainShare <- length(mccain.voters)/(length(obama.voters)+length(mccain.voters))
obamadens <- density(obama.voters) Set up so that the Obama and McCain Densities sum to 1
obamadens$y <- obamadens$y*obamaShare
#
mccaindens <- density(mccain.voters)
mccaindens$y <- mccaindens$y*mccainShare
#
ymax1 <- max(obamadens$y)
ymax2 <- max(mccaindens$y)
ymax <- 1.1*max(ymax1,ymax2)
#
windows()
#
plot(obamadens,xlab="",ylab="",
main="",
xlim=c(-2.0,2.0),ylim=c(0,ymax),type="l",lwd=3,col="red",font=2)
lines(obamadens,lwd=3,col="blue")
lines(mccaindens,lwd=3,col="red")
#
# Main title
mtext("Aldrich-McKelvey Scaling 2008 CCES L-C Scale\nMcCain (red), Obama (blue)",side=3,line=1.50,cex=1.2,font=2)
# x-axis title
mtext("Liberal - Conservative",side=1,line=2.75,cex=1.2)
# y-axis title
mtext("Density",side=2,line=2.5,cex=1.2)
#
text(-1.5,ymax,paste("Obama Voters ",
100.0*round(obamaShare, 3),"%"),col="blue",font=2)
text(-1.5,0.9*ymax,paste("N = ",
1.0*round(length(obama.voters), 7)),col="blue",font=2)
text( 1.5,ymax,paste("McCain Voters ",
100.0*round(mccainShare, 3),"%"),col="red",font=2)
text( 1.5,0.9*ymax,paste("N = ",
1.0*round(length(mccain.voters), 7)),col="red",font=2)
#
#
#
arrows(result$stimuli[4], 0.05,result$stimuli[4],0.0,length=0.1,lwd=3,col="blue4") This is a nice way
text(result$stimuli[4],.075,"Obama",col="blue4",font=2) to show where the stimuli are
arrows(result$stimuli[5], 0.05,result$stimuli[5],0.0,length=0.1,lwd=3,col="red4")
text(result$stimuli[5],.075,"McCain",col="red4",font=2)
arrows(result$stimuli[3], 0.05,result$stimuli[3],0.0,length=0.1,lwd=3,col="red4")
text(result$stimuli[3],.10,"Bush",col="red4",font=2)
#
#
detach(data)