In this problem we are going to try out the metric unfolding version of
SMACOF
("Scaling by Maximizing a Convex Function" or "Scaling by Majorizing a Complicated Function") which is discussed in
Chapter 8 of Borg and Groenen. We are going to apply it to the 1968 NES Candidate Feeling Thermometers.
Download the R program:
Here is the key section of code:
#np <- length(T[,1])
nq <- length(T[1,])
#
kkk <- rep(1,nq) Here is how to remove those respondents who have fewer than 5 Thermometer responses
xrow <- sapply(1:np,function(i) length(kkk[!is.na(T[i,])]))
# the kkk[] vector is used as a "trick" to get a count of the number of "NA"s in a row
#> !is.na(T[10,])
# Wallace Humphrey Nixon McCarthy Reagan Rockefeller
# TRUE TRUE TRUE FALSE FALSE FALSE
# Johnson Romney Kennedy Muskie Agnew LeMay
# TRUE FALSE TRUE FALSE FALSE FALSE
#
#> kkk
# [1] 1 1 1 1 1 1 1 1 1 1 1 1
#> length(kkk[!is.na(T[10,])])
#[1] 5
#> kkk[!is.na(T[10,])]
#[1] 1 1 1 1 1
#> length(kkk)
#[1] 12
#> sum(kkk[!is.na(T[10,])])
#[1] 5
#
# DELETE ROWS WITH LESS THAN 5 THERMOMETER RESPONSES
#
NA5 <- (xrow<5) Simple TRUE/FALSE vector -- TRUE if fewer than 5 responses
T <- T[!NA5,] This drops the rows
NOTVOTE <- NOTVOTE[!NA5] VOTE and NOTVOTE are taken from the STATA file
VOTE <- VOTE[!NA5]
np <- length(VOTE) Reset the Number of Respondents
ndim <- 2
#
T <- (100-T)/50 Transform the data to distances
TT <- T
TT[is.na(TT)] <- mean(T,na.rm=TRUE) Stick the matrix mean in the "NA" entries
weightmat <- rep(1,np*nq) Create a weight matrix = 0 if missing, = 1 if not missing
dim(weightmat) <- c(np,nq) This ensures that smacofRect does not use that entry
weightmat[is.na(T)] <- 0
result <- smacofRect(TT, ndim=2, weightmat, itmax=10000) Here is the call
#
# class(result)
# [1] "smacofR"
# length(result)
# [1] 14
# names(result)
# [1] "obsdiss" "confdiss" "conf.row" "conf.col" "stress" "spp.row"
# [7] "spp.col" "ndim" "model" "niter" "nind" "nobj"
# [13] "metric" "call"
#
TEIGHT <- as.numeric(TT)
dim(TEIGHT) <- c(np,nq)
zmetric2 <- result$conf.col Extract the Respondent and Candidate Coordinates
zmetric <- as.numeric(zmetric2)
dim(zmetric) <- c(nq,ndim)
xmetric2 <- result$conf.row
xmetric <- as.numeric(xmetric2)
dim(xmetric) <- c(np,ndim)
sse1 <- 0.0
sse2 <- 0.0
for (i in 1:np) {
for(j in 1:nq) {
dist_i_j <- 0.0
for( k in 1:ndim) {
#
# Calculate distance between points
#
dist_i_j <- dist_i_j+ (xmetric[i,k]-zmetric[j,k])*(xmetric[i,k]-zmetric[j,k])
}
sse1 <- sse1 + ((TEIGHT[i,j]) - sqrt(dist_i_j))*((TEIGHT[i,j]) - sqrt(dist_i_j))
sse2 <- sse2 + ((TEIGHT[i,j]) - sqrt(dist_i_j))*((TEIGHT[i,j]) - sqrt(dist_i_j))*weightmat[i,j]
} Note the Difference between sse1 and sse2
}
#
# SETUP FOR TWO DIMENSIONAL PLOT
#
xmax <- max(abs(xmetric))
zz <- c(xmetric[,1],xmetric[,2],VOTE,NOTVOTE)
dim(zz) <- c(np,4)
#windows()
plot(zz[,1],zz[,2],type="n",asp=1,
main="",
xlab="",
ylab="",
xlim=c(-xmax,xmax),ylim=c(-xmax,xmax),cex=1.2,font=2)
#
# Main title
mtext("Metric MDS of 1968 Thermometers\nNixon (blue), Humphrey (red), Wallace (black)",side=3,line=1.50,cex=1.2,font=2)
# x-axis title
mtext("Liberal - Conservative",side=1,line=2.75,cex=1.2)
# y-axis title
mtext("Social/Lifestyle Issues",side=2,line=2.5,cex=1.2)
#
points(zz[zz[,3] == 23,1],zz[zz[,3] == 23,2],pch='N',col="blue",font=2)
points(zz[zz[,3] == 11,1],zz[zz[,3] == 11,2],pch='H',col="red",font=2)
points(zz[zz[,3] == 34,1],zz[zz[,3] == 34,2],pch='W',col="black",font=2)
#
points(zmetric[,1],zmetric[,2],pch=16,col="red")
text(zmetric[,1],zmetric[,2], junk, pos=namepos, col="purple",font=2)
#
- Run the program and turn in the plot.
- Report result$stress, sse1, and sse2.
- Change the program so that 8 thermometers are required for a respondent to be included in the analysis. Report
np, result$stress, sse1, and sse2, and turn in the plot.