In this problem we are going to try out SMACOF
("Scaling by Maximizing a Convex Function" or "Scaling by Majorizing a Complicated Function") which is discussed in
Chapter 8 of Borg and Groenen.
Download the R program:
#
#
rm(list=ls(all=TRUE))
#
library(stats)
library(smacof)
setwd("C:/uga_course_homework_10/")
#
#
# Read Nations Similarities Data
#
T <- matrix(scan("C:/uga_course_homework_10/nations.txt",0),ncol=12,byrow=TRUE)
TT <- 9-T
dim(TT)
#
nrow <- length(TT[,1])
np <- nrow
#
#
#
names <- NULL
names[1] <- "Brazil"
names[2] <- "Congo"
names[3] <- "Cuba"
names[4] <- "Egypt"
names[5] <- "France"
names[6] <- "India"
names[7] <- "Israel"
names[8] <- "Japan"
names[9] <- "China"
names[10] <- "USSR"
names[11] <- "USA"
names[12] <- "Yugo"
#
#
# pos -- a position specifier for the text. Values of 1, 2, 3 and 4,
# respectively indicate positions below, to the left of, above and
# to the right of the specified coordinates
#
namepos <- rep(4,np)
#namepos[1] <- 4 # Brazil
#namepos[2] <- 4 # Congo
#namepos[3] <- 4 # Cuba
#namepos[4] <- 4 # Egypt
#namepos[5] <- 4 # France
#namepos[6] <- 4 # India
#namepos[7] <- 4 # Israel
#namepos[8] <- 4 # Japan
namepos[9] <- 2 # China
#namepos[10] <- 4 # USSR
#namepos[11] <- 4 # USA
#namepos[12] <- 4 # Yugoslavia
#
# SMACOF Package in R:
#
result <- smacofSym(TT,ndim=2) You must pass it Distances (Dissimilarities)
#
# delta is the input dissimilarities; obsdiss is the normalized dissimilarities
# names(result)confdiss is reproduced dissimilarities; conf is the configuration
# [1] "delta" "obsdiss" "confdiss" "conf" "stress.m" "stress.nm" stress.m=sse/n
# [7] "spp" "ndim" "model" "niter" "nobj" "metric" spp is stress per point
#[13] "call" niter is the number of iterations; nobj=12 here; metric=TRUE
# model="Symmetric SMACOF"; call=smacofSym(delta = TT, ndim = 2)
xmetric <- result$conf Put the Estimated Configuration in xmetric[12,ndim]
#
xmax <- max(xmetric)
xmin <- min(xmetric)
#
plot(xmetric[,1],xmetric[,2],type="n",asp=1,
main="",
xlab="",
ylab="",
xlim=c(xmin,xmax),ylim=c(xmin,xmax),font=2)
points(xmetric[,1],xmetric[,2],pch=16,col="red")
axis(1,font=2)
axis(2,font=2,cex=1.2)
#
# Main title
mtext("12 Nations Data (SMACOF)",side=3,line=1.50,cex=1.2,font=2)
# x-axis title
#mtext("Liberal-Conservative",side=1,line=2.75,cex=1.2)
# y-axis title
#mtext("Region (Civil Rights)",side=2,line=2.5,cex=1.2)
#
text(xmetric[,1],xmetric[,2],names,pos=namepos,offset=00.40,col="blue",font=2)
#
ndim <- 2
#
holycow <- glm(result$obsdiss ~ result$delta) Here is where we recover the normalization constant
#
# > names(holycow)
# [1] "coefficients" "residuals" "fitted.values"
# [4] "effects" "R" "rank"
# [7] "qr" "family" "linear.predictors"
# [10] "deviance" "aic" "null.deviance"
# [13] "iter" "weights" "prior.weights"
# [16] "df.residual" "df.null" "y"
# [19] "converged" "boundary" "model"
# [22] "call" "formula" "terms"
# [25] "data" "offset" "control"
# [28] "method" "contrasts" "xlevels"
#
TT <- (holycow$coefficients[2])*TT Note that the intercept term = 0.0 and the coefficient on the input data
# is used to shrink TT for testing purposes below
sse <- 0.0
for (i in 2:np) {
for(ii in 1:(i-1)) {
dist_i_ii <- 0.0
for( j in 1:ndim) {
#
# Calculate distance between points
#
dist_i_ii <- dist_i_ii + (xmetric[i,j]-xmetric[ii,j])*(xmetric[i,j]-xmetric[ii,j])
}
sse <- sse + ((TT[i,ii]) - sqrt(dist_i_ii))*((TT[i,ii]) - sqrt(dist_i_ii))
}
}
heck <- (sum((result$obsdiss-result$confdiss)**2))/(np*(np-1)/2)
sse <- sse/(np*(np-1)/2)
ryandist <- dist(xmetric) Create a Distance Object like obsdiss and confdiss
heck2 <- (sum((result$obsdiss-ryandist)**2))/(np*(np-1)/2)
- Run smacof_nations.r and turn in the resulting plot (NEATLY FORMATTED).
- Report result$stress.m, heck, sse, and heck2
- Let A be the matrix of Nation coordinates from this question and
B be the matrix of Mation coordinates from question (2) of Homework 4. Using the
same method as Question 4 of
Homework 5 solve for the orthogonal procrustes rotation
matrix, T, for B. NOTE THAT YOU NEED TO EXPAND THE SMACOF CONFIGURATION
USING GLM BEFORE YOU DO THE ROTATION!. Solve for T and turn in a neatly formatted listing
of T. Compute the Pearson
r-squares between the corresponding columns of A and B before and
after rotating B.
- Do a two panel graph with the two plots side-by-side -- A
on the left and B on the right.
In this problem we are going to compare the configurations produced by Double-Centering and Metric MDS using
smacofSym function.
Download the R program:
# Essentially the same code as in Homework 8 Question (1)
Cool Function Written by a Retired Physicist in California
###################################################
### chunk number 20: Written by R. R. Burns of Oceanside, California
###################################################
#
doubleCenterRect2 <- function(T){
p <- dim(T)[1]
n <- dim(T)[2]
-(T-matrix(apply(T,1,mean),nrow=p,ncol=n)-
t(matrix(apply(T,2,mean),nrow=n,ncol=p))+ mean(T))/2
}
#
# etc., etc., etc.
#
ndim <- 2
#
dcenter <- cmdscale(TTT,ndim, eig=FALSE,add=FALSE,x.ret=TRUE)
#
# returns double-center matrix as dcenter$x if x.ret=TRUE
#
# Do the Division by -2.0 Here
#
TTT <- (dcenter$x)/(-2.0)
#
TCHECK <- TT
TCC <- doubleCenterRect2(TCHECK) Here is the call to the function
#
#
# Run some checks to make sure everything is correct
#
xcheck <- sum(abs(TCC-TTT))
#
# From Here on Down the Two Configurations are Plotted
- Run Double_Center_SMACOF_Color_Circle.r and turn in the two plots.
- Report xcheck, result$stress.m, heck, sse, and heck2.
- Repeat 1c) and 1d) only now A = Double Center Configuation and B = SMACOF configuration.