Setting one country at the origin will eliminate two of the zero eigenvalues of the Hessian matrix.
Again, because we are working with relational data -- that is, data that can be interpreted as distances between
points -- one point can be fixed at the origin without affecting the estimation of the other points because we are
trying to reproduce distances. It is also possible to fix one
coordinate of a second point at 0.0. The same distances produced by moving all the points vis a vis one another
can be generated by fixing one point at the origin and one coordinate
of another point and simply moving the remaining points vis a vis the
fixed point and the second point which is free to move back and forth
on one of the dimensions. In this
problem we will fix the U.S.S.R. at the origin as we have done before
but we will also fix the U.S.A. second dimension coordinate at 0.0. Download the R
program:
metric_mds_nations_ussr_usa.r -- Program to
run Metric MDS on a matrix of Similarities/Dissimilarities with one point fixed at the origin#
# nations_mds_nations_ussr_usa.r -- Does Metric MDS
#
# Wish 1971 nations similarities data
# Title: Perceived similarity of nations
#
# Description: Wish (1971) asked 18 students to rate the global
# similarity of different pairs of nations such as 'France and China' on
# a 9-point rating scale ranging from `1=very different' to `9=very
# similar'. The table shows the mean similarity ratings.
#
# Brazil 9.00 4.83 5.28 3.44 4.72 4.50 3.83 3.50 2.39 3.06 5.39 3.17
# Congo 4.83 9.00 4.56 5.00 4.00 4.83 3.33 3.39 4.00 3.39 2.39 3.50
# Cuba 5.28 4.56 9.00 5.17 4.11 4.00 3.61 2.94 5.50 5.44 3.17 5.11
# Egypt 3.44 5.00 5.17 9.00 4.78 5.83 4.67 3.83 4.39 4.39 3.33 4.28
# France 4.72 4.00 4.11 4.78 9.00 3.44 4.00 4.22 3.67 5.06 5.94 4.72
# India 4.50 4.83 4.00 5.83 3.44 9.00 4.11 4.50 4.11 4.50 4.28 4.00
# Israel 3.83 3.33 3.61 4.67 4.00 4.11 9.00 4.83 3.00 4.17 5.94 4.44
# Japan 3.50 3.39 2.94 3.83 4.22 4.50 4.83 9.00 4.17 4.61 6.06 4.28
# China 2.39 4.00 5.50 4.39 3.67 4.11 3.00 4.17 9.00 5.72 2.56 5.06
# USSR 3.06 3.39 5.44 4.39 5.06 4.50 4.17 4.61 5.72 9.00 5.00 6.67
# USA 5.39 2.39 3.17 3.33 5.94 4.28 5.94 6.06 2.56 5.00 9.00 3.56
# Yugoslavia 3.17 3.50 5.11 4.28 4.72 4.00 4.44 4.28 5.06 6.67 3.56 9.00
#
rm(list=ls(all=TRUE))
library(MASS)
library(stats)
#
# A function to perform Double Centering on a Matrix with missing
# elements
#
doubleCenterMissing <- function(x){
p <- dim(x)[1]
n <- dim(x)[2]
-(x-matrix(apply(x,1,mean, na.rm=TRUE),nrow=p,ncol=n) -
t(matrix(apply(x,2,mean, na.rm=TRUE),nrow=n,ncol=p)) + mean(x, na.rm=TRUE))/2
}
#
# &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
# FUNCTION CALLED BY OPTIM(...) Below
#
# *** Calculate Log-Likelihood -- In this case simple SSE ***
# **************************************************************
fr <- function(zzz){
#
xx[1:18] <- zzz[1:18]
xx[21] <- zzz[19]
xx[19:20] <- 0.0 USSR Set to Origin
xx[22] <- 0.0 USA 2nd Dimension Coordinate Set to 0.0
xx[23:24] <- zzz[20:21]
#
i <- 1
sse <- 0.0
while (i <= np) {
ii <- 1
while (ii <= i) {
j <- 1
dist_i_ii <- 0.0
while (j <= ndim) {
#
# Calculate distance between points
#
dist_i_ii <- dist_i_ii + (xx[ndim*(i-1)+j]-xx[ndim*(ii-1)+j])*(xx[ndim*(i-1)+j]-xx[ndim*(ii-1)+j])
j <- j + 1
}
sse <- sse + (sqrt(TT[i,ii]) - sqrt(dist_i_ii))*(sqrt(TT[i,ii]) - sqrt(dist_i_ii))
ii <- ii + 1
}
i <- i + 1
}
return(sse)
}
#
# &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
#
# Read Nations Similarities Data
#
T <- matrix(scan("C:/uga_course_homework_8_2015/nations.txt",0),ncol=12,byrow=TRUE)
#
#
np <- length(T[,1])
ndim <- 2
nparam <- np*ndim - 3 The number of parameters is 24-3=21
#
names <- NULL
names[1] <- "Brazil"
names[2] <- "Congo"
names[3] <- "Cuba"
names[4] <- "Egypt"
names[5] <- "France"
names[6] <- "India"
names[7] <- "Israel"
names[8] <- "Japan"
names[9] <- "China"
names[10] <- "USSR"
names[11] <- "USA"
names[12] <- "Yugo"
#
#
# pos -- a position specifier for the text. Values of 1, 2, 3 and 4,
# respectively indicate positions below, to the left of, above and
# to the right of the specified coordinates
#
namepos <- rep(4,np)
#namepos[1] <- 4 # Brazil
#namepos[2] <- 4 # Congo
#namepos[3] <- 4 # Cuba
#namepos[4] <- 4 # Egypt
#namepos[5] <- 4 # France
#namepos[6] <- 4 # India
#namepos[7] <- 4 # Israel
#namepos[8] <- 4 # Japan
#namepos[9] <- 4 # China
#namepos[10] <- 4 # USSR
#namepos[11] <- 4 # USA
#namepos[12] <- 4 # Yugoslavia
#
# Set Parameter Values
#
zz <- rep(0,np*ndim)
dim(zz) <- c(np*ndim,1)
xx <- rep(0,np*ndim)
dim(xx) <- c(np*ndim,1)
#
# Set up for Double-Centering
#
TT <- rep(0,np*np)
dim(TT) <- c(np,np)
TTT <- rep(0,np*np)
dim(TTT) <- c(np,np)
#
xrow <- NULL
xcol <- NULL
xcount <- NULL
matrixmean <- 0
#
# Transform the Matrix
#
i <- 0
while (i < np) {
i <- i + 1
xcount[i] <- i
j <- 0
while (j < np) {
j <- j + 1
#
# This is the Normal Transformation
#
TT[i,j] <- ((9 - T[i,j]))**2
#
# But the R subroutine wants simple Distances!
#
TTT[i,j] <- (9 - T[i,j])
#
}
}
#
# Call Double Center Routine From R Program
# cmdscale(....) in stats library
# The Input data are DISTANCES!!! Not Squared Distances!!
# Note that the R routine does not divide
# by -2.0
#
#
dcenter <- cmdscale(TTT,ndim, eig=FALSE,add=FALSE,x.ret=TRUE)
#
# returns double-center matrix as dcenter$x if x.ret=TRUE
#
# Do the Division by -2.0 Here
#
TTT <- (dcenter$x)/(-2.0)
#
# Do Our Double-Centering Function
#
TCHECK <- doubleCenterMissing(TT)
TMEAN <- mean(TCHECK, na.rm=TRUE)
TCHECK[is.na(TCHECK)] <- mean(TCHECK, na.rm=TRUE)
#
# Compare the Two Methods of Double-Centering
#
errordc <- sum((TTT-TCHECK)^2)
#
# Perform Eigenvalue-Eigenvector Decomposition of Double-Centered Matrix
#
# ev$values are the eigenvectors
# ev$vectors are the eigenvectors
#
ev <- eigen(TTT)
# The Two Lines Below Put the Eigenvalues in a
# Diagonal Matrix -- The first one creates an
# identity matrix and the second command puts
# the singular values on the diagonal
#
Lambda <- diag(np)
diag(Lambda) <- ev$val
#
# Compute U*LAMBDA*U' for check below
#
#
XX <- ev$vec %*% Lambda %*% t(ev$vec)
#
#
# Compute Fit of decomposition -- This is just the sum of squared
# error -- Note that xfit should be zero!
#
xfit <- sum(abs(TTT-XX)) Note that I have simplified the code here
#
# Use Eigenvectors for Starts
#
i <- 1
while (i <= np)
{
j <- 1
while (j <= ndim)
{
zz[ndim*(i-1)+j]=ev$vector[i,j]
j <- j +1
}
i <- i + 1
}
#
# CONSTRAIN USSR TO THE ORIGIN -- entries 19 and 20
#
zzz <- rep(0,nparam)
dim(zzz) <- c(nparam,1)
zzz[1:18] <- zz[1:18]
zzz[19] <- zz[21]
#
# CONSTRAIN 2nd DIMENSION USA TO 0.0 entry 22
#
zzz[20:21] <- zz[23:24]
#
# *******************************************
# DO MAXIMUM LIKELIHOOD MAXIMIZATION HERE
# *******************************************
#
model_nd <- optim(zzz,fr,hessian=TRUE,control=list(maxit=50000))
model_sann <- optim(model_nd$par,fr,method="SANN",hessian=TRUE,control=list(maxit=50000, temp=20))
model <- optim(model_sann$par,fr,method="BFGS",hessian=TRUE)
#
# Log-Likelihood (inverse -- optim minimizes!!)
#
logLmax <- model$value
#
# Parameter Estimates
#
rhomax <- model$par
#
# convergence an integer code.
# 0 indicates successful convergence.
# Error codes are
# 1 indicates that the iteration limit maxit had been reached.
# 10 indicates degeneracy of the Nelder–Mead simplex.
# 51 indicates a warning from the "L-BFGS-B" method; see component message for further details.
# 52 indicates an error from the "L-BFGS-B" method; see component message for further details.
#
nconverge <- model$convergence
#
# counts
# A two-element integer vector giving the number of calls to
# fn and gr respectively.
#
ncounts <- model$counts
#
xhessian <- model$hessian
#
# Check Rank of Hessian Matrix
#
evv <- eigen(xhessian)
#
# evv$values -- shows the eigenvalues
#
LambdaInv <- diag(nparam)
diag(LambdaInv) <- 1/evv$val
#
# Compute U*[(LAMBDA)-1]*U' for check below
#
#
XXInv <- evv$vec %*% LambdaInv %*% t(evv$vec) Note that this is the Inverse of the Hessian
#
#
results <- rep(0,nparam*4)
dim(results) <- c(nparam,4)
#
results[,1] <- rhomax
results[,2] <- sqrt(diag(XXInv)) Standard Error
results[,3] <- rhomax/sqrt(diag(XXInv)) T Value
results[,4] <- pt(-abs(results[,3]),((np*(np-1))/2)-nparam-1)*2 Two-tail p-value
#
#
# Put Coordinates into matrix for plot purposes
#
xmetric <- rep(0,np*ndim)
dim(xmetric) <- c(np,ndim)
#
i <- 1
while (i <= np)
{
j <- 1
while (j <= ndim)
{
if(i < 10){
xmetric[i,j] <- rhomax[ndim*(i-1)+j]
}
j <- j +1
}
if(i==10){
xmetric[i,1] <- 0.0 Note the Kludge Here
xmetric[i,2] <- 0.0 This very inelegant
}
if(i==11){
xmetric[i,1] <- rhomax[19]
xmetric[i,2] <- 0.0
}
if(i==12){
xmetric[i,1] <- rhomax[20]
xmetric[i,2] <- rhomax[21]
}
i <- i + 1
}
#
# Compute R-Square between True vs. Reproduced Distances
#
aaa <- NULL
bbb <- NULL
i <- 0
j <- 0
kk <- 0
while (i < np) {
i <- i + 1
j <- i
while (j < np) {
j <- j + 1
kk <- kk + 1
aaa[kk] <- sqrt(TT[i,j])
sum <- 0.0
k <- 0
while (k < ndim){
k <- k + 1
sum <- sum + (xmetric[i,k]-xmetric[j,k])*(xmetric[i,k]-xmetric[j,k])
}
bbb[kk] <- sqrt(sum)
}
}
rfit <- cor(aaa,bbb)
xmax <- max(xmetric)
xmin <- min(xmetric)
#
plot(xmetric[,1],xmetric[,2],type="n",asp=1,
main="",
xlab="",
ylab="",
xlim=c(xmin,xmax),ylim=c(xmin,xmax),font=2)
points(xmetric[,1],xmetric[,2],pch=16,col="red")
axis(1,font=2)
axis(2,font=2,cex=1.2)
#
# Main title
mtext("12 Nations Data",side=3,line=1.50,cex=1.2,font=2)
# x-axis title
#mtext("Liberal-Conservative",side=1,line=2.75,cex=1.2)
# y-axis title
#mtext("Region (Civil Rights)",side=2,line=2.5,cex=1.2)
#
text(xmetric[,1],xmetric[,2],names,pos=namepos,offset=00.00,col="blue",font=2)
- Run metric_mds_nations_ussr_usa.r and turn in a
NEATLY FORMATTED plot of the nations.
- Report errordc, xfit, rfit, and a NEATLY
FORMATTED listing of results.
- Add the code to produce a graph of the eigenvalues of the
Hessian matrix. Turn in NEATLY FORMATTED
versions of this plot with appropriate labeling.