MEASUREMENT THEORY
Third Assignment
Due 10 December 2004
# The cross-hatch is used as a comment marker -- R ignores the line # double_center_drive_data.r -- Double-Center Program Always put the name of the program at the top # # Data Must Be Transformed to Squared Distances Below # # ATLANTA 0000 2340 1084 715 481 826 1519 2252 662 641 2450 # BOISE 2340 0000 2797 1789 2018 1661 891 908 2974 2480 680 # BOSTON 1084 2797 0000 976 853 1868 2008 3130 1547 443 3160 # CHICAGO 715 1789 976 0000 301 936 1017 2189 1386 696 2200 I embed the data for # CINCINNATI 481 2018 853 301 0000 988 1245 2292 1143 498 2330 convenience # DALLAS 826 1661 1868 936 988 0000 797 1431 1394 1414 1720 # DENVER 1519 891 2008 1017 1245 797 0000 1189 2126 1707 1290 # LOS ANGELES 2252 908 3130 2189 2292 1431 1189 0000 2885 2754 370 # MIAMI 662 2974 1547 1386 1143 1394 2126 2885 0000 1096 3110 # WASHINGTON 641 2480 443 696 498 1414 1707 2754 1096 0000 2870 # CASBS 2450 680 3160 2200 2330 1720 1290 370 3110 2870 0000 # library(MASS) These are the three libraries library(pcurve) These commands tell R to load them library(stats) # The command below reads the driving distances into the matrix T # Note that R uses the syntax "<-" for "=" T <- matrix(scan("C:/ucsd_course/drive2.txt",0),ncol=11,byrow=TRUE) # The c() command creates a vector names <- c("Atlanta ","Boise ","Boston ","Chicago ","Cincinnati ","Dallas ","Denver ", "Los Angeles ","Miami ","Washington ","CASBS ") # nrow <- length(T[,1]) The length command tells you the length of a vector ncol <- length(T[1,]) The "," acts as a wildcard TT <- rep(0,nrow*ncol) rep stands for repeat -- this creates a vector of zeroes dim(TT) <- c(nrow,ncol) dim stands for dimension -- this creates an nrow by ncol matrix of zeroes TTT <- rep(0,nrow*ncol) dim(TTT) <- c(nrow,ncol) # xrow <- NULL The NULL is a convenient way to initialize a vector xcol <- NULL xcount <- NULL matrixmean <- 0 Simple initialization to zero matrixmean2 <- 0 # # Transform the Matrix This is a simple double DO-Loop that divides the Distances # by 1000 and squares them so that we have a set of squared i <- 0 distances roughly in the range of 0 to 9 while (i < nrow) { i <- i + 1 The programming language is essentially the same as C/C++ and xcount[i] <- i Visual Basic j <- 0 while (j < ncol) { j <- j + 1 # # Square the Driving Distances # TT[i,j] <- (T[i,j]/1000)**2 # } } # # Put it Back in T # T <- TT # # # Below is the old Long Method of Double-Centering # # Compute Row and Column Means # i <- 0 while (i < nrow) { i <- i + 1 xrow[i] <- mean(T[i,]) Row Means of the Squared Distance Matrix } i <- 0 while (i < ncol) { i <- i + 1 xcol[i] <- mean(T[,i]) Column Means of the Squared Distance Matrix } matrixmean <- mean(xcol) Matrix Mean matrixmean2 <- mean(xrow) This is a safety check # # Double-Center the Matrix Using old Long Method # Compute comparison as safety check # i <- 0 while (i < nrow) { i <- i + 1 j <- 0 while (j < ncol) { j <- j + 1 TT[i,j] <- (T[i,j]-xrow[i]-xcol[j]+matrixmean)/(-2) Basic Formula for Double-Centering TTT[i,j] <- (T[i,j]-xrow[i]-xcol[j]+matrixmean)/(-2) a Squared Distance Matrix } } # # Perform Eigenvalue-Eigenvector Decomposition of Double-Centered Matrix # eigen performs an eigenvalue-eigenvector decomposition of the input matrix ev <- eigen(TT) The n by n matrix of eigenvectors is returned in ev$vec[*,*] # The n length vector of eigenvalues is returned in ev$val[*] # Find Point furthest from Center of Space # The max(x) command finds the maximum value in the vector x aaa <- sqrt(max((abs(ev$vec[,1]))**2 + (abs(ev$vec[,2]))**2)) Note that aaa and bbb should be exactly the same! bbb <- sqrt(max(((ev$vec[,1]))**2 + ((ev$vec[,2]))**2)) This is overkill, but it is a handy check on your math! # # Weight the Eigenvectors to Scale Space to Unit Circle # torgerson1 <- ev$vec[,1]*(1/aaa)*sqrt(ev$val[1]) This is the classical Torgerson Solution #torgerson2 <- ev$vec[,2]*(1/aaa) # #torgerson1 <- -ev$vec[,1]*(1/aaa) torgerson2 <- -ev$vec[,2]*(1/aaa)*sqrt(ev$val[2]) This is the classical Torgerson Solution # The basic plot command. The first two arguments are the x-axis and y-axis coordinates. plot(torgerson1,torgerson2,type="n",asp=1, The type="n" suppresses all plotting of points; asp=1 maintains the aspect ratio main="Double-Centered Driving Distance Matrix \n Torgerson Coordinates", Title -- the \n is a "new-line" command xlab="West East", Label for the x-axis ylab="South North", Label for the y-axis xlim=c(-3.0,3.0),ylim=c(-3.0,3.0)) Sets range for the axes -- these must be the same if asp=1 points(torgerson1,torgerson2,pch=16,col="red") Places points in the plot -- pch=16 is a solid circle text(torgerson1,torgerson2,names,col="blue",adj=1) Places the City names -- adj=1 puts names on left #Go back to the R prompt by clicking on the R Console Window to bring it to the front. You can now see the values for any of the variables in the program by simply typing the variable name and hitting Enter. For example, to check to see if matrixmean and matrixmean2 produce the same number, simply type each in turn:
#text(torgerson1,torgerson2,names,col="blue",adj=1) # # pos -- a position specifier for the text. Values of 1, 2, 3 and 4, # respectively indicate positions below, to the left of, above and # to the right of the specified coordinates # namepos <- NULL namepos[1] <- 2 # Atlanta namepos[2] <- 2 # Boise namepos[3] <- 2 # Boston namepos[4] <- 2 # Chicago namepos[5] <- 2 # Cininnati namepos[6] <- 2 # Dallas namepos[7] <- 2 # Denver namepos[8] <- 2 # Los Angeles namepos[9] <- 2 # Miami namepos[10] <- 2 # Washington namepos[11] <- 2 # CASBS # text(torgerson1,torgerson2,names,pos=namepos,offset=0.0,col="blue") #The vector namepos controls where the city names in the names vector are placed vis a vis the points. Run double_center_drive_data_2.R in R and you should get:
21 NOVEMBER 2004 14.17.13.10. RANDOM NUMBER SEED 33700 SEN90KH.ORD NON-PARAMETRIC MULTIDIMENSIONAL UNFOLDING OF 90TH SENATE 2 596 20 36 1 2 10 0.005 (36A1,3900I1) (I5,1X,36A1,2I5,50F8.3) ****************************************************************************** 1 ROLL CALLS 2 6244 48878 0.12775 0.87225 0.55333 0.00000 LEGISLATORS 2 6210 48878 0.12705 0.87295 0.55576 0.00000 2 ROLL CALLS 2 6143 48878 0.12568 0.87432 0.56056 0.99832 LEGISLATORS 2 6129 48878 0.12539 0.87461 0.56156 0.99434 3 ROLL CALLS 2 6079 48878 0.12437 0.87563 0.56513 0.99853 LEGISLATORS 2 6072 48878 0.12423 0.87577 0.56563 0.99982 4 ROLL CALLS 2 6067 48878 0.12413 0.87587 0.56599 0.99935 LEGISLATORS 2 6063 48878 0.12404 0.87596 0.56628 0.99922 5 ROLL CALLS 2 6053 48878 0.12384 0.87616 0.56699 0.99761 LEGISLATORS 2 6053 48878 0.12384 0.87616 0.56699 0.99999 6 ROLL CALLS 2 6050 48878 0.12378 0.87622 0.56721 0.99981 LEGISLATORS 2 6049 48878 0.12376 0.87624 0.56728 0.99982 7 ROLL CALLS 2 6048 48878 0.12374 0.87626 0.56735 0.99976 LEGISLATORS 2 6048 48878 0.12374 0.87626 0.56735 1.00000 8 ROLL CALLS 2 6048 48878 0.12374 0.87626 0.56735 0.99983 LEGISLATORS 2 6047 48878 0.12372 0.87628 0.56742 0.99999 9 ROLL CALLS 2 6045 48878 0.12368 0.87632 0.56757 0.99987 LEGISLATORS 2 6045 48878 0.12368 0.87632 0.56757 1.00000 10 ROLL CALLS 2 6045 48878 0.12368 0.87632 0.56757 0.99986 LEGISLATORS 2 6044 48878 0.12365 0.87635 0.56764 0.99999 11 ROLL CALLS 2 6042 48878 0.12361 0.87639 0.56778 0.99986 LEGISLATORS 2 6041 48878 0.12359 0.87641 0.56785 0.99996 12 ROLL CALLS 2 6039 48878 0.12355 0.87645 0.56799 0.99985 LEGISLATORS 2 6035 48878 0.12347 0.87653 0.56828 0.99986 13 ROLL CALLS 2 6034 48878 0.12345 0.87655 0.56835 0.99974 LEGISLATORS 2 6034 48878 0.12345 0.87655 0.56835 1.00000 14 ROLL CALLS 2 6033 48878 0.12343 0.87657 0.56842 0.99986 LEGISLATORS 2 6033 48878 0.12343 0.87657 0.56842 1.00000 15 ROLL CALLS 2 6029 48878 0.12335 0.87665 0.56871 0.99991 LEGISLATORS 2 6029 48878 0.12335 0.87665 0.56871 1.00000 16 ROLL CALLS 2 6028 48878 0.12333 0.87667 0.56878 0.99987 LEGISLATORS 2 6028 48878 0.12333 0.87667 0.56878 1.00000 17 ROLL CALLS 2 6027 48878 0.12331 0.87669 0.56885 0.99986 LEGISLATORS 2 6027 48878 0.12331 0.87669 0.56885 1.00000 18 ROLL CALLS 2 6027 48878 0.12331 0.87669 0.56885 0.99980 LEGISLATORS 2 6027 48878 0.12331 0.87669 0.56885 1.00000 19 ROLL CALLS 2 6026 48878 0.12329 0.87671 0.56892 0.99988 LEGISLATORS 2 6026 48878 0.12329 0.87671 0.56892 1.00000 20 ROLL CALLS 2 5882 48878 0.12034 0.87966 0.57923 0.99358 LEGISLATORS 2 5882 48878 0.12034 0.87966 0.57923 0.99998 MEAN VOLUME LEG. 0.0057 0.0163 MACHINE PREC. 2 5882 48878 0.12034 0.87966 0.57923 MACHINE PREC. 2 5882 48878 0.12034 0.87966 0.57923 14.17.13.10. ELAPSED TIME OF JOB 14.18.07.24.You should reproduce the above almost exactly subject to very small variations between CPU types and the random number draw that is used to do a final search on the legislator polytopes. Note that the correct classification is 87.966 percent with an APRE of 0.57923 (the output files are explained in detail on the Optimal Classification (OC) Page).