In this problem we are going to compare probit and logit coefficients
using the analysis in my book on pages 37-40 (also, work through
Notes on the Geometry of Logit and Probit).
Download the R program:
#
# bush_v_gore_example_h106.r -- GLM Examples
#
rm(list=ls(all=TRUE))
#
library(MASS)
library(foreign)
#
setwd("C:/uga_course_homework_9/")
data <- read.dta("hdmg106_2009_fixed.dta") Read in the Stata Data set
attach(data,warn.conflicts = FALSE)
# Grab all the variables
T <- cbind(bush00,gore00,black00,south,hispanic00,income,owner00,dwnom1n,dwnom2n,ybush,ygore)
TT <- T
mode(TT) <- "double" Name the variables -- handy below
colnames(TT) <- c("Bushvote","Gorevote","blackpct","southdum","hispandum","income","ownerhome",
"dwnom1n","dwnom2n","ybush","ygore")
#
nrow <- length(TT[,1])
ncol <- length(TT[1,])
#
#
# STATA OUTPUT FOR REFERENCE
# &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
#. summ
#
# Variable | Obs Mean Std. Dev. Min Max
#-------------+--------------------------------------------------------
# bush00 | 432 46.81019 14.36873 6 80
# gore00 | 432 49.71296 14.005 20 93
# black00 | 432 12.68333 16.25699 .3 96
# south | 432 .3101852 .4631056 0 1
# hispanic00 | 432 12.31597 16.25281 .6 86
#-------------+--------------------------------------------------------
# income | 432 30.75224 8.319547 15.06 57.219
# owner00 | 432 65.86875 11.92404 8 84.3
# dwnom1n | 432 .0586875 .4671698 -.815 1.269
# dwnom2n | 432 -.0381065 .4706545 -1.123 1.27
# ybush | 432 .4861111 .5003865 0 1 1 if Bush > 50% in a CD, 0 otherwise
#-------------+--------------------------------------------------------
# ygore | 432 .4375 .4966535 0 1
#. probit ybush black00 south hispanic00 income owner00 dwnom1n dwnom2n
#
#Probit regression Number of obs = 432
# LR chi2(7) = 338.29
# Prob > chi2 = 0.0000
#Log likelihood = -130.12789 Pseudo R2 = 0.5652
#
#------------------------------------------------------------------------------
# ybush | Coef. Std. Err. z P>|z| [95% Conf. Interval]
#-------------+----------------------------------------------------------------
# black00 | -.0285973 .0097913 -2.92 0.003 -.0477878 -.0094067
# south | .7695862 .2545783 3.02 0.003 .2706219 1.268551
# hispanic00 | -.0089458 .0069163 -1.29 0.196 -.0225015 .0046099
# income | -.0241489 .0126394 -1.91 0.056 -.0489217 .0006239
# owner00 | .0235461 .0143687 1.64 0.101 -.0046161 .0517083
# dwnom1n | 2.761974 .2619813 10.54 0.000 2.2485 3.275448
# dwnom2n | 1.136417 .2339829 4.86 0.000 .6778186 1.595015
# _cons | -.9697998 1.077738 -0.90 0.368 -3.082127 1.142528
#------------------------------------------------------------------------------
# How to run a Probit in R
probitbush <-glm(ybush ~ black00+south+hispanic00+income+owner00+dwnom1n+dwnom2n,family=binomial(link=probit))
sumprobitbush <- summary(probitbush) Get the Summary
#
# ---- Useful Commands To See What is in an Object
#
# > length(sumprobitbush)
# [1] 17
# > class(sumprobitbush)
# [1] "summary.glm"
# > names(sumprobitbush) The coefficients are in sumprobitbush$coefficients[1:8]
# [1] "call" "terms" "family" "deviance"
# [5] "aic" "contrasts" "df.residual" "null.deviance"
# [9] "df.null" "iter" "deviance.resid" "coefficients"
# [13] "aliased" "dispersion" "df" "cov.unscaled"
# [17] "cov.scaled"
#
# How to run a Logit in R
logitbush <-glm(ybush ~ black00+south+hispanic00+income+owner00+dwnom1n+dwnom2n,family=binomial(link=logit))
sumlogitbush <- summary(logitbush) Get the Summary
- Run the program and report all the output NEATLY FORMATTED produced by
sumprobitbush and sumlogitbush.
- Calculate Cosineq (Cosine Theta if you use a handicapped browser) between the
probit and logit coefficients (see page 40 of my book).
- The probit and logit summaries report
Null Deviance, Residual Deviance, and AIC.
Null Deviance is the deviation of a model that contains only the intercept term, that is, a
fixed probability for all observations. Residual Deviance is simply -2*Log-Likelihood.
In this instance, N1=210, the number of "1"s, N0=222, the number of "0"s, K=8, the number of coefficients, and the
formulas are:
Null Deviance = -2*N1*[log(N1/(N1+N2))] -2*N2*[log(N2/(N1+N2))]
Residual Deviance = -2*[LOG LIKELIHOOD]
Akaike Information Criterion (AIC) = -2*[LOG LIKELIHOOD] + 2*K
Calculate these three statistics for the probit output. Show all of your calculations.
In this problem we are going to compare ordered probit and
ordered logit coefficients
using the analysis in my book on pages 37-40 (also, work through
Notes on the Geometry of Logit and Probit).
Download the R program:
#
rm(list=ls(all=TRUE))
library(foreign)
library(MASS)
library(stats)
setwd("C:/uga_course_homework_9")
#
# READ STATA FILE HERE
data <- read.dta("nes1968_first.dta") Read in the Stata Data set
attach(data,warn.conflicts = FALSE)
# Grab only the variables we need
T <- cbind(VAR00120,VAR00156,VAR00261,VAR00263,VAR00264,VAR00478,VAR00479,VAR00480,VAR00533)
TT <- T
mode(TT) <- "double" We do not use these variable names, we just need to keep track
colnames(TT) <- c("partyid2","education5","income2","sex2","black2","wallace2","humphrey2","nixon2","age2")
#
# partyid -- 0,1,2,3,4,5,6 The Codes we need
# education -- 00 - 22=8th grade or less, 31-61=9th-12th grades, 71=some college, 81=BS/BA, 82-87, advanced
# income -- 10 - 35
# sex -- 1=male, 2=female
# black -- 1=white, 2=black
# wallace -- 0 - 97
# humphrey -- 0 - 97
# nixon -- 0 - 97
# age -- 20 - 93
#
partyidx <- VAR00120 Recode the variables and insert NA's for missing data
partyid2 <- ifelse(partyidx > 6,NA,partyidx)
#
educationx <- VAR00156 Note the Logic here
education1 <- ifelse(educationx < 23,1,educationx)
education2 <- ifelse(((23 < educationx) & (educationx < 62)),2,education1)
education3 <- ifelse(educationx==71,3,education2)
education4 <- ifelse(((72 < educationx) & (educationx <= 87)),4,education3)
education5 <- ifelse(educationx > 87,NA,education4)
#
incomex <- VAR00261
income2 <- ifelse(incomex > 35,NA,incomex)
#
sex <- VAR00263 - 1 0=Male, 1=Female
#
racex <- VAR00264
race1 <- ifelse(racex==1,0,racex) White = 0
race2 <- ifelse(racex==2,1,race1) Black = 1
race3 <- ifelse(racex>2,NA,race2)
#
wallacex <- VAR00478 Feeling Thermometers, max=97
wallace1 <- ifelse(wallacex>97,NA,wallacex)
#
humphreyx <- VAR00479
humphrey1 <- ifelse(humphreyx>97,NA,humphreyx)
#
nixonx <- VAR00480
nixon1 <- ifelse(nixonx>97,NA,nixonx)
#
agex <- VAR00533 Maximum age in the 1968 data was 93
age1 <- ifelse(agex>93,NA,agex)
#
# VARIABLES
# partyid2, education5, income2, sex, reace3, wallace1, humphrey1, nixon1, age1
# Stick Variables in a Matrix
XDATA <- cbind(partyid2,education5,income2,sex,race3,wallace1,humphrey1,nixon1,age1)
YDATA <- na.omit(XDATA) List-Wise Deletion -- NEAT COMMAND
#
# ORDERED PROBIT
#
scarecrow <- polr(as.ordered(YDATA[,1]) ~ YDATA[,2]+YDATA[,3]+YDATA[,4]+YDATA[,5]+YDATA[,6]+YDATA[,7]+YDATA[,8]+YDATA[,9],
method="probit")
#
# ORDERED LOGIT
#
blackcat <- polr(as.ordered(YDATA[,1]) ~ YDATA[,2]+YDATA[,3]+YDATA[,4]+YDATA[,5]+YDATA[,6]+YDATA[,7]+YDATA[,8]+YDATA[,9],
method="logistic")
- Run the program and report all the output NEATLY FORMATTED produced by
summary(scarecrow) and summary(blackcat).
- Calculate Cosineq (Cosine Theta if you use a handicapped browser) between the
ordered probit and ordered logit coefficients
(see page 40 of my book).
- The ordered probit and ordered logit summaries report
Residual Deviance, and AIC.
Calculate these two statistics for both the ordered probit and
ordered logit outputs. Show all of your calculations.