rm(list=ls(all=TRUE))  # clear all variables
graphics.off()  # clear all graphics

# Greg Francis
# PSY 626
# 04 November 2025

# Set up contingency table
Sex<- c(rep("Male", 210), rep("Female", 240))
SmokeStatus<- c(rep("Smoker", 84), rep("Nonsmoker", 210-84), rep("Smoker", 72), rep("Nonsmoker", 240-72) )

data <- data.frame(Sex=Sex, SmokeStatus=SmokeStatus)

crosstab <- xtabs(~ Sex + SmokeStatus, data)

# Bayes Factor 

library(BayesFactor)

bf = contingencyTableBF( crosstab, sampleType = "jointMulti" )

print(bf)

# Stan/Ulam with rethinking 
library(rethinking)	

# Set up data frame to work with ulam
SexIndex <- c(rep(0, 210), rep(1, 240))
SmokeStatusIndex<- c(rep(1, 84), rep(0, 210-84), rep(1, 72), rep(0, 240-72) )
data <- data.frame(Sex=SexIndex, SmokeStatus=SmokeStatusIndex)

# Note logit(0.5)=0, and logistic(0)=0.5
# Model comparison of logit models (converts proportions to regression)
# Null model 
NullModel <- ulam(
			alist( SmokeStatus ~ dbinom(1, p), 
			logit(p) <- a,
			a ~ dnorm(logit(0.5), 10)
			), data= data, log_lik=TRUE, chains=4  ) 
			

# Alt model (possible change for females, relative to males)
AltModel <- ulam(
			alist( SmokeStatus ~ dbinom(1, p), 
			logit(p) <- a+ b*Sex,
			a ~ dnorm(logit(0.5), 10),
			b~dnorm(0, 5)
			), data= data, log_lik=TRUE, chains=4  ) 
			
print(compare(NullModel, AltModel) )


# Analyzing posterior of alt model	

#plot posterior of b (change due to sex)
numSamples=2000
post<-extract.samples(AltModel, n= numSamples)

# check that numbers make sense
cat("Mean of posterior of a=", mean(post$a), " which corresponds to proporton of ", logistic(mean(post$a)))
cat("Sample proportion of male smokers = ", 84/210)

cat("Mean of posterior of a+b=", mean(post$a+post$b), " which corresponds to proporton of ", logistic(mean(post$a+post$b)))
cat("Sample proportion of female smokers = ", 72/240)

dev.new()
dens(post$b, col=rangi2, lwd=2, xlab="posterior for difference of logit proportion ")

# Probability that a > 0.5 ?
cat("Probability that b greater than 0")
length(post$b[post$b >0])/length(post$b)
cat("Probability that b less than 0")
length(post$b[post$b <0])/length(post$b)