install.packages("MASS")
library("MASS")
attach(bacteria)
nrow(bacteria)
install.packages('BAS')
library(BAS)
set.seed(12345)
glimpse(bacteria)
help(bacteria)
cor = glm(y~ap+hilo+trt+week, family= "binomial", data = bacteria)
summary(cor)
plot(cor)
resid(cor)
pairs(bacteria)
plot(y~week, data = bacteria)
beta = coef(cor)
resid = residuals(cor)
n = length(resid)
mse = 1/(n-2)*sum((resid^2))
mse
output = summary(cor)$coef[,1:4]
output #OLS estimates
par(mfrow = c(2,2))
corred = glm(y~week, family = "binomial", data = bacteria)#Using only statistically significant variables from cor.
summary(red)
# A function to evaluate the log of the posterior density
logP=function(y,X,b,b0,varB){
Xb=X%*%b
theta=exp(Xb)/(1+exp(Xb))
logLik=sum( dbinom(x=y,p=theta,size=1,log=T) )
logPrior=sum( dnorm(x=b,sd=sqrt(varB),mean=b0,log=T))
return(logLik+logPrior)
}
logisticRegressionBayes=function(y,X,nIter=100000,V=.02,varB=rep(10000,ncol(X)),b0=rep(0,ncol(X))){
####### Arguments #######################
# y a vector with 0/1 values
# X incidence matrix of effects
# b0,varB, the prior mean and prior variance bj~N(b0[j],varB[j])
# V the variance of the normal distribution used to generate candidates~N(b[i-1],V)
# nIter: number of iterations of the sampler
# Details: generates samples from the posterior distribution of a logistic regression using a Metropolis algorithm
#########################################
# A matrix to store samples
p=ncol(X)
B=matrix(nrow=nIter,ncol=p)
colnames(B)=colnames(X)
# A vector to trace acceptance
accept=matrix(nrow=nIter,ncol=p,NA)
accept[1,]=TRUE
# Initialize
B[1,]=0
B[1,1]=log(mean(y)/(1-mean(y)))
b=B[1,]
for(i in 2:nIter){
for(j in 1:p){
candidate=b
candidate[j]=rnorm(mean=b[j],sd=sqrt(V),n=1)
logP_current=logP(y,X,b0=b0,varB=varB,b=b)
logP_candidate=logP(y,X,b0=b0,varB=varB,b=candidate)
r=min(1,exp(logP_candidate-logP_current))
delta=rbinom(n=1,size=1,p=r)
accept[i,j]=delta
if(delta==1){ b[j]=candidate[j] }
}
B[i,]=b
if(i%%1000==0){
message(" Iteration ",i)
}
}
return(list(B=B,accept=accept))
}
Z=as.matrix(model.matrix(y~ap+hilo+trt+week, data=bacteria))
samples=logisticRegressionBayes(bacteria$y,X=model.matrix(y~ap+hilo+trt+week, data=bacteria),nIter=55000)
I'm trying to find the posterior and priors for my data set. I am using the bacteria data set;
This data frame has 220 rows and the following columns:
y
presence or absence: a factor with levels n and y.
ap
active/placebo: a factor with levels a and p.
hilo
hi/low compliance: a factor with levels hi amd lo.
week
numeric: week of test.
ID
subject ID: a factor.
trt
a factor with levels placebo, drug and drug+, a re-coding of ap and hilo.
When I try to run the last line I get an error:
Error in dbinom(x = y, p = theta, size = 1, log = T) :
Non-numeric argument to mathematical function
In addition: Warning messages:
1: In mean.default(y) : argument is not numeric or logical: returning NA
2: In mean.default(y) : argument is not numeric or logical: returning NA
3: In rnorm(mean = b[j], sd = sqrt(V), n = 1) :
Show Traceback
Rerun with Debug
Error in dbinom(x = y, p = theta, size = 1, log = T) :
Non-numeric argument to mathematical function
Here's a glimpse of my data: Various factors, some integers.
Observations: 220
Variables: 6
$ y <fct> y, y, y, y, y, y, n, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, y, n, n, n, y, n, y, y, y, y, y, y, y, ...
$ ap <fct> p, p, p, p, a, a, a, a, a, a, a, a, a, p, p, p, p, p, p, p, p, p, p, a, a, a, a, a, a, a, a, a, p, p, p, p, p, p, p, p, ...
$ hilo <fct> hi, hi, hi, hi, hi, hi, hi, hi, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, lo, hi, hi, hi, ...
$ week <int> 0, 2, 4, 11, 0, 2, 6, 11, 0, 2, 4, 6, 11, 0, 2, 4, 6, 11, 0, 2, 4, 6, 11, 0, 2, 4, 11, 0, 2, 4, 6, 11, 0, 2, 4, 6, 11, 0...
$ ID <fct> X01, X01, X01, X01, X02, X02, X02, X02, X03, X03, X03, X03, X03, X04, X04, X04, X04, X04, X05, X05, X05, X05, X05, X06, ...
$ trt <fct> placebo, placebo, placebo, placebo, drug+, drug+, drug+, drug+, drug, drug, drug, drug, drug, placebo, placebo, placebo,...
I'm trying to find the priors and posteriors for this data set. When I use the given function it will not work for me. I have provided error messages, my original code and the what the data consists of. I hope you can help me.
Thanks in advance.