Tuesday, June 26, 2012

Code for Preparing Data in R for Scan Statistics

#Setting our data directory
setwd("D:/R")


#Load package for reading external datafile 
library(foreign)


#Reading the data
atl <- read.csv("atlnhoodwresid.csv")
str(atl)

# Extracting relevant variables from original data
ds <- atl[,c("id","finwgt","inccont","age","gender","income","educa","hhsize","ethn","bmi","xhome","yhome")]
str(ds)
# Deleting the incomplete records
ds1 <- na.omit(ds)
str(ds1)

# Recoding the ethnicity into black (yes or no)
attach(ds1)
var <- rep(NA,length(ethn))
var [ethn==1] <- 1
var [ethn>1] <- 0
summary(var)
ds1$ethnblack <- as.factor(var)
str(ds1)

# Creating the dichotomous variable for obesity status (yes or no)
obese <- rep(NA,length(ethn))
obese [bmi<=30] <- 0
obese [bmi>30] <- 1
ds1$obese <- (obese)


# Recoding the education variable
educa1 <- rep(NA,length(educa))
educa1 [educa<=4] <- 1
educa1 [educa>4] <- 0
table(educa1)
ds1$educa1 <- educa1

# Recoding the income variable
table(income,obese)
incomecat <- rep(NA,length(income))
incomecat [income<=3] <- 3
incomecat [income>3 & income<6] <- 2
incomecat [income>5] <- 1
table(incomecat)
ds1$incomecat_fac <- as.factor(incomecat)
str(ds1)
      
# Inserting the number of case variable for bernoulli model for every point (1 case)
ds1$number <- rep(1,length(ds1$id))

# Preparing coordinate file
coordinate <- ds1[,c("id","xhome","yhome")]
str(coordinate)
write.csv(coordinate,"coordresid.csv")

# Saving new dataset
write.csv(ds1,"atl.csv")
rm(atl,ds,ds1)


# BERNOULLI MODEL
# Subsetting the dataset into case and control
atl <- read.csv("atl.csv")
case_atl <- atl[atl$obese==1,]
str(case_atl)
write.csv(case_atl,"case_atl.csv")
cont_atl <- atl[atl$obese==0,]
str(cont_atl)
write.csv(cont_atl,"cont_atl.csv")
rm(case_atl,cont_atl)