Skip to content. | Skip to navigation

Personal tools

Sections

R diversity

### 16s - diversity
# applied to taxotofreq_script output
# paper version



rm(list=ls())


setwd()
freq=read.table("filename_taxotofreq.txt", header=T, sep="\t")
colnames(freq)[4]="exp"
for (j in 5:ncol(freq)){
set(freq,which(freq[[j]]<0),j,0)
}

# removal of unclassified + Wolbachia sequences
freq$unclassified=NULL
freq$Rickettsiales = NULL
freq$Anaplasmataceae = NULL

library(data.table, quietly = T)


#####################
## alpha-diversity ##
#####################
# nb of taxa in each group

test=freq[,c(5:ncol(freq))]
summary(test)
for(i in 1:nrow(test)){
test[i,]=round(100*test[i,]/sum(test[i,]),2)
}

prop=cbind(freq[,c(1:4)], test)

# threshold
s=1
library(data.table, quietly = T)
for (j in 5:ncol(prop)){
set(prop,which(prop[[j]]<s),j,0)
}

ALPHA=prop[,1:4]
alpha=apply(prop[,5:ncol(prop)],1,function(x){return(sum(x!=0))})
ALPHA$alpha=alpha
summary(ALPHA)


mod=lm(alpha~exp*sample.type*level, ALPHA)
anova(mod)
ANOVA <- drop1(mod, .~., test="F") # type III ANOVA

write.table(ANOVA, "filename_ANOVA.txt", sep="\t",dec="," ,row.names=T, quote=F)




################################################
## all samples comparaisons - classical stats ##
################################################

par(mar=c(10,4,4,4))

fly=prop[-c(29:32),]
summary(fly)

# for each taxa ANOVA & pairewise.t.test pvalues listed in the dataframe FlyStat
FlyStat=data.frame(Family=0, anova=0, wildfounders_ywparents=0,wildfounders_ywprogeny=0,ywparents_ywprogeny=0)
for(i in 5:ncol(fly)){
mod=lm(fly[,i]~fly$level)
panov=anova(mod)$`Pr(>F)`[1]
test=pairwise.t.test(fly[,i],fly$level)
fpa=test$p.value[1,1]
fpr=test$p.value[2,1]
papr=test$p.value[2,2]
FlyStat=rbind(FlyStat, c(colnames(fly)[i], panov, fpa, fpr, papr))
}

FlyStat=FlyStat[-1,]
FlyStat$anova=as.numeric(FlyStat$anova)
FlyStat$wildfounders_ywparents=as.numeric(FlyStat$wildfounders_ywparents)
FlyStat$wildfounders_ywprogeny=as.numeric(FlyStat$wildfounders_ywprogeny)
FlyStat$ywparents_ywprogeny=as.numeric(FlyStat$ywparents_ywprogeny)

# which taxa are stat sign differentially represented across levels?
summary(FlyStat)
FlySign=na.omit(FlyStat[FlyStat$anova<0.05,])
FlySign
write.table(FlySign, "RUN201507_signtaxa_Order.txt", sep="\t",dec="," ,row.names=F, quote=F)


# boxplot
flysign=fly[,c(1:4,which(colnames(fly)%in%FlySign$Family))]
for(i in 5:ncol(flysign)){
boxplot(flysign[,i]~flysign$sample.type+flysign$level,las=2, col=c("grey", "white"))
title(colnames(flysign)[i])
stripchart(flysign[,i]~flysign$sample.type+flysign$level,
vertical = TRUE, method = "stack",
pch = 21, cex=1.2, col = "black", bg="darkgoldenrod1",
add = TRUE)
}