R taxotofreq
### 16s - taxonomy to frequencies
# applied to Mothur-generated taxonomic tables
# paper version
rm(list=ls())
setwd()
library(data.table)
taxo=fread("filename.taxonomy", sep="\t", header=F, data.table=F)
taxo[,2]=gsub("\\d","",taxo[,2])
taxo[,2]=gsub("\\()","",taxo[,2])
require(reshape2)
taxo2=colsplit(taxo[,2], ";", c("Domain","Phylum","Class", "Order","Family","Genus","end"))
taxo=data.frame(Sequence_name=taxo[,1], taxo2)
groups=read.table("filename.group", sep="\t", header=F)
colnames(groups) <- c("Sequence_name", "echantillon")
#creates a table with taxonomic level name + associated frequency in each sample
ech=levels(groups$echantillon)
ColName="Order"
col=which(colnames(taxo)==ColName)
DATA_TABLE=data.frame(levels(as.factor(taxo[,col])))
colnames(DATA_TABLE)=ColName
for(i in 1:length(ech)){
# Select sequence
data_seq=groups[groups$echantillon%in%ech[i],]$Sequence_name
data_taxo=taxo[taxo$Sequence_name%in%data_seq,]
data_table=data.frame(table(data_taxo[,col]))
colnames(data_table)=c(ColName,ech[i])
DATA_TABLE=merge(DATA_TABLE,data_table, by=ColName, all=T)
}
for (j in seq_len(ncol(DATA_TABLE))){
set(DATA_TABLE,which(is.na(DATA_TABLE[[j]])),j,0)
}
#import metadata
metadata <- read.table("filename.txt", header=T, sep="\t")
n <- DATA_TABLE$Order
# transpose all but the first column (name)
DATA_TABLEt <- as.data.frame(t(DATA_TABLE[,-1]))
colnames(DATA_TABLEt) <- n
DATA_TABLE=merge(metadata, DATA_TABLEt, by.x="sample.ID", by.y="row.names", all.x=F, all.y=T)
write.table(DATA_TABLE, "filename_taxotofreq.txt", sep="\t", row.names=F, quote=F)
################## Excel step ###################
#### subtraction of frequency in ctl samples ####
#################################################