Skip to content. | Skip to navigation

Personal tools

Sections

R taxotofreq

### 16s - taxonomy to frequencies
# applied to Mothur-generated taxonomic tables
# paper version


rm(list=ls())


setwd()

library(data.table)
taxo=fread("filename.taxonomy", sep="\t", header=F, data.table=F)
taxo[,2]=gsub("\\d","",taxo[,2])
taxo[,2]=gsub("\\()","",taxo[,2])

require(reshape2)
taxo2=colsplit(taxo[,2], ";", c("Domain","Phylum","Class", "Order","Family","Genus","end"))

taxo=data.frame(Sequence_name=taxo[,1], taxo2)

groups=read.table("filename.group", sep="\t", header=F)

colnames(groups) <- c("Sequence_name", "echantillon")


#creates a table with taxonomic level name + associated frequency in each sample
ech=levels(groups$echantillon)
ColName="Order"

col=which(colnames(taxo)==ColName)
DATA_TABLE=data.frame(levels(as.factor(taxo[,col])))
colnames(DATA_TABLE)=ColName

for(i in 1:length(ech)){
# Select sequence
data_seq=groups[groups$echantillon%in%ech[i],]$Sequence_name
data_taxo=taxo[taxo$Sequence_name%in%data_seq,]
data_table=data.frame(table(data_taxo[,col]))
colnames(data_table)=c(ColName,ech[i])
DATA_TABLE=merge(DATA_TABLE,data_table, by=ColName, all=T)
}


for (j in seq_len(ncol(DATA_TABLE))){
set(DATA_TABLE,which(is.na(DATA_TABLE[[j]])),j,0)
}


#import metadata
metadata <- read.table("filename.txt", header=T, sep="\t")

n <- DATA_TABLE$Order

# transpose all but the first column (name)
DATA_TABLEt <- as.data.frame(t(DATA_TABLE[,-1]))
colnames(DATA_TABLEt) <- n

DATA_TABLE=merge(metadata, DATA_TABLEt, by.x="sample.ID", by.y="row.names", all.x=F, all.y=T)
write.table(DATA_TABLE, "filename_taxotofreq.txt", sep="\t", row.names=F, quote=F)



################## Excel step ###################

#### subtraction of frequency in ctl samples ####

#################################################