Aller au contenu. | Aller à la navigation

Outils personnels

Navigation

data preprocessing fig1

#! /bin/bash

## Sequencing data analysis pipeline


#Set up working directory
cd /Users/directoryname

###################
## FastX toolkit ##
###################

#Convert fastq file to fasta
fastq_to_fasta -i filename.fastq -o filename.fasta

#Reverse-complement all reads and add these "new" sequences to the original file
fastx_reverse_complement -i filename.fasta -o filename_revcompl.fasta
cat filename.fasta filename_revcompl.fasta > filename_ALL.fasta

#Convert fasta file to tab-delimited txt file and swap columns (sequences/names)
fasta_formatter -t -i filename_ALL.fasta -o filename_ALL_tab.txt
join -o 1.2,1.1 filename_ALL_tab.txt filename_ALL_tab.txt > filename_ALL_tab_swaped.txt


#Select reads matching FWD and REV primer (+barcode) sequences - V3 marker
grep -E "^CAAGGATGTCACTCCTACGGGAGGCAGCAGT" filename_ALL_tab_swaped.txt > filename_FV309.txt
grep -E "^CTCAACAGTCACTCCTACGGGAGGCAGCAGT" filename_ALL_tab_swaped.txt > filename_FV310.txt
grep -E "^CGTAGCTATCACTCCTACGGGAGGCAGCAGT" filename_ALL_tab_swaped.txt > filename_FV311.txt
grep -E "^CATGAGCTTCACTCCTACGGGAGGCAGCAGT" filename_ALL_tab_swaped.txt > filename_FV312.txt
grep -E "^CAGATCTGTCACTCCTACGGGAGGCAGCAGT" filename_ALL_tab_swaped.txt > filename_FV313.txt
grep -E "^CCTACCATTCACTCCTACGGGAGGCAGCAGT" filename_ALL_tab_swaped.txt > filename_FV314.txt
grep -E "^CCGCAATATCACTCCTACGGGAGGCAGCAGT" filename_ALL_tab_swaped.txt > filename_FV315.txt
grep -E "^CTCACACTTCACTCCTACGGGAGGCAGCAGT" filename_ALL_tab_swaped.txt > filename_FV316.txt
grep -E "GTGTAG(C|A)GGTGAAAT(G|T)CGTGCATCCTTG" filename_FV309.txt > filename_FV309R09_7.txt
grep -E "GTGTAG(C|A)GGTGAAAT(G|T)CGTGCTGTTGAG" filename_FV310.txt > filename_FV310R10_8.txt
grep -E "GTGTAG(C|A)GGTGAAAT(G|T)CGTGTAGCTACG" filename_FV311.txt > filename_FV311R11_9.txt
grep -E "GTGTAG(C|A)GGTGAAAT(G|T)CGTGAGCTCATG" filename_FV312.txt > filename_FV312R12_10.txt
grep -E "GTGTAG(C|A)GGTGAAAT(G|T)CGTGCAGATCTG" filename_FV313.txt > filename_FV313R13_11.txt
grep -E "GTGTAG(C|A)GGTGAAAT(G|T)CGTGATGGTAGG" filename_FV314.txt > filename_FV314R14_12.txt
grep -E "GTGTAG(C|A)GGTGAAAT(G|T)CGTGTATTGCGG" filename_FV315.txt > filename_FV315R15_14.txt
grep -E "GTGTAG(C|A)GGTGAAAT(G|T)CGTGAGTGTGAG" filename_FV316.txt > filename_FV316R16_16.txt

#Convert tabular files to fasta
awk '{print ">"$2"\n"$1}' filename_FV201_1.txt > filename_FV201_1.fasta
awk '{print ">"$2"\n"$1}' filename_FV202_2.txt > filename_FV202_2.fasta
awk '{print ">"$2"\n"$1}' filename_FV203_3.txt > filename_FV203_3.fasta
awk '{print ">"$2"\n"$1}' filename_FV204_4.txt > filename_FV204_4.fasta
awk '{print ">"$2"\n"$1}' filename_FV205_5.txt > filename_FV205_5.fasta
awk '{print ">"$2"\n"$1}' filename_FV206_6.txt > filename_FV206_6.fasta
awk '{print ">"$2"\n"$1}' filename_FV207_13.txt > filename_FV207_13.fasta
awk '{print ">"$2"\n"$1}' filename_FV208_15.txt > filename_FV208_15.fasta
awk '{print ">"$2"\n"$1}' filename_FV309R09_7.txt > filename_FV309R09_7.fasta
awk '{print ">"$2"\n"$1}' filename_FV310R10_8.txt > filename_FV310R10_8.fasta
awk '{print ">"$2"\n"$1}' filename_FV311R11_9.txt > filename_FV311R11_9.fasta
awk '{print ">"$2"\n"$1}' filename_FV312R12_10.txt > filename_FV312R12_10.fasta
awk '{print ">"$2"\n"$1}' filename_FV313R13_11.txt > filename_FV313R13_11.fasta
awk '{print ">"$2"\n"$1}' filename_FV314R14_12.txt > filename_FV314R14_12.fasta
awk '{print ">"$2"\n"$1}' filename_FV315R15_14.txt > filename_FV315R15_14.fasta
awk '{print ">"$2"\n"$1}' filename_FV316R16_16.txt > filename_FV316R16_16.fasta


##############
## Cutadapt ##
##############

#remove 5'adapters
cutadapt -g CAAGGATGTCACTCCTACGGGAGGCAGCAGT -e 0.04 --minimum-length=200 filename_FV309R09_7.fasta > filename_FV309R09_7_.fasta 2> cutadapt-report_7_.txt
cutadapt -g CTCAACAGTCACTCCTACGGGAGGCAGCAGT -e 0.04 --minimum-length=200 filename_FV310R10_8.fasta > filename_FV310R10_8_.fasta 2> cutadapt-report_8_.txt
cutadapt -g CGTAGCTATCACTCCTACGGGAGGCAGCAGT -e 0.04 --minimum-length=200 filename_FV311R11_9.fasta > filename_FV311R11_9_.fasta 2> cutadapt-report_9_.txt
cutadapt -g CATGAGCTTCACTCCTACGGGAGGCAGCAGT -e 0.04 --minimum-length=200 filename_FV312R12_10.fasta > filename_FV312R12_10_.fasta 2> cutadapt-report_10_.txt
cutadapt -g CAGATCTGTCACTCCTACGGGAGGCAGCAGT -e 0.04 --minimum-length=200 filename_FV313R13_11.fasta > filename_FV313R13_11_.fasta 2> cutadapt-report_11_.txt
cutadapt -g CCTACCATTCACTCCTACGGGAGGCAGCAGT -e 0.04 --minimum-length=200 filename_FV314R14_12.fasta > filename_FV314R14_12_.fasta 2> cutadapt-report_12_.txt
cutadapt -g CCGCAATATCACTCCTACGGGAGGCAGCAGT -e 0.04 --minimum-length=200 filename_FV315R15_14.fasta > filename_FV315R15_14_.fasta 2> cutadapt-report_14_.txt
cutadapt -g CTCACACTTCACTCCTACGGGAGGCAGCAGT -e 0.04 --minimum-length=200 filename_FV316R16_16.fasta > filename_FV316R16_16_.fasta 2> cutadapt-report_16_.txt


#remove 3'adapters
cutadapt -a GTGTAGCAGGTGAAATGTCGTGCATCCTTG -e 0.08 --minimum-length=200 filename_FV309R09_7_.fasta > filename_FV309R09_7.fasta 2> cutadapt-report_7.txt
cutadapt -a GTGTAGCAGGTGAAATGTCGTGCTGTTGAG -e 0.08 --minimum-length=200 filename_FV310R10_8_.fasta > filename_FV310R10_8.fasta 2> cutadapt-report_8.txt
cutadapt -a GTGTAGCAGGTGAAATGTCGTGTAGCTACG -e 0.08 --minimum-length=200 filename_FV311R11_9_.fasta > filename_FV311R11_9.fasta 2> cutadapt-report_9.txt
cutadapt -a GTGTAGCAGGTGAAATGTCGTGAGCTCATG -e 0.08 --minimum-length=200 filename_FV312R12_10_.fasta > filename_FV312R12_10.fasta 2> cutadapt-report_10.txt
cutadapt -a GTGTAGCAGGTGAAATGTCGTGCAGATCTG -e 0.08 --minimum-length=200 filename_FV313R13_11_.fasta > filename_FV313R13_11.fasta 2> cutadapt-report_11.txt
cutadapt -a GTGTAGCAGGTGAAATGTCGTGATGGTAGG -e 0.08 --minimum-length=200 filename_FV314R14_12_.fasta > filename_FV314R14_12.fasta 2> cutadapt-report_12.txt
cutadapt -a GTGTAGCAGGTGAAATGTCGTGTATTGCGG -e 0.08 --minimum-length=200 filename_FV315R15_14_.fasta > filename_FV315R15_14.fasta 2> cutadapt-report_14.txt
cutadapt -a GTGTAGCAGGTGAAATGTCGTGAGTGTGAG -e 0.08 --minimum-length=200 filename_FV316R16_16_.fasta > filename_FV316R16_16.fasta 2> cutadapt-report_16.txt