## This line of code just demultiplexes the fastq on the basis of the fw indexing barcode encoded in the first 8 bp of R1 module load cutadapt cutadapt -e 1 -g ^file:fw_barcode.fasta -o "{name}.R1.fastq" -p "{name}.R2.fastq" CROP_VIP_Pool_R1.fastq CROP_VIP_Pool_R2.fastq # This will loop through all the demultiplexed fastqs and trim the U6 priming site at the 5' end and the scaffold at the 3' end to only leave the 20bp protospacer sequence from the read. for fastq in *R1.fastq do out=`awk -v fastq=$fastq 'BEGIN{ sub(/.fastq/, "_cut.fastq",fastq); print fastq }'` echo $out cutadapt -g TCTTGTGGAAAGGACGAAACACCG -l 20 -o $out $fastq sleep 0.3 done # This line of code uses mageck to generate a counts matrix from the trimmed protospacer FASTQs, using the # full list of protospacers in GuEST_List as a reference. You will need to be in a conda environment with # mageck and it's dependancies installed to run this mageck count -l GuEST_List_Protospacer_Reference.csv -n pDNA_Diversity_Check --sample-label Sample1,Sample2 --fastq Sample1.R1_cut.fastq Sample2.R1_cut.fastq