Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.0.1 #8

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ If you don't have permission for global system installation, try the following _
enterocolitica [-o /path/to/output/directory/] [-j N]
[--trueCoverage] [--noCheckPoint] [--minGeneCoverage N]
[--minGeneIdentity N] [--minGeneDepth N]
[--bowtieAlgo="--very-sensitive-local"]
[--doNotRemoveConsensus] [--debug]

In silico pathogenic typing directly from raw Illumina reads
Expand All @@ -99,7 +100,7 @@ If you don't have permission for global system installation, try the following _
--version Version information

Required options:
-f /path/to/input/file.fq.gz [/path/to/input/file.fq.gz ...], --fastq /path/to/input/file.fq.gz [/path/to/input/file.fq.gz ...]
-f --fastq /path/to/input/file.fq.gz [/path/to/input/file.fq.gz ...
Path to single OR paired-end fastq files. If two files
are passed, they will be assumed as being the paired
fastq files (default: None)
Expand All @@ -126,6 +127,16 @@ If you don't have permission for global system installation, try the following _
positions to consider a gene to be present (default
15, or 1/3 of average sample coverage assessed by true
coverage analysis) (default: None)
--bowtieAlgo="--very-sensitive-local"
Bowtie2 alignment mode. It can be an end-to-end alignment
(unclipped alignment) or local alignment (soft clipped
alignment). Also, can choose between fast or sensitive
alignments. Please check Bowtie2 manual for extra information:
http://bowtie-bio.sourceforge.net/bowtie2/index.shtml .
This option should be provided between quotes and starting
with an empty space (like --bowtieAlgo " --very-fast") or
using equal sign (like --bowtieAlgo="--very-fast")
(default: "--very-sensitive-local")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You state that the option should be provided between quotes and starting with an empty space, but the default value isn't like this. Is this correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default only displays the default value that will be used

--doNotRemoveConsensus
Do not remove ReMatCh consensus sequences (default:
False)
Expand Down
2 changes: 1 addition & 1 deletion pathotyping/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.0'
__version__ = '1.0.1'
22 changes: 17 additions & 5 deletions pathotyping/modules/run_rematch.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def clean_rematch_folder(consensus_files, bam_file, reference_file, outdir, doNo
remove_reference_stuff(outdir, reference_file)


def sequence_data(sample, reference_file, bam_file, outdir, threads, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, debug_mode_true, rematch):
def sequence_data(sample, reference_file, bam_file, outdir, threads, length_extra_seq, minimum_depth_presence,
minimum_depth_call, minimum_depth_frequency_dominant_allele, debug_mode_true, rematch):
sequence_data_outdir = os.path.join(outdir, 'sequence_data', '')
utils.removeDirectory(sequence_data_outdir)
os.mkdir(sequence_data_outdir)
Expand All @@ -52,7 +53,10 @@ def sequence_data(sample, reference_file, bam_file, outdir, threads, length_extr
sequence_dir = os.path.join(sequence_data_outdir, str(sequence_counter), '')
utils.removeDirectory(sequence_dir)
os.makedirs(sequence_dir)
pool.apply_async(rematch.analyse_sequence_data, args=(bam_file, sequences[sequence_counter], sequence_dir, sequence_counter, reference_file, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele,))
pool.apply_async(rematch.analyse_sequence_data, args=(bam_file, sequences[sequence_counter], sequence_dir,
sequence_counter, reference_file, length_extra_seq,
minimum_depth_presence, minimum_depth_call,
minimum_depth_frequency_dominant_allele,))
pool.close()
pool.join()

Expand Down Expand Up @@ -91,7 +95,9 @@ def determine_general_statistics(sample_data, minimum_gene_coverage, minimum_gen


@module_timer
def run_rematch(rematch, outdir, reference_file, bam_file, threads, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, minimum_gene_coverage, minimum_gene_identity, debug_mode_true, doNotRemoveConsensus):
def run_rematch(rematch, outdir, reference_file, bam_file, threads, length_extra_seq, minimum_depth_presence,
minimum_depth_call, minimum_depth_frequency_dominant_allele, minimum_gene_coverage,
minimum_gene_identity, debug_mode_true, doNotRemoveConsensus):
module_dir = os.path.join(outdir, 'rematch', '')
utils.removeDirectory(module_dir)
os.makedirs(module_dir)
Expand All @@ -100,7 +106,9 @@ def run_rematch(rematch, outdir, reference_file, bam_file, threads, length_extra
import rematch_module as rematch

print('Analysing alignment data')
run_successfully, sample_data, consensus_files, consensus_sequences = sequence_data('sample', reference_file, bam_file, module_dir, threads, length_extra_seq, minimum_depth_presence, minimum_depth_call, minimum_depth_frequency_dominant_allele, debug_mode_true, rematch)
run_successfully, sample_data, consensus_files, consensus_sequences = \
sequence_data('sample', reference_file, bam_file, module_dir, threads, length_extra_seq, minimum_depth_presence,
minimum_depth_call, minimum_depth_frequency_dominant_allele, debug_mode_true, rematch)

if run_successfully:
number_absent_genes, number_genes_multiple_alleles, mean_sample_coverage = \
Expand All @@ -112,4 +120,8 @@ def run_rematch(rematch, outdir, reference_file, bam_file, threads, length_extra

clean_rematch_folder(consensus_files, bam_file, reference_file, outdir, doNotRemoveConsensus, debug_mode_true)

return run_successfully, {'number_absent_genes': number_absent_genes if 'number_absent_genes' in locals() else None, 'number_genes_multiple_alleles': number_genes_multiple_alleles if 'number_genes_multiple_alleles' in locals() else None, 'mean_sample_coverage': round(mean_sample_coverage, 2) if 'mean_sample_coverage' in locals() else None}, sample_data if 'sample_data' in locals() else None
return run_successfully, \
{'number_absent_genes': number_absent_genes if 'number_absent_genes' in locals() else None,
'number_genes_multiple_alleles': number_genes_multiple_alleles if 'number_genes_multiple_alleles' in locals() else None,
'mean_sample_coverage': round(mean_sample_coverage, 2) if 'mean_sample_coverage' in locals() else None}, \
sample_data if 'sample_data' in locals() else None
41 changes: 23 additions & 18 deletions pathotyping/patho_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
Illumina reads
<https://github.com/B-UMMI/patho_typing/>

Copyright (C) 2018 Miguel Machado <[email protected]>
Copyright (C) 2019 Miguel Machado <[email protected]>

Last modified: October 15, 2018
Last modified: January 10, 2019

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -144,13 +144,13 @@ def indexSequenceBowtie2(referenceFile, threads):
return run_successfully


def run_bowtie(fastq_files, referenceFile, threads, outdir, conserved_True, numMapLoc):
def run_bowtie(fastq_files, referenceFile, threads, outdir, numMapLoc, bowtie_algorithm):
sam_file = os.path.join(outdir, str('alignment.sam'))

run_successfully = indexSequenceBowtie2(referenceFile, threads)
if run_successfully:
command = ['bowtie2', '-k', str(numMapLoc), '-q', '', '--threads', str(threads), '-x', referenceFile, '',
'--no-unal', '-S', sam_file]
command = ['bowtie2', '-k', str(numMapLoc), '-q', bowtie_algorithm, '--threads', str(threads), '-x',
referenceFile, '', '--no-unal', '-S', sam_file]

if len(fastq_files) == 1:
command[9] = '-U ' + fastq_files[0]
Expand All @@ -159,11 +159,6 @@ def run_bowtie(fastq_files, referenceFile, threads, outdir, conserved_True, numM
else:
return False, None

if conserved_True:
command[4] = '--sensitive'
else:
command[4] = '--very-sensitive-local'

run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

if not run_successfully:
Expand All @@ -189,9 +184,9 @@ def indexAlignment(alignment_file):
return run_successfully


def mapping_reads(fastq_files, referenceFile, threads, outdir, conserved_True, numMapLoc):
def mapping_reads(fastq_files, referenceFile, threads, outdir, numMapLoc, bowtie_algorithm):
print('\n' + 'Mapping the reads' + '\n')
run_successfully, sam_file = run_bowtie(fastq_files, referenceFile, threads, outdir, conserved_True, numMapLoc)
run_successfully, sam_file = run_bowtie(fastq_files, referenceFile, threads, outdir, numMapLoc, bowtie_algorithm)
bam_file = None
if run_successfully:
run_successfully, bam_file = sortAlignment(sam_file, str(os.path.splitext(sam_file)[0] + '.bam'), False,
Expand Down Expand Up @@ -331,6 +326,16 @@ def main():
help='Minimum typing gene average coverage depth of present positions to'
' consider a gene to be present (default is 1/3 of average sample'
' coverage or 15x)', required=False)
parser_optional_general.add_argument('--bowtieAlgo', type=str, metavar='"--very-sensitive-local"',
help='Bowtie2 alignment mode. It can be an end-to-end alignment'
' (unclipped alignment) or local alignment (soft clipped'
' alignment). Also, can choose between fast or sensitive'
' alignments. Please check Bowtie2 manual for extra information:'
' http://bowtie-bio.sourceforge.net/bowtie2/index.shtml .'
' This option should be provided between quotes and starting with'
' an empty space (like --bowtieAlgo " --very-fast") or using equal'
' sign (like --bowtieAlgo="--very-fast")',
required=False, default=['--very-sensitive-local'])
parser_optional_general.add_argument('--doNotRemoveConsensus', action='store_true',
help='Do not remove ReMatCh consensus sequences')
parser_optional_general.add_argument('--debug', action='store_true',
Expand Down Expand Up @@ -366,7 +371,8 @@ def main():

confirm_genes_fasta_rules(typing_headers, typing_rules)

run_successfully, bam_file = mapping_reads(args.fastq, reference_file, args.threads, args.outdir, False, 1)
run_successfully, bam_file = mapping_reads(args.fastq, reference_file, args.threads, args.outdir, 1,
args.bowtieAlgo)
if run_successfully:
rematch_dir = os.path.join(args.outdir, 'rematch', '')
if not os.path.isdir(rematch_dir):
Expand Down Expand Up @@ -401,8 +407,8 @@ def main():
sample_data_general['number_genes_multiple_alleles'] is not None:
if args.minGeneDepth is None:
args.minGeneDepth = sample_data_general['mean_sample_coverage'] / 3 if \
sample_data_general['mean_sample_coverage'] / 3 > 15 else \
15
sample_data_general['mean_sample_coverage'] / 3 > 15 else \
15

exit_info = []
if sample_data_general['mean_sample_coverage'] < config['minimum_read_coverage']:
Expand Down Expand Up @@ -476,9 +482,8 @@ def main():
args.debug, args.doNotRemoveConsensus)
if run_successfully and data_by_gene is not None:
if args.minGeneDepth is None:
args.minGeneDepth = sample_data_general['mean_sample_coverage'] / 3 if \
sample_data_general['mean_sample_coverage'] / 3 > 15 else \
15
args.minGeneDepth = sample_data_general['mean_sample_coverage'] / 3 \
if sample_data_general['mean_sample_coverage'] / 3 > 15 else 15

_, _, _ = typing.typing(data_by_gene, typing_rules, config['minimum_gene_coverage'],
config['minimum_gene_identity'], args.minGeneDepth, args.outdir)
Expand Down