Configuration File

To download a configuration file template users just use --get_config parameter. Using a config file your code is lot more clean and concise.

# get config
nextflow run fmalmeida/ngs-preprocess --get_config

# run with config
nextflow run fmalmeida/ngs-preprocess -c [path-to-config]

Default configuration

/*

                                      fmalmeida/ngs-preprocess pipeline configuration file

                    Maintained by Felipe Marques de Almeida
                    Contact: almeidafmarques@outlook.com

                    This file contains all the parameters that are required by the pipeline.
                    Some of them can be left in blank and other ones need a proper configuration.

                    Please do not change anything before the = sign. Fields must be configured 
                    and set with values after the = sign.

                    Since the input file parameters will always work as a pattern match
                    it is required that it MUST ALWAYS be double quoted as the example below.

                    The pipeline will process each file that matches the pattern separately.

                                E.g. param1 = "my_reads/input*.fastq"

 */

params {

  // Sets output directory
  output  = "output"

  // inputs from SRA. A file containing on SRA ID per line.
  sra_ids = null

  /*


                    Parameters for short reads preprocessing
                              with fastp


   */

  shortreads = null                  // Path to shortreads. Examples: 'SRR6307304_{1,2}.fastq' | 'SRR7128258*'.
                                     // Must be double quoted and paired end reads must have the pattern {1,2}.
                                     // The pipeline will process each file that matches the pattern separately.

  shortreads_type = null             // Type of shortreads. Values: paired | single

  fastp_average_quality = 20         // Filter reads by its average read quality (it loads the --average_qual fastp param). Default: 20.

  fastp_merge_pairs = false          // If true, fastp will try to merge paired end reads.

  fastp_correct_pairs = false        // If true, fastp will try to correct small base errors in paired end reads.

  fastp_additional_parameters = null // Pass any other fastp additional parameter that you want.
                                     // See https://github.com/OpenGene/fastp.


  /*

                      Optional parameter for filtering longreads from pacbio or nanopore.
                      This command uses nanofilt for both technologies. If you prefer to
                      filter reads from any technology with a different program you can
                      left it blank and it you not be executed.

   */
  lreads_min_quality = 5             // If blank, long reads (lreads) will not be filtered.

  lreads_min_length  = 500           // If blank, long reads (lreads) will not be filtered.

  /*


                      Parameters for nanopore ONT longreads preprocessing


   */
  nanopore_fastq = null             // Path to nanopore ONT basecalled reads in fastq

  nanopore_is_barcoded = false      // Tells wheter or not nanopore reads are barcoded
                                    // It will split barcodes into single files

  nanopore_sequencing_summary = null // Path to nanopore 'sequencing_summary.txt'. 
                                     // Using this will make the pipeline render a
                                     // sequencing statistics report using pycoQC.
                                     // pycoQC reports will be saved using the 
                                     // files basename, so please, use meaningful basename,
                                     // such as: sample1.txt, sample2.txt, etc.

  /*


                      Parameters for PacBio longreads preprocessing

                      Use bamPath or h5Path, not both.


   */

  pacbio_bam  = null               // Path to PacBio subreads in bam format

  pacbio_h5   = null               // Path to directory containing legacy *.bas.h5 data (1 per directory)

  pacbio_barcodes = null           // Path to xml/fasta file containing barcode information.
                                   // It will split barcodes into single files.

  pacbio_barcode_design = "same"   // By default, only reads with "same" barcodes are given.
                                   // Options: same, different, any

  pacbio_get_hifi = false          // Whether or not to try to compute CCS reads

  // Max resource options
  // Defaults only, expecting to be overwritten
  max_memory                 = '6.GB'
  max_cpus                   = 4
  max_time                   = '40.h'

}