From 7f90d9fa7aac07090e42d09db448784d41476f3b Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 26 Jun 2023 10:40:14 +0200 Subject: [PATCH 01/21] Add threads numbers in bwa and samtools commands Ref :#61 --- conf/prod.config | 9 ++++++++- conf/test.config | 7 +++++++ modules/local/module_dna.nf | 6 +++--- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/conf/prod.config b/conf/prod.config index 3dee02a..117ab99 100644 --- a/conf/prod.config +++ b/conf/prod.config @@ -19,12 +19,19 @@ process { ] } - withLabel: samtools { + withLabel: samtools_view { module = ['bioinfo/samtools-1.14'] cpus = { 6 * task.attempt } memory = { 8.GB * task.attempt } time = { 3.h * task.attempt } } + + withLabel: samtools { + module = ['bioinfo/samtools-1.14'] + cpus = 6 + memory = { 8.GB * task.attempt } + time = { 3.h * task.attempt } + } withLabel: qualimap { module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20'] diff --git a/conf/test.config b/conf/test.config index 7e37ac0..cfcba99 100644 --- a/conf/test.config +++ b/conf/test.config @@ -19,6 +19,13 @@ process { ] } + withLabel: samtools_view { + module = ['bioinfo/samtools-1.14'] + cpus = { 6 * task.attempt } + memory = { 8.GB * task.attempt } + time = { 3.h * task.attempt } + } + withLabel: samtools { module = ['bioinfo/samtools-1.14'] cpus = { 1 * task.attempt } diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf index 8dc0709..3f7f6fc 100644 --- a/modules/local/module_dna.nf +++ b/modules/local/module_dna.nf @@ -19,7 +19,7 @@ process BWA_ALIGNMENT { def reference = params.reference_genome ?: params.reference_transcriptome def referenceName=file(reference).toString().split('/')[6] """ - bwa mem ${reference} ${reads} 1> ${sample}_${referenceName}.sam 2> ${sample}_${referenceName}.log + bwa mem ${reference} ${reads} -t ${task.cpus} 1> ${sample}_${referenceName}.sam 2> ${sample}_${referenceName}.log """ } @@ -28,7 +28,7 @@ process SAMTOOLS_VIEW { tag "$sample" - label 'samtools' + label 'samtools_view' input: tuple val(sample), path(sam) @@ -38,7 +38,7 @@ process SAMTOOLS_VIEW { script: """ - samtools view -bS ${sam} > ${sample}.bam + samtools view -bS ${sam} -@ ${task.cpus} > ${sample}.bam """ } -- GitLab From db413556e9e7efdcc4eb925bb41300d4aeafdc7f Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 26 Jun 2023 10:41:27 +0200 Subject: [PATCH 02/21] Remove wordir if pipeline succeeds Ref : #62 --- workflow/illumina_qc.nf | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index 04c0e66..e48b5c3 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -145,6 +145,13 @@ workflow ILLUMINA_QC { def end_mail_sent = false workflow.onComplete { end_mail_sent = sendFinalMail(format.format(new Date()), params.summary) + + // remove work directory if pipeline is successful + if (workflow.success) { + println "Pipeline terminé avec succès => suppression du workdir : $workflow.workDir" + exec: + workflow.workDir.deleteDir() + } } workflow.onError { } \ No newline at end of file -- GitLab From f0bcf5ffc0370006e9c92adce90b0a8a0e1bdd4e Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 21 Jul 2023 16:14:14 +0200 Subject: [PATCH 03/21] Increase methode for index counting - for 10X - for sample has name build by subset of another one Ref : #57 --- bin/demuxStatsFromXML.R | 46 +++++++++++++++------------- bin/extractInfoForDemuxStats.pl | 18 +++++------ modules/local/module_core.nf | 4 +-- sub-workflows/local/core_illumina.nf | 1 + 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/bin/demuxStatsFromXML.R b/bin/demuxStatsFromXML.R index 78a40fc..247e498 100755 --- a/bin/demuxStatsFromXML.R +++ b/bin/demuxStatsFromXML.R @@ -110,7 +110,7 @@ cat("Rassemblement des statistiques par échantillons.\n") for (line in 1:dim(indexNumber)[1]){ mySample<-indexNumber[line, "Sample"] mySampleNumber<-indexNumber[line, "NumberOfIndex"] - cat("\nEtude de l'échantillon : " , mySample, "\n") + cat("\nEtude de l'échantillon : " , mySample, "(" , mySampleNumber, "index )\n") # Single Index Case if (mySampleNumber == 1) { df.singleLine<-df[which(df$Sample == mySample),] @@ -126,9 +126,10 @@ for (line in 1:dim(indexNumber)[1]){ #print(sub.df) if (nrow(sub.df) == 0) { cat("Aucun échantillon trouvé !\n") - cat("La recherche de l'échantillon",mySample, "dans le data.table suivant à échouée :\n") + cat("La recherche de l'échantillon", paste0(mySample, sampleName.suffixe), "dans le data.table suivant à échouée :\n") print(df) } else { + countBarcodesDone = 1 # Parcours du sous-data.frame for (l in 1:dim(sub.df)[1]) { sub.df.project<-sub.df[l, "Project"] @@ -138,17 +139,20 @@ for (line in 1:dim(indexNumber)[1]){ sub.df.oneMismatch<-as.numeric(sub.df[l, "bcOneMismatch"]) # bcOneMismatch # Première iteration - if (l == 1 ) { - sub.df.project.toAdd<-sub.df.project - sub.df.barcode.toAdd<-sub.df.barcode - sub.df.bcCount.toAdd<-sub.df.bcCount - sub.df.bcPerfect.toAdd<-sub.df.bcPerfect - sub.df.oneMismatch.toAdd<-sub.df.oneMismatch - } else { - sub.df.barcode.toAdd<-paste0(sub.df.barcode.toAdd, "+", sub.df.barcode) - sub.df.bcCount.toAdd<-sub.df.bcCount.toAdd+sub.df.bcCount - sub.df.bcPerfect.toAdd<-sub.df.bcPerfect.toAdd+sub.df.bcPerfect - sub.df.oneMismatch.toAdd<-sub.df.oneMismatch.toAdd+sub.df.oneMismatch + countBarcodesDone = countBarcodesDone + str_count(sub.df.barcode, "\\+") + if (countBarcodesDone <= mySampleNumber) { + if (l == 1 ) { + sub.df.project.toAdd<-sub.df.project + sub.df.barcode.toAdd<-sub.df.barcode + sub.df.bcCount.toAdd<-sub.df.bcCount + sub.df.bcPerfect.toAdd<-sub.df.bcPerfect + sub.df.oneMismatch.toAdd<-sub.df.oneMismatch + } else { + sub.df.barcode.toAdd<-paste0(sub.df.barcode.toAdd, "+", sub.df.barcode) + sub.df.bcCount.toAdd<-sub.df.bcCount.toAdd+sub.df.bcCount + sub.df.bcPerfect.toAdd<-sub.df.bcPerfect.toAdd+sub.df.bcPerfect + sub.df.oneMismatch.toAdd<-sub.df.oneMismatch.toAdd+sub.df.oneMismatch + } } } # Add to data.frame @@ -180,14 +184,14 @@ if(nrow(tabUndetermined) > 0) { head(tabUndetermined) } # Construction du dataFrame pour intégration à df2 -df2.Projects<-unique(df2$Project) -myProject<-df2.Projects[which(df2.Projects != "default")] +#df2.Projects<-unique(df2$Project) +#myProject<-df2.Projects[which(df2.Projects != "default")] ### Pour chaque ligne de tabUndertermined, on ajoute une ligne à df2 : if (dim(tabUndetermined)[1] != 0) { df.tabUndetermined<-data.frame() for (i in 1:dim(tabUndetermined)[1]) { - df.tabUndetermined.tmp<-data.frame(myProject, "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-") + df.tabUndetermined.tmp<-data.frame("default", "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-") df.tabUndetermined<-concat_df(df.tabUndetermined, df.tabUndetermined.tmp, vec.names) } @@ -198,11 +202,11 @@ if (dim(tabUndetermined)[1] != 0) { } ## Soustraction des undertermined aux allOthers -# recuperer les Count de tabUndetermined et soustraire la somme à df2[which(df2$Project == "default"), "bcCount"] +# recuperer les Count de tabUndetermined et soustraire la somme à df2[which(df2$Barcode == "unknown"), "bcCount"] cat("\nQuelques calculs sur les données avant de les exporter.\n") cat("\tActualisation du nombre d'index 'AllOthers'.\n") undertermined.count<-sum(as.numeric(tabUndetermined[,"Count"])) -df2[which(df2$Project == "default"), "bcCount"]<-as.numeric(df2[which(df2$Project == "default"), "bcCount"])-undertermined.count +df2[which(df2$Barcode == "unknown"), "bcCount"]<-as.numeric(df2[which(df2$Barcode == "unknown"), "bcCount"])-undertermined.count # Calcul pourcentages de chaque barcode cat("\tCalcul du pourcentage sur le nombre de fragments total.\n") @@ -216,9 +220,9 @@ df2<-cbind(df2, percentOfFragment) # Export du data.frame cat("\nSauvegarde du data.frame.\n") -myProject<-"DEBUG" +#myProject<-"DEBUG" # mettre des 0 à la place des NA dans df2 -write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats_", myProject, ".csv")) +write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats.tsv")) # Ecrire un fichier par valeur de myProject ! Cas ou il y a plusieurs projets sur la même lane. -cat(paste0("\tLe fichier suivant à été créé :\t", launchDir, "/DemultiplexStats_", myProject, ".csv\n")) +cat(paste0("\tLe fichier suivant à été créé :\t", launchDir, "/DemultiplexStats.tsv\n")) cat("\nFin normale du script, on sort.\n") diff --git a/bin/extractInfoForDemuxStats.pl b/bin/extractInfoForDemuxStats.pl index eddd760..f3a51a0 100755 --- a/bin/extractInfoForDemuxStats.pl +++ b/bin/extractInfoForDemuxStats.pl @@ -96,11 +96,6 @@ foreach my $line (@lines) { $machineName = $machineName =~ /^NOVASEQ/ ? 'NOVASEQ' : $machineName; } - # Recherche du nom du projet - if ($line =~ /^Infos/) { - $projectName = $cur_line[1]; - } - # Recherche des positions des Sample_ID et des Index_ID elsif ($line =~ m/${regexForDataHeader{$machineName}}/) { while ( my ( $indice, $valeur ) = each @cur_line ) { @@ -109,13 +104,19 @@ foreach my $line (@lines) { } } - # Association Sample_ID avec sont nombre d'index + # Association Sample_ID avec son nombre d'index elsif ($line =~ m/${regexForSampleLine{$machineName}}/) { my $sample_ID = $cur_line[$sample_ID_position]; my $index_number=0; my @cur_index_ID = (); foreach my $pos (@index_ID_position) { - if ($cur_line[$pos] =~ /\w{2}-\w{2}-\w{2}/) { $index_number = 4; } else { $index_number += 1; } + if ($cur_line[$pos] =~ /^SI-T|NT-\w{2}$/) { + $index_number = 2; + } elsif ($cur_line[$pos] =~ /^\w{2}-\w{2}-\w{2}$/) { + $index_number = 4; + } else { + $index_number += 1; + } } $sample_info{$sample_ID} = $index_number; } @@ -128,8 +129,7 @@ foreach my $k (keys(%sample_info)) { $content.="$k\t$sample_info{$k}\n"; } -$projectName = $projectName eq "" ? 'noName' : $projectName; -my $file2write = "$projectName.indexNumber"; +my $file2write = "indexNumber.tsv"; open(my $fh, '>', $file2write) or exit 1; print $fh $content; diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index 9b7f25c..f6c26ee 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -9,7 +9,7 @@ process PREP_DEMUXSTAT { path SampleSheet output: - path "*.indexNumber" + path "indexNumber.tsv" script: """ @@ -30,7 +30,7 @@ process DEMUX_STATS { output: path 'demultiplexStats.log', emit: log - path "DemultiplexStats_*", emit: demultiplexStatsCSV + path "DemultiplexStats.tsv", emit: demultiplexStatsTSV script: """ diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf index 6a79fdf..a03cd6f 100644 --- a/sub-workflows/local/core_illumina.nf +++ b/sub-workflows/local/core_illumina.nf @@ -47,5 +47,6 @@ workflow CORE_ILLUMINA { emit: fastq = fastq_good + demuxStat = DEMUX_STATS.out.demultiplexStatsTSV } -- GitLab From 778bbfe6f8408c6049419cd0f9c633deebc27975 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 24 Jul 2023 09:28:45 +0200 Subject: [PATCH 04/21] 5 hours to run fastqc Ref: #235 hours to run fastqc Ref: #235 hours to run fastqc Ref: #235 hours to run fastqc Ref: #235 hours to run fastqc Ref: #23 --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index b99f889..b77a503 100644 --- a/conf/base.config +++ b/conf/base.config @@ -155,7 +155,7 @@ process { maxRetries = 4 module = ['bioinfo/FastQC_v0.11.7'] - time = { 2.h * task.attempt * params.resource_factor } + time = { 5.h * task.attempt * params.resource_factor } } withName: FASTQSCREEN { -- GitLab From 40661eb565d4ffc6aa78b3726aa996517bcad18c Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 24 Jul 2023 15:10:28 +0200 Subject: [PATCH 05/21] Improvement of subsampling step - simplification of calculations for subsampling - increase memory for subsampling process - add params to skip subsampling step Ref: #63 & #40 --- conf/base.config | 5 +---- nextflow.config | 13 ++++++------- sub-workflows/local/core_pipeline.nf | 29 +++++++++++++--------------- 3 files changed, 20 insertions(+), 27 deletions(-) diff --git a/conf/base.config b/conf/base.config index b77a503..70333a1 100644 --- a/conf/base.config +++ b/conf/base.config @@ -82,17 +82,13 @@ params.n_samples = n_read_files / 2 params.resource_factor = 0.1 * params.n_samples params { - bytes_subset_seq = miseq_subset_byte subset_seq = miseq_subset_seq if ( sequencer =~ /NovaSeq.*/ ) { if ( n_samples >= large_sampling_threshold ) { - nova_subset_byte = large_indexing_nova_subset_byte nova_subset_seq = large_indexing_nova_subset_seq } - bytes_subset_seq = nova_subset_byte subset_seq = nova_subset_seq } - System.out.println "Seuil de taille de fichier pour subset : " + bytes_subset_seq + " bytes." System.out.println "Nombre de reads pour subset : " + subset_seq + "." } @@ -247,6 +243,7 @@ process { ext.args = '-s100' ext.args2 = params.subset_seq + memory = { 5.GB * task.attempt } module = 'bioinfo/seqtk-1.3' publishDir = [ diff --git a/nextflow.config b/nextflow.config index 157085f..6e44f5d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,18 +1,17 @@ // ======================================== // PARAMS // ========================================= + // Global params params { // PARAMETRE POUR OUTILS // Subset fastq files params - large_sampling_threshold = 200 // 200 samples run is high multiplexed - miseq_subset_byte = 20000000 // in byte <=> 20 000 reads - miseq_subset_seq = 20000 // in reads - nova_subset_byte = 700000000 // in byte <=> 1 000 000 reads - nova_subset_seq = 1000000 // in reads - large_indexing_nova_subset_byte = 350000000 // in byte <=> 500 000 reads - large_indexing_nova_subset_seq = 500000 // in reads + no_subset = false // to skip subset step -> use every reads to align + large_sampling_threshold = 200 // 200 samples run is high multiplexed + miseq_subset_seq = "50000" // in reads must be a string + nova_subset_seq = "50000000" // in reads + large_indexing_nova_subset_seq = "500000" // in reads // RNA QC sortmerna_db_path = '/usr/local/bioinfo/src/SortMeRNA/sortmerna-2.1b/rRNA_databases' diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf index edbb825..272b1f2 100644 --- a/sub-workflows/local/core_pipeline.nf +++ b/sub-workflows/local/core_pipeline.nf @@ -43,28 +43,25 @@ workflow CORE { // ----------- Recherche Duplicats GUNZIP(ch_read) - GUNZIP.out.branch{ - large : it[1].size() >= params.bytes_subset_seq - small : it[1].size() < params.bytes_subset_seq - }.set{unzip_reads_split} + // ----------- Sous-échantillonnage + if (params.no_subset) { + unzipped_fastq = GUNZIP.out + } else { + SEQTK_SAMPLE(GUNZIP.out) + unzipped_fastq = SEQTK_SAMPLE.out + } - unzip_reads_split.large.count().map{it}.subscribe onNext: { println it + " large fastq (more than ${params.subset_seq} reads)" } - unzip_reads_split.small.count().map{it}.subscribe onNext: { println it + " small fastq" } - - // Do subset only on large fastq files - SEQTK_SAMPLE(unzip_reads_split.large) - DUPLICATED_READS(unzip_reads_split.small - .mix(SEQTK_SAMPLE.out) - .collect{it[1]} - .flatten() - .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] } - .groupTuple() + DUPLICATED_READS(unzipped_fastq + .collect{it[1]} + .flatten() + .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] } + .groupTuple() ) // need fastq paired !!! emit: fastqc_report = FASTQC.out.zip ?: Channel.empty() fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty() fastp_report = DUPLICATED_READS.out.json - subset_fastq = unzip_reads_split.small.mix(SEQTK_SAMPLE.out) + subset_fastq = unzipped_fastq fastq_md5 = MD5SUM.out } -- GitLab From f8a07b3e6780cb7038cd24901796ba877e5d045c Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 11:08:19 +0200 Subject: [PATCH 06/21] BWA runs on R1 and R2 for DNA data Ref: #66 --- workflow/illumina_qc.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index e48b5c3..9c75eca 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -94,7 +94,12 @@ workflow ILLUMINA_QC { CORE(fastq) if (params.data_nature == 'DNA') { - DNA_QC(CORE.out.subset_fastq) + DNA_QC(CORE.out.subset_fastq + .collect{it[1]} + .flatten() + .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] } + .groupTuple() + ) ch_mqc = ch_mqc.mix( DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]), DNA_QC.out.flagstats_output.collect{it[1]}.ifEmpty([]) -- GitLab From c4a4794e00e933430367ee86594f03ec12e09483 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 11:11:00 +0200 Subject: [PATCH 07/21] Adaptative memory for md5sum generation Ref: #65 --- conf/base.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/base.config b/conf/base.config index 70333a1..f193e7f 100644 --- a/conf/base.config +++ b/conf/base.config @@ -286,6 +286,7 @@ process { } withName: MD5SUM { + time = { 3.h * task.attempt * params.resource_factor } publishDir = [ path: { "${params.outdir}/fastq" }, mode: 'copy', -- GitLab From 50ca307e51ef50c855602601c4b0a26aba2ba771 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 11:13:16 +0200 Subject: [PATCH 08/21] Work dir is not remove when using dev profile Ref: #62 --- conf/prod.config | 1 + conf/test.config | 1 + workflow/illumina_qc.nf | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/conf/prod.config b/conf/prod.config index 117ab99..4b0873f 100644 --- a/conf/prod.config +++ b/conf/prod.config @@ -4,6 +4,7 @@ params { ngl_bi_client = '/home/sbsuser/save/scripts-ngs/NGL-Bi_client_Current' shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current' + is_dev_mode = false } // ======================================== diff --git a/conf/test.config b/conf/test.config index cfcba99..9c48729 100644 --- a/conf/test.config +++ b/conf/test.config @@ -4,6 +4,7 @@ params { ngl_bi_client = '/home/sbsuser/work/test/jules/VisualStudioSources/ngl-bi_client/' shared_modules = '/home/sbsuser/work/Nextflow/shared_modules/ExportSources_Jules/' + is_dev_mode = true } // ======================================== diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index 9c75eca..9beae69 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -152,7 +152,7 @@ workflow.onComplete { end_mail_sent = sendFinalMail(format.format(new Date()), params.summary) // remove work directory if pipeline is successful - if (workflow.success) { + if (workflow.success && !params.is_dev_mode) { println "Pipeline terminé avec succès => suppression du workdir : $workflow.workDir" exec: workflow.workDir.deleteDir() -- GitLab From 8d3c91482ce534ad872b24edff1218cffeebc8f5 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 11:33:53 +0200 Subject: [PATCH 09/21] Fastp subsampling set in base.config file Ref: #68 --- conf/base.config | 8 ++++++++ modules/local/module_core.nf | 2 ++ 2 files changed, 10 insertions(+) diff --git a/conf/base.config b/conf/base.config index f193e7f..ae6461b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -129,6 +129,14 @@ process { mode: 'copy', pattern: "*.log" ] + publishDir = [ + path: "${params.outdir}/Duplicats", + mode: 'copy', + pattern: "*.json" + ] + + ext.args = "--reads_to_process ${params.fastp_n_reads}" + module = ['bioinfo/fastp-0.23.2'] time = { 5.h * task.attempt } memory = { 3.GB * task.attempt } diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index f6c26ee..2127340 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -110,6 +110,7 @@ process DUPLICATED_READS { shell: R1_name=file(fastq[0]).simpleName R2_name=file(fastq[1]).simpleName + def args = task.ext.args ?: '' ''' fastp \ -i !{fastq[0]} \ @@ -120,6 +121,7 @@ process DUPLICATED_READS { --disable_quality_filtering \ --disable_length_filtering \ --json !{R1_name}_fastp.json \ + $args \ 2> !{R1_name}.log ''' } -- GitLab From f42ba02a5adc3e757a1500c0fda1e0f3033fd27c Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 11:36:03 +0200 Subject: [PATCH 10/21] params for fastp subsampling Ref : #68 --- nextflow.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nextflow.config b/nextflow.config index 6e44f5d..5042b13 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,6 +22,9 @@ params { sortmerna_euk_18s = sortmerna_db_path + '/silva-euk-18s-id95.fasta' sortmerna_euk_28s = sortmerna_db_path + '/silva-euk-28s-id98.fasta' + // FASTP + fastp_n_reads = 100000000 + // OTHERS email="" email_dev="jules.sabban@inrae.fr" -- GitLab From 44daa011f63cfd4b2effc199990614f49bfca34c Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 11:40:31 +0200 Subject: [PATCH 11/21] Run/ReadSet demuxStat treatment insertion Ref: #60 --- modules/local/module_NGL-Bi.nf | 52 +++++++++++++++------------------- workflow/illumina_qc.nf | 34 ++++++++++++++-------- 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf index 96f29d5..6dfe3de 100644 --- a/modules/local/module_NGL-Bi.nf +++ b/modules/local/module_NGL-Bi.nf @@ -1,4 +1,7 @@ -params.outdir='' +/* + * Ensemble de process pour l'interraction avec NGL-Bi + * Process pour la création de traitement SAV + */ process prepareReadSetCreation { @@ -17,38 +20,29 @@ process prepareReadSetCreation { """ } -process readsetNGLBiCreation { - publishDir path: "${params.outdir}/NGLBi" , mode: 'copy', pattern: '*.created' +process TREATMENT_DEMUXSTAT { + publishDir path: params.outdir + "/ngl/", pattern: '*.log', mode: 'copy' - executor = 'local' - beforeScript = "export ENV_NGL='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/'" - errorStrategy = { 'ignore' } - - input : - path infoFile - - output : - path 'ReadsetsNGL-Bi.created', emit: readSetFile - path 'ReadsetsNGL-BiCreation.log', emit: readSetLog - - script : - """ - createNGLBiReadSets.pl --infoFile $infoFile --env_ngl_bi \$ENV_NGL 2> ReadsetsNGL-BiCreation.log 1> ReadsetsNGL-Bi.created - - """ -} + label 'ngl' -process checkErrorFromNGLBi { - publishDir path: "${params.outdir}/NGLBi" , mode: 'copy' - input: - path logFile - + val nglCode + path csvFile + val lane + output: - path 'ReadsetsNGL-BiCreation.log' - + path("*.log") + val 1, emit: ready + script: + laneOption = lane ? "--lane $lane" : '' + forceOption = workflow.resume ? "--force" : '' """ - checkErrorNGLScripts.pl --file $logFile + perl ${params.ngl_bi_client}/GeT/perl/illumina/createNGL-BiTreatmentDemultiplexStat.pl \\ + --code $nglCode \\ + --stat $csvFile \\ + ${laneOption} \\ + ${forceOption} \\ + 1> treatment_demux_${lane}.log """ -} \ No newline at end of file +} diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index 9beae69..ccb7fd7 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -57,15 +57,19 @@ createDir = file(params.outdir).mkdir() // ------------------------------------------------- // INCLUDES // ------------------------------------------------- -include { NGLBI } from "$baseDir/sub-workflows/local/begin_nglbi.nf" -include { CORE_ILLUMINA } from "$baseDir/sub-workflows/local/core_illumina.nf" -include { CORE } from "$baseDir/sub-workflows/local/core_pipeline.nf" -include { DNA_QC } from "$baseDir/sub-workflows/local/dna_qc.nf" -include { RNA_QC } from "$baseDir/sub-workflows/local/rna_qc.nf" -include { MULTIQC } from "${params.shared_modules}/multiqc.nf" -include { workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf" -include { UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC } from "${params.shared_modules}/ngl_bi.nf" -include { READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW') +include { NGLBI } from "$baseDir/sub-workflows/local/begin_nglbi.nf" +include { CORE_ILLUMINA } from "$baseDir/sub-workflows/local/core_illumina.nf" +include { CORE } from "$baseDir/sub-workflows/local/core_pipeline.nf" +include { DNA_QC } from "$baseDir/sub-workflows/local/dna_qc.nf" +include { RNA_QC } from "$baseDir/sub-workflows/local/rna_qc.nf" +include { PARSE_REPORTS } from "$baseDir/modules/local/module_DTM.nf" +include { TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_RUN; + TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS + } from "$baseDir/modules/local/module_NGL-Bi.nf" +include { MULTIQC } from "${params.shared_modules}/multiqc.nf" +include { workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf" +include { UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC } from "${params.shared_modules}/ngl_bi.nf" +include { READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW') // ------------------------------------------------- // EMAIL ON START // ------------------------------------------------- @@ -84,11 +88,17 @@ workflow ILLUMINA_QC { NGLBI() } - if ( ! params.skip_core_illumina ) { + if ( params.skip_core_illumina ) { + fastq = ch_read + } else { CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read) fastq = CORE_ILLUMINA.out.fastq - } else { - fastq = ch_read + + if (params.insert_to_ngl){ + // Add demultiplexStat treatments + TREATMENT_DEMUX_RUN(params.bi_run_code, CORE_ILLUMINA.out.demuxStat, params.lane) + TREATMENT_DEMUX_READSETS(NGLBI.out.readsetsFile, CORE_ILLUMINA.out.demuxStat, '') + } } CORE(fastq) -- GitLab From 12eae4ff09cce1ad1be563cff6282f50e11a0399 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 11:42:25 +0200 Subject: [PATCH 12/21] Use NF variable for fastp args Ref : #68 --- modules/local/module_core.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index 2127340..94f9b76 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -121,7 +121,7 @@ process DUPLICATED_READS { --disable_quality_filtering \ --disable_length_filtering \ --json !{R1_name}_fastp.json \ - $args \ + !{args} \ 2> !{R1_name}.log ''' } -- GitLab From feb43679add83a621b46307e3109b2db8adcb929 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 13:28:59 +0200 Subject: [PATCH 13/21] Move Qualimap process from here to shared_modules Ref: #67 --- conf/base.config | 20 ++++++++++++++++++++ conf/prod.config | 11 +---------- modules/local/module_dna.nf | 23 ----------------------- sub-workflows/local/dna_qc.nf | 3 +-- 4 files changed, 22 insertions(+), 35 deletions(-) diff --git a/conf/base.config b/conf/base.config index ae6461b..7a80be6 100644 --- a/conf/base.config +++ b/conf/base.config @@ -301,4 +301,24 @@ process { pattern: "*.md5sum" ] } + + withName: QUALIMAP { + module = 'bioinfo/qualimap-31-08-20' + cpus = { 8 * task.attempt } + memory = { 2.GB * task.attempt } + time = { 3.h * task.attempt } + + publishDir = [ + path: "${params.outdir}/alignmentStats/qualimap", + mode: 'copy', + pattern: "*/*.html" + ] + + publishDir = [ + path: "${params.outdir}/alignmentStats/qualimap", + mode: 'copy', + pattern: "*/*.txt" + ] + + } } \ No newline at end of file diff --git a/conf/prod.config b/conf/prod.config index 4b0873f..5bf85b8 100644 --- a/conf/prod.config +++ b/conf/prod.config @@ -32,16 +32,7 @@ process { cpus = 6 memory = { 8.GB * task.attempt } time = { 3.h * task.attempt } - } - - withLabel: qualimap { - module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20'] - beforeScript='unset DISPLAY' - cpus = { 8 * task.attempt } - memory = { 2.GB * task.attempt } - time = { 3.h * task.attempt } - } - + } withName: BWA_ALIGNMENT { cpus = { 6 * task.attempt } diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf index 3f7f6fc..756d4c5 100644 --- a/modules/local/module_dna.nf +++ b/modules/local/module_dna.nf @@ -83,29 +83,6 @@ process SAMTOOLS_FLAGSTATS { """ } -process QUALIMAP { - publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy', pattern: "*.html" - publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy', pattern: "*.txt" - - tag "$sample" - - label 'qualimap' - - errorStrategy = { 'ignore' } - - input: - tuple val(sample), path(bam) - - output: - tuple val(sample), path("*.log"), emit: log - tuple val(sample), path("*/*"), emit: all // ${sample}_stats/* - tuple val(sample), path("${sample}"), emit: report - - script: - """ - qualimap bamqc -bam ${bam} -outdir ${sample} 1> ${sample}.log - """ -} diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf index 57e1a08..f41af2e 100644 --- a/sub-workflows/local/dna_qc.nf +++ b/sub-workflows/local/dna_qc.nf @@ -14,9 +14,8 @@ include { BWA_ALIGNMENT; SAMTOOLS_VIEW; SAMTOOLS_SORT; SAMTOOLS_FLAGSTATS; - QUALIMAP; } from "$baseDir/modules/local/module_dna.nf" - +include { QUALIMAP } from "${params.shared_modules}/qualimap.nf" // ------------------------------------------------- // WORKFLOW -- GitLab From 1f92bda250d7407fbdeeec7d858e0e48a76d4411 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 13:29:57 +0200 Subject: [PATCH 14/21] Add filter on output qualimap file name Ref: #67 --- assets/multiqc_config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 47abb58..593ca97 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -35,6 +35,7 @@ table_columns_visible: ## Sample name formatting extra_fn_clean_exts: + - "_qualimap_results" - "_filtered" - "_unmerged" - "_flagstat" -- GitLab From 28d4e18120fa09aa430b920a0e5eb77f819e136c Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 13:43:45 +0200 Subject: [PATCH 15/21] New ressources for DTM Read and extract metrics from qualimap and fastp reports Create CSV file Ref : #69 --- bin/parse_reports.sh | 28 ++++++++++++++++++++++++++++ conf/test.config | 6 ++++++ modules/local/module_DTM.nf | 22 ++++++++++++++++++++++ workflow/illumina_qc.nf | 6 ++++++ 4 files changed, 62 insertions(+) create mode 100755 bin/parse_reports.sh create mode 100644 modules/local/module_DTM.nf diff --git a/bin/parse_reports.sh b/bin/parse_reports.sh new file mode 100755 index 0000000..a7d46ac --- /dev/null +++ b/bin/parse_reports.sh @@ -0,0 +1,28 @@ +TAG=$1 +FASTP_REPORT=$2 +QUALIMAP_REPORT=$3/genome_results.txt + +O_STAT="./${TAG}.stat" +O_CSV="./${TAG}.csv" + +## Get values +DUPLI=$(jq '.duplication.rate' $FASTP_REPORT) +TOT_SEQ=$(( $(sed -n 's/number of reads = \(.*\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g' | sed 's/,//g') / 2 )) +INSERT=$(sed -n 's/median insert size = \(.*\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g') +GC_PERCENT=$(sed -n 's/GC percentage = \(.*%\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g') +GEN_COV=$(grep ">= 1X" $QUALIMAP_REPORT | sed -n 's/There is a \(.*%\) of.*/\1/p' | sed 's/ //g') +MEAN_COV=$(sed -n 's/mean coverageData.*= \(.*X\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g') +ALIGN=$(sed -n 's/number of mapped reads =.*(\(.*%\))/\1/p' $QUALIMAP_REPORT | sed 's/ //g') + +## Write stat file +echo "duplication_rate: $DUPLI" >> $O_STAT +echo "total_sequences: $TOT_SEQ" >> $O_STAT +echo "mean_insert_size: $INSERT" >> $O_STAT +echo "GC_percent: $GC_PERCENT" >> $O_STAT +echo "genome_cov_percent: $GEN_COVcat " >> $O_STAT +echo "mean_cov: $MEAN_COV" >> $O_STAT +echo "align_percent: $ALIGN" >> $O_STAT + +## Write export file +echo "Sample;Tot_seq;Duplication_rate;Mean_insert_size;%GC;%Genome_cov;Mean_cov;%Align" > $O_CSV +echo "$TAG;$TOT_SEQ;$DUPLI;$INSERT;$GC_PERCENT;$GEN_COV;$MEAN_COV;$ALIGN" >> $O_CSV \ No newline at end of file diff --git a/conf/test.config b/conf/test.config index 9c48729..0424a4d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -47,6 +47,12 @@ process { memory = { 8.GB * task.attempt } time = { 3.d * task.attempt } } + + withName: PARSE_REPORTS { + executor = 'local' + memory = { 500.MB * task.attempt } + time = { 5.m * task.attempt } + } } diff --git a/modules/local/module_DTM.nf b/modules/local/module_DTM.nf new file mode 100644 index 0000000..6685386 --- /dev/null +++ b/modules/local/module_DTM.nf @@ -0,0 +1,22 @@ +/* + * Module pour la gestion des analyses particulières dans le cadre d'un DTM +*/ + +process PARSE_REPORTS { + publishDir path: "${params.outdir}/DTM" , mode: 'copy' + + tag "$sample" + + input: + tuple val(sample), path(fastp_json_report) + tuple val(sample), path(qualimap_folder) + + output: + tuple val(sample), path("*.csv"), emit: csv + + script: + """ + bash parse_reports.sh $sample $fastp_json_report $qualimap_folder + """ +} + diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index ccb7fd7..beb02dc 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -114,6 +114,12 @@ workflow ILLUMINA_QC { DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]), DNA_QC.out.flagstats_output.collect{it[1]}.ifEmpty([]) ) + + // DTM process + if ("$params.is_dev_mode") { + PARSE_REPORTS(CORE.out.fastp_report, DNA_QC.out.qualimap_report) + } + } else if (params.data_nature =~ 'RNA') { RNA_QC(CORE.out.subset_fastq, ch_sortmerna_db) ch_mqc = ch_mqc.mix( -- GitLab From 48f10fccb4cddc5a97ceb7e8275b8e9c1fc0d6c7 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Fri, 28 Jul 2023 14:29:49 +0200 Subject: [PATCH 16/21] Remove readset creation script We now use the script in NGL-Bi_Client project --- bin/createNGLBiReadSets.pl | 127 ------------------------------------- 1 file changed, 127 deletions(-) delete mode 100755 bin/createNGLBiReadSets.pl diff --git a/bin/createNGLBiReadSets.pl b/bin/createNGLBiReadSets.pl deleted file mode 100755 index e5cdf2e..0000000 --- a/bin/createNGLBiReadSets.pl +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/perl -w -binmode STDIN, ':encoding(UTF-8)'; -binmode STDOUT, ':encoding(UTF-8)'; -binmode STDERR, ':encoding(UTF-8)'; - -=head1 NAME - - createNGLBiReadSets.pl - -=head1 DESCRIPTION - - Performe readSets creation on NGL-Bi - -=head1 SYNOPSIS - - createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV> - -=head1 OPTIONS - - --infoFile=s : path to the info file - --env_ngl_bi=s : environment varible of ngl-bi - -=head1 EXEMPLES - - perl createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV> - -=head1 AUTHOR - - Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr) - -=cut - -################################################################### -# -# LIBRAIRIES -# -################################################################### -use strict; -use Getopt::Long; -use Log::Log4perl qw(:easy);; - -################################################################## -# -# INITIALISATION -# -################################################################## -Log::Log4perl -> easy_init( { level => $TRACE, - utf8 => 1, - layout => '[%d][%p>createNGLBiReadSets.pl:L%L] %m%n' } ); - -my $logger = Log::Log4perl -> get_logger(); - -my $infoFile=""; -my $env_ngl_bi = ""; - -GetOptions ('infoFile=s' => \$infoFile, - "env_ngl_bi=s" => \$env_ngl_bi, # environnement path of NGL-Bi -); - -if ($env_ngl_bi eq "" || $infoFile eq "" ) { - $logger -> logdie("USAGE : createNGLBiReadSets.pl --infoFile <File> --env_ngl_bi <ENV>\n"); -} - -my $experimentName=""; -my $runName=""; -my $laneNumber=""; -my $script_path="/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/GeT/perl"; # Répertoire des scripts de l'API NGL - -################################################################## -# -# NGL-Bi ENVIRONMENT -# -################################################################## - -$ENV{APIPERL}=$env_ngl_bi; -$ENV{CONFFILE}=$env_ngl_bi."conf/prod_illumina_qc.conf"; -$logger = Log::Log4perl -> get_logger('loadConfFile'); -unless ($ENV{CONFFILE}) { - $logger -> logdie("$0 : Database configuration file not defined ! Initialize 'CONFFILE' with configuration file path in your environment"); -} -my $dbconf_file = $ENV{CONFFILE}; -unless (-f $dbconf_file) { - $logger -> logdie("$0 : Database configuration file does not exist : $dbconf_file. It's necessary for continue."); -} -open my $handle, '<', $dbconf_file; -chomp ( my @lines = <$handle> ); -close $handle; -foreach my $line (@lines) { - $line =~ s/#.*//o; - unless ($line) {next;} - if ($line =~ /(.*)=(.*)/o) { - my $key = $1; - my $value = $2; - $key =~ s/^\s*//o; - $key =~ s/\s*$//o; - $value =~ s/^\s*//o; - $value =~ s/^\s*//o; - $ENV{$key} = $value; - } else { - $logger -> logdie("$0 : Can't load variable to dababase configration file $dbconf_file in line : '$_'"); - } -} - -unshift @INC, $env_ngl_bi."Common_tools/src/perl/lib"; -unshift @INC, $env_ngl_bi."DB_tools/src/perl/lib"; - -require illumina; -require json; -$logger -> info("\tVariables d'environnement pour NGL-Bi charées."); - -################################################################## -# -# INFO FILE READING -# -################################################################## -$experimentName=`grep "ExperimentName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep ExperimentName impossible : $!"); -$runName=`grep "NGLBiRunName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep NGLBiRunName impossible : $!"); -$laneNumber=`grep "LaneNumber" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep LaneNumber impossible : $!"); - -chomp($experimentName); -chomp($runName); -chomp($laneNumber); - - -my $commandNGLBiReadSets = "perl $script_path/createNGL-BiReadSets.pl --NGLBiRunCode $runName --NGLSqExperimentCode $experimentName --laneNumberToWorkOn $laneNumber"; -$logger -> info("\tCreation des readSets dans NGL-Bi : ".$commandNGLBiReadSets); -my $result_commandNGLBiReadSets = `$commandNGLBiReadSets 2>&1`; $? and $logger -> logdie("[Erreur]Lancement de createNGL-BiReadSets.pl\n".$result_commandNGLBiReadSets); \ No newline at end of file -- GitLab From f561c94153dbbc0a2d83b45d4bf8b14a1f2f4b91 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 31 Jul 2023 15:24:27 +0200 Subject: [PATCH 17/21] Reorganization of config files and process Ref: #64 --- conf/base.config | 195 +++++++++++++---------------- conf/dependencies_genologin.config | 24 ++++ conf/functions.config | 4 +- conf/genomes.config | 29 ----- conf/prod.config | 47 ------- conf/test.config | 54 -------- modules/local/module_DTM.nf | 4 +- modules/local/module_NGL-Bi.nf | 2 - modules/local/module_core.nf | 16 +-- modules/local/module_dna.nf | 14 +-- nextflow.config | 133 +++++++++++++++----- workflow/illumina_qc.nf | 2 +- 12 files changed, 222 insertions(+), 302 deletions(-) create mode 100644 conf/dependencies_genologin.config delete mode 100644 conf/genomes.config delete mode 100644 conf/prod.config diff --git a/conf/base.config b/conf/base.config index 7a80be6..9683cd8 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,62 +1,8 @@ // ======================================== -// PARAMS +// BASE CONFIGURATION //========================================= -System.out.println "Chargement des paramètres de base" -// Fixed params +// Print of analysis parameters params { - // EMPTY INITIALISATION OF INPUT PARAMS - // General params - outdir = "./" // base output directory for all analysis - inputdir = "" - project = "" - sequencer = "" - machine_id = "" - fc_id = "" - fc_type = "" - lane = "" - demux_uniqueness = "" - - data_nature = "" - species = "" - is_multiplex = false - - run_name = "" - run_date = "" - description = "" - split_reads = false - - // DNA / RNA params - reference_genome = "" - make_star_index = false - reference_transcriptome = "" - - // Amplicon / 16S params - min_overlap = "" - max_overlap = "" - - // 10X params - - - // MethylSeq params - puc19 = "" - lambda = "" - - // NGL - insert_to_ngl = true - bi_run_code = '' - sq_xp_code = '' -} - -params.samplesheet = params.inputdir.toString() + "/SampleSheet.csv" -params.data_location = params.inputdir.toString() + "/" + params.project.toString() - -// Dynamic params -import java.text.SimpleDateFormat -SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyyMMddHHmmss") -params { - nf_uniqueness = uniqueness_format.format(new Date()) - outdir= params.inputdir + "/nextflow/" + project + "_" + run_name + "_" + nf_uniqueness - System.out.println "" System.out.println "run_name : "+run_name System.out.println "data : "+data_nature @@ -70,28 +16,6 @@ params { System.out.println "" } -// Dynamic params depending on samples number -import java.nio.file.Files -import java.nio.file.Paths -def n_read_files = Files.walk(Paths.get(params.data_location)) - .filter(Files::isRegularFile) - .filter(p -> p.getFileName().toString().matches(".*_R[12](_.*)?\\.fastq\\.gz")) - .count() - -params.n_samples = n_read_files / 2 -params.resource_factor = 0.1 * params.n_samples - -params { - subset_seq = miseq_subset_seq - if ( sequencer =~ /NovaSeq.*/ ) { - if ( n_samples >= large_sampling_threshold ) { - nova_subset_seq = large_indexing_nova_subset_seq - } - subset_seq = nova_subset_seq - } - System.out.println "Nombre de reads pour subset : " + subset_seq + "." -} - // ======================================== // PROCESS //========================================= @@ -108,8 +32,26 @@ process { maxRetries = 2 maxErrors = '-1' - // ----- WithName + // ----- DTM + withName: PARSE_REPORTS { + executor = 'local' + memory = { 500.MB * task.attempt } + time = { 5.m * task.attempt } + + publishDir = [ + path: "${params.outdir}/DTM", + mode: 'copy' + ] + } + // ----- CORE ----- // + withLabel: demux { + publishDir = [ + path: "${params.outdir}/Demux", + mode: 'copy' + ] + } + withName: ILLUMINA_FILTER { publishDir = [ path: "${params.outdir}/IlluminaFilter", @@ -117,8 +59,8 @@ process { pattern: '*.gz'/*, saveAs: { filename -> "${name}.fastq.gz" }*/ ] - - module = ['bioinfo/fastq_illumina_filter-0.1'] + + module = toolsModuleHash['ILLUMINA_FILTER'] cpus = { 3 * task.attempt } time = { 4.h * task.attempt } } @@ -127,17 +69,12 @@ process { publishDir = [ path: "${params.outdir}/Duplicats", mode: 'copy', - pattern: "*.log" - ] - publishDir = [ - path: "${params.outdir}/Duplicats", - mode: 'copy', - pattern: "*.json" + pattern: "*.{log,json}" ] ext.args = "--reads_to_process ${params.fastp_n_reads}" - module = ['bioinfo/fastp-0.23.2'] + module = toolsModuleHash['FASTP'] time = { 5.h * task.attempt } memory = { 3.GB * task.attempt } cpus = { 3 * task.attempt } @@ -157,27 +94,46 @@ process { saveAs: { filename -> "${name}.html" } ] + module = toolsModuleHash['FASTQC'] maxRetries = 4 - module = ['bioinfo/FastQC_v0.11.7'] time = { 5.h * task.attempt * params.resource_factor } } withName: FASTQSCREEN { - ext.args = [ - "--conf ${params.inputdir}/fastq_screen.conf" - ].join(' ') + time = { 1.h * task.attempt } + module = toolsModuleHash['FASTQSCREEN'] + + ext.args = "--conf ${params.inputdir}/fastq_screen.conf" + + publishDir = [ + path: "${params.outdir}/ContaminationSearch/FastQ-Screen", + mode: 'copy' + ] + } + + // ----- DNA ----- // + withLabel: bwa { + module = toolsModuleHash['BWA'] + cpus = { 6 * task.attempt } + memory = { 8.GB * task.attempt } + time = { 3.d * task.attempt } + + publishDir = [ + path: "${params.outdir}/alignment/bwa", + mode: 'copy' + ] } // ----- RNA ----- // withName: SALMON_INDEX { - module = ['bioinfo/salmon-1.9.0'] + module = toolsModuleHash['SALMON'] time = { 1.h * task.attempt } memory = { 3.GB * task.attempt } cpus = 8 } withName: SALMON_QUANT { - module = ['bioinfo/salmon-1.9.0'] + module = toolsModuleHash['SALMON'] time = { 1.h * task.attempt } memory = { 3.GB * task.attempt } cpus = 8 @@ -189,12 +145,14 @@ process { ] } - withName: STAR_INDEX { + withName: STAR_INDEX { + module = toolsModuleHash['STAR'] memory = { 50.GB * task.attempt } cpus = 8 } withName: STAR_ALIGN { + module = toolsModuleHash['STAR'] memory = { 20.GB * task.attempt } cpus = 2 } @@ -203,21 +161,37 @@ process { withLabel: littleJob { executor = 'local' } - - withLabel: cigar { - module = ['system/Python-3.7.4:bioinfo/samtools-1.14'] + + withLabel: ngl { + beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/prod_illumina_qc.conf" + publishDir = [ + path: { "${params.outdir}/ngl" }, + mode: 'copy', + pattern: "*.{log,created}" + ] } - // ----- DNA ----- // - withLabel: bwa { - module = ['/tools/share/Modules/bioinfo/bwa-0.7.17'] - beforeScript = "module list" + withLabel: samtools { + module = toolsModuleHash['SAMTOOLS'] + cpus = { 6 * task.attempt } + memory = { 8.GB * task.attempt } + time = { 3.h * task.attempt } } - // ----- RNA ----- // - withLabel: star { - module = ['bioinfo/STAR-2.7.10a_alpha_220314'] + withLabel: alignment { + publishDir = [ + path: "${params.outdir}/alignment/samtools", + mode: 'copy' + ] + } + + withLabel: alignmentStats { + publishDir = [ + path: "${params.outdir}/alignmentStats/samtools", + mode: 'copy' + ] } + } // ======================================== @@ -225,8 +199,8 @@ process { //========================================= process { withName: SAMTOOLS_FAIDX { + module = toolsModuleHash['SAMTOOLS'] beforeScript = "module purge" - module = ['bioinfo/samtools-1.16.1'] } withName: GZIP { @@ -252,7 +226,7 @@ process { ext.args2 = params.subset_seq memory = { 5.GB * task.attempt } - module = 'bioinfo/seqtk-1.3' + module = toolsModuleHash['SEQTK_SAMPLE'] publishDir = [ path: { "${params.outdir}/subset" }, @@ -269,7 +243,7 @@ process { ].join(' ') beforeScript = "module purge" - module = 'bioinfo/MultiQC-1.14' + module = toolsModuleHash['MULTIQC'] memory = { 10.GB * task.attempt * params.resource_factor } publishDir = [ @@ -281,7 +255,7 @@ process { } withName: SORTMERNA { - module = 'bioinfo/sortmerna-4.3.2' + module = toolsModuleHash['SORTMERNA'] memory = { 2.GB * task.attempt } time = { 10.h * task.attempt } cpus = { 1 * task.attempt } @@ -303,7 +277,7 @@ process { } withName: QUALIMAP { - module = 'bioinfo/qualimap-31-08-20' + module = toolsModuleHash['QUALIMAP'] cpus = { 8 * task.attempt } memory = { 2.GB * task.attempt } time = { 3.h * task.attempt } @@ -319,6 +293,5 @@ process { mode: 'copy', pattern: "*/*.txt" ] - } } \ No newline at end of file diff --git a/conf/dependencies_genologin.config b/conf/dependencies_genologin.config new file mode 100644 index 0000000..4dd63eb --- /dev/null +++ b/conf/dependencies_genologin.config @@ -0,0 +1,24 @@ +// ======================================== +// GENOLOGIN MODULES +//========================================= +// ----- CORE ----- // +toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter-0.1'] +toolsModuleHash['FASTP'] = ['bioinfo/fastp-0.23.2'] +toolsModuleHash['FASTQC'] = ['bioinfo/FastQC_v0.11.7'] +toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQ-Screen-0.15.2'] + +// ----- RNA ----- // +toolsModuleHash['SALMON'] = ['bioinfo/salmon-1.9.0'] +toolsModuleHash['STAR'] = ['bioinfo/STAR-2.7.10a_alpha_220314'] + +// ----- DNA ----- // +toolsModuleHash['BWA'] = ['/tools/share/Modules/bioinfo/bwa-0.7.17'] +toolsModuleHash['SAMTOOLS'] = ['bioinfo/samtools-1.16.1'] + +// ======================================== +// SHARED MODULES +//========================================= +toolsModuleHash['SEQTK_SAMPLE'] = ['bioinfo/seqtk-1.3'] +toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC-1.14'] +toolsModuleHash['SORTMERNA'] = ['bioinfo/sortmerna-4.3.2'] +toolsModuleHash['QUALIMAP'] = ['bioinfo/qualimap-31-08-20'] diff --git a/conf/functions.config b/conf/functions.config index f16fa59..2099a7a 100644 --- a/conf/functions.config +++ b/conf/functions.config @@ -67,7 +67,7 @@ def customMailSend(body, subject, email_address) { if (email_address == null) { email_address = params.email_bioinfo } - if (workflow.profile == 'dev') { + if (params.is_dev_mode) { email_address = params.email_dev try { def sending = ['echo', '-e' , body ].execute() | [ 'mail', '-s', subject, email_address ].execute() @@ -177,7 +177,7 @@ def sendFinalMail(formatted_date, summary) { if (!params.email && params.email_on_fail && !workflow.success) { email_address = params.email_on_fail } - if (workflow.profile == 'dev') { + if (params.is_dev_mode) { email_address = params.email_dev } // Render the TXT template diff --git a/conf/genomes.config b/conf/genomes.config deleted file mode 100644 index b8ef761..0000000 --- a/conf/genomes.config +++ /dev/null @@ -1,29 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for Genomes paths and indexes - * ------------------------------------------------- - * Defines reference genomes, using Genome paths - * Can be used by any config that customises the base - */ - -params { - genomes { - 'GRCh37' { - bed12 = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - fasta = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - star = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bowtie2 = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - bwa = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/" - } - 'GRCm38' { - bed12 = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - fasta = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - star = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bowtie2 = "${params.genomes_base}/Mus_musculus/Ensembl/GRCh37/Sequence/Bowtie2Index/" - bwa = "${params.genomes_base}/Mus_musculus/Ensembl/GRCh37/Sequence/BWAIndex/" - } - - } -} diff --git a/conf/prod.config b/conf/prod.config deleted file mode 100644 index 5bf85b8..0000000 --- a/conf/prod.config +++ /dev/null @@ -1,47 +0,0 @@ -// ======================================== -// PARAMS -//========================================= -params { - ngl_bi_client = '/home/sbsuser/save/scripts-ngs/NGL-Bi_client_Current' - shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current' - is_dev_mode = false -} - -// ======================================== -// PROCESSES -//========================================= -process { - withLabel: ngl { - beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/prod_illumina_qc.conf" - publishDir = [ - path: { "${params.outdir}/ngl" }, - mode: 'copy', - pattern: "*.{log,created}" - ] - } - - withLabel: samtools_view { - module = ['bioinfo/samtools-1.14'] - cpus = { 6 * task.attempt } - memory = { 8.GB * task.attempt } - time = { 3.h * task.attempt } - } - - withLabel: samtools { - module = ['bioinfo/samtools-1.14'] - cpus = 6 - memory = { 8.GB * task.attempt } - time = { 3.h * task.attempt } - } - - withName: BWA_ALIGNMENT { - cpus = { 6 * task.attempt } - memory = { 8.GB * task.attempt } - time = { 3.d * task.attempt } - } -} - -// ======================================== -// CONFIG FILES -//========================================= -includeConfig "$baseDir/conf/report.config" \ No newline at end of file diff --git a/conf/test.config b/conf/test.config index 0424a4d..23c2af3 100644 --- a/conf/test.config +++ b/conf/test.config @@ -6,57 +6,3 @@ params { shared_modules = '/home/sbsuser/work/Nextflow/shared_modules/ExportSources_Jules/' is_dev_mode = true } - -// ======================================== -// PROCESSES -//========================================= -process { - withLabel: ngl { - beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/dev_illumina_qc.conf" - publishDir = [ - path: { "${params.outdir}/ngl" }, - mode: 'copy', - pattern: "*.{log,created}" - ] - } - - withLabel: samtools_view { - module = ['bioinfo/samtools-1.14'] - cpus = { 6 * task.attempt } - memory = { 8.GB * task.attempt } - time = { 3.h * task.attempt } - } - - withLabel: samtools { - module = ['bioinfo/samtools-1.14'] - cpus = { 1 * task.attempt } - memory = { 2.GB * task.attempt } - time = { 10.m * task.attempt } - } - - withLabel: qualimap { - module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20'] - beforeScript='unset DISPLAY' - cpus = { 1 * task.attempt } - memory = { 2.GB * task.attempt } - time = { 10.m * task.attempt } - } - - withName: BWA_ALIGNMENT { - cpus = { 6 * task.attempt } - memory = { 8.GB * task.attempt } - time = { 3.d * task.attempt } - } - - withName: PARSE_REPORTS { - executor = 'local' - memory = { 500.MB * task.attempt } - time = { 5.m * task.attempt } - } -} - - -// ======================================== -// CONFIG FILES -//========================================= -includeConfig "$baseDir/conf/report.config" \ No newline at end of file diff --git a/modules/local/module_DTM.nf b/modules/local/module_DTM.nf index 6685386..451b3cd 100644 --- a/modules/local/module_DTM.nf +++ b/modules/local/module_DTM.nf @@ -2,9 +2,7 @@ * Module pour la gestion des analyses particulières dans le cadre d'un DTM */ -process PARSE_REPORTS { - publishDir path: "${params.outdir}/DTM" , mode: 'copy' - +process PARSE_REPORTS { tag "$sample" input: diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf index 6dfe3de..e243b2e 100644 --- a/modules/local/module_NGL-Bi.nf +++ b/modules/local/module_NGL-Bi.nf @@ -21,8 +21,6 @@ process prepareReadSetCreation { } process TREATMENT_DEMUXSTAT { - publishDir path: params.outdir + "/ngl/", pattern: '*.log', mode: 'copy' - label 'ngl' input: diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index 94f9b76..9f904ed 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -3,8 +3,8 @@ */ process PREP_DEMUXSTAT { - publishDir path: "${params.outdir}/Demux" , mode: 'copy' - + label 'demux' + input: path SampleSheet @@ -19,9 +19,7 @@ process PREP_DEMUXSTAT { } process DEMUX_STATS { - publishDir path: "${params.outdir}/Demux" , mode: 'copy' - - //module 'system/R-4.0.4_gcc-9.3.0' // Ne fonctionne pas ! + label 'demux' input: path DemuxStatXML @@ -75,12 +73,7 @@ process ILLUMINA_FILTER { } -process FASTQSCREEN { - publishDir path: "${params.outdir}/ContaminationSearch/FastQ-Screen", mode: 'copy' - - module 'bioinfo/FastQ-Screen-0.15.2' - time { 1.h * task.attempt } - +process FASTQSCREEN { tag " $sample" input: @@ -97,7 +90,6 @@ process FASTQSCREEN { } process DUPLICATED_READS { - tag "$sample" input: diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf index 756d4c5..2afa198 100644 --- a/modules/local/module_dna.nf +++ b/modules/local/module_dna.nf @@ -3,9 +3,8 @@ */ process BWA_ALIGNMENT { - publishDir path: "${params.outdir}/alignment/bwa" , mode: 'copy' - tag "$sample" + label 'bwa' input: @@ -24,11 +23,10 @@ process BWA_ALIGNMENT { } process SAMTOOLS_VIEW { - publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy' - tag "$sample" - label 'samtools_view' + label 'samtools' + label 'alignment' input: tuple val(sample), path(sam) @@ -43,11 +41,10 @@ process SAMTOOLS_VIEW { } process SAMTOOLS_SORT { - publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy' - tag "$sample" label 'samtools' + label 'alignment' input: tuple val(sample), path(bam) @@ -64,11 +61,10 @@ process SAMTOOLS_SORT { } process SAMTOOLS_FLAGSTATS { - publishDir path: "${params.outdir}/alignmentStats/samtools" , mode: 'copy' - tag "$sample" label 'samtools' + label 'alignmentStats' input: tuple val(sample), path(bam) diff --git a/nextflow.config b/nextflow.config index 5042b13..5541f97 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,11 +1,64 @@ // ======================================== -// PARAMS -// ========================================= +// WORKFLOW FLAGS / OPTIONS +//========================================= +params { + // ----- GLOBAL PARAMETERS ----- + inputdir = "" + project = "" + sequencer = "" + machine_id = "" + fc_id = "" + fc_type = "" + lane = "" + demux_uniqueness = "" + + data_nature = "" + species = "" + is_multiplex = false + + run_name = "" + run_date = "" + description = "" + split_reads = false + + // DNA / RNA params + reference_genome = "" + make_star_index = false + reference_transcriptome = "" + + // Amplicon / 16S params + min_overlap = "" + max_overlap = "" + + // 10X params -// Global params -params { - // PARAMETRE POUR OUTILS + // MethylSeq params + puc19 = "" + lambda = "" + // NGL + ngl_bi_client = '/home/sbsuser/save/scripts-ngs/NGL-Bi_client_Current' + insert_to_ngl = true + bi_run_code = '' + sq_xp_code = '' + + // Shared Modules + shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current' + + // OTHERS + cluster_options = '' + is_dev_mode = false + DTM_mode = false + host = 'genologin' + email="" + email_dev="jules.sabban@inrae.fr" + email_on_fail="jules.sabban@inrae.fr" + email_bioinfo="get-plage.bioinfo@genotoul.fr" + //email_labo="get-plage.labo@genotoul.fr" + email_labo="" + + + // ----- TOOLS PARAMETERS ----- // Subset fastq files params no_subset = false // to skip subset step -> use every reads to align large_sampling_threshold = 200 // 200 samples run is high multiplexed @@ -25,50 +78,67 @@ params { // FASTP fastp_n_reads = 100000000 - // OTHERS - email="" - email_dev="jules.sabban@inrae.fr" - email_on_fail="jules.sabban@inrae.fr" - email_bioinfo="get-plage.bioinfo@genotoul.fr" - //email_labo="get-plage.labo@genotoul.fr" - email_labo="" - - cluster_options = '' - // skip parameters skip_core_illumina = false - monochrome_logs = true help = false - - config_profile_description = false // ?? - config_profile_contact = false // ?? - config_profile_url = false // ?? +} + +// ======================================== +// ANALYSIS PARAMETERS +//========================================= +import java.text.SimpleDateFormat +SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyyMMddHHmmss") + +import java.nio.file.Files +import java.nio.file.Paths + +params.data_location = params.inputdir.toString() + "/" + params.project.toString() +def n_read_files = Files.walk(Paths.get(params.data_location)) + .filter(Files::isRegularFile) + .filter(p -> p.getFileName().toString().matches(".*_R[12](_.*)?\\.fastq\\.gz")) + .count() + +params.n_samples = n_read_files / 2 +params.resource_factor = 0.1 * params.n_samples + +params { + // Dynamics params, depend on others + samplesheet = inputdir.toString() + "/SampleSheet.csv" + nf_uniqueness = uniqueness_format.format(new Date()) + outdir = params.inputdir + "/nextflow/" + project + "_" + run_name + "_" + nf_uniqueness + + subset_seq = miseq_subset_seq + if ( sequencer =~ /NovaSeq.*/ ) { + if ( n_samples >= large_sampling_threshold ) { + nova_subset_seq = large_indexing_nova_subset_seq + } + subset_seq = nova_subset_seq + } } // ======================================== // PROFILES //========================================= -// Load base.config by default for all pipelines -includeConfig "$baseDir/conf/base.config" +toolsModuleHash = [:] +if (params.host == 'genologin') { + includeConfig "$baseDir/conf/dependencies_genologin.config" +} else if (params.host == 'genobioinfo') { + includeConfig "$baseDir/conf/dependencies_genobioinfo.config" +} -System.out.println "Les configurations de bases sont chargées" +// Load base.config and report.config by default for all pipelines +includeConfig "$baseDir/conf/base.config" +includeConfig "$baseDir/conf/report.config" // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev process.container = "$baseDir/template-nf.sif" profiles { - conda { process.conda = "$baseDir/environment.yml" } - debug { process.beforeScript = 'echo $HOSTNAME' } - docker { docker.enabled = true } - singularity { singularity.enabled = true } - dev { includeConfig "$baseDir/conf/test.config" } - prod { includeConfig "$baseDir/conf/prod.config" } + dev { includeConfig "$baseDir/conf/test.config" } } -System.out.println "Tous les profiles ont été analysés" - // Avoid this error: // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351, once this is established and works well, nextflow might implement this behavior as new default. @@ -76,4 +146,3 @@ docker.runOptions = '-u \$(id -u):\$(id -g)' // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -System.out.println "Sortie du nextflow.config" \ No newline at end of file diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index beb02dc..5d2e895 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -116,7 +116,7 @@ workflow ILLUMINA_QC { ) // DTM process - if ("$params.is_dev_mode") { + if (params.DTM_mode) { PARSE_REPORTS(CORE.out.fastp_report, DNA_QC.out.qualimap_report) } -- GitLab From 3383b1c7419eb5d8b151703a680cf021c773d919 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 31 Jul 2023 15:24:59 +0200 Subject: [PATCH 18/21] Config file for genobioinfo server Ref : #70 --- conf/dependencies_genobioinfo.config | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 conf/dependencies_genobioinfo.config diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config new file mode 100644 index 0000000..9ac8895 --- /dev/null +++ b/conf/dependencies_genobioinfo.config @@ -0,0 +1,24 @@ +// ======================================== +// GENOBIOINFO MODULES +//========================================= +// ----- CORE ----- // +toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter/0.1'] +toolsModuleHash['FASTP'] = ['bioinfo/fastp/0.23.2'] +toolsModuleHash['FASTQC'] = ['bioinfo/FastQC/0.12.1'] // version upgraded face to genologin +toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQScreen/0.15.3'] + +// ----- RNA ----- // +toolsModuleHash['SALMON'] = ['bioinfo/Salmon/1.10.0'] // version upgraded face to genologin +toolsModuleHash['STAR'] = ['bioinfo/STAR/2.7.5a'] // version upgraded face to genologin + +// ----- DNA ----- // +toolsModuleHash['BWA'] = ['bioinfo/bwa/0.7.17'] +toolsModuleHash['SAMTOOLS'] = ['bioinfo/samtools/1.18'] // version upgraded face to genologin + +// ======================================== +// SHARED MODULES +//========================================= +toolsModuleHash['SEQTK_SAMPLE'] = ['bioinfo/Seqtk/1.3'] +toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.14'] +toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/1.18'] // Not installed +toolsModuleHash['QUALIMAP'] = ['bioinfo/Qualimap/31-08-20'] -- GitLab From acd5b48d1254c8883cb2eeb0f9846aba42f462e5 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 31 Jul 2023 16:55:15 +0200 Subject: [PATCH 19/21] Set R module --- conf/base.config | 4 ++++ conf/dependencies_genobioinfo.config | 1 + conf/dependencies_genologin.config | 1 + modules/local/module_core.nf | 1 - 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index 9683cd8..43666af 100644 --- a/conf/base.config +++ b/conf/base.config @@ -52,6 +52,10 @@ process { ] } + withName: DEMUX_STATS { + module = toolsModuleHash['R'] + } + withName: ILLUMINA_FILTER { publishDir = [ path: "${params.outdir}/IlluminaFilter", diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config index 9ac8895..0568a41 100644 --- a/conf/dependencies_genobioinfo.config +++ b/conf/dependencies_genobioinfo.config @@ -6,6 +6,7 @@ toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter/0.1'] toolsModuleHash['FASTP'] = ['bioinfo/fastp/0.23.2'] toolsModuleHash['FASTQC'] = ['bioinfo/FastQC/0.12.1'] // version upgraded face to genologin toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQScreen/0.15.3'] +toolsModuleHash['R'] = ['statistics/R/4.3.0'] // ----- RNA ----- // toolsModuleHash['SALMON'] = ['bioinfo/Salmon/1.10.0'] // version upgraded face to genologin diff --git a/conf/dependencies_genologin.config b/conf/dependencies_genologin.config index 4dd63eb..7c9fa92 100644 --- a/conf/dependencies_genologin.config +++ b/conf/dependencies_genologin.config @@ -6,6 +6,7 @@ toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter-0.1'] toolsModuleHash['FASTP'] = ['bioinfo/fastp-0.23.2'] toolsModuleHash['FASTQC'] = ['bioinfo/FastQC_v0.11.7'] toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQ-Screen-0.15.2'] +toolsModuleHash['R'] = ['system/R-4.0.4_gcc-9.3.0'] // ----- RNA ----- // toolsModuleHash['SALMON'] = ['bioinfo/salmon-1.9.0'] diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index 9f904ed..cf7e9f6 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -32,7 +32,6 @@ process DEMUX_STATS { script: """ - module load system/R-4.0.4_gcc-9.3.0 demuxStatsFromXML.R --xml $DemuxStatXML --indexNumber $IndexNumberFile --demuxSum $DemuxSummary > demultiplexStats.log """ } -- GitLab From 7fb81834344b3045749acb2bac62109812c4d066 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Wed, 2 Aug 2023 15:23:50 +0200 Subject: [PATCH 20/21] New module for sortmerna on genobioinfo Ref : #70 --- conf/dependencies_genobioinfo.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config index 0568a41..5276bce 100644 --- a/conf/dependencies_genobioinfo.config +++ b/conf/dependencies_genobioinfo.config @@ -21,5 +21,5 @@ toolsModuleHash['SAMTOOLS'] = ['bioinfo/samtools/1.18'] // version upgraded fa //========================================= toolsModuleHash['SEQTK_SAMPLE'] = ['bioinfo/Seqtk/1.3'] toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.14'] -toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/1.18'] // Not installed +toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/4.3.6'] // version upgraded face to genologin toolsModuleHash['QUALIMAP'] = ['bioinfo/Qualimap/31-08-20'] -- GitLab From 0abde469738cb1924de1563c8926a5f29fa91806 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Wed, 2 Aug 2023 15:25:15 +0200 Subject: [PATCH 21/21] Update version number --- conf/report.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/report.config b/conf/report.config index 68b3e9d..2c00805 100644 --- a/conf/report.config +++ b/conf/report.config @@ -29,5 +29,5 @@ manifest { description = "Workflow for Illumina data quality control" mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.2.4' + version = '1.6.0' } \ No newline at end of file -- GitLab