From 7f90d9fa7aac07090e42d09db448784d41476f3b Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 26 Jun 2023 10:40:14 +0200
Subject: [PATCH 01/21] Add threads numbers in bwa and samtools commands

	Ref :#61
---
 conf/prod.config            | 9 ++++++++-
 conf/test.config            | 7 +++++++
 modules/local/module_dna.nf | 6 +++---
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/conf/prod.config b/conf/prod.config
index 3dee02a..117ab99 100644
--- a/conf/prod.config
+++ b/conf/prod.config
@@ -19,12 +19,19 @@ process {
         ]
 	}
 	
-	withLabel: samtools {
+	withLabel: samtools_view {
 		module = ['bioinfo/samtools-1.14']
 		cpus = { 6 * task.attempt }
 	    memory = { 8.GB * task.attempt }
 	    time = { 3.h * task.attempt }
 	}
+
+	withLabel: samtools {
+		module = ['bioinfo/samtools-1.14']
+		cpus = 6
+	    memory = { 8.GB * task.attempt }
+	    time = { 3.h * task.attempt }
+	}
 	
 	withLabel: qualimap {
 		module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20']
diff --git a/conf/test.config b/conf/test.config
index 7e37ac0..cfcba99 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -19,6 +19,13 @@ process {
         ]
 	}
 	
+	withLabel: samtools_view {
+		module = ['bioinfo/samtools-1.14']
+		cpus = { 6 * task.attempt }
+	    memory = { 8.GB * task.attempt }
+	    time = { 3.h * task.attempt }
+	}
+	
 	withLabel: samtools {
 		module = ['bioinfo/samtools-1.14']
 		cpus = { 1 * task.attempt }
diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index 8dc0709..3f7f6fc 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -19,7 +19,7 @@ process BWA_ALIGNMENT {
 	def reference = params.reference_genome ?: params.reference_transcriptome
 	def referenceName=file(reference).toString().split('/')[6]
 	"""
-		bwa mem ${reference} ${reads} 1> ${sample}_${referenceName}.sam 2> ${sample}_${referenceName}.log
+		bwa mem ${reference} ${reads} -t ${task.cpus} 1> ${sample}_${referenceName}.sam 2> ${sample}_${referenceName}.log
 	"""
 }
 
@@ -28,7 +28,7 @@ process SAMTOOLS_VIEW {
 	
 	tag "$sample"
 	
-	label 'samtools'
+	label 'samtools_view'
 	
 	input:
 		tuple val(sample), path(sam)
@@ -38,7 +38,7 @@ process SAMTOOLS_VIEW {
 		
 	script:
 	"""
-		samtools view -bS ${sam} > ${sample}.bam
+		samtools view -bS ${sam} -@ ${task.cpus} > ${sample}.bam
 	"""
 }
 
-- 
GitLab


From db413556e9e7efdcc4eb925bb41300d4aeafdc7f Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 26 Jun 2023 10:41:27 +0200
Subject: [PATCH 02/21] Remove wordir if pipeline succeeds

	Ref : #62
---
 workflow/illumina_qc.nf | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 04c0e66..e48b5c3 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -145,6 +145,13 @@ workflow ILLUMINA_QC {
 def end_mail_sent = false
 workflow.onComplete {
 	end_mail_sent = sendFinalMail(format.format(new Date()), params.summary)
+
+	// remove work directory if pipeline is successful
+	if (workflow.success) {
+		println "Pipeline terminÃ© avec succÃ¨s => suppression du workdir : $workflow.workDir"
+		exec:
+			workflow.workDir.deleteDir()
+	}
 }
 
 workflow.onError { }
\ No newline at end of file
-- 
GitLab


From f0bcf5ffc0370006e9c92adce90b0a8a0e1bdd4e Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 21 Jul 2023 16:14:14 +0200
Subject: [PATCH 03/21] Increase methode for index counting

	- for 10X
	- for sample has name build by subset of another one

	Ref : #57
---
 bin/demuxStatsFromXML.R              | 46 +++++++++++++++-------------
 bin/extractInfoForDemuxStats.pl      | 18 +++++------
 modules/local/module_core.nf         |  4 +--
 sub-workflows/local/core_illumina.nf |  1 +
 4 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/bin/demuxStatsFromXML.R b/bin/demuxStatsFromXML.R
index 78a40fc..247e498 100755
--- a/bin/demuxStatsFromXML.R
+++ b/bin/demuxStatsFromXML.R
@@ -110,7 +110,7 @@ cat("Rassemblement des statistiques par Ã©chantillons.\n")
 for (line in 1:dim(indexNumber)[1]){
 	mySample<-indexNumber[line, "Sample"]
 	mySampleNumber<-indexNumber[line, "NumberOfIndex"]
-	cat("\nEtude de l'Ã©chantillon : " , mySample, "\n")
+	cat("\nEtude de l'Ã©chantillon : " , mySample, "(" , mySampleNumber, "index )\n")
 	# Single Index Case
 	if (mySampleNumber == 1) {
 		df.singleLine<-df[which(df$Sample == mySample),]
@@ -126,9 +126,10 @@ for (line in 1:dim(indexNumber)[1]){
 		#print(sub.df)
 		if (nrow(sub.df) == 0) {
 			cat("Aucun Ã©chantillon trouvÃ© !\n")
-			cat("La recherche de l'Ã©chantillon",mySample, "dans le data.table suivant Ã  Ã©chouÃ©e :\n")
+			cat("La recherche de l'Ã©chantillon", paste0(mySample, sampleName.suffixe), "dans le data.table suivant Ã  Ã©chouÃ©e :\n")
 			print(df)
 		} else {
+			countBarcodesDone = 1
 			# Parcours du sous-data.frame
 			for (l in 1:dim(sub.df)[1]) {
 				sub.df.project<-sub.df[l, "Project"]
@@ -138,17 +139,20 @@ for (line in 1:dim(indexNumber)[1]){
 				sub.df.oneMismatch<-as.numeric(sub.df[l, "bcOneMismatch"])	# bcOneMismatch
 				
 				# PremiÃ¨re iteration
-				if (l == 1 ) {
-					sub.df.project.toAdd<-sub.df.project
-					sub.df.barcode.toAdd<-sub.df.barcode
-					sub.df.bcCount.toAdd<-sub.df.bcCount
-					sub.df.bcPerfect.toAdd<-sub.df.bcPerfect
-					sub.df.oneMismatch.toAdd<-sub.df.oneMismatch
-				} else {
-					sub.df.barcode.toAdd<-paste0(sub.df.barcode.toAdd, "+", sub.df.barcode)
-					sub.df.bcCount.toAdd<-sub.df.bcCount.toAdd+sub.df.bcCount
-					sub.df.bcPerfect.toAdd<-sub.df.bcPerfect.toAdd+sub.df.bcPerfect
-					sub.df.oneMismatch.toAdd<-sub.df.oneMismatch.toAdd+sub.df.oneMismatch
+				countBarcodesDone = countBarcodesDone + str_count(sub.df.barcode, "\\+")
+				if (countBarcodesDone <= mySampleNumber) {
+					if (l == 1 ) {
+						sub.df.project.toAdd<-sub.df.project
+						sub.df.barcode.toAdd<-sub.df.barcode
+						sub.df.bcCount.toAdd<-sub.df.bcCount
+						sub.df.bcPerfect.toAdd<-sub.df.bcPerfect
+						sub.df.oneMismatch.toAdd<-sub.df.oneMismatch
+					} else {
+						sub.df.barcode.toAdd<-paste0(sub.df.barcode.toAdd, "+", sub.df.barcode)
+						sub.df.bcCount.toAdd<-sub.df.bcCount.toAdd+sub.df.bcCount
+						sub.df.bcPerfect.toAdd<-sub.df.bcPerfect.toAdd+sub.df.bcPerfect
+						sub.df.oneMismatch.toAdd<-sub.df.oneMismatch.toAdd+sub.df.oneMismatch
+					}
 				}
 			}
 			# Add to data.frame
@@ -180,14 +184,14 @@ if(nrow(tabUndetermined) > 0) { head(tabUndetermined) }
 
 
 # Construction du dataFrame pour intÃ©gration Ã  df2
-df2.Projects<-unique(df2$Project)
-myProject<-df2.Projects[which(df2.Projects != "default")]
+#df2.Projects<-unique(df2$Project)
+#myProject<-df2.Projects[which(df2.Projects != "default")]
 
 ### Pour chaque ligne de tabUndertermined, on ajoute une ligne Ã  df2 :
 if (dim(tabUndetermined)[1] != 0) {
 	df.tabUndetermined<-data.frame()
 	for (i in 1:dim(tabUndetermined)[1]) {
-		df.tabUndetermined.tmp<-data.frame(myProject, "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-")
+		df.tabUndetermined.tmp<-data.frame("default", "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-")
 		df.tabUndetermined<-concat_df(df.tabUndetermined, df.tabUndetermined.tmp, vec.names)
 	}
 	
@@ -198,11 +202,11 @@ if (dim(tabUndetermined)[1] != 0) {
 }
 
 ## Soustraction des undertermined aux allOthers
-# recuperer les Count de tabUndetermined et soustraire la somme Ã  df2[which(df2$Project == "default"), "bcCount"]
+# recuperer les Count de tabUndetermined et soustraire la somme Ã  df2[which(df2$Barcode == "unknown"), "bcCount"]
 cat("\nQuelques calculs sur les donnÃ©es avant de les exporter.\n")
 cat("\tActualisation du nombre d'index 'AllOthers'.\n")
 undertermined.count<-sum(as.numeric(tabUndetermined[,"Count"]))
-df2[which(df2$Project == "default"), "bcCount"]<-as.numeric(df2[which(df2$Project == "default"), "bcCount"])-undertermined.count
+df2[which(df2$Barcode == "unknown"), "bcCount"]<-as.numeric(df2[which(df2$Barcode == "unknown"), "bcCount"])-undertermined.count
 
 # Calcul pourcentages de chaque barcode
 cat("\tCalcul du pourcentage sur le nombre de fragments total.\n")
@@ -216,9 +220,9 @@ df2<-cbind(df2, percentOfFragment)
 
 # Export du data.frame
 cat("\nSauvegarde du data.frame.\n")
-myProject<-"DEBUG"
+#myProject<-"DEBUG"
 # mettre des 0 Ã  la place des NA dans df2
-write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats_", myProject, ".csv"))
+write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats.tsv"))
 # Ecrire un fichier par valeur de myProject ! Cas ou il y a plusieurs projets sur la mÃªme lane.
-cat(paste0("\tLe fichier suivant Ã  Ã©tÃ© crÃ©Ã© :\t", launchDir, "/DemultiplexStats_", myProject, ".csv\n"))
+cat(paste0("\tLe fichier suivant Ã  Ã©tÃ© crÃ©Ã© :\t", launchDir, "/DemultiplexStats.tsv\n"))
 cat("\nFin normale du script, on sort.\n")
diff --git a/bin/extractInfoForDemuxStats.pl b/bin/extractInfoForDemuxStats.pl
index eddd760..f3a51a0 100755
--- a/bin/extractInfoForDemuxStats.pl
+++ b/bin/extractInfoForDemuxStats.pl
@@ -96,11 +96,6 @@ foreach my $line (@lines) {
 		$machineName = $machineName =~ /^NOVASEQ/ ? 'NOVASEQ' : $machineName;
 	}
 
-	# Recherche du nom du projet
-	if ($line =~ /^Infos/) {
-		$projectName = $cur_line[1];
-	}
-	
 	# Recherche des positions des Sample_ID et des Index_ID
 	elsif ($line =~ m/${regexForDataHeader{$machineName}}/) {
 		while ( my ( $indice, $valeur ) = each @cur_line ) { 
@@ -109,13 +104,19 @@ foreach my $line (@lines) {
 		}
 	}
 
-	# Association Sample_ID avec sont nombre d'index
+	# Association Sample_ID avec son nombre d'index
 	elsif ($line =~ m/${regexForSampleLine{$machineName}}/) {
 		my $sample_ID = $cur_line[$sample_ID_position];
 		my $index_number=0;
 		my @cur_index_ID = ();
 		foreach my $pos (@index_ID_position) {
-			if ($cur_line[$pos] =~ /\w{2}-\w{2}-\w{2}/) { $index_number = 4; } else { $index_number += 1; }
+			if ($cur_line[$pos] =~ /^SI-T|NT-\w{2}$/) { 
+				$index_number = 2;
+			} elsif ($cur_line[$pos] =~ /^\w{2}-\w{2}-\w{2}$/) {
+				$index_number = 4;
+			} else { 
+				$index_number += 1; 
+			}
 		}
 		$sample_info{$sample_ID} = $index_number;
 	}
@@ -128,8 +129,7 @@ foreach my $k (keys(%sample_info)) {
    $content.="$k\t$sample_info{$k}\n";
 }
 
-$projectName = $projectName eq "" ? 'noName' : $projectName;
-my $file2write = "$projectName.indexNumber";
+my $file2write = "indexNumber.tsv";
 
 open(my $fh, '>', $file2write) or exit 1;
 print $fh $content;
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index 9b7f25c..f6c26ee 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -9,7 +9,7 @@ process PREP_DEMUXSTAT {
 		path SampleSheet
 	
 	output:
-		path "*.indexNumber"
+		path "indexNumber.tsv"
 	
 	script:
 	"""
@@ -30,7 +30,7 @@ process DEMUX_STATS {
 	
 	output:
 		path 'demultiplexStats.log', emit: log
-		path "DemultiplexStats_*", emit: demultiplexStatsCSV
+		path "DemultiplexStats.tsv", emit: demultiplexStatsTSV
 	
 	script:
 	"""
diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf
index 6a79fdf..a03cd6f 100644
--- a/sub-workflows/local/core_illumina.nf
+++ b/sub-workflows/local/core_illumina.nf
@@ -47,5 +47,6 @@ workflow CORE_ILLUMINA {
 
     emit:
         fastq = fastq_good
+		demuxStat = DEMUX_STATS.out.demultiplexStatsTSV
 }
 
-- 
GitLab


From 778bbfe6f8408c6049419cd0f9c633deebc27975 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 24 Jul 2023 09:28:45 +0200
Subject: [PATCH 04/21] 5 hours to run fastqc

	Ref: #235 hours to run fastqc

	Ref: #235 hours to run fastqc

	Ref: #235 hours to run fastqc

	Ref: #235 hours to run fastqc

	Ref: #23
---
 conf/base.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/base.config b/conf/base.config
index b99f889..b77a503 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -155,7 +155,7 @@ process {
 		
 		maxRetries = 4
 		module = ['bioinfo/FastQC_v0.11.7']
-		time = { 2.h * task.attempt * params.resource_factor }
+		time = { 5.h * task.attempt * params.resource_factor }
 	}
 
 	withName: FASTQSCREEN {
-- 
GitLab


From 40661eb565d4ffc6aa78b3726aa996517bcad18c Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 24 Jul 2023 15:10:28 +0200
Subject: [PATCH 05/21] Improvement of subsampling step

	- simplification of calculations for subsampling
	- increase memory for subsampling process
	- add params to skip subsampling step

	Ref: #63 & #40
---
 conf/base.config                     |  5 +----
 nextflow.config                      | 13 ++++++-------
 sub-workflows/local/core_pipeline.nf | 29 +++++++++++++---------------
 3 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index b77a503..70333a1 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -82,17 +82,13 @@ params.n_samples = n_read_files / 2
 params.resource_factor = 0.1 * params.n_samples
 
 params {
-	bytes_subset_seq = miseq_subset_byte
 	subset_seq = miseq_subset_seq
 	if ( sequencer =~ /NovaSeq.*/ ) {
 		if ( n_samples >= large_sampling_threshold ) {
-			nova_subset_byte = large_indexing_nova_subset_byte
 			nova_subset_seq = large_indexing_nova_subset_seq
 		}
-		bytes_subset_seq = nova_subset_byte
 		subset_seq = nova_subset_seq
 	}
-	System.out.println "Seuil de taille de fichier pour subset : " + bytes_subset_seq + " bytes."
 	System.out.println "Nombre de reads pour subset : " + subset_seq + "."
 }
 
@@ -247,6 +243,7 @@ process {
         ext.args = '-s100'
         ext.args2 = params.subset_seq
 
+		memory  = { 5.GB * task.attempt }
         module = 'bioinfo/seqtk-1.3'
 
         publishDir = [
diff --git a/nextflow.config b/nextflow.config
index 157085f..6e44f5d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,18 +1,17 @@
 // ========================================
 //				PARAMS
 // =========================================
+
 // Global params
 params {	
 	// PARAMETRE POUR OUTILS
 
 	// Subset fastq files params
-	large_sampling_threshold = 200				// 200 samples run is high multiplexed
-	miseq_subset_byte = 20000000				// in byte <=> 20 000 reads
-	miseq_subset_seq = 20000					// in reads
-	nova_subset_byte = 700000000				// in byte <=> 1 000 000 reads
-	nova_subset_seq = 1000000					// in reads
-	large_indexing_nova_subset_byte = 350000000	// in byte <=> 500 000 reads
-	large_indexing_nova_subset_seq = 500000		// in reads
+	no_subset = false										// to skip subset step -> use every reads to align
+	large_sampling_threshold = 200							// 200 samples run is high multiplexed
+	miseq_subset_seq = "50000"								// in reads must be a string
+	nova_subset_seq = "50000000"							// in reads 
+	large_indexing_nova_subset_seq = "500000"				// in reads
 
 	// RNA QC
 	sortmerna_db_path = '/usr/local/bioinfo/src/SortMeRNA/sortmerna-2.1b/rRNA_databases'
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index edbb825..272b1f2 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -43,28 +43,25 @@ workflow CORE {
 		// ----------- Recherche Duplicats
 		GUNZIP(ch_read)
 		
-		GUNZIP.out.branch{
-			large : it[1].size() >= params.bytes_subset_seq
-			small : it[1].size() < params.bytes_subset_seq
-		}.set{unzip_reads_split}
+		// ----------- Sous-Ã©chantillonnage
+		if (params.no_subset) {
+			unzipped_fastq = GUNZIP.out
+		} else {
+			SEQTK_SAMPLE(GUNZIP.out)
+			unzipped_fastq = SEQTK_SAMPLE.out
+		}
 
-		unzip_reads_split.large.count().map{it}.subscribe onNext: { println it + "  large fastq (more than ${params.subset_seq} reads)" }
-		unzip_reads_split.small.count().map{it}.subscribe onNext: { println it + " small fastq" }
-
-		// Do subset only on large fastq files
-		SEQTK_SAMPLE(unzip_reads_split.large)
-		DUPLICATED_READS(unzip_reads_split.small
-				.mix(SEQTK_SAMPLE.out)
-				.collect{it[1]}
-				.flatten()
-				.map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] }
-				.groupTuple()
+		DUPLICATED_READS(unzipped_fastq
+			.collect{it[1]}
+			.flatten()
+			.map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] }
+			.groupTuple()
 		) // need fastq paired !!!
 		
 	emit:
 		fastqc_report = FASTQC.out.zip ?: Channel.empty()
 		fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty()
 		fastp_report = DUPLICATED_READS.out.json
-		subset_fastq = unzip_reads_split.small.mix(SEQTK_SAMPLE.out)
+		subset_fastq = unzipped_fastq
 		fastq_md5 = MD5SUM.out
 }
-- 
GitLab


From f8a07b3e6780cb7038cd24901796ba877e5d045c Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 11:08:19 +0200
Subject: [PATCH 06/21] BWA runs on R1 and R2 for DNA data

	Ref: #66
---
 workflow/illumina_qc.nf | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index e48b5c3..9c75eca 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -94,7 +94,12 @@ workflow ILLUMINA_QC {
 	CORE(fastq)
 
 	if (params.data_nature == 'DNA') {
-		DNA_QC(CORE.out.subset_fastq)
+		DNA_QC(CORE.out.subset_fastq
+			.collect{it[1]}
+			.flatten()
+			.map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] }
+			.groupTuple()
+		)
 		ch_mqc = ch_mqc.mix(
 			DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]),
 			DNA_QC.out.flagstats_output.collect{it[1]}.ifEmpty([])
-- 
GitLab


From c4a4794e00e933430367ee86594f03ec12e09483 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 11:11:00 +0200
Subject: [PATCH 07/21] Adaptative memory for md5sum generation

	Ref: #65
---
 conf/base.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/base.config b/conf/base.config
index 70333a1..f193e7f 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -286,6 +286,7 @@ process {
     }
 
 	withName: MD5SUM {
+		time = { 3.h * task.attempt * params.resource_factor }
 		publishDir = [
             path: { "${params.outdir}/fastq" },
             mode: 'copy',
-- 
GitLab


From 50ca307e51ef50c855602601c4b0a26aba2ba771 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 11:13:16 +0200
Subject: [PATCH 08/21] Work dir is not remove when using dev profile

	Ref: #62
---
 conf/prod.config        | 1 +
 conf/test.config        | 1 +
 workflow/illumina_qc.nf | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/conf/prod.config b/conf/prod.config
index 117ab99..4b0873f 100644
--- a/conf/prod.config
+++ b/conf/prod.config
@@ -4,6 +4,7 @@
 params {
 	ngl_bi_client = '/home/sbsuser/save/scripts-ngs/NGL-Bi_client_Current'
 	shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current'
+	is_dev_mode = false
 }
 
 // ========================================
diff --git a/conf/test.config b/conf/test.config
index cfcba99..9c48729 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -4,6 +4,7 @@
 params {
 	ngl_bi_client = '/home/sbsuser/work/test/jules/VisualStudioSources/ngl-bi_client/'
 	shared_modules = '/home/sbsuser/work/Nextflow/shared_modules/ExportSources_Jules/'
+	is_dev_mode = true
 }
 
 // ========================================
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 9c75eca..9beae69 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -152,7 +152,7 @@ workflow.onComplete {
 	end_mail_sent = sendFinalMail(format.format(new Date()), params.summary)
 
 	// remove work directory if pipeline is successful
-	if (workflow.success) {
+	if (workflow.success && !params.is_dev_mode) {
 		println "Pipeline terminÃ© avec succÃ¨s => suppression du workdir : $workflow.workDir"
 		exec:
 			workflow.workDir.deleteDir()
-- 
GitLab


From 8d3c91482ce534ad872b24edff1218cffeebc8f5 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 11:33:53 +0200
Subject: [PATCH 09/21] Fastp subsampling set in base.config file

	Ref: #68
---
 conf/base.config             | 8 ++++++++
 modules/local/module_core.nf | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/conf/base.config b/conf/base.config
index f193e7f..ae6461b 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -129,6 +129,14 @@ process {
 			mode: 'copy',
 			pattern: "*.log"
 		]
+		publishDir = [
+			path: "${params.outdir}/Duplicats",
+			mode: 'copy',
+			pattern: "*.json"
+		]
+
+		ext.args = "--reads_to_process ${params.fastp_n_reads}"
+
 		module = ['bioinfo/fastp-0.23.2']
 		time = { 5.h * task.attempt }
 		memory = { 3.GB * task.attempt }
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index f6c26ee..2127340 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -110,6 +110,7 @@ process DUPLICATED_READS {
 	shell:
 	R1_name=file(fastq[0]).simpleName
 	R2_name=file(fastq[1]).simpleName
+	def args = task.ext.args ?: ''
 	'''
 		fastp \
 		-i !{fastq[0]} \
@@ -120,6 +121,7 @@ process DUPLICATED_READS {
 		--disable_quality_filtering \
 		--disable_length_filtering \
 		--json !{R1_name}_fastp.json \
+		$args \
 		2> !{R1_name}.log
 	'''
 }
-- 
GitLab


From f42ba02a5adc3e757a1500c0fda1e0f3033fd27c Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 11:36:03 +0200
Subject: [PATCH 10/21] params for fastp subsampling

	Ref : #68
---
 nextflow.config | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nextflow.config b/nextflow.config
index 6e44f5d..5042b13 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -22,6 +22,9 @@ params {
 	sortmerna_euk_18s = sortmerna_db_path + '/silva-euk-18s-id95.fasta'
 	sortmerna_euk_28s = sortmerna_db_path + '/silva-euk-28s-id98.fasta'
 
+	// FASTP
+	fastp_n_reads = 100000000
+
 	// OTHERS
 	email=""
 	email_dev="jules.sabban@inrae.fr"
-- 
GitLab


From 44daa011f63cfd4b2effc199990614f49bfca34c Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 11:40:31 +0200
Subject: [PATCH 11/21] Run/ReadSet demuxStat treatment insertion

	Ref: #60
---
 modules/local/module_NGL-Bi.nf | 52 +++++++++++++++-------------------
 workflow/illumina_qc.nf        | 34 ++++++++++++++--------
 2 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf
index 96f29d5..6dfe3de 100644
--- a/modules/local/module_NGL-Bi.nf
+++ b/modules/local/module_NGL-Bi.nf
@@ -1,4 +1,7 @@
-params.outdir=''
+/*
+ * Ensemble de process pour l'interraction avec NGL-Bi
+ * Process pour la crÃ©ation de traitement SAV
+ */
 
 
 process prepareReadSetCreation {
@@ -17,38 +20,29 @@ process prepareReadSetCreation {
 	"""
 }
 
-process readsetNGLBiCreation {
-	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy', pattern: '*.created'
+process TREATMENT_DEMUXSTAT {
+	publishDir path: params.outdir + "/ngl/", pattern: '*.log',  mode: 'copy'
 	
-	executor = 'local'
-	beforeScript = "export ENV_NGL='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/'"
-	errorStrategy = { 'ignore' }
-	
-	input :
-		path infoFile
-	
-	output :
-		path 'ReadsetsNGL-Bi.created', emit: readSetFile
-		path 'ReadsetsNGL-BiCreation.log', emit: readSetLog
-
-	script :
-	"""
-		createNGLBiReadSets.pl --infoFile $infoFile --env_ngl_bi \$ENV_NGL 2> ReadsetsNGL-BiCreation.log 1> ReadsetsNGL-Bi.created
-		
-	"""
-}
+	label 'ngl'
 
-process checkErrorFromNGLBi {
-	publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
-	
 	input:
-		path logFile
-		
+		val nglCode
+		path csvFile
+		val lane
+
 	output:
-		path 'ReadsetsNGL-BiCreation.log'
-		
+		path("*.log")
+		val 1, emit: ready
+
 	script:
+	laneOption = lane ? "--lane $lane" : ''
+	forceOption = workflow.resume ? "--force" : ''
 	"""
-		checkErrorNGLScripts.pl --file $logFile
+		perl ${params.ngl_bi_client}/GeT/perl/illumina/createNGL-BiTreatmentDemultiplexStat.pl \\
+			--code $nglCode \\
+			--stat $csvFile \\
+			${laneOption} \\
+			${forceOption} \\
+			1> treatment_demux_${lane}.log
 	"""
-}
\ No newline at end of file
+}
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 9beae69..ccb7fd7 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -57,15 +57,19 @@ createDir = file(params.outdir).mkdir()
 // -------------------------------------------------
 // 					INCLUDES
 // -------------------------------------------------
-include { NGLBI			} from "$baseDir/sub-workflows/local/begin_nglbi.nf"
-include { CORE_ILLUMINA } from "$baseDir/sub-workflows/local/core_illumina.nf"
-include { CORE			} from "$baseDir/sub-workflows/local/core_pipeline.nf"
-include { DNA_QC		} from "$baseDir/sub-workflows/local/dna_qc.nf"
-include { RNA_QC		} from "$baseDir/sub-workflows/local/rna_qc.nf"
-include { MULTIQC		} from "${params.shared_modules}/multiqc.nf"
-include { workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf"
-include { UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC } from "${params.shared_modules}/ngl_bi.nf"
-include { READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW')
+include {	NGLBI		} from "$baseDir/sub-workflows/local/begin_nglbi.nf"
+include {	CORE_ILLUMINA } from "$baseDir/sub-workflows/local/core_illumina.nf"
+include {	CORE		} from "$baseDir/sub-workflows/local/core_pipeline.nf"
+include {	DNA_QC		} from "$baseDir/sub-workflows/local/dna_qc.nf"
+include {	RNA_QC		} from "$baseDir/sub-workflows/local/rna_qc.nf"
+include { 	PARSE_REPORTS } from "$baseDir/modules/local/module_DTM.nf"
+include {	TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_RUN;
+			TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS
+						} from "$baseDir/modules/local/module_NGL-Bi.nf"
+include {	MULTIQC		} from "${params.shared_modules}/multiqc.nf"
+include {	workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf"
+include {	UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC } from "${params.shared_modules}/ngl_bi.nf"
+include {	READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW')
 // -------------------------------------------------
 // 					 EMAIL ON START
 // -------------------------------------------------
@@ -84,11 +88,17 @@ workflow ILLUMINA_QC {
 		NGLBI()
 	}
 
-	if ( ! params.skip_core_illumina ) {
+	if ( params.skip_core_illumina ) {
+		fastq = ch_read
+	} else {
 		CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read)
 		fastq = CORE_ILLUMINA.out.fastq
-	} else {
-		fastq = ch_read
+
+		if (params.insert_to_ngl){
+			// Add demultiplexStat treatments
+			TREATMENT_DEMUX_RUN(params.bi_run_code, CORE_ILLUMINA.out.demuxStat, params.lane)
+			TREATMENT_DEMUX_READSETS(NGLBI.out.readsetsFile, CORE_ILLUMINA.out.demuxStat, '')
+		}
 	}
 
 	CORE(fastq)
-- 
GitLab


From 12eae4ff09cce1ad1be563cff6282f50e11a0399 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 11:42:25 +0200
Subject: [PATCH 12/21] Use NF variable for fastp args

	Ref : #68
---
 modules/local/module_core.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index 2127340..94f9b76 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -121,7 +121,7 @@ process DUPLICATED_READS {
 		--disable_quality_filtering \
 		--disable_length_filtering \
 		--json !{R1_name}_fastp.json \
-		$args \
+		!{args} \
 		2> !{R1_name}.log
 	'''
 }
-- 
GitLab


From feb43679add83a621b46307e3109b2db8adcb929 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 13:28:59 +0200
Subject: [PATCH 13/21] Move Qualimap process from here to shared_modules

	Ref: #67
---
 conf/base.config              | 20 ++++++++++++++++++++
 conf/prod.config              | 11 +----------
 modules/local/module_dna.nf   | 23 -----------------------
 sub-workflows/local/dna_qc.nf |  3 +--
 4 files changed, 22 insertions(+), 35 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index ae6461b..7a80be6 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -301,4 +301,24 @@ process {
             pattern: "*.md5sum"
         ]
 	}
+
+	withName: QUALIMAP {
+		module = 'bioinfo/qualimap-31-08-20'
+		cpus = { 8 * task.attempt }
+	    memory = { 2.GB * task.attempt }
+	    time = { 3.h * task.attempt }
+
+		publishDir = [
+			path: "${params.outdir}/alignmentStats/qualimap",
+			mode: 'copy',
+			pattern: "*/*.html"
+		]
+
+		publishDir = [
+			path: "${params.outdir}/alignmentStats/qualimap",
+			mode: 'copy',
+			pattern: "*/*.txt"
+		]
+
+	}
 }
\ No newline at end of file
diff --git a/conf/prod.config b/conf/prod.config
index 4b0873f..5bf85b8 100644
--- a/conf/prod.config
+++ b/conf/prod.config
@@ -32,16 +32,7 @@ process {
 		cpus = 6
 	    memory = { 8.GB * task.attempt }
 	    time = { 3.h * task.attempt }
-	}
-	
-	withLabel: qualimap {
-		module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20']
-		beforeScript='unset DISPLAY'
-		cpus = { 8 * task.attempt }
-	    memory = { 2.GB * task.attempt }
-	    time = { 3.h * task.attempt }
-	}
-	
+	}	
 	
 	withName: BWA_ALIGNMENT {
 		cpus = { 6 * task.attempt }
diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index 3f7f6fc..756d4c5 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -83,29 +83,6 @@ process SAMTOOLS_FLAGSTATS {
 	"""
 }
 
-process QUALIMAP {
-	publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy', pattern: "*.html"
-	publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy', pattern: "*.txt"
-
-	tag "$sample"
-
-	label 'qualimap'
-	
-	errorStrategy = { 'ignore' }
-
-	input:
-		tuple val(sample), path(bam)
-
-	output:
-		tuple val(sample), path("*.log"), emit: log
-		tuple val(sample), path("*/*"), emit: all	// ${sample}_stats/*
-		tuple val(sample), path("${sample}"), emit: report
-
-	script:
-	"""
-		qualimap bamqc -bam ${bam} -outdir ${sample} 1> ${sample}.log
-	"""
-}
 
 
 
diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf
index 57e1a08..f41af2e 100644
--- a/sub-workflows/local/dna_qc.nf
+++ b/sub-workflows/local/dna_qc.nf
@@ -14,9 +14,8 @@ include { 	BWA_ALIGNMENT;
 			SAMTOOLS_VIEW;
 			SAMTOOLS_SORT;
 			SAMTOOLS_FLAGSTATS;
-			QUALIMAP;
 } from "$baseDir/modules/local/module_dna.nf"
-
+include { 	QUALIMAP } from "${params.shared_modules}/qualimap.nf"
 
 // -------------------------------------------------
 // 					WORKFLOW
-- 
GitLab


From 1f92bda250d7407fbdeeec7d858e0e48a76d4411 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 13:29:57 +0200
Subject: [PATCH 14/21] Add filter on output qualimap file name

	Ref: #67
---
 assets/multiqc_config.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index 47abb58..593ca97 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -35,6 +35,7 @@ table_columns_visible:
 
 ## Sample name formatting
 extra_fn_clean_exts:
+  - "_qualimap_results"
   - "_filtered"
   - "_unmerged"
   - "_flagstat"
-- 
GitLab


From 28d4e18120fa09aa430b920a0e5eb77f819e136c Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 13:43:45 +0200
Subject: [PATCH 15/21] New ressources for DTM

	Read and extract metrics from qualimap and fastp reports
	Create CSV file

	Ref : #69
---
 bin/parse_reports.sh        | 28 ++++++++++++++++++++++++++++
 conf/test.config            |  6 ++++++
 modules/local/module_DTM.nf | 22 ++++++++++++++++++++++
 workflow/illumina_qc.nf     |  6 ++++++
 4 files changed, 62 insertions(+)
 create mode 100755 bin/parse_reports.sh
 create mode 100644 modules/local/module_DTM.nf

diff --git a/bin/parse_reports.sh b/bin/parse_reports.sh
new file mode 100755
index 0000000..a7d46ac
--- /dev/null
+++ b/bin/parse_reports.sh
@@ -0,0 +1,28 @@
+TAG=$1
+FASTP_REPORT=$2
+QUALIMAP_REPORT=$3/genome_results.txt
+
+O_STAT="./${TAG}.stat"
+O_CSV="./${TAG}.csv"
+
+## Get values
+DUPLI=$(jq '.duplication.rate' $FASTP_REPORT)
+TOT_SEQ=$(( $(sed -n 's/number of reads = \(.*\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g' | sed 's/,//g') / 2 ))
+INSERT=$(sed -n 's/median insert size = \(.*\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g')
+GC_PERCENT=$(sed -n 's/GC percentage = \(.*%\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g')
+GEN_COV=$(grep ">= 1X" $QUALIMAP_REPORT | sed -n 's/There is a \(.*%\) of.*/\1/p' | sed 's/ //g')
+MEAN_COV=$(sed -n 's/mean coverageData.*= \(.*X\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g')
+ALIGN=$(sed -n 's/number of mapped reads =.*(\(.*%\))/\1/p' $QUALIMAP_REPORT | sed 's/ //g')
+
+## Write stat file
+echo "duplication_rate: $DUPLI" >> $O_STAT
+echo "total_sequences: $TOT_SEQ" >> $O_STAT
+echo "mean_insert_size: $INSERT" >> $O_STAT
+echo "GC_percent: $GC_PERCENT" >> $O_STAT
+echo "genome_cov_percent: $GEN_COVcat " >> $O_STAT
+echo "mean_cov: $MEAN_COV" >> $O_STAT
+echo "align_percent: $ALIGN" >> $O_STAT
+
+## Write export file
+echo "Sample;Tot_seq;Duplication_rate;Mean_insert_size;%GC;%Genome_cov;Mean_cov;%Align" > $O_CSV
+echo "$TAG;$TOT_SEQ;$DUPLI;$INSERT;$GC_PERCENT;$GEN_COV;$MEAN_COV;$ALIGN" >> $O_CSV
\ No newline at end of file
diff --git a/conf/test.config b/conf/test.config
index 9c48729..0424a4d 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -47,6 +47,12 @@ process {
 	    memory = { 8.GB * task.attempt }
 	    time = { 3.d * task.attempt }
 	}
+
+	withName: PARSE_REPORTS {
+		executor = 'local'
+	    memory = { 500.MB * task.attempt }
+	    time = { 5.m * task.attempt }
+	}
 }
 
 
diff --git a/modules/local/module_DTM.nf b/modules/local/module_DTM.nf
new file mode 100644
index 0000000..6685386
--- /dev/null
+++ b/modules/local/module_DTM.nf
@@ -0,0 +1,22 @@
+/*
+ *	Module pour la gestion des analyses particuliÃ¨res dans le cadre d'un DTM
+*/
+
+process PARSE_REPORTS {
+	publishDir path: "${params.outdir}/DTM" , mode: 'copy'
+	
+	tag "$sample"
+	
+	input:
+		tuple val(sample), path(fastp_json_report)
+		tuple val(sample), path(qualimap_folder)
+		
+	output:
+		tuple val(sample), path("*.csv"), emit: csv
+		
+	script:
+	"""
+		bash parse_reports.sh $sample $fastp_json_report $qualimap_folder
+	"""
+}
+
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index ccb7fd7..beb02dc 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -114,6 +114,12 @@ workflow ILLUMINA_QC {
 			DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]),
 			DNA_QC.out.flagstats_output.collect{it[1]}.ifEmpty([])
 		)
+
+		// DTM process
+		if ("$params.is_dev_mode") {
+			PARSE_REPORTS(CORE.out.fastp_report, DNA_QC.out.qualimap_report)
+		}
+
 	} else if (params.data_nature =~ 'RNA') {
 		RNA_QC(CORE.out.subset_fastq, ch_sortmerna_db)
 		ch_mqc = ch_mqc.mix(
-- 
GitLab


From 48f10fccb4cddc5a97ceb7e8275b8e9c1fc0d6c7 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Fri, 28 Jul 2023 14:29:49 +0200
Subject: [PATCH 16/21] Remove readset creation script

	We now use the script in NGL-Bi_Client project
---
 bin/createNGLBiReadSets.pl | 127 -------------------------------------
 1 file changed, 127 deletions(-)
 delete mode 100755 bin/createNGLBiReadSets.pl

diff --git a/bin/createNGLBiReadSets.pl b/bin/createNGLBiReadSets.pl
deleted file mode 100755
index e5cdf2e..0000000
--- a/bin/createNGLBiReadSets.pl
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/perl -w
-binmode STDIN,  ':encoding(UTF-8)';
-binmode STDOUT, ':encoding(UTF-8)';
-binmode STDERR, ':encoding(UTF-8)';
-
-=head1 NAME
-
- createNGLBiReadSets.pl
- 
-=head1 DESCRIPTION
-
- Performe readSets creation on NGL-Bi
- 
-=head1 SYNOPSIS
-
- createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV>
-
-=head1 OPTIONS
-
- --infoFile=s : path to the info file
- --env_ngl_bi=s : environment varible of ngl-bi
- 
-=head1 EXEMPLES
-
- perl createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV>
-
-=head1 AUTHOR
-
- Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr)
- 
-=cut
-
-###################################################################
-#
-#						LIBRAIRIES
-#
-###################################################################
-use strict;
-use Getopt::Long;
-use Log::Log4perl  qw(:easy);;
-
-##################################################################
-#
-#						INITIALISATION
-#
-##################################################################
-Log::Log4perl -> easy_init( {   level    => $TRACE,
-                                utf8     => 1,
-                                layout   => '[%d][%p>createNGLBiReadSets.pl:L%L] %m%n' } );
-
-my $logger = Log::Log4perl -> get_logger();
-
-my $infoFile="";
-my $env_ngl_bi = "";
-
-GetOptions ('infoFile=s' => \$infoFile,
-			"env_ngl_bi=s" => \$env_ngl_bi, 	# environnement path of NGL-Bi
-);
-
-if ($env_ngl_bi eq "" || $infoFile eq "" ) {
-	$logger -> logdie("USAGE : createNGLBiReadSets.pl --infoFile <File> --env_ngl_bi <ENV>\n");
-}
-
-my $experimentName="";
-my $runName="";
-my $laneNumber="";
-my $script_path="/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/GeT/perl"; # RÃ©pertoire des scripts de l'API NGL
-
-##################################################################
-#
-#						NGL-Bi ENVIRONMENT
-#
-##################################################################
-
-$ENV{APIPERL}=$env_ngl_bi;
-$ENV{CONFFILE}=$env_ngl_bi."conf/prod_illumina_qc.conf";
-$logger = Log::Log4perl -> get_logger('loadConfFile');
-unless ($ENV{CONFFILE}) {
-	$logger -> logdie("$0 : Database configuration file not defined ! Initialize 'CONFFILE' with configuration file path in your environment");
-}
-my $dbconf_file = $ENV{CONFFILE};
-unless (-f $dbconf_file) {
-	$logger -> logdie("$0 : Database configuration file does not exist : $dbconf_file. It's necessary for continue.");
-}
-open my $handle, '<', $dbconf_file;
-chomp ( my @lines = <$handle> );
-close $handle;
-foreach my $line (@lines) {
-	$line =~ s/#.*//o;
-	unless ($line) {next;}
-	if ($line =~ /(.*)=(.*)/o) {
-		my $key = $1;
-		my $value = $2;
-		$key =~ s/^\s*//o;
-		$key =~ s/\s*$//o;
-		$value =~ s/^\s*//o;
-		$value =~ s/^\s*//o;
-		$ENV{$key} = $value;
-	} else {
-		$logger -> logdie("$0 : Can't load variable to dababase configration file $dbconf_file in line : '$_'");
-	}
-}
-
-unshift @INC, $env_ngl_bi."Common_tools/src/perl/lib";
-unshift @INC, $env_ngl_bi."DB_tools/src/perl/lib";
-
-require illumina;
-require json;
-$logger -> info("\tVariables d'environnement pour NGL-Bi charÃ©es.");
-
-##################################################################
-#
-#						INFO FILE READING
-#
-##################################################################
-$experimentName=`grep "ExperimentName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep ExperimentName impossible : $!");
-$runName=`grep "NGLBiRunName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep NGLBiRunName impossible : $!");
-$laneNumber=`grep "LaneNumber" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep LaneNumber impossible : $!");
-
-chomp($experimentName);
-chomp($runName);
-chomp($laneNumber);
-
-
-my $commandNGLBiReadSets = "perl $script_path/createNGL-BiReadSets.pl --NGLBiRunCode $runName --NGLSqExperimentCode $experimentName --laneNumberToWorkOn $laneNumber";
-$logger -> info("\tCreation des readSets dans NGL-Bi : ".$commandNGLBiReadSets);
-my $result_commandNGLBiReadSets = `$commandNGLBiReadSets 2>&1`; $? and $logger -> logdie("[Erreur]Lancement de createNGL-BiReadSets.pl\n".$result_commandNGLBiReadSets);
\ No newline at end of file
-- 
GitLab


From f561c94153dbbc0a2d83b45d4bf8b14a1f2f4b91 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 31 Jul 2023 15:24:27 +0200
Subject: [PATCH 17/21] Reorganization of config files and process

	Ref: #64
---
 conf/base.config                   | 195 +++++++++++++----------------
 conf/dependencies_genologin.config |  24 ++++
 conf/functions.config              |   4 +-
 conf/genomes.config                |  29 -----
 conf/prod.config                   |  47 -------
 conf/test.config                   |  54 --------
 modules/local/module_DTM.nf        |   4 +-
 modules/local/module_NGL-Bi.nf     |   2 -
 modules/local/module_core.nf       |  16 +--
 modules/local/module_dna.nf        |  14 +--
 nextflow.config                    | 133 +++++++++++++++-----
 workflow/illumina_qc.nf            |   2 +-
 12 files changed, 222 insertions(+), 302 deletions(-)
 create mode 100644 conf/dependencies_genologin.config
 delete mode 100644 conf/genomes.config
 delete mode 100644 conf/prod.config

diff --git a/conf/base.config b/conf/base.config
index 7a80be6..9683cd8 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -1,62 +1,8 @@
 // ========================================
-//				PARAMS
+//			BASE CONFIGURATION
 //=========================================
-System.out.println "Chargement des paramÃ¨tres de base"
-// Fixed params
+// Print of analysis parameters
 params {
-	// EMPTY INITIALISATION OF INPUT PARAMS
-	// General params
-	outdir = "./"			// base output directory for all analysis
-	inputdir = ""
-	project = ""
-	sequencer = ""
-	machine_id = ""
-	fc_id = ""
-	fc_type = ""
-	lane = ""
-	demux_uniqueness = ""
-
-	data_nature = ""
-	species = ""
-	is_multiplex = false
-	
-	run_name = ""
-	run_date = ""
-	description = ""
-	split_reads = false
-	
-	// DNA / RNA params
-	reference_genome = ""
-	make_star_index = false
-	reference_transcriptome = ""
-
-	// Amplicon / 16S params
-	min_overlap = ""
-	max_overlap = ""
-
-	// 10X params
-
-
-	// MethylSeq params
-	puc19 = ""
-	lambda = ""
-
-	// NGL
-	insert_to_ngl = true
-	bi_run_code = ''
-	sq_xp_code = ''
-}
-
-params.samplesheet = params.inputdir.toString() + "/SampleSheet.csv"
-params.data_location = params.inputdir.toString() + "/" + params.project.toString()
-
-// Dynamic params
-import java.text.SimpleDateFormat
-SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyyMMddHHmmss")
-params {
-	nf_uniqueness = uniqueness_format.format(new Date())
-	outdir= params.inputdir + "/nextflow/" + project + "_" + run_name + "_" + nf_uniqueness
-
 	System.out.println ""
 	System.out.println "run_name : "+run_name
 	System.out.println "data : "+data_nature
@@ -70,28 +16,6 @@ params {
 	System.out.println ""
 }
 
-// Dynamic params depending on samples number
-import java.nio.file.Files
-import java.nio.file.Paths
-def n_read_files = Files.walk(Paths.get(params.data_location))
-	.filter(Files::isRegularFile)
-	.filter(p -> p.getFileName().toString().matches(".*_R[12](_.*)?\\.fastq\\.gz"))
-	.count()
-
-params.n_samples = n_read_files / 2
-params.resource_factor = 0.1 * params.n_samples
-
-params {
-	subset_seq = miseq_subset_seq
-	if ( sequencer =~ /NovaSeq.*/ ) {
-		if ( n_samples >= large_sampling_threshold ) {
-			nova_subset_seq = large_indexing_nova_subset_seq
-		}
-		subset_seq = nova_subset_seq
-	}
-	System.out.println "Nombre de reads pour subset : " + subset_seq + "."
-}
-
 // ========================================
 //				PROCESS
 //=========================================
@@ -108,8 +32,26 @@ process {
 	maxRetries = 2
 	maxErrors = '-1'
 
-	// ----- WithName
+	// ----- DTM
+	withName: PARSE_REPORTS {
+		executor = 'local'
+	    memory = { 500.MB * task.attempt }
+	    time = { 5.m * task.attempt }
+
+		publishDir = [
+			path: "${params.outdir}/DTM",
+			mode: 'copy'
+		]
+	}
+	
 	// ----- CORE ----- //
+	withLabel: demux {
+		publishDir = [
+			path: "${params.outdir}/Demux",
+			mode: 'copy'
+		]
+	}
+
 	withName: ILLUMINA_FILTER {
 		publishDir = [
 			path: "${params.outdir}/IlluminaFilter",
@@ -117,8 +59,8 @@ process {
 			pattern: '*.gz'/*,
 			saveAs: { filename -> "${name}.fastq.gz" }*/
 		]
-	
-		module = ['bioinfo/fastq_illumina_filter-0.1']
+
+		module = toolsModuleHash['ILLUMINA_FILTER']
 		cpus = { 3 * task.attempt }
 		time = { 4.h * task.attempt }
 	}
@@ -127,17 +69,12 @@ process {
 		publishDir = [
 			path: "${params.outdir}/Duplicats",
 			mode: 'copy',
-			pattern: "*.log"
-		]
-		publishDir = [
-			path: "${params.outdir}/Duplicats",
-			mode: 'copy',
-			pattern: "*.json"
+			pattern: "*.{log,json}"
 		]
 
 		ext.args = "--reads_to_process ${params.fastp_n_reads}"
 
-		module = ['bioinfo/fastp-0.23.2']
+		module = toolsModuleHash['FASTP']
 		time = { 5.h * task.attempt }
 		memory = { 3.GB * task.attempt }
 		cpus = { 3 * task.attempt }
@@ -157,27 +94,46 @@ process {
 			saveAs: { filename -> "${name}.html" }
 		]
 		
+		module = toolsModuleHash['FASTQC'] 
 		maxRetries = 4
-		module = ['bioinfo/FastQC_v0.11.7']
 		time = { 5.h * task.attempt * params.resource_factor }
 	}
 
 	withName: FASTQSCREEN {
-		ext.args = [
-            "--conf ${params.inputdir}/fastq_screen.conf"
-		].join(' ')
+		time = { 1.h * task.attempt }
+		module = toolsModuleHash['FASTQSCREEN']
+
+		ext.args = "--conf ${params.inputdir}/fastq_screen.conf"
+
+		publishDir = [
+			path: "${params.outdir}/ContaminationSearch/FastQ-Screen",
+			mode: 'copy'
+		]
+	}
+
+	// ----- DNA ----- //
+	withLabel: bwa {
+		module = toolsModuleHash['BWA']
+		cpus = { 6 * task.attempt }
+	    memory = { 8.GB * task.attempt }
+	    time = { 3.d * task.attempt }
+
+		publishDir = [
+			path: "${params.outdir}/alignment/bwa",
+			mode: 'copy'
+		]
 	}
 
 	// ----- RNA ----- //
 	withName: SALMON_INDEX {
-		module = ['bioinfo/salmon-1.9.0']
+		module = toolsModuleHash['SALMON']
 		time = { 1.h * task.attempt }
 		memory = { 3.GB * task.attempt }
 		cpus = 8 
 	}
 
 	withName: SALMON_QUANT {
-		module = ['bioinfo/salmon-1.9.0']
+		module = toolsModuleHash['SALMON']
 		time = { 1.h * task.attempt }
 		memory = { 3.GB * task.attempt }
 		cpus = 8 
@@ -189,12 +145,14 @@ process {
 		]
 	}
 
-	withName: STAR_INDEX {	
+	withName: STAR_INDEX {
+		module = toolsModuleHash['STAR']
 		memory = { 50.GB * task.attempt }
 		cpus = 8
 	}
 
 	withName: STAR_ALIGN {	
+		module = toolsModuleHash['STAR']
 		memory = { 20.GB * task.attempt }
 		cpus = 2
 	}
@@ -203,21 +161,37 @@ process {
 	withLabel: littleJob {
 		executor = 'local'
 	}
-	
-	withLabel: cigar {
-		module = ['system/Python-3.7.4:bioinfo/samtools-1.14']
+
+	withLabel: ngl {
+		beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/prod_illumina_qc.conf"
+		publishDir = [
+            path: { "${params.outdir}/ngl" },
+            mode: 'copy',
+			pattern: "*.{log,created}"
+        ]
 	}
 
-	// ----- DNA ----- //
-	withLabel: bwa {
-		module = ['/tools/share/Modules/bioinfo/bwa-0.7.17']
-		beforeScript = "module list"
+	withLabel: samtools {
+		module = toolsModuleHash['SAMTOOLS']
+		cpus = { 6 * task.attempt }
+	    memory = { 8.GB * task.attempt }
+	    time = { 3.h * task.attempt }
 	}
 
-	// ----- RNA ----- //
-	withLabel: star {
-		module = ['bioinfo/STAR-2.7.10a_alpha_220314']
+	withLabel: alignment {
+		publishDir = [
+			path: "${params.outdir}/alignment/samtools",
+			mode: 'copy'
+		]
+	}
+
+	withLabel: alignmentStats {
+		publishDir = [
+			path: "${params.outdir}/alignmentStats/samtools",
+			mode: 'copy'
+		]
 	}
+
 }
 
 // ========================================
@@ -225,8 +199,8 @@ process {
 //=========================================
 process {
 	withName: SAMTOOLS_FAIDX {
+		module = toolsModuleHash['SAMTOOLS']
 		beforeScript = "module purge"
-		module = ['bioinfo/samtools-1.16.1']
 	}
 
     withName: GZIP {
@@ -252,7 +226,7 @@ process {
         ext.args2 = params.subset_seq
 
 		memory  = { 5.GB * task.attempt }
-        module = 'bioinfo/seqtk-1.3'
+		module = toolsModuleHash['SEQTK_SAMPLE']
 
         publishDir = [
             path: { "${params.outdir}/subset" },
@@ -269,7 +243,7 @@ process {
         ].join(' ')
 
 		beforeScript = "module purge"
-		module = 'bioinfo/MultiQC-1.14'
+		module = toolsModuleHash['MULTIQC']
 		memory = { 10.GB * task.attempt * params.resource_factor }
 
 		publishDir = [
@@ -281,7 +255,7 @@ process {
     }
 
 	withName: SORTMERNA {
-        module  = 'bioinfo/sortmerna-4.3.2'
+		module  = toolsModuleHash['SORTMERNA']
         memory  = { 2.GB * task.attempt }
         time    = { 10.h * task.attempt }
         cpus    = { 1 * task.attempt }
@@ -303,7 +277,7 @@ process {
 	}
 
 	withName: QUALIMAP {
-		module = 'bioinfo/qualimap-31-08-20'
+		module = toolsModuleHash['QUALIMAP']
 		cpus = { 8 * task.attempt }
 	    memory = { 2.GB * task.attempt }
 	    time = { 3.h * task.attempt }
@@ -319,6 +293,5 @@ process {
 			mode: 'copy',
 			pattern: "*/*.txt"
 		]
-
 	}
 }
\ No newline at end of file
diff --git a/conf/dependencies_genologin.config b/conf/dependencies_genologin.config
new file mode 100644
index 0000000..4dd63eb
--- /dev/null
+++ b/conf/dependencies_genologin.config
@@ -0,0 +1,24 @@
+// ========================================
+//			GENOLOGIN MODULES
+//=========================================
+// ----- CORE ----- //
+toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter-0.1']
+toolsModuleHash['FASTP'] = ['bioinfo/fastp-0.23.2']
+toolsModuleHash['FASTQC'] = ['bioinfo/FastQC_v0.11.7']
+toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQ-Screen-0.15.2']
+
+// ----- RNA ----- //
+toolsModuleHash['SALMON'] = ['bioinfo/salmon-1.9.0']
+toolsModuleHash['STAR'] = ['bioinfo/STAR-2.7.10a_alpha_220314']
+
+// ----- DNA ----- //
+toolsModuleHash['BWA'] = ['/tools/share/Modules/bioinfo/bwa-0.7.17']
+toolsModuleHash['SAMTOOLS'] = ['bioinfo/samtools-1.16.1']
+
+// ========================================
+//			   SHARED MODULES
+//=========================================
+toolsModuleHash['SEQTK_SAMPLE'] = ['bioinfo/seqtk-1.3']
+toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC-1.14']
+toolsModuleHash['SORTMERNA'] = ['bioinfo/sortmerna-4.3.2']
+toolsModuleHash['QUALIMAP'] = ['bioinfo/qualimap-31-08-20']
diff --git a/conf/functions.config b/conf/functions.config
index f16fa59..2099a7a 100644
--- a/conf/functions.config
+++ b/conf/functions.config
@@ -67,7 +67,7 @@ def customMailSend(body, subject, email_address) {
     if (email_address == null) {
         email_address = params.email_bioinfo
     }
-    if (workflow.profile == 'dev') {
+    if (params.is_dev_mode) {
         email_address = params.email_dev
         try {
             def sending = ['echo', '-e' , body ].execute() | [ 'mail', '-s', subject, email_address  ].execute()
@@ -177,7 +177,7 @@ def sendFinalMail(formatted_date, summary) {
 	if (!params.email && params.email_on_fail && !workflow.success) {
 		email_address = params.email_on_fail
 	}
-    if (workflow.profile == 'dev') {
+    if (params.is_dev_mode) {
         email_address = params.email_dev
     }
 	// Render the TXT template
diff --git a/conf/genomes.config b/conf/genomes.config
deleted file mode 100644
index b8ef761..0000000
--- a/conf/genomes.config
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * -------------------------------------------------
- *  Nextflow config file for Genomes paths and indexes
- * -------------------------------------------------
- * Defines reference genomes, using Genome paths
- * Can be used by any config that customises the base
-  */
-
-params {
-  genomes {
-    'GRCh37' {
-      bed12   = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed"
-      fasta   = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf"
-      star    = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/"
-      bowtie2 = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/"
-      bwa     = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/"
-    }
-    'GRCm38' {
-      bed12   = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed"
-      fasta   = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa"
-      gtf     = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf"
-      star    = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/"
-      bowtie2 = "${params.genomes_base}/Mus_musculus/Ensembl/GRCh37/Sequence/Bowtie2Index/"
-      bwa     = "${params.genomes_base}/Mus_musculus/Ensembl/GRCh37/Sequence/BWAIndex/"
-    }
-
-  }
-}
diff --git a/conf/prod.config b/conf/prod.config
deleted file mode 100644
index 5bf85b8..0000000
--- a/conf/prod.config
+++ /dev/null
@@ -1,47 +0,0 @@
-// ========================================
-//				PARAMS
-//=========================================
-params {
-	ngl_bi_client = '/home/sbsuser/save/scripts-ngs/NGL-Bi_client_Current'
-	shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current'
-	is_dev_mode = false
-}
-
-// ========================================
-//				PROCESSES
-//=========================================
-process {
-	withLabel: ngl {
-		beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/prod_illumina_qc.conf"
-		publishDir = [
-            path: { "${params.outdir}/ngl" },
-            mode: 'copy',
-			pattern: "*.{log,created}"
-        ]
-	}
-	
-	withLabel: samtools_view {
-		module = ['bioinfo/samtools-1.14']
-		cpus = { 6 * task.attempt }
-	    memory = { 8.GB * task.attempt }
-	    time = { 3.h * task.attempt }
-	}
-
-	withLabel: samtools {
-		module = ['bioinfo/samtools-1.14']
-		cpus = 6
-	    memory = { 8.GB * task.attempt }
-	    time = { 3.h * task.attempt }
-	}	
-	
-	withName: BWA_ALIGNMENT {
-		cpus = { 6 * task.attempt }
-	    memory = { 8.GB * task.attempt }
-	    time = { 3.d * task.attempt }
-	}
-}
-
-// ========================================
-//				CONFIG FILES
-//=========================================
-includeConfig "$baseDir/conf/report.config"
\ No newline at end of file
diff --git a/conf/test.config b/conf/test.config
index 0424a4d..23c2af3 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -6,57 +6,3 @@ params {
 	shared_modules = '/home/sbsuser/work/Nextflow/shared_modules/ExportSources_Jules/'
 	is_dev_mode = true
 }
-
-// ========================================
-//				PROCESSES
-//=========================================
-process {
-	withLabel: ngl {
-		beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/dev_illumina_qc.conf"
-		publishDir = [
-            path: { "${params.outdir}/ngl" },
-            mode: 'copy',
-			pattern: "*.{log,created}"
-        ]
-	}
-	
-	withLabel: samtools_view {
-		module = ['bioinfo/samtools-1.14']
-		cpus = { 6 * task.attempt }
-	    memory = { 8.GB * task.attempt }
-	    time = { 3.h * task.attempt }
-	}
-	
-	withLabel: samtools {
-		module = ['bioinfo/samtools-1.14']
-		cpus = { 1 * task.attempt }
-	    memory = { 2.GB * task.attempt }
-	    time = { 10.m * task.attempt }
-	}
-	
-	withLabel: qualimap {
-		module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20']
-		beforeScript='unset DISPLAY'
-		cpus = { 1 * task.attempt }
-	    memory = { 2.GB * task.attempt }
-	    time = { 10.m * task.attempt }
-	}
-	
-	withName: BWA_ALIGNMENT {
-		cpus = { 6 * task.attempt }
-	    memory = { 8.GB * task.attempt }
-	    time = { 3.d * task.attempt }
-	}
-
-	withName: PARSE_REPORTS {
-		executor = 'local'
-	    memory = { 500.MB * task.attempt }
-	    time = { 5.m * task.attempt }
-	}
-}
-
-
-// ========================================
-//				CONFIG FILES
-//=========================================
-includeConfig "$baseDir/conf/report.config"
\ No newline at end of file
diff --git a/modules/local/module_DTM.nf b/modules/local/module_DTM.nf
index 6685386..451b3cd 100644
--- a/modules/local/module_DTM.nf
+++ b/modules/local/module_DTM.nf
@@ -2,9 +2,7 @@
  *	Module pour la gestion des analyses particuliÃ¨res dans le cadre d'un DTM
 */
 
-process PARSE_REPORTS {
-	publishDir path: "${params.outdir}/DTM" , mode: 'copy'
-	
+process PARSE_REPORTS {	
 	tag "$sample"
 	
 	input:
diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf
index 6dfe3de..e243b2e 100644
--- a/modules/local/module_NGL-Bi.nf
+++ b/modules/local/module_NGL-Bi.nf
@@ -21,8 +21,6 @@ process prepareReadSetCreation {
 }
 
 process TREATMENT_DEMUXSTAT {
-	publishDir path: params.outdir + "/ngl/", pattern: '*.log',  mode: 'copy'
-	
 	label 'ngl'
 
 	input:
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index 94f9b76..9f904ed 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -3,8 +3,8 @@
 */
 
 process PREP_DEMUXSTAT {
-	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
-	
+	label 'demux'
+
 	input:
 		path SampleSheet
 	
@@ -19,9 +19,7 @@ process PREP_DEMUXSTAT {
 }
 
 process DEMUX_STATS {
-	publishDir path: "${params.outdir}/Demux" , mode: 'copy'
-	
-	//module 'system/R-4.0.4_gcc-9.3.0'		// Ne fonctionne pas !
+	label 'demux'
 	
 	input:
 		path DemuxStatXML
@@ -75,12 +73,7 @@ process ILLUMINA_FILTER {
 	
 }
 
-process FASTQSCREEN {
-	publishDir path: "${params.outdir}/ContaminationSearch/FastQ-Screen", mode: 'copy'
-	
-	module 'bioinfo/FastQ-Screen-0.15.2'
-	time { 1.h * task.attempt }
-	
+process FASTQSCREEN {	
 	tag " $sample"
 	
 	input:
@@ -97,7 +90,6 @@ process FASTQSCREEN {
 }
 
 process DUPLICATED_READS {
-	
 	tag "$sample"
 
 	input:
diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index 756d4c5..2afa198 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -3,9 +3,8 @@
 */
 
 process BWA_ALIGNMENT {
-	publishDir path: "${params.outdir}/alignment/bwa" , mode: 'copy'
-	
 	tag "$sample"
+
 	label 'bwa'
 	
 	input:
@@ -24,11 +23,10 @@ process BWA_ALIGNMENT {
 }
 
 process SAMTOOLS_VIEW { 
-	publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy'
-	
 	tag "$sample"
 	
-	label 'samtools_view'
+	label 'samtools'
+	label 'alignment'
 	
 	input:
 		tuple val(sample), path(sam)
@@ -43,11 +41,10 @@ process SAMTOOLS_VIEW {
 }
 
 process SAMTOOLS_SORT {
-	publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy'
-
 	tag "$sample"
 	
 	label 'samtools'
+	label 'alignment'
 
 	input:
 		tuple val(sample), path(bam)
@@ -64,11 +61,10 @@ process SAMTOOLS_SORT {
 }
 
 process SAMTOOLS_FLAGSTATS {
-	publishDir path: "${params.outdir}/alignmentStats/samtools" , mode: 'copy'
-
 	tag "$sample"
 	
 	label 'samtools'
+	label 'alignmentStats'
 
 	input:
 		tuple val(sample), path(bam)
diff --git a/nextflow.config b/nextflow.config
index 5042b13..5541f97 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,11 +1,64 @@
 // ========================================
-//				PARAMS
-// =========================================
+//			WORKFLOW FLAGS / OPTIONS
+//=========================================
+params {
+	// ----- GLOBAL PARAMETERS -----
+	inputdir = ""
+	project = ""
+	sequencer = ""
+	machine_id = ""
+	fc_id = ""
+	fc_type = ""
+	lane = ""
+	demux_uniqueness = ""
+
+	data_nature = ""
+	species = ""
+	is_multiplex = false
+	
+	run_name = ""
+	run_date = ""
+	description = ""
+	split_reads = false
+	
+	// DNA / RNA params
+	reference_genome = ""
+	make_star_index = false
+	reference_transcriptome = ""
+
+	// Amplicon / 16S params
+	min_overlap = ""
+	max_overlap = ""
+
+	// 10X params
 
-// Global params
-params {	
-	// PARAMETRE POUR OUTILS
+	// MethylSeq params
+	puc19 = ""
+	lambda = ""
 
+	// NGL
+	ngl_bi_client = '/home/sbsuser/save/scripts-ngs/NGL-Bi_client_Current'
+	insert_to_ngl = true
+	bi_run_code = ''
+	sq_xp_code = ''
+
+	// Shared Modules
+	shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current'
+
+	// OTHERS
+	cluster_options = ''
+	is_dev_mode = false
+	DTM_mode = false
+	host = 'genologin'
+	email=""
+	email_dev="jules.sabban@inrae.fr"
+	email_on_fail="jules.sabban@inrae.fr"
+	email_bioinfo="get-plage.bioinfo@genotoul.fr"
+	//email_labo="get-plage.labo@genotoul.fr"
+	email_labo=""
+
+
+	// ----- TOOLS PARAMETERS -----
 	// Subset fastq files params
 	no_subset = false										// to skip subset step -> use every reads to align
 	large_sampling_threshold = 200							// 200 samples run is high multiplexed
@@ -25,50 +78,67 @@ params {
 	// FASTP
 	fastp_n_reads = 100000000
 
-	// OTHERS
-	email=""
-	email_dev="jules.sabban@inrae.fr"
-	email_on_fail="jules.sabban@inrae.fr"
-	email_bioinfo="get-plage.bioinfo@genotoul.fr"
-	//email_labo="get-plage.labo@genotoul.fr"
-	email_labo=""
-
-	cluster_options = ''
-
 	// skip parameters
 	skip_core_illumina = false
 
-	monochrome_logs = true
 	help = false
-	
-	config_profile_description = false	// ??
-	config_profile_contact = false	// ??
-	config_profile_url = false	// ??
+}
+
+// ========================================
+//			ANALYSIS PARAMETERS
+//=========================================
+import java.text.SimpleDateFormat
+SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyyMMddHHmmss")
+
+import java.nio.file.Files
+import java.nio.file.Paths
+
+params.data_location = params.inputdir.toString() + "/" + params.project.toString()
+def n_read_files = Files.walk(Paths.get(params.data_location))
+	.filter(Files::isRegularFile)
+	.filter(p -> p.getFileName().toString().matches(".*_R[12](_.*)?\\.fastq\\.gz"))
+	.count()
+
+params.n_samples = n_read_files / 2
+params.resource_factor = 0.1 * params.n_samples
+
+params {
+	// Dynamics params, depend on others
+	samplesheet = inputdir.toString() + "/SampleSheet.csv"
+	nf_uniqueness = uniqueness_format.format(new Date())
+	outdir = params.inputdir + "/nextflow/" + project + "_" + run_name + "_" + nf_uniqueness
+
+	subset_seq = miseq_subset_seq	
+	if ( sequencer =~ /NovaSeq.*/ ) {
+		if ( n_samples >= large_sampling_threshold ) {
+			nova_subset_seq = large_indexing_nova_subset_seq
+		}
+		subset_seq = nova_subset_seq
+	}
 }
 
 // ========================================
 //				PROFILES
 //=========================================
-// Load base.config by default for all pipelines
-includeConfig "$baseDir/conf/base.config"
+toolsModuleHash = [:]
+if (params.host == 'genologin') {
+	includeConfig "$baseDir/conf/dependencies_genologin.config"
+} else if (params.host == 'genobioinfo') {
+	includeConfig "$baseDir/conf/dependencies_genobioinfo.config"
+}
 
-System.out.println "Les configurations de bases sont chargÃ©es"
+// Load base.config and report.config by default for all pipelines
+includeConfig "$baseDir/conf/base.config"
+includeConfig "$baseDir/conf/report.config"
 
 // Container slug. Stable releases should specify release tag!
 // Developmental code should specify :dev
 process.container = "$baseDir/template-nf.sif"
 
 profiles {
-	conda { process.conda = "$baseDir/environment.yml" }
-	debug { process.beforeScript = 'echo $HOSTNAME' }
-	docker { docker.enabled = true }
-	singularity { singularity.enabled = true }
-	dev { includeConfig "$baseDir/conf/test.config" }
-	prod { includeConfig "$baseDir/conf/prod.config" }
+	dev		{ includeConfig "$baseDir/conf/test.config" }
 }
 
-System.out.println "Tous les profiles ont Ã©tÃ© analysÃ©s"
-
 // Avoid this error:
 // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.
 // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351, once this is established and works well, nextflow might implement this behavior as new default.
@@ -76,4 +146,3 @@ docker.runOptions = '-u \$(id -u):\$(id -g)'
 
 // Capture exit codes from upstream processes when piping
 process.shell = ['/bin/bash', '-euo', 'pipefail']
-System.out.println "Sortie du nextflow.config"
\ No newline at end of file
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index beb02dc..5d2e895 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -116,7 +116,7 @@ workflow ILLUMINA_QC {
 		)
 
 		// DTM process
-		if ("$params.is_dev_mode") {
+		if (params.DTM_mode) {
 			PARSE_REPORTS(CORE.out.fastp_report, DNA_QC.out.qualimap_report)
 		}
 
-- 
GitLab


From 3383b1c7419eb5d8b151703a680cf021c773d919 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 31 Jul 2023 15:24:59 +0200
Subject: [PATCH 18/21] Config file for genobioinfo server

	Ref : #70
---
 conf/dependencies_genobioinfo.config | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 conf/dependencies_genobioinfo.config

diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config
new file mode 100644
index 0000000..9ac8895
--- /dev/null
+++ b/conf/dependencies_genobioinfo.config
@@ -0,0 +1,24 @@
+// ========================================
+//			GENOBIOINFO MODULES
+//=========================================
+// ----- CORE ----- //
+toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter/0.1']
+toolsModuleHash['FASTP'] = ['bioinfo/fastp/0.23.2']
+toolsModuleHash['FASTQC'] = ['bioinfo/FastQC/0.12.1']  // version upgraded face to genologin
+toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQScreen/0.15.3']
+
+// ----- RNA ----- //
+toolsModuleHash['SALMON'] = ['bioinfo/Salmon/1.10.0']  // version upgraded face to genologin
+toolsModuleHash['STAR'] = ['bioinfo/STAR/2.7.5a']    // version upgraded face to genologin
+
+// ----- DNA ----- //
+toolsModuleHash['BWA'] = ['bioinfo/bwa/0.7.17']
+toolsModuleHash['SAMTOOLS'] = ['bioinfo/samtools/1.18']   // version upgraded face to genologin
+
+// ========================================
+//			   SHARED MODULES
+//=========================================
+toolsModuleHash['SEQTK_SAMPLE'] = ['bioinfo/Seqtk/1.3']
+toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.14']
+toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/1.18']  // Not installed
+toolsModuleHash['QUALIMAP'] = ['bioinfo/Qualimap/31-08-20']
-- 
GitLab


From acd5b48d1254c8883cb2eeb0f9846aba42f462e5 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 31 Jul 2023 16:55:15 +0200
Subject: [PATCH 19/21] Set R module

---
 conf/base.config                     | 4 ++++
 conf/dependencies_genobioinfo.config | 1 +
 conf/dependencies_genologin.config   | 1 +
 modules/local/module_core.nf         | 1 -
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/conf/base.config b/conf/base.config
index 9683cd8..43666af 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -52,6 +52,10 @@ process {
 		]
 	}
 
+	withName: DEMUX_STATS {
+		module = toolsModuleHash['R']
+	}
+
 	withName: ILLUMINA_FILTER {
 		publishDir = [
 			path: "${params.outdir}/IlluminaFilter",
diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config
index 9ac8895..0568a41 100644
--- a/conf/dependencies_genobioinfo.config
+++ b/conf/dependencies_genobioinfo.config
@@ -6,6 +6,7 @@ toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter/0.1']
 toolsModuleHash['FASTP'] = ['bioinfo/fastp/0.23.2']
 toolsModuleHash['FASTQC'] = ['bioinfo/FastQC/0.12.1']  // version upgraded face to genologin
 toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQScreen/0.15.3']
+toolsModuleHash['R'] = ['statistics/R/4.3.0']
 
 // ----- RNA ----- //
 toolsModuleHash['SALMON'] = ['bioinfo/Salmon/1.10.0']  // version upgraded face to genologin
diff --git a/conf/dependencies_genologin.config b/conf/dependencies_genologin.config
index 4dd63eb..7c9fa92 100644
--- a/conf/dependencies_genologin.config
+++ b/conf/dependencies_genologin.config
@@ -6,6 +6,7 @@ toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter-0.1']
 toolsModuleHash['FASTP'] = ['bioinfo/fastp-0.23.2']
 toolsModuleHash['FASTQC'] = ['bioinfo/FastQC_v0.11.7']
 toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQ-Screen-0.15.2']
+toolsModuleHash['R'] = ['system/R-4.0.4_gcc-9.3.0']
 
 // ----- RNA ----- //
 toolsModuleHash['SALMON'] = ['bioinfo/salmon-1.9.0']
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index 9f904ed..cf7e9f6 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -32,7 +32,6 @@ process DEMUX_STATS {
 	
 	script:
 	"""
-		module load system/R-4.0.4_gcc-9.3.0
 		demuxStatsFromXML.R --xml $DemuxStatXML --indexNumber $IndexNumberFile --demuxSum $DemuxSummary > demultiplexStats.log
 	"""
 }
-- 
GitLab


From 7fb81834344b3045749acb2bac62109812c4d066 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 2 Aug 2023 15:23:50 +0200
Subject: [PATCH 20/21] New module for sortmerna on genobioinfo

	Ref : #70
---
 conf/dependencies_genobioinfo.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config
index 0568a41..5276bce 100644
--- a/conf/dependencies_genobioinfo.config
+++ b/conf/dependencies_genobioinfo.config
@@ -21,5 +21,5 @@ toolsModuleHash['SAMTOOLS'] = ['bioinfo/samtools/1.18']   // version upgraded fa
 //=========================================
 toolsModuleHash['SEQTK_SAMPLE'] = ['bioinfo/Seqtk/1.3']
 toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.14']
-toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/1.18']  // Not installed
+toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/4.3.6']  // version upgraded face to genologin
 toolsModuleHash['QUALIMAP'] = ['bioinfo/Qualimap/31-08-20']
-- 
GitLab


From 0abde469738cb1924de1563c8926a5f29fa91806 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 2 Aug 2023 15:25:15 +0200
Subject: [PATCH 21/21] Update version number

---
 conf/report.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/report.config b/conf/report.config
index 68b3e9d..2c00805 100644
--- a/conf/report.config
+++ b/conf/report.config
@@ -29,5 +29,5 @@ manifest {
 	description = "Workflow for Illumina data quality control"
 	mainScript = 'main.nf'
 	nextflowVersion = '>=0.32.0'
-	version = '1.2.4'
+	version = '1.6.0'
 }
\ No newline at end of file
-- 
GitLab