From eecdb982e0d4e88b95f51464d07300d6d2c3d3c4 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Wed, 26 Jul 2023 15:48:44 +0200 Subject: [PATCH 01/23] Germplasm V1, V2, Mcpd unified --- .../gnpis-pg-to-json/germplasmV2.sql | 360 ++++++++++++++++++ 1 file changed, 360 insertions(+) create mode 100644 etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql new file mode 100644 index 0000000..afbd05e --- /dev/null +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql @@ -0,0 +1,360 @@ +---https://hashrocket.com/blog/posts/faster-json-generation-with-postgresql +-- Warning: The name of this file is used as the elasticsearch document type name + +-- Example of usage: +-- psql --host shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -A -t -f germplasmV2.sql -v faidareURL=https://urgi.versailles.inrae.fr/faidare > germplasmV2.json + +-- Making sure to stop psql with non-zero exit code on any error +\set ON_ERROR_STOP true + +SELECT json_build_object( + 'accessionNames', array( + SELECT a.accession_name + UNION + SELECT DISTINCT(accsyn.accession_synonym_name) + FROM accession_synonym_t accsyn + WHERE accsyn.accession_id = a.accession_id + ), + 'accessionNumber', a.accession_number, + 'acquisitionDate', a.collecting_date, + 'acquisitionSourcecode', collecting_site.environment_type, + 'acquisitionSourceCodeDescription', -- environmentType + 'alternadeIDs', null, + 'ancestralData', a.pedigree, + 'biologicalStatusOfAccessionCode', biostat.name_en, + 'biologicalStatusOfAccessionDescription', null, + 'breedingInstitutes', json_build_object( + 'instituteCode', bi.fao_code, + 'instituteName', bi.institution_name, + 'acronym', bi.acronym, + 'organisation', bi.organization, + 'instituteType', bi_type.name_en, + 'webSite', bi.internet_site, + 'address', trim(concat_ws(', ', + CASE WHEN bi_address.address is not null and bi_address.address != '' THEN bi_address.address END, + CASE WHEN bi_address.postal_box is not null and bi_address.postal_box != '' THEN bi_address.postal_box END, + CASE WHEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) != '' THEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) END, + CASE WHEN bi_address_country.name_en is not null and bi_address_country.name_en != '' THEN bi_address_country.name_en END + )), + 'logo', bi.institution_logo + ), + 'breedingMethodDbId', null, + 'breedingMethodName', null, + 'collection', array( + SELECT json_build_object( + 'id', co.collection_id, + 'name', ( + CASE + WHEN tr.translated_name is not null THEN tr.translated_name + ELSE co.collection_code + END + ), + 'type', ot.name_en, + 'germplasmCount', ( + SELECT count(distinct ac2.accession_id) + FROM accession_collection_t ac2 + WHERE ac2.collection_id = co.collection_id + ) + ) + FROM collections_t co + JOIN accession_collection_t ac ON co.collection_id = ac.collection_id + LEFT JOIN translations_t tr ON co.collection_id = tr.named_collection_id + LEFT JOIN ontology_term_t ot ON ot.ontology_term_id = co.collection_type_id + WHERE ac.accession_id = a.accession_id + GROUP BY co.collection_id, ot.name_en, tr.translated_name + ), + 'collectinInfo', json_build_object( + 'collectingDate', a.collecting_date, + 'collectingInstitutes', array( + SELECT json_build_object( + 'instituteName', ci.institution_name, + 'instituteCode', ci.fao_code, + 'acronym', ci.acronym, + 'organisation', ci.organization, + 'instituteType', ci_type.name_en, + 'webSite', ci.internet_site, + 'instituteAddress', trim(concat_ws(', ', + CASE WHEN ci_address.address is not null and ci_address.address != '' THEN ci_address.address END, + CASE WHEN ci_address.postal_box is not null and ci_address.postal_box != '' THEN ci_address.postal_box END, + CASE WHEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) != '' THEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) END, + CASE WHEN ci_address_country.name_en is not null and ci_address_country.name_en != '' THEN ci_address_country.name_en END + )), + 'logo', 'https://urgi.versailles.inra.fr/files/siregal/images/institution/' || ci.institution_logo + ) + FROM institution_t ci + LEFT JOIN ontology_term_t ci_type ON ci_type.ontology_term_id = ci.institution_type_id + LEFT JOIN address_t ci_address ON ci_address.address_id = ci.address_id + LEFT JOIN ontology_term_t ci_address_country ON ci_address_country.ontology_term_id = ci_address.country_id + WHERE ci.institution_id = a.collector_institution_id + ), + 'collectingMissionIdentifier', null, + 'collectingNumber', a.collecting_number, + 'collectingSite', json_build_object( + 'coordinateUncertainty', null, + 'elevation', collecting_site.elevation, + 'geoReferencingMethod', null, + 'latitudeDecimal', collecting_site.latitude, + 'latitudeDegrees', -- formule collecting_site.latitude, + 'locationDescription', collecting_site.description, + 'longitudeDecimal', collecting_site.longitude, + 'longitudeDegrees', -- formule collecting_site.longitude, + 'spatialReferenceSystem', null, + 'locationDbId', collecting_site.site_id, + 'locationName', collecting_site.site_name + ), + 'materialType', cmt.name_en, + 'collectors', a.collectors_list + ), + 'commonCropName', ( + CASE + WHEN ta.dataset_id = (select dataset_id from dataset_t where dataset_name = 'Forest tree taxa') AND a.taxon_group != 'Cherry' AND a.taxon_group != 'Walnut' THEN 'Forest tree' + ELSE a.taxon_group + END + ), + 'countryOfOriginCode', originCountry.name_en, + 'originSite', json_build_object( + 'siteId', origin_site.site_id, + 'siteName', origin_site.site_name, + 'latitude', origin_site.latitude, + 'longitude', origin_site.longitude, + 'siteType', origin_site.site_type + ), + 'defaultDisplayName', a.accession_name, + 'documentationURL', null, + 'donorInfo', array( + SELECT json_build_object( + 'donorAccessionNumber', a.donor_accession_number, + 'donorAccessionPUI', null, + 'donorInstitute', json_build_object( + 'instituteName', donor.institution_name, + 'instituteCode', donor.fao_code, + 'acronym', donor.acronym, + 'organisation', donor.organization, + 'instituteType', donor_type.name_en, + 'webSite', donor.internet_site, + 'address', trim(concat_ws(', ', + CASE WHEN address.address is not null and address.address != '' THEN address.address END, + CASE WHEN address.postal_box is not null and address.postal_box != '' THEN address.postal_box END, + CASE WHEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) != '' THEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) END, + CASE WHEN address_country.name_en is not null and address_country.name_en != '' THEN address_country.name_en END + )), + 'logo', donor.institution_logo + ), + 'donationDate', a.donation_date + ) + FROM institution_t donor + LEFT JOIN ontology_term_t donor_type ON donor_type.ontology_term_id = donor.institution_type_id + LEFT JOIN address_t address ON address.address_id = donor.address_id + LEFT JOIN ontology_term_t address_country ON address_country.ontology_term_id = address.country_id + WHERE donor.institution_id = a.donor_institution_id + ), + 'externalReferences', --, + 'genus', ( + CASE + WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus + ELSE '' + END + ), + 'germplasmDbId', text(a.accession_id), + 'germplasmName', a.accession_name, + 'germplasmPUI', a.puid, + 'instituteCode', hi.fao_code, + 'instituteName', hi.institution_name, + 'holdingInstitute', json_build_object( + 'instituteName', hi.institution_name, + 'instituteCode', hi.fao_code, + 'acronym', hi.acronym, + 'organisation', hi.organization, + 'instituteType', hi_type.name_en, + 'webSite', hi.internet_site, + 'address', trim(concat_ws(', ', + CASE WHEN hi_address.address is not null and hi_address.address != '' THEN hi_address.address END, + CASE WHEN hi_address.postal_box is not null and hi_address.postal_box != '' THEN hi_address.postal_box END, + CASE WHEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) != '' THEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) END, + CASE WHEN hi_address_country.name_en is not null and hi_address_country.name_en != '' THEN hi_address_country.name_en END + )), + 'logo', hi.institution_logo + ), + 'holdingGenbank', json_build_object ( + 'instituteName', ( + CASE + WHEN grc_name.translated_name is not null and grc_name.translated_name != '' THEN grc_name.translated_name + ELSE '' + END + ), + 'instituteCode', grc.grc_code, + 'webSite', grc.internet_site, + 'logo', + CASE + WHEN grc.grc_code is not null THEN 'https://urgi.versailles.inrae.fr/files/siregal/images/grc/inra_brc_en.png' + ELSE '' + END + ), + 'mlsStatus', ( + SELECT + CASE string_agg(distinct(ott.textual_code), ', ') + WHEN 'MLS' THEN '1' + ELSE '0' + END + FROM accession_distributor_t ad + JOIN ontology_term_t ott ON ott.ontology_term_id = ad.distribution_status_id + WHERE ad.accession_id = a.accession_id + ), + 'pedigree', a.pedigree, + 'remarks', agg_accession_comments.accession_comments, + 'species', ta.species, + 'speciesAuthority', species.author_list, + 'safetyDuplicateInstitutes', array( + 'instituteCode', hi.fao_code, + 'instituteName', hi.institution_name, + ), + 'storageType', null, + 'subtaxa', trim(concat_ws(' ', + CASE WHEN ta.subspecies is not null AND ta.subspecies != '' THEN 'subsp. ' || ta.subspecies END, + CASE WHEN ta.variety is not null AND ta.variety != '' THEN 'var. ' || ta.variety END, + CASE WHEN ta.cultivar is not null AND ta.cultivar != '' THEN 'cv. ' || ta.cultivar END, + CASE WHEN ta.form is not null AND ta.form != '' THEN 'f. ' || ta.form END, + CASE WHEN ta.tax_group is not null AND ta.tax_group != '' THEN 'gr. ' || ta.tax_group END, + CASE WHEN ta.subgroup is not null AND ta.subgroup != '' THEN 'sgr. ' || ta.subgroup END + )), + 'subtaxaAuthority', ( + CASE + WHEN (ta.subspecies is not null AND ta.subspecies != '') + OR (ta.variety is not null AND ta.variety != '') + OR (ta.cultivar is not null AND ta.cultivar != '') + OR (ta.form is not null AND ta.form != '') + OR (ta.tax_group is not null AND ta.tax_group != '') + OR (ta.subgroup is not null AND ta.subgroup != '') + THEN ta.author_list + ELSE '' + END + ), + 'synonyms', null, + 'taxonIds', ta.taxon_external_ids +) + +FROM accession_t a + +------------------- +-- PREVIOUS JOIN -- +------------------- + +-- taxon +JOIN taxon_t ta ON a.taxon_id = ta.taxon_id + +-- Recursive join to get species authors +LEFT JOIN ( + WITH RECURSIVE tax(descendant_id, rank, taxon_id, parent_id) AS ( + SELECT descendant_id, bt.name, tax.taxon_id, tax.parent_id, tax.author_list + FROM taxon_t tax + JOIN taxon_path_t tax_path ON tax_path.ascendant_id = tax.taxon_id + JOIN bio_type_t bt ON bt.bio_type_id = tax.rank_id + UNION ALL + SELECT tax.descendant_id, bt.name, parent_tax.taxon_id, parent_tax.parent_id, parent_tax.author_list + FROM tax + JOIN taxon_t parent_tax ON parent_tax.taxon_id = tax.parent_id + JOIN bio_type_t bt ON bt.bio_type_id = parent_tax.rank_id + ) + SELECT * FROM tax + WHERE RANK = 'species' + LIMIT 1 +) AS species ON species.descendant_id = ta.taxon_id + +-- biological status +LEFT JOIN ontology_term_t biostat ON biostat.ontology_term_id = a.biological_status_id + +-- genetic nature +LEFT JOIN ontology_term_t genetic_nature ON genetic_nature.ontology_term_id = a.genetic_nature_id + +-- aggregates acccession comments in one line +LEFT JOIN ( + SELECT acc.accession_id, string_agg(distinct(translated_name), ', ') AS accession_comments + FROM translations_t tra + JOIN accession_t acc ON tra.commented_accession_id = acc.accession_id + GROUP BY acc.accession_id +) AS agg_accession_comments ON agg_accession_comments.accession_id = a.accession_id + +-- photo +LEFT JOIN photo_t photo ON photo.photo_id = a.photo_id + +-- holding institution +LEFT JOIN institution_t hi ON hi.institution_id = a.holding_institution_id +LEFT JOIN ontology_term_t hi_type ON hi_type.ontology_term_id = hi.institution_type_id +LEFT JOIN address_t hi_address ON hi_address.address_id = hi.address_id +LEFT JOIN ontology_term_t hi_address_country ON hi_address_country.ontology_term_id = hi_address.country_id + +-- grc +LEFT JOIN grc_t grc ON grc.grc_id = a.grc_id +LEFT JOIN ( + SELECT grc_tra.named_grc_id, grc_tra.translated_name + FROM translations_t grc_tra + JOIN languages_t l ON grc_tra.language_id = l.language_id + WHERE l.language_code = 'en' +) AS grc_name ON grc_name.named_grc_id = a.grc_id + +-- presence status +LEFT JOIN ontology_term_t pres_status ON pres_status.ontology_term_id = a.presence_status_id + +-- genealogy +LEFT JOIN genealogy_t genea on genea.genealogy_id = a.genealogy_id +LEFT JOIN accession_t fp ON fp.accession_id = genea.first_parent_id +LEFT JOIN ontology_term_t fp_type ON fp_type.ontology_term_id = genea.first_parent_type_id +LEFT JOIN accession_t sp ON sp.accession_id = genea.second_parent_id +LEFT JOIN ontology_term_t sp_type ON sp_type.ontology_term_id = genea.second_parent_type_id + +-- origin site +LEFT JOIN ( + SELECT s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en AS site_type + FROM site_t s + JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id +) AS origin_site ON origin_site.site_id = a.origin_site_id + +-- Recursive join on site_t.geographical_location_id = ontology_term_t.ontology_term_id +-- Select geographical location level = COUNTRY or OLD_COUNTRY +LEFT JOIN ( + WITH RECURSIVE geo(site_id, level_textual_code, ontology_term_id, parent_id, textual_code, name_en) AS ( + SELECT location.site_id, level.textual_code, geo.ontology_term_id, geo.parent_id, geo.textual_code, geo.name_en + FROM ontology_term_t geo + JOIN site_t location ON location.geographical_location_id = geo.ontology_term_id + LEFT JOIN ontology_term_t level ON level.ontology_term_id = geo.term_level_id + UNION ALL + SELECT geo.site_id, level.textual_code, parent_geo.ontology_term_id, parent_geo.parent_id, parent_geo.textual_code, parent_geo.name_en + FROM geo + JOIN ontology_term_t parent_geo ON parent_geo.ontology_term_id = geo.parent_id + LEFT JOIN ontology_term_t level ON level.ontology_term_id = parent_geo.term_level_id + ) + SELECT DISTINCT ON (site_id) site_id, level_textual_code, ontology_term_id, parent_id, textual_code, name_en + FROM geo + WHERE level_textual_code = 'COUNTRY' OR level_textual_code = 'OLD_COUNTRY' +) AS originCountry ON originCountry.site_id = a.origin_site_id + +-- collecting site +LEFT JOIN ( + SELECT s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en AS site_type + FROM site_t s + JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id +) AS collecting_site ON collecting_site.site_id = a.site_id + +-- collector +LEFT JOIN institution_t ci ON ci.institution_id = a.collector_institution_id +LEFT JOIN ontology_term_t ci_type ON ci_type.ontology_term_id = ci.institution_type_id +LEFT JOIN address_t ci_address ON ci_address.address_id = ci.address_id +LEFT JOIN ontology_term_t ci_address_country ON ci_address_country.ontology_term_id = ci_address.country_id + +-- collected material type +LEFT JOIN ontology_term_t cmt ON cmt.ontology_term_id = a.collected_material_type_id + +-- breeder +LEFT JOIN institution_t bi ON bi.institution_id = a.breeder_institution_id +LEFT JOIN ontology_term_t bi_type ON bi_type.ontology_term_id = bi.institution_type_id +LEFT JOIN address_t bi_address ON bi_address.address_id = bi.address_id +LEFT JOIN ontology_term_t bi_address_country ON bi_address_country.ontology_term_id = bi_address.country_id + +-- dataset +LEFT JOIN dataset_t ds ON ds.dataset_id = a.dataset_id + +WHERE a.accession_id between :startPageId and :endPageId +ORDER BY a.accession_id +--ORDER BY RANDOM() LIMIT 500 +--LIMIT 5 +; \ No newline at end of file -- GitLab From 46a4e9bed3298306a31ec84274cfa8af080e989e Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Wed, 26 Jul 2023 16:15:05 +0200 Subject: [PATCH 02/23] Other fields --- .../gnpis-pg-to-json/germplasmV2.sql | 296 +++++++++++++++++- 1 file changed, 295 insertions(+), 1 deletion(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql index afbd05e..90d4c60 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql @@ -230,7 +230,301 @@ SELECT json_build_object( END ), 'synonyms', null, - 'taxonIds', ta.taxon_external_ids + 'taxonIds', ta.taxon_external_ids, + + -- OTHER IN GERMPLASM -- + 'groupId', a.group_id, + 'seedSource', null, + 'source', 'INRAE-URGI', + 'typeOfGermplasmStorageCode', null, + 'genusSpecies', ( + CASE + WHEN ta.species is not null AND ta.species != '' THEN + CASE + WHEN ta.subgenus is not null AND ta.subgenus != '' and ta.subgenus != '-' THEN ta.subgenus || ' ' || ta.species + WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus || ' ' || ta.species + ELSE ta.species + END + ELSE '' + END + ), + 'speciesAuthority', species.author_list, + 'genusSpeciesSubtaxa', ( + CASE + WHEN (ta.subspecies is not null AND ta.subspecies != '') + OR (ta.variety is not null AND ta.variety != '') + OR (ta.cultivar is not null AND ta.cultivar != '') + OR (ta.form is not null AND ta.form != '') + OR (ta.tax_group is not null AND ta.tax_group != '') + OR (ta.subgroup is not null AND ta.subgroup != '') + THEN concat_ws(' ', + CASE + WHEN ta.subgenus is not null AND ta.subgenus != '' and ta.subgenus != '-' THEN ta.subgenus || ' ' || ta.species + WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus || ' ' || ta.species + ELSE ta.species + END, + CASE WHEN ta.subspecies is not null AND ta.subspecies != '' THEN 'subsp. ' || ta.subspecies END, + CASE WHEN ta.variety is not null AND ta.variety != '' THEN 'var. ' || ta.variety END, + CASE WHEN ta.cultivar is not null AND ta.cultivar != '' THEN 'cv. ' || ta.cultivar END, + CASE WHEN ta.form is not null AND ta.form != '' THEN 'f. ' || ta.form END, + CASE WHEN ta.tax_group is not null AND ta.tax_group != '' THEN 'gr. ' || ta.tax_group END, + CASE WHEN ta.subgroup is not null AND ta.subgroup != '' THEN 'sgr. ' || ta.subgroup END + ) + ELSE '' + END + ), + + 'taxonSynonyms', array( + SELECT distinct ts.taxon_synonym_name as ta_synonym_name + FROM taxon_t ta + JOIN taxon_synonym_taxon_t tst ON tst.taxons_id = ta.taxon_id + JOIN taxon_synonym_t ts ON ts.taxon_synonym_id = tst.taxon_synonyms_id + JOIN ontology_term_t ot ON ot.ontology_term_id = ts.name_type_id + WHERE ot.textual_code = 'SCIENTIFIC' + AND ta.taxon_id = a.taxon_id + ), + 'taxonCommonNames', array( + SELECT distinct ts.taxon_synonym_name as ta_common_name + FROM taxon_t ta + JOIN taxon_synonym_taxon_t tst ON tst.taxons_id = ta.taxon_id + JOIN taxon_synonym_t ts ON ts.taxon_synonym_id = tst.taxon_synonyms_id + JOIN ontology_term_t ot ON ot.ontology_term_id = ts.name_type_id + WHERE ot.textual_code != 'SCIENTIFIC' + AND ta.taxon_id = a.taxon_id + ), + 'taxonComment', ta.taxon_comment_en, + 'geneticNature', genetic_nature.name_en, + 'photo', json_build_object( + 'file', ( + 'https://urgi.versailles.inrae.fr/files/siregal/images/accession/' || + CASE + WHEN grc.grc_code is not null and grc.grc_code != '' THEN grc.grc_code || '/' || photo.file_name + ELSE photo.file_name + END + ), + 'thumbnailFile', ( + 'https://urgi.versailles.inrae.fr/files/siregal/images/accession/' || + CASE + WHEN grc.grc_code is not null and grc.grc_code != '' THEN grc.grc_code || '/' || photo.thumbnail_file_name + ELSE photo.thumbnail_file_name + END + ), + 'photoName', photo.photo_name_en, + 'description', photo.description_en, + 'copyright', trim(concat_ws(' ', photo.author, photo.photo_year)) + ), + + 'holdingGenbank', json_build_object ( + 'instituteName', ( + CASE + WHEN grc_name.translated_name is not null and grc_name.translated_name != '' THEN grc_name.translated_name + ELSE '' + END + ), + 'instituteCode', grc.grc_code, + 'webSite', grc.internet_site, + 'logo', + CASE + WHEN grc.grc_code is not null THEN 'https://urgi.versailles.inrae.fr/files/siregal/images/grc/inra_brc_en.png' + ELSE '' + END + ), + 'accessionHolder', ( + CASE + WHEN (select string_agg(distinct(coll.collection_code), ', ') + from accession_collection ac + join collections coll on coll.collection_id = ac.collection_id + where a.accession_id = ac.accession_id + group by ac.accession_id) like '%BRC4Forest_CMD%' THEN + CASE + WHEN ds.dataset_name = 'Salicaceae' THEN 'Forest BRC - Orleans' + WHEN ds.dataset_name = 'Pinus Portal' or ds.dataset_name = 'Quercus Portal' THEN 'Forest BRC - Pierroton' + WHEN ds.dataset_name = 'PlantaExp' THEN 'Forest BRC - Avignon' + ELSE 'Forest BRC' + END + --WHEN grc.grc_code is not null and grc.grc_code != 'BRC4Forest' THEN replace(grc.grc_code, '_', ' ') + ELSE '' + END + ), + 'presenceStatus', pres_status.name_en, + 'genealogy', json_build_object( + 'crossingPlan', genea.crossing_plan, + 'crossingYear', genea.crossing_year, + 'familyCode', genea.family_code, + 'firstParentName', fp.accession_name, + 'firstParentPUI', fp.puid, + 'firstParentType', fp_type.name_en, + 'secondParentName', CASE WHEN sp.accession_name != 'None' THEN sp.accession_name ELSE '' END, + 'secondParentPUI', CASE WHEN sp.accession_name != 'None' THEN sp.puid ELSE '' END, + 'secondParentType', CASE WHEN sp.accession_name != 'None' THEN sp_type.name_en ELSE '' END, + 'sibblings', array( + SELECT json_build_object( + 'pui', acc.puid, + 'name', acc.accession_name + ) + FROM accession_t acc + WHERE acc.genealogy_id = genea.genealogy_id AND acc.accession_id != a.accession_id + ) + ), + 'children', array( + SELECT json_build_object( + 'firstParentName', p1.accession_name, + 'firstParentPUI', p1.puid, + 'secondParentName', CASE WHEN p2.accession_name != 'None' THEN p2.accession_name ELSE '' END, + 'secondParentPUI', CASE WHEN p2.accession_name != 'None' THEN p2.puid ELSE '' END, + 'sibblings', array( + SELECT json_build_object( + 'pui', acc.puid, + 'name', acc.accession_name + ) + FROM accession_t acc + WHERE acc.genealogy_id = g.genealogy_id + ) + ) + FROM genealogy_t g + JOIN accession_t p1 ON p1.accession_id = g.first_parent_id + LEFT JOIN accession_t p2 ON p2.accession_id = g.second_parent_id + WHERE p1.accession_id = a.accession_id OR p2.accession_id = a.accession_id + ), + 'descriptors', array( + SELECT json_build_object( + 'name', ot.name_en, + 'value', ad.descriptor_value + ) + FROM accession_descriptor_t ad + JOIN ontology_term_t ot ON ot.ontology_term_id = ad.descriptor_id + WHERE ad.accession_id = a.accession_id + ), + 'originSite', json_build_object( + 'siteId', origin_site.site_id, + 'siteName', origin_site.site_name, + 'latitude', origin_site.latitude, + 'longitude', origin_site.longitude, + 'siteType', origin_site.site_type + ), + 'collectingSite', json_build_object( + 'siteId', collecting_site.site_id, + 'siteName', collecting_site.site_name, + 'latitude', collecting_site.latitude, + 'longitude', collecting_site.longitude, + 'siteType', collecting_site.site_type + ), + 'evaluationSites', array( + SELECT json_build_object( + 'siteId', s.site_id, + 'siteName', s.site_name, + 'latitude', s.latitude, + 'longitude', s.longitude, + 'siteType', ot.name_en + ) + FROM site_t s + JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id + JOIN trial_t t ON t.site_id = s.site_id + JOIN trial_lot_t tl ON tl.trials_id = t.trial_id + JOIN lot_t l ON l.lot_id = tl.lots_id + WHERE l.accession_id = a.accession_id + GROUP BY s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en + ), + 'distributors', array( + SELECT json_build_object( + 'institute', json_build_object( + 'instituteName', di.institution_name, + 'instituteCode', di.fao_code, + 'acronym', di.acronym, + 'organisation', di.organization, + 'instituteType', di_type.name_en, + 'webSite', di.internet_site, + 'address', trim(concat_ws(', ', + CASE WHEN di_address.address is not null AND di_address.address != '' THEN di_address.address END, + CASE WHEN di_address.postal_box is not null AND di_address.postal_box != '' THEN di_address.postal_box END, + CASE WHEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) != '' THEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) END, + CASE WHEN di_address_country.name_en is not null AND di_address_country.name_en != '' THEN di_address_country.name_en END + )), + 'logo', di.institution_logo + ), + 'accessionNumber', ad.distributor_accession_number, + 'distributionStatus', ot.name_en + ) + FROM accession_distributor_t ad + JOIN ontology_term_t ot ON ot.ontology_term_id = ad.distribution_status_id + JOIN institution_t di ON di.institution_id = ad.distributor_institution_id + LEFT JOIN ontology_term_t di_type ON di_type.ontology_term_id = di.institution_type_id + LEFT JOIN address_t di_address ON di_address.address_id = di.address_id + LEFT JOIN ontology_term_t di_address_country ON di_address_country.ontology_term_id = di_address.country_id + WHERE ad.accession_id = a.accession_id + ), + 'panel', array( + SELECT json_build_object( + 'id', pa.panel_id, + 'name', pa.panel_name, + 'germplasmCount', ( + SELECT count(distinct l2.accession_id) + FROM lot_t l2 + JOIN panel_lot_t pl2 on pl2.lot_id = l2.lot_id + WHERE pl2.panel_id = pa.panel_id + ) + ) + FROM panel_t pa + JOIN panel_lot_t pl ON pa.panel_id = pl.panel_id + JOIN lot_t l ON pl.lot_id = l.lot_id + WHERE l.accession_id = a.accession_id + GROUP BY pa.panel_id, pa.panel_name + ), + 'collection', array( + SELECT json_build_object( + 'id', co.collection_id, + 'name', ( + CASE + WHEN tr.translated_name is not null THEN tr.translated_name + ELSE co.collection_code + END + ), + 'type', ot.name_en, + 'germplasmCount', ( + SELECT count(distinct ac2.accession_id) + FROM accession_collection_t ac2 + WHERE ac2.collection_id = co.collection_id + ) + ) + FROM collections_t co + JOIN accession_collection_t ac ON co.collection_id = ac.collection_id + LEFT JOIN translations_t tr ON co.collection_id = tr.named_collection_id + LEFT JOIN ontology_term_t ot ON ot.ontology_term_id = co.collection_type_id + WHERE ac.accession_id = a.accession_id + GROUP BY co.collection_id, ot.name_en, tr.translated_name + ), + 'population', array( + SELECT json_build_object( + 'id', prg.population_id, + 'name', prg.population_name, + 'type', ot.name_en, + 'germplasmRef', json_build_object( + 'pui', acc.puid, + 'name', acc.accession_name + ), + 'germplasmCount', ( + SELECT count(distinct aprg2.accession_id) + FROM accession_population_rg_t aprg2 + WHERE aprg2.population_id = prg.population_id + ) + ) + FROM population_rg_t prg + JOIN accession_population_rg_t aprg ON aprg.population_id = prg.population_id + JOIN ontology_term_t ot ON ot.ontology_term_id = prg.population_type_id + LEFT JOIN accession_t acc ON acc.accession_id = prg.ref_accession_id + WHERE aprg.accession_id = a.accession_id + OR acc.accession_id = a.accession_id + GROUP BY prg.population_id, prg.population_name, ot.name_en, acc.puid, acc.accession_name + ), + + -- List of trial number in which this germplasm is used + 'studyDbIds', array( + SELECT DISTINCT t.trial_number + FROM trial_t t + JOIN trial_lot_t tl ON tl.trials_id = t.trial_id + JOIN lot_t l ON l.lot_id = tl.lots_id + WHERE l.accession_id = a.accession_id + ) ) FROM accession_t a -- GitLab From 685ea5706ea6bac2a2ef7f713b80be80c6bbe4ac Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Wed, 26 Jul 2023 16:19:52 +0200 Subject: [PATCH 03/23] MCPD fields --- etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql index 90d4c60..718d02e 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql @@ -524,7 +524,13 @@ SELECT json_build_object( JOIN trial_lot_t tl ON tl.trials_id = t.trial_id JOIN lot_t l ON l.lot_id = tl.lots_id WHERE l.accession_id = a.accession_id - ) + ), + + -- IN MCPD -- + 'breederAccessionNumber', a.breeder_accession_number, + 'breedingCreationYear', a.breeding_creation_year, + 'catalogRegistrationYear', a.catalog_registration_year, + 'catalogDeregistrationYear', a.catalog_deregistration_year ) FROM accession_t a -- GitLab From c215d9d2d232c48f1511e6d26ab76b333ac34bc8 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Thu, 3 Aug 2023 14:06:24 +0200 Subject: [PATCH 04/23] Minors corrections --- .../gnpis-pg-to-json/germplasmV2.sql | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql index 718d02e..9e49977 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql @@ -204,9 +204,9 @@ SELECT json_build_object( 'remarks', agg_accession_comments.accession_comments, 'species', ta.species, 'speciesAuthority', species.author_list, - 'safetyDuplicateInstitutes', array( + 'safetyDuplicateInstitutes', json_build_object ( 'instituteCode', hi.fao_code, - 'instituteName', hi.institution_name, + 'instituteName', hi.institution_name ), 'storageType', null, 'subtaxa', trim(concat_ws(' ', @@ -629,10 +629,20 @@ LEFT JOIN ( ) AS originCountry ON originCountry.site_id = a.origin_site_id -- collecting site +-- LEFT JOIN ( +-- SELECT s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en AS site_type +-- FROM site_t s +-- JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id +--) AS collecting_site ON collecting_site.site_id = a.site_id + +-- collecting site Mcpd LEFT JOIN ( - SELECT s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en AS site_type - FROM site_t s - JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id + SELECT s.site_id, s.site_name, s.latitude, s.longitude, s.elevation, ot.name_en AS site_type, ot2.numeric_code AS environment_type, string_agg(distinct(t.translated_name), ', ') AS description + FROM site_t s + JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id + LEFT JOIN ontology_term_t ot2 ON ot2.ontology_term_id = s.environment_type_id + LEFT JOIN translations_t t ON t.commented_site_id = s.site_id + GROUP BY s.site_id, s.site_name, s.latitude, s.longitude, s.elevation, site_type, environment_type ) AS collecting_site ON collecting_site.site_id = a.site_id -- collector -- GitLab From 4dd23c12e6f07201b6678146039b19db848b7129 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Fri, 4 Aug 2023 15:20:02 +0200 Subject: [PATCH 05/23] Typo --- etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql index 9e49977..b5d0934 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql @@ -566,7 +566,7 @@ LEFT JOIN ontology_term_t biostat ON biostat.ontology_term_id = a.biological_sta -- genetic nature LEFT JOIN ontology_term_t genetic_nature ON genetic_nature.ontology_term_id = a.genetic_nature_id --- aggregates acccession comments in one line +-- aggregates accession comments in one line LEFT JOIN ( SELECT acc.accession_id, string_agg(distinct(translated_name), ', ') AS accession_comments FROM translations_t tra -- GitLab From 3d9b7669d8c75d85c03bca2317ab8de5d2f991ce Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Thu, 10 Aug 2023 10:51:58 +0200 Subject: [PATCH 06/23] Correct request --- .../gnpis-pg-to-json/germplasm.sql | 977 ++++++++++-------- .../gnpis-pg-to-json/germplasmV2.sql | 670 ------------ 2 files changed, 544 insertions(+), 1103 deletions(-) delete mode 100644 etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql index 7e838b7..ad9aab6 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql @@ -2,442 +2,543 @@ -- Warning: The name of this file is used as the elasticsearch document type name -- Example of usage: --- psql --host shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -A -t -f germplasm.sql -v faidareURL=https://urgi.versailles.inrae.fr/faidare > germplasm.json +-- psql --host shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -A -t -f germplasmV2.sql -v faidareURL=https://urgi.versailles.inrae.fr/faidare > germplasmV2.json -- Making sure to stop psql with non-zero exit code on any error \set ON_ERROR_STOP true SELECT json_build_object( - 'groupId', a.group_id, - 'documentationURL', NULL, - 'germplasmDbId', text(a.accession_id), - 'defaultDisplayName', a.accession_name, - 'accessionNumber', a.accession_number, - 'germplasmName', a.accession_name, - 'germplasmPUI', a.puid, - 'pedigree', a.pedigree, - 'seedSource', null, - 'source', 'INRAE-URGI', - 'synonyms', array( - SELECT distinct accsyn.accession_synonym_name AS synonyms - FROM accession_synonym_t accsyn - WHERE accsyn.accession_id = a.accession_id - ), - 'commonCropName', ( - CASE - WHEN ta.dataset_id = (select dataset_id from dataset_t where dataset_name = 'Forest tree taxa') AND a.taxon_group != 'Cherry' AND a.taxon_group != 'Walnut' THEN 'Forest tree' - ELSE a.taxon_group - END - ), - 'instituteCode', hi.fao_code, - 'instituteName', hi.institution_name, - 'biologicalStatusOfAccessionCode', biostat.name_en, - 'countryOfOriginCode', originCountry.name_en, - 'typeOfGermplasmStorageCode', null, - 'taxonIds', ta.taxon_external_ids, - 'genus', ( - CASE - WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus - ELSE '' - END - ), - 'species', ta.species, - 'genusSpecies', ( - CASE - WHEN ta.species is not null AND ta.species != '' THEN - CASE - WHEN ta.subgenus is not null AND ta.subgenus != '' and ta.subgenus != '-' THEN ta.subgenus || ' ' || ta.species - WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus || ' ' || ta.species - ELSE ta.species - END - ELSE '' - END - ), - 'speciesAuthority', species.author_list, - 'subtaxa', trim(concat_ws(' ', - CASE WHEN ta.subspecies is not null AND ta.subspecies != '' THEN 'subsp. ' || ta.subspecies END, - CASE WHEN ta.variety is not null AND ta.variety != '' THEN 'var. ' || ta.variety END, - CASE WHEN ta.cultivar is not null AND ta.cultivar != '' THEN 'cv. ' || ta.cultivar END, - CASE WHEN ta.form is not null AND ta.form != '' THEN 'f. ' || ta.form END, - CASE WHEN ta.tax_group is not null AND ta.tax_group != '' THEN 'gr. ' || ta.tax_group END, - CASE WHEN ta.subgroup is not null AND ta.subgroup != '' THEN 'sgr. ' || ta.subgroup END - )), - 'genusSpeciesSubtaxa', ( - CASE - WHEN (ta.subspecies is not null AND ta.subspecies != '') - OR (ta.variety is not null AND ta.variety != '') - OR (ta.cultivar is not null AND ta.cultivar != '') - OR (ta.form is not null AND ta.form != '') - OR (ta.tax_group is not null AND ta.tax_group != '') - OR (ta.subgroup is not null AND ta.subgroup != '') - THEN concat_ws(' ', - CASE - WHEN ta.subgenus is not null AND ta.subgenus != '' and ta.subgenus != '-' THEN ta.subgenus || ' ' || ta.species - WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus || ' ' || ta.species - ELSE ta.species - END, - CASE WHEN ta.subspecies is not null AND ta.subspecies != '' THEN 'subsp. ' || ta.subspecies END, - CASE WHEN ta.variety is not null AND ta.variety != '' THEN 'var. ' || ta.variety END, - CASE WHEN ta.cultivar is not null AND ta.cultivar != '' THEN 'cv. ' || ta.cultivar END, - CASE WHEN ta.form is not null AND ta.form != '' THEN 'f. ' || ta.form END, - CASE WHEN ta.tax_group is not null AND ta.tax_group != '' THEN 'gr. ' || ta.tax_group END, - CASE WHEN ta.subgroup is not null AND ta.subgroup != '' THEN 'sgr. ' || ta.subgroup END - ) - ELSE '' - END - ), - 'subtaxaAuthority', ( - CASE - WHEN (ta.subspecies is not null AND ta.subspecies != '') - OR (ta.variety is not null AND ta.variety != '') - OR (ta.cultivar is not null AND ta.cultivar != '') - OR (ta.form is not null AND ta.form != '') - OR (ta.tax_group is not null AND ta.tax_group != '') - OR (ta.subgroup is not null AND ta.subgroup != '') - THEN ta.author_list - ELSE '' - END - ), - 'donors', array( - SELECT json_build_object( - 'donorInstituteCode', donor.fao_code, - 'donorGermplasmPUI', null, - 'donorAccessionNumber', a.donor_accession_number, - 'donorInstitute', json_build_object( - 'instituteName', donor.institution_name, - 'instituteCode', donor.fao_code, - 'acronym', donor.acronym, - 'organisation', donor.organization, - 'instituteType', donor_type.name_en, - 'webSite', donor.internet_site, - 'address', trim(concat_ws(', ', - CASE WHEN address.address is not null and address.address != '' THEN address.address END, - CASE WHEN address.postal_box is not null and address.postal_box != '' THEN address.postal_box END, - CASE WHEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) != '' THEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) END, - CASE WHEN address_country.name_en is not null and address_country.name_en != '' THEN address_country.name_en END - )), - 'logo', donor.institution_logo - ), - 'donationDate', a.donation_date - ) - FROM institution_t donor - LEFT JOIN ontology_term_t donor_type ON donor_type.ontology_term_id = donor.institution_type_id - LEFT JOIN address_t address ON address.address_id = donor.address_id - LEFT JOIN ontology_term_t address_country ON address_country.ontology_term_id = address.country_id - WHERE donor.institution_id = a.donor_institution_id - ), - 'acquisitionDate', a.collecting_date, - 'taxonSynonyms', array( - SELECT distinct ts.taxon_synonym_name as ta_synonym_name - FROM taxon_t ta - JOIN taxon_synonym_taxon_t tst ON tst.taxons_id = ta.taxon_id - JOIN taxon_synonym_t ts ON ts.taxon_synonym_id = tst.taxon_synonyms_id - JOIN ontology_term_t ot ON ot.ontology_term_id = ts.name_type_id - WHERE ot.textual_code = 'SCIENTIFIC' - AND ta.taxon_id = a.taxon_id - ), - 'taxonCommonNames', array( - SELECT distinct ts.taxon_synonym_name as ta_common_name - FROM taxon_t ta - JOIN taxon_synonym_taxon_t tst ON tst.taxons_id = ta.taxon_id - JOIN taxon_synonym_t ts ON ts.taxon_synonym_id = tst.taxon_synonyms_id - JOIN ontology_term_t ot ON ot.ontology_term_id = ts.name_type_id - WHERE ot.textual_code != 'SCIENTIFIC' - AND ta.taxon_id = a.taxon_id - ), - 'taxonComment', ta.taxon_comment_en, - 'geneticNature', genetic_nature.name_en, - 'comment', agg_accession_comments.accession_comments, - 'photo', json_build_object( - 'file', ( - 'https://urgi.versailles.inrae.fr/files/siregal/images/accession/' || - CASE - WHEN grc.grc_code is not null and grc.grc_code != '' THEN grc.grc_code || '/' || photo.file_name - ELSE photo.file_name - END - ), - 'thumbnailFile', ( - 'https://urgi.versailles.inrae.fr/files/siregal/images/accession/' || - CASE - WHEN grc.grc_code is not null and grc.grc_code != '' THEN grc.grc_code || '/' || photo.thumbnail_file_name - ELSE photo.thumbnail_file_name - END - ), - 'photoName', photo.photo_name_en, - 'description', photo.description_en, - 'copyright', trim(concat_ws(' ', photo.author, photo.photo_year)) - ), - 'holdingInstitute', json_build_object( - 'instituteName', hi.institution_name, - 'instituteCode', hi.fao_code, - 'acronym', hi.acronym, - 'organisation', hi.organization, - 'instituteType', hi_type.name_en, - 'webSite', hi.internet_site, - 'address', trim(concat_ws(', ', - CASE WHEN hi_address.address is not null and hi_address.address != '' THEN hi_address.address END, - CASE WHEN hi_address.postal_box is not null and hi_address.postal_box != '' THEN hi_address.postal_box END, - CASE WHEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) != '' THEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) END, - CASE WHEN hi_address_country.name_en is not null and hi_address_country.name_en != '' THEN hi_address_country.name_en END - )), - 'logo', hi.institution_logo - ), - 'holdingGenbank', json_build_object ( - 'instituteName', ( - CASE - WHEN grc_name.translated_name is not null and grc_name.translated_name != '' THEN grc_name.translated_name - ELSE '' - END - ), - 'instituteCode', grc.grc_code, - 'webSite', grc.internet_site, - 'logo', - CASE - WHEN grc.grc_code is not null THEN 'https://urgi.versailles.inrae.fr/files/siregal/images/grc/inra_brc_en.png' - ELSE '' - END - ), - 'accessionHolder', ( - CASE - WHEN (select string_agg(distinct(coll.collection_code), ', ') - from accession_collection ac - join collections coll on coll.collection_id = ac.collection_id - where a.accession_id = ac.accession_id - group by ac.accession_id) like '%BRC4Forest_CMD%' THEN - CASE - WHEN ds.dataset_name = 'Salicaceae' THEN 'Forest BRC - Orleans' - WHEN ds.dataset_name = 'Pinus Portal' or ds.dataset_name = 'Quercus Portal' THEN 'Forest BRC - Pierroton' - WHEN ds.dataset_name = 'PlantaExp' THEN 'Forest BRC - Avignon' - ELSE 'Forest BRC' - END - --WHEN grc.grc_code is not null and grc.grc_code != 'BRC4Forest' THEN replace(grc.grc_code, '_', ' ') - ELSE '' - END - ), - 'presenceStatus', pres_status.name_en, - 'genealogy', json_build_object( - 'crossingPlan', genea.crossing_plan, - 'crossingYear', genea.crossing_year, - 'familyCode', genea.family_code, - 'firstParentName', fp.accession_name, - 'firstParentPUI', fp.puid, - 'firstParentType', fp_type.name_en, - 'secondParentName', CASE WHEN sp.accession_name != 'None' THEN sp.accession_name ELSE '' END, - 'secondParentPUI', CASE WHEN sp.accession_name != 'None' THEN sp.puid ELSE '' END, - 'secondParentType', CASE WHEN sp.accession_name != 'None' THEN sp_type.name_en ELSE '' END, - 'sibblings', array( - SELECT json_build_object( - 'pui', acc.puid, - 'name', acc.accession_name - ) - FROM accession_t acc - WHERE acc.genealogy_id = genea.genealogy_id AND acc.accession_id != a.accession_id - ) - ), - 'children', array( - SELECT json_build_object( - 'firstParentName', p1.accession_name, - 'firstParentPUI', p1.puid, - 'secondParentName', CASE WHEN p2.accession_name != 'None' THEN p2.accession_name ELSE '' END, - 'secondParentPUI', CASE WHEN p2.accession_name != 'None' THEN p2.puid ELSE '' END, - 'sibblings', array( - SELECT json_build_object( - 'pui', acc.puid, - 'name', acc.accession_name - ) - FROM accession_t acc - WHERE acc.genealogy_id = g.genealogy_id - ) - ) - FROM genealogy_t g - JOIN accession_t p1 ON p1.accession_id = g.first_parent_id - LEFT JOIN accession_t p2 ON p2.accession_id = g.second_parent_id - WHERE p1.accession_id = a.accession_id OR p2.accession_id = a.accession_id - ), - 'descriptors', array( - SELECT json_build_object( - 'name', ot.name_en, - 'value', ad.descriptor_value - ) - FROM accession_descriptor_t ad - JOIN ontology_term_t ot ON ot.ontology_term_id = ad.descriptor_id - WHERE ad.accession_id = a.accession_id - ), - 'originSite', json_build_object( - 'siteId', origin_site.site_id, - 'siteName', origin_site.site_name, - 'latitude', origin_site.latitude, - 'longitude', origin_site.longitude, - 'siteType', origin_site.site_type - ), - 'collectingSite', json_build_object( - 'siteId', collecting_site.site_id, - 'siteName', collecting_site.site_name, - 'latitude', collecting_site.latitude, - 'longitude', collecting_site.longitude, - 'siteType', collecting_site.site_type - ), - 'evaluationSites', array( - SELECT json_build_object( - 'siteId', s.site_id, - 'siteName', s.site_name, - 'latitude', s.latitude, - 'longitude', s.longitude, - 'siteType', ot.name_en - ) - FROM site_t s - JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id - JOIN trial_t t ON t.site_id = s.site_id - JOIN trial_lot_t tl ON tl.trials_id = t.trial_id - JOIN lot_t l ON l.lot_id = tl.lots_id - WHERE l.accession_id = a.accession_id - GROUP BY s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en - ), - 'collector', json_build_object( - 'institute', json_build_object( - 'instituteName', ci.institution_name, - 'instituteCode', ci.fao_code, - 'acronym', ci.acronym, - 'organisation', ci.organization, - 'instituteType', ci_type.name_en, - 'webSite', ci.internet_site, - 'address', trim(concat_ws(', ', - CASE WHEN ci_address.address is not null and ci_address.address != '' THEN ci_address.address END, - CASE WHEN ci_address.postal_box is not null and ci_address.postal_box != '' THEN ci_address.postal_box END, - CASE WHEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) != '' THEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) END, - CASE WHEN ci_address_country.name_en is not null and ci_address_country.name_en != '' THEN ci_address_country.name_en END - )), - 'logo', ci.institution_logo - ), - 'accessionNumber', a.collecting_number, - 'accessionCreationDate', a.collecting_date, - 'materialType', cmt.name_en, - 'collectors', a.collectors_list - ), - 'breeder', json_build_object( - 'institute', json_build_object( - 'instituteName', bi.institution_name, - 'instituteCode', bi.fao_code, - 'acronym', bi.acronym, - 'organisation', bi.organization, - 'instituteType', bi_type.name_en, - 'webSite', bi.internet_site, - 'address', trim(concat_ws(', ', - CASE WHEN bi_address.address is not null and bi_address.address != '' THEN bi_address.address END, - CASE WHEN bi_address.postal_box is not null and bi_address.postal_box != '' THEN bi_address.postal_box END, - CASE WHEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) != '' THEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) END, - CASE WHEN bi_address_country.name_en is not null and bi_address_country.name_en != '' THEN bi_address_country.name_en END - )), - 'logo', bi.institution_logo - ), - 'accessionNumber', a.breeder_accession_number, - 'accessionCreationDate', a.breeding_creation_year, - 'registrationYear', a.catalog_registration_year, - 'deregistrationYear', a.catalog_deregistration_year - ), - 'distributors', array( - SELECT json_build_object( - 'institute', json_build_object( - 'instituteName', di.institution_name, - 'instituteCode', di.fao_code, - 'acronym', di.acronym, - 'organisation', di.organization, - 'instituteType', di_type.name_en, - 'webSite', di.internet_site, - 'address', trim(concat_ws(', ', - CASE WHEN di_address.address is not null AND di_address.address != '' THEN di_address.address END, - CASE WHEN di_address.postal_box is not null AND di_address.postal_box != '' THEN di_address.postal_box END, - CASE WHEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) != '' THEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) END, - CASE WHEN di_address_country.name_en is not null AND di_address_country.name_en != '' THEN di_address_country.name_en END - )), - 'logo', di.institution_logo - ), - 'accessionNumber', ad.distributor_accession_number, - 'distributionStatus', ot.name_en - ) - FROM accession_distributor_t ad - JOIN ontology_term_t ot ON ot.ontology_term_id = ad.distribution_status_id - JOIN institution_t di ON di.institution_id = ad.distributor_institution_id - LEFT JOIN ontology_term_t di_type ON di_type.ontology_term_id = di.institution_type_id - LEFT JOIN address_t di_address ON di_address.address_id = di.address_id - LEFT JOIN ontology_term_t di_address_country ON di_address_country.ontology_term_id = di_address.country_id - WHERE ad.accession_id = a.accession_id - ), - 'panel', array( - SELECT json_build_object( - 'id', pa.panel_id, - 'name', pa.panel_name, - 'germplasmCount', ( - SELECT count(distinct l2.accession_id) - FROM lot_t l2 - JOIN panel_lot_t pl2 on pl2.lot_id = l2.lot_id - WHERE pl2.panel_id = pa.panel_id - ) - ) - FROM panel_t pa - JOIN panel_lot_t pl ON pa.panel_id = pl.panel_id - JOIN lot_t l ON pl.lot_id = l.lot_id - WHERE l.accession_id = a.accession_id - GROUP BY pa.panel_id, pa.panel_name - ), - 'collection', array( - SELECT json_build_object( - 'id', co.collection_id, - 'name', ( - CASE - WHEN tr.translated_name is not null THEN tr.translated_name - ELSE co.collection_code - END - ), - 'type', ot.name_en, - 'germplasmCount', ( - SELECT count(distinct ac2.accession_id) - FROM accession_collection_t ac2 - WHERE ac2.collection_id = co.collection_id - ) - ) - FROM collections_t co - JOIN accession_collection_t ac ON co.collection_id = ac.collection_id - LEFT JOIN translations_t tr ON co.collection_id = tr.named_collection_id - LEFT JOIN ontology_term_t ot ON ot.ontology_term_id = co.collection_type_id - WHERE ac.accession_id = a.accession_id - GROUP BY co.collection_id, ot.name_en, tr.translated_name - ), - 'population', array( - SELECT json_build_object( - 'id', prg.population_id, - 'name', prg.population_name, - 'type', ot.name_en, - 'germplasmRef', json_build_object( - 'pui', acc.puid, - 'name', acc.accession_name - ), - 'germplasmCount', ( - SELECT count(distinct aprg2.accession_id) - FROM accession_population_rg_t aprg2 - WHERE aprg2.population_id = prg.population_id - ) - ) - FROM population_rg_t prg - JOIN accession_population_rg_t aprg ON aprg.population_id = prg.population_id - JOIN ontology_term_t ot ON ot.ontology_term_id = prg.population_type_id - LEFT JOIN accession_t acc ON acc.accession_id = prg.ref_accession_id - WHERE aprg.accession_id = a.accession_id - OR acc.accession_id = a.accession_id - GROUP BY prg.population_id, prg.population_name, ot.name_en, acc.puid, acc.accession_name - ), + 'accessionNames', array( + SELECT a.accession_name + UNION + SELECT DISTINCT(accsyn.accession_synonym_name) + FROM accession_synonym_t accsyn + WHERE accsyn.accession_id = a.accession_id + ), + 'accessionNumber', a.accession_number, + 'acquisitionDate', a.collecting_date, + 'acquisitionSourcecode', collecting_site.environment_type, + -- environmentType 'acquisitionSourceCodeDescription', + 'alternadeIDs', null, + 'ancestralData', a.pedigree, + 'biologicalStatusOfAccessionCode', biostat.name_en, + 'biologicalStatusOfAccessionDescription', null, + 'breedingInstitutes', json_build_object( + 'instituteCode', bi.fao_code, + 'instituteName', bi.institution_name, + 'acronym', bi.acronym, + 'organisation', bi.organization, + 'instituteType', bi_type.name_en, + 'webSite', bi.internet_site, + 'address', trim(concat_ws(', ', + CASE WHEN bi_address.address is not null and bi_address.address != '' THEN bi_address.address END, + CASE WHEN bi_address.postal_box is not null and bi_address.postal_box != '' THEN bi_address.postal_box END, + CASE WHEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) != '' THEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) END, + CASE WHEN bi_address_country.name_en is not null and bi_address_country.name_en != '' THEN bi_address_country.name_en END + )), + 'logo', bi.institution_logo + ), + 'breedingMethodDbId', null, + 'breedingMethodName', null, + 'collection', array( + SELECT json_build_object( + 'id', co.collection_id, + 'name', ( + CASE + WHEN tr.translated_name is not null THEN tr.translated_name + ELSE co.collection_code + END + ), + 'type', ot.name_en, + 'germplasmCount', ( + SELECT count(distinct ac2.accession_id) + FROM accession_collection_t ac2 + WHERE ac2.collection_id = co.collection_id + ) + ) + FROM collections_t co + JOIN accession_collection_t ac ON co.collection_id = ac.collection_id + LEFT JOIN translations_t tr ON co.collection_id = tr.named_collection_id + LEFT JOIN ontology_term_t ot ON ot.ontology_term_id = co.collection_type_id + WHERE ac.accession_id = a.accession_id + GROUP BY co.collection_id, ot.name_en, tr.translated_name + ), + 'collectinInfo', json_build_object( + 'collectingDate', a.collecting_date, + 'collectingInstitutes', array( + SELECT json_build_object( + 'instituteName', ci.institution_name, + 'instituteCode', ci.fao_code, + 'acronym', ci.acronym, + 'organisation', ci.organization, + 'instituteType', ci_type.name_en, + 'webSite', ci.internet_site, + 'instituteAddress', trim(concat_ws(', ', + CASE WHEN ci_address.address is not null and ci_address.address != '' THEN ci_address.address END, + CASE WHEN ci_address.postal_box is not null and ci_address.postal_box != '' THEN ci_address.postal_box END, + CASE WHEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) != '' THEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) END, + CASE WHEN ci_address_country.name_en is not null and ci_address_country.name_en != '' THEN ci_address_country.name_en END + )), + 'logo', 'https://urgi.versailles.inra.fr/files/siregal/images/institution/' || ci.institution_logo + ) + FROM institution_t ci + LEFT JOIN ontology_term_t ci_type ON ci_type.ontology_term_id = ci.institution_type_id + LEFT JOIN address_t ci_address ON ci_address.address_id = ci.address_id + LEFT JOIN ontology_term_t ci_address_country ON ci_address_country.ontology_term_id = ci_address.country_id + WHERE ci.institution_id = a.collector_institution_id + ), + 'collectingMissionIdentifier', null, + 'collectingNumber', a.collecting_number, + 'collectingSite', json_build_object( + 'coordinateUncertainty', null, + 'elevation', collecting_site.elevation, + 'geoReferencingMethod', null, + 'latitudeDecimal', collecting_site.latitude, + -- formule collecting_site.latitude 'latitudeDegrees', , + 'locationDescription', collecting_site.description, + 'longitudeDecimal', collecting_site.longitude, + -- formule collecting_site.longitude 'longitudeDegrees', , + 'spatialReferenceSystem', null, + 'locationDbId', collecting_site.site_id, + 'locationName', collecting_site.site_name + ), + 'materialType', cmt.name_en, + 'collectors', a.collectors_list + ), + 'commonCropName', ( + CASE + WHEN ta.dataset_id = (select dataset_id from dataset_t where dataset_name = 'Forest tree taxa') AND a.taxon_group != 'Cherry' AND a.taxon_group != 'Walnut' THEN 'Forest tree' + ELSE a.taxon_group + END + ), + 'countryOfOriginCode', originCountry.name_en, + 'originSite', json_build_object( + 'siteId', origin_site.site_id, + 'siteName', origin_site.site_name, + 'latitude', origin_site.latitude, + 'longitude', origin_site.longitude, + 'siteType', origin_site.site_type + ), + 'defaultDisplayName', a.accession_name, + 'documentationURL', null, + 'donorInfo', array( + SELECT json_build_object( + 'donorAccessionNumber', a.donor_accession_number, + 'donorAccessionPUI', null, + 'donorInstitute', json_build_object( + 'instituteName', donor.institution_name, + 'instituteCode', donor.fao_code, + 'acronym', donor.acronym, + 'organisation', donor.organization, + 'instituteType', donor_type.name_en, + 'webSite', donor.internet_site, + 'address', trim(concat_ws(', ', + CASE WHEN address.address is not null and address.address != '' THEN address.address END, + CASE WHEN address.postal_box is not null and address.postal_box != '' THEN address.postal_box END, + CASE WHEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) != '' THEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) END, + CASE WHEN address_country.name_en is not null and address_country.name_en != '' THEN address_country.name_en END + )), + 'logo', donor.institution_logo + ), + 'donationDate', a.donation_date + ) + FROM institution_t donor + LEFT JOIN ontology_term_t donor_type ON donor_type.ontology_term_id = donor.institution_type_id + LEFT JOIN address_t address ON address.address_id = donor.address_id + LEFT JOIN ontology_term_t address_country ON address_country.ontology_term_id = address.country_id + WHERE donor.institution_id = a.donor_institution_id + ), +-- 'externalReferences', null, + 'genus', ( + CASE + WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus + ELSE '' + END + ), + 'germplasmDbId', text(a.accession_id), + 'germplasmName', a.accession_name, + 'germplasmPUI', a.puid, + 'instituteCode', hi.fao_code, + 'instituteName', hi.institution_name, + 'holdingInstitute', json_build_object( + 'instituteName', hi.institution_name, + 'instituteCode', hi.fao_code, + 'acronym', hi.acronym, + 'organisation', hi.organization, + 'instituteType', hi_type.name_en, + 'webSite', hi.internet_site, + 'address', trim(concat_ws(', ', + CASE WHEN hi_address.address is not null and hi_address.address != '' THEN hi_address.address END, + CASE WHEN hi_address.postal_box is not null and hi_address.postal_box != '' THEN hi_address.postal_box END, + CASE WHEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) != '' THEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) END, + CASE WHEN hi_address_country.name_en is not null and hi_address_country.name_en != '' THEN hi_address_country.name_en END + )), + 'logo', hi.institution_logo + ), + 'holdingGenbank', json_build_object ( + 'instituteName', ( + CASE + WHEN grc_name.translated_name is not null and grc_name.translated_name != '' THEN grc_name.translated_name + ELSE '' + END + ), + 'instituteCode', grc.grc_code, + 'webSite', grc.internet_site, + 'logo', ( + CASE + WHEN grc.grc_code is not null THEN 'https://urgi.versailles.inrae.fr/files/siregal/images/grc/inra_brc_en.png' + ELSE '' + END + ) + ), + 'mlsStatus', ( + SELECT + CASE string_agg(distinct(ott.textual_code), ', ') + WHEN 'MLS' THEN '1' + ELSE '0' + END + FROM accession_distributor_t ad + JOIN ontology_term_t ott ON ott.ontology_term_id = ad.distribution_status_id + WHERE ad.accession_id = a.accession_id + ), + 'pedigree', a.pedigree, + 'remarks', agg_accession_comments.accession_comments, + 'species', ta.species, + 'speciesAuthority', species.author_list, + 'safetyDuplicateInstitutes', json_build_object ( + 'instituteCode', hi.fao_code, + 'instituteName', hi.institution_name + ), + 'storageType', null, + 'subtaxa', trim(concat_ws(' ', + CASE WHEN ta.subspecies is not null AND ta.subspecies != '' THEN 'subsp. ' || ta.subspecies END, + CASE WHEN ta.variety is not null AND ta.variety != '' THEN 'var. ' || ta.variety END, + CASE WHEN ta.cultivar is not null AND ta.cultivar != '' THEN 'cv. ' || ta.cultivar END, + CASE WHEN ta.form is not null AND ta.form != '' THEN 'f. ' || ta.form END, + CASE WHEN ta.tax_group is not null AND ta.tax_group != '' THEN 'gr. ' || ta.tax_group END, + CASE WHEN ta.subgroup is not null AND ta.subgroup != '' THEN 'sgr. ' || ta.subgroup END + )), + 'subtaxaAuthority', ( + CASE + WHEN (ta.subspecies is not null AND ta.subspecies != '') + OR (ta.variety is not null AND ta.variety != '') + OR (ta.cultivar is not null AND ta.cultivar != '') + OR (ta.form is not null AND ta.form != '') + OR (ta.tax_group is not null AND ta.tax_group != '') + OR (ta.subgroup is not null AND ta.subgroup != '') + THEN ta.author_list + ELSE '' + END + ), + 'synonyms', null, + 'taxonIds', ta.taxon_external_ids, + -- OTHER IN GERMPLASM -- + 'groupId', a.group_id, + 'seedSource', null, + 'source', 'INRAE-URGI', + 'typeOfGermplasmStorageCode', null, + 'genusSpecies', ( + CASE + WHEN ta.species is not null AND ta.species != '' THEN + CASE + WHEN ta.subgenus is not null AND ta.subgenus != '' and ta.subgenus != '-' THEN ta.subgenus || ' ' || ta.species + WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus || ' ' || ta.species + ELSE ta.species + END + ELSE '' + END + ), + 'speciesAuthority', species.author_list, + 'genusSpeciesSubtaxa', ( + CASE + WHEN (ta.subspecies is not null AND ta.subspecies != '') + OR (ta.variety is not null AND ta.variety != '') + OR (ta.cultivar is not null AND ta.cultivar != '') + OR (ta.form is not null AND ta.form != '') + OR (ta.tax_group is not null AND ta.tax_group != '') + OR (ta.subgroup is not null AND ta.subgroup != '') + THEN concat_ws(' ', + CASE + WHEN ta.subgenus is not null AND ta.subgenus != '' and ta.subgenus != '-' THEN ta.subgenus || ' ' || ta.species + WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus || ' ' || ta.species + ELSE ta.species + END, + CASE WHEN ta.subspecies is not null AND ta.subspecies != '' THEN 'subsp. ' || ta.subspecies END, + CASE WHEN ta.variety is not null AND ta.variety != '' THEN 'var. ' || ta.variety END, + CASE WHEN ta.cultivar is not null AND ta.cultivar != '' THEN 'cv. ' || ta.cultivar END, + CASE WHEN ta.form is not null AND ta.form != '' THEN 'f. ' || ta.form END, + CASE WHEN ta.tax_group is not null AND ta.tax_group != '' THEN 'gr. ' || ta.tax_group END, + CASE WHEN ta.subgroup is not null AND ta.subgroup != '' THEN 'sgr. ' || ta.subgroup END + ) + ELSE '' + END + ), - -- List of trial number in which this germplasm is used - 'studyDbIds', array( - SELECT DISTINCT t.trial_number - FROM trial_t t - JOIN trial_lot_t tl ON tl.trials_id = t.trial_id - JOIN lot_t l ON l.lot_id = tl.lots_id - WHERE l.accession_id = a.accession_id - ) -) + 'taxonSynonyms', array( + SELECT distinct ts.taxon_synonym_name as ta_synonym_name + FROM taxon_t ta + JOIN taxon_synonym_taxon_t tst ON tst.taxons_id = ta.taxon_id + JOIN taxon_synonym_t ts ON ts.taxon_synonym_id = tst.taxon_synonyms_id + JOIN ontology_term_t ot ON ot.ontology_term_id = ts.name_type_id + WHERE ot.textual_code = 'SCIENTIFIC' + AND ta.taxon_id = a.taxon_id + ), + 'taxonCommonNames', array( + SELECT distinct ts.taxon_synonym_name as ta_common_name + FROM taxon_t ta + JOIN taxon_synonym_taxon_t tst ON tst.taxons_id = ta.taxon_id + JOIN taxon_synonym_t ts ON ts.taxon_synonym_id = tst.taxon_synonyms_id + JOIN ontology_term_t ot ON ot.ontology_term_id = ts.name_type_id + WHERE ot.textual_code != 'SCIENTIFIC' + AND ta.taxon_id = a.taxon_id + ), + 'taxonComment', ta.taxon_comment_en, + 'geneticNature', genetic_nature.name_en, + 'photo', json_build_object( + 'file', ( + 'https://urgi.versailles.inrae.fr/files/siregal/images/accession/' || + CASE + WHEN grc.grc_code is not null and grc.grc_code != '' THEN grc.grc_code || '/' || photo.file_name + ELSE photo.file_name + END + ), + 'thumbnailFile', ( + 'https://urgi.versailles.inrae.fr/files/siregal/images/accession/' || + CASE + WHEN grc.grc_code is not null and grc.grc_code != '' THEN grc.grc_code || '/' || photo.thumbnail_file_name + ELSE photo.thumbnail_file_name + END + ), + 'photoName', photo.photo_name_en, + 'description', photo.description_en, + 'copyright', trim(concat_ws(' ', photo.author, photo.photo_year)) + ) +)::jsonb || + json_build_object( + 'holdingGenbank', json_build_object ( + 'instituteName', ( + CASE + WHEN grc_name.translated_name is not null and grc_name.translated_name != '' THEN grc_name.translated_name + ELSE '' + END + ), + 'instituteCode', grc.grc_code, + 'webSite', grc.internet_site, + 'logo', + CASE + WHEN grc.grc_code is not null THEN 'https://urgi.versailles.inrae.fr/files/siregal/images/grc/inra_brc_en.png' + ELSE '' + END + ), + 'accessionHolder', ( + CASE + WHEN (select string_agg(distinct(coll.collection_code), ', ') + from accession_collection ac + join collections coll on coll.collection_id = ac.collection_id + where a.accession_id = ac.accession_id + group by ac.accession_id) like '%BRC4Forest_CMD%' THEN + CASE + WHEN ds.dataset_name = 'Salicaceae' THEN 'Forest BRC - Orleans' + WHEN ds.dataset_name = 'Pinus Portal' or ds.dataset_name = 'Quercus Portal' THEN 'Forest BRC - Pierroton' + WHEN ds.dataset_name = 'PlantaExp' THEN 'Forest BRC - Avignon' + ELSE 'Forest BRC' + END + --WHEN grc.grc_code is not null and grc.grc_code != 'BRC4Forest' THEN replace(grc.grc_code, '_', ' ') + ELSE '' + END + ), + 'presenceStatus', pres_status.name_en, + 'genealogy', json_build_object( + 'crossingPlan', genea.crossing_plan, + 'crossingYear', genea.crossing_year, + 'familyCode', genea.family_code, + 'firstParentName', fp.accession_name, + 'firstParentPUI', fp.puid, + 'firstParentType', fp_type.name_en, + 'secondParentName', CASE WHEN sp.accession_name != 'None' THEN sp.accession_name ELSE '' END, + 'secondParentPUI', CASE WHEN sp.accession_name != 'None' THEN sp.puid ELSE '' END, + 'secondParentType', CASE WHEN sp.accession_name != 'None' THEN sp_type.name_en ELSE '' END, + 'sibblings', array( + SELECT json_build_object( + 'pui', acc.puid, + 'name', acc.accession_name + ) + FROM accession_t acc + WHERE acc.genealogy_id = genea.genealogy_id AND acc.accession_id != a.accession_id + ) + ), + 'children', array( + SELECT json_build_object( + 'firstParentName', p1.accession_name, + 'firstParentPUI', p1.puid, + 'secondParentName', CASE WHEN p2.accession_name != 'None' THEN p2.accession_name ELSE '' END, + 'secondParentPUI', CASE WHEN p2.accession_name != 'None' THEN p2.puid ELSE '' END, + 'sibblings', array( + SELECT json_build_object( + 'pui', acc.puid, + 'name', acc.accession_name + ) + FROM accession_t acc + WHERE acc.genealogy_id = g.genealogy_id + ) + ) + FROM genealogy_t g + JOIN accession_t p1 ON p1.accession_id = g.first_parent_id + LEFT JOIN accession_t p2 ON p2.accession_id = g.second_parent_id + WHERE p1.accession_id = a.accession_id OR p2.accession_id = a.accession_id + ), + 'descriptors', array( + SELECT json_build_object( + 'name', ot.name_en, + 'value', ad.descriptor_value + ) + FROM accession_descriptor_t ad + JOIN ontology_term_t ot ON ot.ontology_term_id = ad.descriptor_id + WHERE ad.accession_id = a.accession_id + ), + 'originSite', json_build_object( + 'siteId', origin_site.site_id, + 'siteName', origin_site.site_name, + 'latitude', origin_site.latitude, + 'longitude', origin_site.longitude, + 'siteType', origin_site.site_type + ), + 'collectingSite', json_build_object( + 'siteId', collecting_site.site_id, + 'siteName', collecting_site.site_name, + 'latitude', collecting_site.latitude, + 'longitude', collecting_site.longitude, + 'siteType', collecting_site.site_type + ), + 'evaluationSites', array( + SELECT json_build_object( + 'siteId', s.site_id, + 'siteName', s.site_name, + 'latitude', s.latitude, + 'longitude', s.longitude, + 'siteType', ot.name_en + ) + FROM site_t s + JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id + JOIN trial_t t ON t.site_id = s.site_id + JOIN trial_lot_t tl ON tl.trials_id = t.trial_id + JOIN lot_t l ON l.lot_id = tl.lots_id + WHERE l.accession_id = a.accession_id + GROUP BY s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en + ), + 'distributors', array( + SELECT json_build_object( + 'institute', json_build_object( + 'instituteName', di.institution_name, + 'instituteCode', di.fao_code, + 'acronym', di.acronym, + 'organisation', di.organization, + 'instituteType', di_type.name_en, + 'webSite', di.internet_site, + 'address', trim(concat_ws(', ', + CASE WHEN di_address.address is not null AND di_address.address != '' THEN di_address.address END, + CASE WHEN di_address.postal_box is not null AND di_address.postal_box != '' THEN di_address.postal_box END, + CASE WHEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) != '' THEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) END, + CASE WHEN di_address_country.name_en is not null AND di_address_country.name_en != '' THEN di_address_country.name_en END + )), + 'logo', di.institution_logo + ), + 'accessionNumber', ad.distributor_accession_number, + 'distributionStatus', ot.name_en + ) + FROM accession_distributor_t ad + JOIN ontology_term_t ot ON ot.ontology_term_id = ad.distribution_status_id + JOIN institution_t di ON di.institution_id = ad.distributor_institution_id + LEFT JOIN ontology_term_t di_type ON di_type.ontology_term_id = di.institution_type_id + LEFT JOIN address_t di_address ON di_address.address_id = di.address_id + LEFT JOIN ontology_term_t di_address_country ON di_address_country.ontology_term_id = di_address.country_id + WHERE ad.accession_id = a.accession_id + ), + 'panel', array( + SELECT json_build_object( + 'id', pa.panel_id, + 'name', pa.panel_name, + 'germplasmCount', ( + SELECT count(distinct l2.accession_id) + FROM lot_t l2 + JOIN panel_lot_t pl2 on pl2.lot_id = l2.lot_id + WHERE pl2.panel_id = pa.panel_id + ) + ) + FROM panel_t pa + JOIN panel_lot_t pl ON pa.panel_id = pl.panel_id + JOIN lot_t l ON pl.lot_id = l.lot_id + WHERE l.accession_id = a.accession_id + GROUP BY pa.panel_id, pa.panel_name + ), + 'collection', array( + SELECT json_build_object( + 'id', co.collection_id, + 'name', ( + CASE + WHEN tr.translated_name is not null THEN tr.translated_name + ELSE co.collection_code + END + ), + 'type', ot.name_en, + 'germplasmCount', ( + SELECT count(distinct ac2.accession_id) + FROM accession_collection_t ac2 + WHERE ac2.collection_id = co.collection_id + ) + ) + FROM collections_t co + JOIN accession_collection_t ac ON co.collection_id = ac.collection_id + LEFT JOIN translations_t tr ON co.collection_id = tr.named_collection_id + LEFT JOIN ontology_term_t ot ON ot.ontology_term_id = co.collection_type_id + WHERE ac.accession_id = a.accession_id + GROUP BY co.collection_id, ot.name_en, tr.translated_name + ), + 'population', array( + SELECT json_build_object( + 'id', prg.population_id, + 'name', prg.population_name, + 'type', ot.name_en, + 'germplasmRef', json_build_object( + 'pui', acc.puid, + 'name', acc.accession_name + ), + 'germplasmCount', ( + SELECT count(distinct aprg2.accession_id) + FROM accession_population_rg_t aprg2 + WHERE aprg2.population_id = prg.population_id + ) + ) + FROM population_rg_t prg + JOIN accession_population_rg_t aprg ON aprg.population_id = prg.population_id + JOIN ontology_term_t ot ON ot.ontology_term_id = prg.population_type_id + LEFT JOIN accession_t acc ON acc.accession_id = prg.ref_accession_id + WHERE aprg.accession_id = a.accession_id + OR acc.accession_id = a.accession_id + GROUP BY prg.population_id, prg.population_name, ot.name_en, acc.puid, acc.accession_name + ), + + -- List of trial number in which this germplasm is used + 'studyDbIds', array( + SELECT DISTINCT t.trial_number + FROM trial_t t + JOIN trial_lot_t tl ON tl.trials_id = t.trial_id + JOIN lot_t l ON l.lot_id = tl.lots_id + WHERE l.accession_id = a.accession_id + ), + + -- IN MCPD -- + 'breederAccessionNumber', a.breeder_accession_number, + 'breedingCreationYear', a.breeding_creation_year, + 'catalogRegistrationYear', a.catalog_registration_year, + 'catalogDeregistrationYear', a.catalog_deregistration_year +)::jsonb FROM accession_t a +------------------- +-- PREVIOUS JOIN -- +------------------- + -- taxon JOIN taxon_t ta ON a.taxon_id = ta.taxon_id @@ -465,7 +566,7 @@ LEFT JOIN ontology_term_t biostat ON biostat.ontology_term_id = a.biological_sta -- genetic nature LEFT JOIN ontology_term_t genetic_nature ON genetic_nature.ontology_term_id = a.genetic_nature_id --- aggregates acccession comments in one line +-- aggregates accession comments in one line LEFT JOIN ( SELECT acc.accession_id, string_agg(distinct(translated_name), ', ') AS accession_comments FROM translations_t tra @@ -528,10 +629,20 @@ LEFT JOIN ( ) AS originCountry ON originCountry.site_id = a.origin_site_id -- collecting site +-- LEFT JOIN ( +-- SELECT s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en AS site_type +-- FROM site_t s +-- JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id +--) AS collecting_site ON collecting_site.site_id = a.site_id + +-- collecting site Mcpd LEFT JOIN ( - SELECT s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en AS site_type - FROM site_t s - JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id + SELECT s.site_id, s.site_name, s.latitude, s.longitude, s.elevation, ot.name_en AS site_type, ot2.numeric_code AS environment_type, string_agg(distinct(t.translated_name), ', ') AS description + FROM site_t s + JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id + LEFT JOIN ontology_term_t ot2 ON ot2.ontology_term_id = s.environment_type_id + LEFT JOIN translations_t t ON t.commented_site_id = s.site_id + GROUP BY s.site_id, s.site_name, s.latitude, s.longitude, s.elevation, site_type, environment_type ) AS collecting_site ON collecting_site.site_id = a.site_id -- collector @@ -556,4 +667,4 @@ WHERE a.accession_id between :startPageId and :endPageId ORDER BY a.accession_id --ORDER BY RANDOM() LIMIT 500 --LIMIT 5 -; +; \ No newline at end of file diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql deleted file mode 100644 index b5d0934..0000000 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmV2.sql +++ /dev/null @@ -1,670 +0,0 @@ ----https://hashrocket.com/blog/posts/faster-json-generation-with-postgresql --- Warning: The name of this file is used as the elasticsearch document type name - --- Example of usage: --- psql --host shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -A -t -f germplasmV2.sql -v faidareURL=https://urgi.versailles.inrae.fr/faidare > germplasmV2.json - --- Making sure to stop psql with non-zero exit code on any error -\set ON_ERROR_STOP true - -SELECT json_build_object( - 'accessionNames', array( - SELECT a.accession_name - UNION - SELECT DISTINCT(accsyn.accession_synonym_name) - FROM accession_synonym_t accsyn - WHERE accsyn.accession_id = a.accession_id - ), - 'accessionNumber', a.accession_number, - 'acquisitionDate', a.collecting_date, - 'acquisitionSourcecode', collecting_site.environment_type, - 'acquisitionSourceCodeDescription', -- environmentType - 'alternadeIDs', null, - 'ancestralData', a.pedigree, - 'biologicalStatusOfAccessionCode', biostat.name_en, - 'biologicalStatusOfAccessionDescription', null, - 'breedingInstitutes', json_build_object( - 'instituteCode', bi.fao_code, - 'instituteName', bi.institution_name, - 'acronym', bi.acronym, - 'organisation', bi.organization, - 'instituteType', bi_type.name_en, - 'webSite', bi.internet_site, - 'address', trim(concat_ws(', ', - CASE WHEN bi_address.address is not null and bi_address.address != '' THEN bi_address.address END, - CASE WHEN bi_address.postal_box is not null and bi_address.postal_box != '' THEN bi_address.postal_box END, - CASE WHEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) != '' THEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) END, - CASE WHEN bi_address_country.name_en is not null and bi_address_country.name_en != '' THEN bi_address_country.name_en END - )), - 'logo', bi.institution_logo - ), - 'breedingMethodDbId', null, - 'breedingMethodName', null, - 'collection', array( - SELECT json_build_object( - 'id', co.collection_id, - 'name', ( - CASE - WHEN tr.translated_name is not null THEN tr.translated_name - ELSE co.collection_code - END - ), - 'type', ot.name_en, - 'germplasmCount', ( - SELECT count(distinct ac2.accession_id) - FROM accession_collection_t ac2 - WHERE ac2.collection_id = co.collection_id - ) - ) - FROM collections_t co - JOIN accession_collection_t ac ON co.collection_id = ac.collection_id - LEFT JOIN translations_t tr ON co.collection_id = tr.named_collection_id - LEFT JOIN ontology_term_t ot ON ot.ontology_term_id = co.collection_type_id - WHERE ac.accession_id = a.accession_id - GROUP BY co.collection_id, ot.name_en, tr.translated_name - ), - 'collectinInfo', json_build_object( - 'collectingDate', a.collecting_date, - 'collectingInstitutes', array( - SELECT json_build_object( - 'instituteName', ci.institution_name, - 'instituteCode', ci.fao_code, - 'acronym', ci.acronym, - 'organisation', ci.organization, - 'instituteType', ci_type.name_en, - 'webSite', ci.internet_site, - 'instituteAddress', trim(concat_ws(', ', - CASE WHEN ci_address.address is not null and ci_address.address != '' THEN ci_address.address END, - CASE WHEN ci_address.postal_box is not null and ci_address.postal_box != '' THEN ci_address.postal_box END, - CASE WHEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) != '' THEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) END, - CASE WHEN ci_address_country.name_en is not null and ci_address_country.name_en != '' THEN ci_address_country.name_en END - )), - 'logo', 'https://urgi.versailles.inra.fr/files/siregal/images/institution/' || ci.institution_logo - ) - FROM institution_t ci - LEFT JOIN ontology_term_t ci_type ON ci_type.ontology_term_id = ci.institution_type_id - LEFT JOIN address_t ci_address ON ci_address.address_id = ci.address_id - LEFT JOIN ontology_term_t ci_address_country ON ci_address_country.ontology_term_id = ci_address.country_id - WHERE ci.institution_id = a.collector_institution_id - ), - 'collectingMissionIdentifier', null, - 'collectingNumber', a.collecting_number, - 'collectingSite', json_build_object( - 'coordinateUncertainty', null, - 'elevation', collecting_site.elevation, - 'geoReferencingMethod', null, - 'latitudeDecimal', collecting_site.latitude, - 'latitudeDegrees', -- formule collecting_site.latitude, - 'locationDescription', collecting_site.description, - 'longitudeDecimal', collecting_site.longitude, - 'longitudeDegrees', -- formule collecting_site.longitude, - 'spatialReferenceSystem', null, - 'locationDbId', collecting_site.site_id, - 'locationName', collecting_site.site_name - ), - 'materialType', cmt.name_en, - 'collectors', a.collectors_list - ), - 'commonCropName', ( - CASE - WHEN ta.dataset_id = (select dataset_id from dataset_t where dataset_name = 'Forest tree taxa') AND a.taxon_group != 'Cherry' AND a.taxon_group != 'Walnut' THEN 'Forest tree' - ELSE a.taxon_group - END - ), - 'countryOfOriginCode', originCountry.name_en, - 'originSite', json_build_object( - 'siteId', origin_site.site_id, - 'siteName', origin_site.site_name, - 'latitude', origin_site.latitude, - 'longitude', origin_site.longitude, - 'siteType', origin_site.site_type - ), - 'defaultDisplayName', a.accession_name, - 'documentationURL', null, - 'donorInfo', array( - SELECT json_build_object( - 'donorAccessionNumber', a.donor_accession_number, - 'donorAccessionPUI', null, - 'donorInstitute', json_build_object( - 'instituteName', donor.institution_name, - 'instituteCode', donor.fao_code, - 'acronym', donor.acronym, - 'organisation', donor.organization, - 'instituteType', donor_type.name_en, - 'webSite', donor.internet_site, - 'address', trim(concat_ws(', ', - CASE WHEN address.address is not null and address.address != '' THEN address.address END, - CASE WHEN address.postal_box is not null and address.postal_box != '' THEN address.postal_box END, - CASE WHEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) != '' THEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) END, - CASE WHEN address_country.name_en is not null and address_country.name_en != '' THEN address_country.name_en END - )), - 'logo', donor.institution_logo - ), - 'donationDate', a.donation_date - ) - FROM institution_t donor - LEFT JOIN ontology_term_t donor_type ON donor_type.ontology_term_id = donor.institution_type_id - LEFT JOIN address_t address ON address.address_id = donor.address_id - LEFT JOIN ontology_term_t address_country ON address_country.ontology_term_id = address.country_id - WHERE donor.institution_id = a.donor_institution_id - ), - 'externalReferences', --, - 'genus', ( - CASE - WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus - ELSE '' - END - ), - 'germplasmDbId', text(a.accession_id), - 'germplasmName', a.accession_name, - 'germplasmPUI', a.puid, - 'instituteCode', hi.fao_code, - 'instituteName', hi.institution_name, - 'holdingInstitute', json_build_object( - 'instituteName', hi.institution_name, - 'instituteCode', hi.fao_code, - 'acronym', hi.acronym, - 'organisation', hi.organization, - 'instituteType', hi_type.name_en, - 'webSite', hi.internet_site, - 'address', trim(concat_ws(', ', - CASE WHEN hi_address.address is not null and hi_address.address != '' THEN hi_address.address END, - CASE WHEN hi_address.postal_box is not null and hi_address.postal_box != '' THEN hi_address.postal_box END, - CASE WHEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) != '' THEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) END, - CASE WHEN hi_address_country.name_en is not null and hi_address_country.name_en != '' THEN hi_address_country.name_en END - )), - 'logo', hi.institution_logo - ), - 'holdingGenbank', json_build_object ( - 'instituteName', ( - CASE - WHEN grc_name.translated_name is not null and grc_name.translated_name != '' THEN grc_name.translated_name - ELSE '' - END - ), - 'instituteCode', grc.grc_code, - 'webSite', grc.internet_site, - 'logo', - CASE - WHEN grc.grc_code is not null THEN 'https://urgi.versailles.inrae.fr/files/siregal/images/grc/inra_brc_en.png' - ELSE '' - END - ), - 'mlsStatus', ( - SELECT - CASE string_agg(distinct(ott.textual_code), ', ') - WHEN 'MLS' THEN '1' - ELSE '0' - END - FROM accession_distributor_t ad - JOIN ontology_term_t ott ON ott.ontology_term_id = ad.distribution_status_id - WHERE ad.accession_id = a.accession_id - ), - 'pedigree', a.pedigree, - 'remarks', agg_accession_comments.accession_comments, - 'species', ta.species, - 'speciesAuthority', species.author_list, - 'safetyDuplicateInstitutes', json_build_object ( - 'instituteCode', hi.fao_code, - 'instituteName', hi.institution_name - ), - 'storageType', null, - 'subtaxa', trim(concat_ws(' ', - CASE WHEN ta.subspecies is not null AND ta.subspecies != '' THEN 'subsp. ' || ta.subspecies END, - CASE WHEN ta.variety is not null AND ta.variety != '' THEN 'var. ' || ta.variety END, - CASE WHEN ta.cultivar is not null AND ta.cultivar != '' THEN 'cv. ' || ta.cultivar END, - CASE WHEN ta.form is not null AND ta.form != '' THEN 'f. ' || ta.form END, - CASE WHEN ta.tax_group is not null AND ta.tax_group != '' THEN 'gr. ' || ta.tax_group END, - CASE WHEN ta.subgroup is not null AND ta.subgroup != '' THEN 'sgr. ' || ta.subgroup END - )), - 'subtaxaAuthority', ( - CASE - WHEN (ta.subspecies is not null AND ta.subspecies != '') - OR (ta.variety is not null AND ta.variety != '') - OR (ta.cultivar is not null AND ta.cultivar != '') - OR (ta.form is not null AND ta.form != '') - OR (ta.tax_group is not null AND ta.tax_group != '') - OR (ta.subgroup is not null AND ta.subgroup != '') - THEN ta.author_list - ELSE '' - END - ), - 'synonyms', null, - 'taxonIds', ta.taxon_external_ids, - - -- OTHER IN GERMPLASM -- - 'groupId', a.group_id, - 'seedSource', null, - 'source', 'INRAE-URGI', - 'typeOfGermplasmStorageCode', null, - 'genusSpecies', ( - CASE - WHEN ta.species is not null AND ta.species != '' THEN - CASE - WHEN ta.subgenus is not null AND ta.subgenus != '' and ta.subgenus != '-' THEN ta.subgenus || ' ' || ta.species - WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus || ' ' || ta.species - ELSE ta.species - END - ELSE '' - END - ), - 'speciesAuthority', species.author_list, - 'genusSpeciesSubtaxa', ( - CASE - WHEN (ta.subspecies is not null AND ta.subspecies != '') - OR (ta.variety is not null AND ta.variety != '') - OR (ta.cultivar is not null AND ta.cultivar != '') - OR (ta.form is not null AND ta.form != '') - OR (ta.tax_group is not null AND ta.tax_group != '') - OR (ta.subgroup is not null AND ta.subgroup != '') - THEN concat_ws(' ', - CASE - WHEN ta.subgenus is not null AND ta.subgenus != '' and ta.subgenus != '-' THEN ta.subgenus || ' ' || ta.species - WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus || ' ' || ta.species - ELSE ta.species - END, - CASE WHEN ta.subspecies is not null AND ta.subspecies != '' THEN 'subsp. ' || ta.subspecies END, - CASE WHEN ta.variety is not null AND ta.variety != '' THEN 'var. ' || ta.variety END, - CASE WHEN ta.cultivar is not null AND ta.cultivar != '' THEN 'cv. ' || ta.cultivar END, - CASE WHEN ta.form is not null AND ta.form != '' THEN 'f. ' || ta.form END, - CASE WHEN ta.tax_group is not null AND ta.tax_group != '' THEN 'gr. ' || ta.tax_group END, - CASE WHEN ta.subgroup is not null AND ta.subgroup != '' THEN 'sgr. ' || ta.subgroup END - ) - ELSE '' - END - ), - - 'taxonSynonyms', array( - SELECT distinct ts.taxon_synonym_name as ta_synonym_name - FROM taxon_t ta - JOIN taxon_synonym_taxon_t tst ON tst.taxons_id = ta.taxon_id - JOIN taxon_synonym_t ts ON ts.taxon_synonym_id = tst.taxon_synonyms_id - JOIN ontology_term_t ot ON ot.ontology_term_id = ts.name_type_id - WHERE ot.textual_code = 'SCIENTIFIC' - AND ta.taxon_id = a.taxon_id - ), - 'taxonCommonNames', array( - SELECT distinct ts.taxon_synonym_name as ta_common_name - FROM taxon_t ta - JOIN taxon_synonym_taxon_t tst ON tst.taxons_id = ta.taxon_id - JOIN taxon_synonym_t ts ON ts.taxon_synonym_id = tst.taxon_synonyms_id - JOIN ontology_term_t ot ON ot.ontology_term_id = ts.name_type_id - WHERE ot.textual_code != 'SCIENTIFIC' - AND ta.taxon_id = a.taxon_id - ), - 'taxonComment', ta.taxon_comment_en, - 'geneticNature', genetic_nature.name_en, - 'photo', json_build_object( - 'file', ( - 'https://urgi.versailles.inrae.fr/files/siregal/images/accession/' || - CASE - WHEN grc.grc_code is not null and grc.grc_code != '' THEN grc.grc_code || '/' || photo.file_name - ELSE photo.file_name - END - ), - 'thumbnailFile', ( - 'https://urgi.versailles.inrae.fr/files/siregal/images/accession/' || - CASE - WHEN grc.grc_code is not null and grc.grc_code != '' THEN grc.grc_code || '/' || photo.thumbnail_file_name - ELSE photo.thumbnail_file_name - END - ), - 'photoName', photo.photo_name_en, - 'description', photo.description_en, - 'copyright', trim(concat_ws(' ', photo.author, photo.photo_year)) - ), - - 'holdingGenbank', json_build_object ( - 'instituteName', ( - CASE - WHEN grc_name.translated_name is not null and grc_name.translated_name != '' THEN grc_name.translated_name - ELSE '' - END - ), - 'instituteCode', grc.grc_code, - 'webSite', grc.internet_site, - 'logo', - CASE - WHEN grc.grc_code is not null THEN 'https://urgi.versailles.inrae.fr/files/siregal/images/grc/inra_brc_en.png' - ELSE '' - END - ), - 'accessionHolder', ( - CASE - WHEN (select string_agg(distinct(coll.collection_code), ', ') - from accession_collection ac - join collections coll on coll.collection_id = ac.collection_id - where a.accession_id = ac.accession_id - group by ac.accession_id) like '%BRC4Forest_CMD%' THEN - CASE - WHEN ds.dataset_name = 'Salicaceae' THEN 'Forest BRC - Orleans' - WHEN ds.dataset_name = 'Pinus Portal' or ds.dataset_name = 'Quercus Portal' THEN 'Forest BRC - Pierroton' - WHEN ds.dataset_name = 'PlantaExp' THEN 'Forest BRC - Avignon' - ELSE 'Forest BRC' - END - --WHEN grc.grc_code is not null and grc.grc_code != 'BRC4Forest' THEN replace(grc.grc_code, '_', ' ') - ELSE '' - END - ), - 'presenceStatus', pres_status.name_en, - 'genealogy', json_build_object( - 'crossingPlan', genea.crossing_plan, - 'crossingYear', genea.crossing_year, - 'familyCode', genea.family_code, - 'firstParentName', fp.accession_name, - 'firstParentPUI', fp.puid, - 'firstParentType', fp_type.name_en, - 'secondParentName', CASE WHEN sp.accession_name != 'None' THEN sp.accession_name ELSE '' END, - 'secondParentPUI', CASE WHEN sp.accession_name != 'None' THEN sp.puid ELSE '' END, - 'secondParentType', CASE WHEN sp.accession_name != 'None' THEN sp_type.name_en ELSE '' END, - 'sibblings', array( - SELECT json_build_object( - 'pui', acc.puid, - 'name', acc.accession_name - ) - FROM accession_t acc - WHERE acc.genealogy_id = genea.genealogy_id AND acc.accession_id != a.accession_id - ) - ), - 'children', array( - SELECT json_build_object( - 'firstParentName', p1.accession_name, - 'firstParentPUI', p1.puid, - 'secondParentName', CASE WHEN p2.accession_name != 'None' THEN p2.accession_name ELSE '' END, - 'secondParentPUI', CASE WHEN p2.accession_name != 'None' THEN p2.puid ELSE '' END, - 'sibblings', array( - SELECT json_build_object( - 'pui', acc.puid, - 'name', acc.accession_name - ) - FROM accession_t acc - WHERE acc.genealogy_id = g.genealogy_id - ) - ) - FROM genealogy_t g - JOIN accession_t p1 ON p1.accession_id = g.first_parent_id - LEFT JOIN accession_t p2 ON p2.accession_id = g.second_parent_id - WHERE p1.accession_id = a.accession_id OR p2.accession_id = a.accession_id - ), - 'descriptors', array( - SELECT json_build_object( - 'name', ot.name_en, - 'value', ad.descriptor_value - ) - FROM accession_descriptor_t ad - JOIN ontology_term_t ot ON ot.ontology_term_id = ad.descriptor_id - WHERE ad.accession_id = a.accession_id - ), - 'originSite', json_build_object( - 'siteId', origin_site.site_id, - 'siteName', origin_site.site_name, - 'latitude', origin_site.latitude, - 'longitude', origin_site.longitude, - 'siteType', origin_site.site_type - ), - 'collectingSite', json_build_object( - 'siteId', collecting_site.site_id, - 'siteName', collecting_site.site_name, - 'latitude', collecting_site.latitude, - 'longitude', collecting_site.longitude, - 'siteType', collecting_site.site_type - ), - 'evaluationSites', array( - SELECT json_build_object( - 'siteId', s.site_id, - 'siteName', s.site_name, - 'latitude', s.latitude, - 'longitude', s.longitude, - 'siteType', ot.name_en - ) - FROM site_t s - JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id - JOIN trial_t t ON t.site_id = s.site_id - JOIN trial_lot_t tl ON tl.trials_id = t.trial_id - JOIN lot_t l ON l.lot_id = tl.lots_id - WHERE l.accession_id = a.accession_id - GROUP BY s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en - ), - 'distributors', array( - SELECT json_build_object( - 'institute', json_build_object( - 'instituteName', di.institution_name, - 'instituteCode', di.fao_code, - 'acronym', di.acronym, - 'organisation', di.organization, - 'instituteType', di_type.name_en, - 'webSite', di.internet_site, - 'address', trim(concat_ws(', ', - CASE WHEN di_address.address is not null AND di_address.address != '' THEN di_address.address END, - CASE WHEN di_address.postal_box is not null AND di_address.postal_box != '' THEN di_address.postal_box END, - CASE WHEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) != '' THEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) END, - CASE WHEN di_address_country.name_en is not null AND di_address_country.name_en != '' THEN di_address_country.name_en END - )), - 'logo', di.institution_logo - ), - 'accessionNumber', ad.distributor_accession_number, - 'distributionStatus', ot.name_en - ) - FROM accession_distributor_t ad - JOIN ontology_term_t ot ON ot.ontology_term_id = ad.distribution_status_id - JOIN institution_t di ON di.institution_id = ad.distributor_institution_id - LEFT JOIN ontology_term_t di_type ON di_type.ontology_term_id = di.institution_type_id - LEFT JOIN address_t di_address ON di_address.address_id = di.address_id - LEFT JOIN ontology_term_t di_address_country ON di_address_country.ontology_term_id = di_address.country_id - WHERE ad.accession_id = a.accession_id - ), - 'panel', array( - SELECT json_build_object( - 'id', pa.panel_id, - 'name', pa.panel_name, - 'germplasmCount', ( - SELECT count(distinct l2.accession_id) - FROM lot_t l2 - JOIN panel_lot_t pl2 on pl2.lot_id = l2.lot_id - WHERE pl2.panel_id = pa.panel_id - ) - ) - FROM panel_t pa - JOIN panel_lot_t pl ON pa.panel_id = pl.panel_id - JOIN lot_t l ON pl.lot_id = l.lot_id - WHERE l.accession_id = a.accession_id - GROUP BY pa.panel_id, pa.panel_name - ), - 'collection', array( - SELECT json_build_object( - 'id', co.collection_id, - 'name', ( - CASE - WHEN tr.translated_name is not null THEN tr.translated_name - ELSE co.collection_code - END - ), - 'type', ot.name_en, - 'germplasmCount', ( - SELECT count(distinct ac2.accession_id) - FROM accession_collection_t ac2 - WHERE ac2.collection_id = co.collection_id - ) - ) - FROM collections_t co - JOIN accession_collection_t ac ON co.collection_id = ac.collection_id - LEFT JOIN translations_t tr ON co.collection_id = tr.named_collection_id - LEFT JOIN ontology_term_t ot ON ot.ontology_term_id = co.collection_type_id - WHERE ac.accession_id = a.accession_id - GROUP BY co.collection_id, ot.name_en, tr.translated_name - ), - 'population', array( - SELECT json_build_object( - 'id', prg.population_id, - 'name', prg.population_name, - 'type', ot.name_en, - 'germplasmRef', json_build_object( - 'pui', acc.puid, - 'name', acc.accession_name - ), - 'germplasmCount', ( - SELECT count(distinct aprg2.accession_id) - FROM accession_population_rg_t aprg2 - WHERE aprg2.population_id = prg.population_id - ) - ) - FROM population_rg_t prg - JOIN accession_population_rg_t aprg ON aprg.population_id = prg.population_id - JOIN ontology_term_t ot ON ot.ontology_term_id = prg.population_type_id - LEFT JOIN accession_t acc ON acc.accession_id = prg.ref_accession_id - WHERE aprg.accession_id = a.accession_id - OR acc.accession_id = a.accession_id - GROUP BY prg.population_id, prg.population_name, ot.name_en, acc.puid, acc.accession_name - ), - - -- List of trial number in which this germplasm is used - 'studyDbIds', array( - SELECT DISTINCT t.trial_number - FROM trial_t t - JOIN trial_lot_t tl ON tl.trials_id = t.trial_id - JOIN lot_t l ON l.lot_id = tl.lots_id - WHERE l.accession_id = a.accession_id - ), - - -- IN MCPD -- - 'breederAccessionNumber', a.breeder_accession_number, - 'breedingCreationYear', a.breeding_creation_year, - 'catalogRegistrationYear', a.catalog_registration_year, - 'catalogDeregistrationYear', a.catalog_deregistration_year -) - -FROM accession_t a - -------------------- --- PREVIOUS JOIN -- -------------------- - --- taxon -JOIN taxon_t ta ON a.taxon_id = ta.taxon_id - --- Recursive join to get species authors -LEFT JOIN ( - WITH RECURSIVE tax(descendant_id, rank, taxon_id, parent_id) AS ( - SELECT descendant_id, bt.name, tax.taxon_id, tax.parent_id, tax.author_list - FROM taxon_t tax - JOIN taxon_path_t tax_path ON tax_path.ascendant_id = tax.taxon_id - JOIN bio_type_t bt ON bt.bio_type_id = tax.rank_id - UNION ALL - SELECT tax.descendant_id, bt.name, parent_tax.taxon_id, parent_tax.parent_id, parent_tax.author_list - FROM tax - JOIN taxon_t parent_tax ON parent_tax.taxon_id = tax.parent_id - JOIN bio_type_t bt ON bt.bio_type_id = parent_tax.rank_id - ) - SELECT * FROM tax - WHERE RANK = 'species' - LIMIT 1 -) AS species ON species.descendant_id = ta.taxon_id - --- biological status -LEFT JOIN ontology_term_t biostat ON biostat.ontology_term_id = a.biological_status_id - --- genetic nature -LEFT JOIN ontology_term_t genetic_nature ON genetic_nature.ontology_term_id = a.genetic_nature_id - --- aggregates accession comments in one line -LEFT JOIN ( - SELECT acc.accession_id, string_agg(distinct(translated_name), ', ') AS accession_comments - FROM translations_t tra - JOIN accession_t acc ON tra.commented_accession_id = acc.accession_id - GROUP BY acc.accession_id -) AS agg_accession_comments ON agg_accession_comments.accession_id = a.accession_id - --- photo -LEFT JOIN photo_t photo ON photo.photo_id = a.photo_id - --- holding institution -LEFT JOIN institution_t hi ON hi.institution_id = a.holding_institution_id -LEFT JOIN ontology_term_t hi_type ON hi_type.ontology_term_id = hi.institution_type_id -LEFT JOIN address_t hi_address ON hi_address.address_id = hi.address_id -LEFT JOIN ontology_term_t hi_address_country ON hi_address_country.ontology_term_id = hi_address.country_id - --- grc -LEFT JOIN grc_t grc ON grc.grc_id = a.grc_id -LEFT JOIN ( - SELECT grc_tra.named_grc_id, grc_tra.translated_name - FROM translations_t grc_tra - JOIN languages_t l ON grc_tra.language_id = l.language_id - WHERE l.language_code = 'en' -) AS grc_name ON grc_name.named_grc_id = a.grc_id - --- presence status -LEFT JOIN ontology_term_t pres_status ON pres_status.ontology_term_id = a.presence_status_id - --- genealogy -LEFT JOIN genealogy_t genea on genea.genealogy_id = a.genealogy_id -LEFT JOIN accession_t fp ON fp.accession_id = genea.first_parent_id -LEFT JOIN ontology_term_t fp_type ON fp_type.ontology_term_id = genea.first_parent_type_id -LEFT JOIN accession_t sp ON sp.accession_id = genea.second_parent_id -LEFT JOIN ontology_term_t sp_type ON sp_type.ontology_term_id = genea.second_parent_type_id - --- origin site -LEFT JOIN ( - SELECT s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en AS site_type - FROM site_t s - JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id -) AS origin_site ON origin_site.site_id = a.origin_site_id - --- Recursive join on site_t.geographical_location_id = ontology_term_t.ontology_term_id --- Select geographical location level = COUNTRY or OLD_COUNTRY -LEFT JOIN ( - WITH RECURSIVE geo(site_id, level_textual_code, ontology_term_id, parent_id, textual_code, name_en) AS ( - SELECT location.site_id, level.textual_code, geo.ontology_term_id, geo.parent_id, geo.textual_code, geo.name_en - FROM ontology_term_t geo - JOIN site_t location ON location.geographical_location_id = geo.ontology_term_id - LEFT JOIN ontology_term_t level ON level.ontology_term_id = geo.term_level_id - UNION ALL - SELECT geo.site_id, level.textual_code, parent_geo.ontology_term_id, parent_geo.parent_id, parent_geo.textual_code, parent_geo.name_en - FROM geo - JOIN ontology_term_t parent_geo ON parent_geo.ontology_term_id = geo.parent_id - LEFT JOIN ontology_term_t level ON level.ontology_term_id = parent_geo.term_level_id - ) - SELECT DISTINCT ON (site_id) site_id, level_textual_code, ontology_term_id, parent_id, textual_code, name_en - FROM geo - WHERE level_textual_code = 'COUNTRY' OR level_textual_code = 'OLD_COUNTRY' -) AS originCountry ON originCountry.site_id = a.origin_site_id - --- collecting site --- LEFT JOIN ( --- SELECT s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en AS site_type --- FROM site_t s --- JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id ---) AS collecting_site ON collecting_site.site_id = a.site_id - --- collecting site Mcpd -LEFT JOIN ( - SELECT s.site_id, s.site_name, s.latitude, s.longitude, s.elevation, ot.name_en AS site_type, ot2.numeric_code AS environment_type, string_agg(distinct(t.translated_name), ', ') AS description - FROM site_t s - JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id - LEFT JOIN ontology_term_t ot2 ON ot2.ontology_term_id = s.environment_type_id - LEFT JOIN translations_t t ON t.commented_site_id = s.site_id - GROUP BY s.site_id, s.site_name, s.latitude, s.longitude, s.elevation, site_type, environment_type -) AS collecting_site ON collecting_site.site_id = a.site_id - --- collector -LEFT JOIN institution_t ci ON ci.institution_id = a.collector_institution_id -LEFT JOIN ontology_term_t ci_type ON ci_type.ontology_term_id = ci.institution_type_id -LEFT JOIN address_t ci_address ON ci_address.address_id = ci.address_id -LEFT JOIN ontology_term_t ci_address_country ON ci_address_country.ontology_term_id = ci_address.country_id - --- collected material type -LEFT JOIN ontology_term_t cmt ON cmt.ontology_term_id = a.collected_material_type_id - --- breeder -LEFT JOIN institution_t bi ON bi.institution_id = a.breeder_institution_id -LEFT JOIN ontology_term_t bi_type ON bi_type.ontology_term_id = bi.institution_type_id -LEFT JOIN address_t bi_address ON bi_address.address_id = bi.address_id -LEFT JOIN ontology_term_t bi_address_country ON bi_address_country.ontology_term_id = bi_address.country_id - --- dataset -LEFT JOIN dataset_t ds ON ds.dataset_id = a.dataset_id - -WHERE a.accession_id between :startPageId and :endPageId -ORDER BY a.accession_id ---ORDER BY RANDOM() LIMIT 500 ---LIMIT 5 -; \ No newline at end of file -- GitLab From 22193565f6e47345aa99896a9a1e45de680e5ad5 Mon Sep 17 00:00:00 2001 From: Nicolas Francillonne <nicolas.francillonne@inrae.fr> Date: Mon, 14 Aug 2023 17:13:59 +0200 Subject: [PATCH 07/23] Ajout du code numerique et de la description du status biologique des accessions --- etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql index ad9aab6..7d5d6c9 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql @@ -21,8 +21,8 @@ SELECT json_build_object( -- environmentType 'acquisitionSourceCodeDescription', 'alternadeIDs', null, 'ancestralData', a.pedigree, - 'biologicalStatusOfAccessionCode', biostat.name_en, - 'biologicalStatusOfAccessionDescription', null, + 'biologicalStatusOfAccessionCode', biostat.numeric_code, + 'biologicalStatusOfAccessionDescription', biostat.name_en, 'breedingInstitutes', json_build_object( 'instituteCode', bi.fao_code, 'instituteName', bi.institution_name, -- GitLab From b633481e7d2636fd7bdd0c56ce32a0e2a6b38895 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Mon, 23 Oct 2023 15:42:03 +0200 Subject: [PATCH 08/23] Add BrapiV2 fields - study.sql --- .../gnpis-pg-to-json/study.sql | 287 ++++++++++-------- 1 file changed, 159 insertions(+), 128 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql index 56a6b5d..ef0617b 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql @@ -12,69 +12,15 @@ -- EXTRACT PHENOTYPING STUDIES SELECT json_build_object( - 'groupId', study_pheno.group_id, - - -- Study = GnpIS trial - 'documentationURL', ( - CASE study_pheno.group_id - WHEN 0 THEN CONCAT(:'gnpisBaseURL', 'ephesis/ephesis/viewer.do#trialCard/trialId=', study_pheno.trial_id) - END - ), - 'studyDbId', text(study_pheno.trial_number), - 'studyName', study_pheno.name, - 'name', study_pheno.name, - 'startDate', to_char(study_pheno.date_begin, 'YYYY-MM-dd'), - 'endDate', to_char(study_pheno.date_end, 'YYYY-MM-dd'), 'active', status.textual_code LIKE '%ACTIVE%', - 'studyType', ( - CASE - WHEN type.name_en is null THEN 'Phenotyping Study' - ELSE type.name_en - END - ), - 'lastUpdate', json_build_object( - 'version', NULL, - -- Print ISO date format ex: "2017-06-21T16:06:56Z" - 'timestamp', to_char(study_pheno.update_date::timestamp at time zone 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') - ), - - -- Trials = GnpIS trial set - 'trialDbId', NULL, - 'trialName', NULL, - 'trialDbIds', array( - SELECT DISTINCT text(trial.trial_set_id) - FROM trial_set_t trial - JOIN trial_trial_set_t trialStudy ON trialStudy.trial_sets_id = trial.trial_set_id AND trialStudy.trials_id = study_pheno.trial_id - ), - - -- Program = GnpIS project - 'programDbId', text(program.project_code), - 'programName', program.project_name, - - -- Seasons = GnpIS phenotyping campaigns - 'seasons', array( - SELECT DISTINCT season.name - FROM phenotyping_campaign_t season - WHERE study_pheno.trial_id = season.trial_id - ), - -- Location = GnpIS site - 'locationDbId', text(location.site_id), - 'locationName', location.site_name, - -- Location for study details => fetch location detail from location index - 'location', json_build_object('locationDbId', text(location.site_id)), - - -- Data links = GnpIS trial data files - 'dataLinks', array( - SELECT json_build_object( - 'name', dataLink.file_name, - 'type', dataLinkType.name, - 'url', 'https://urgi.versailles.inrae.fr/files' || dataLink.file_path || '/' || dataLink.file_name - ) - FROM data_file_t dataLink - JOIN bio_type_t dataLinkType ON dataLinkType.bio_type_id = dataLink.file_type_id - WHERE dataLink.trial_id = study_pheno.trial_id + -- Additional info = GnpIS trial info + 'additionalInfo', ( + SELECT json_object_agg(key, value) + FROM trial_info_t studyAdditionalInfo + WHERE studyAdditionalInfo.trial_id = study_pheno.trial_id ), + 'commonCropName', NULL, -- Contacts = GnpIS contacts 'contacts', array( @@ -96,13 +42,37 @@ SELECT json_build_object( JOIN trial_contact_t studyContact ON contact.contact_id = studyContact.contact_id AND studyContact.trial_id = study_pheno.trial_id ), - -- Observation variables = GnpIS observation variables - 'observationVariableDbIds', array( - SELECT DISTINCT observationVariable.term_identifier - FROM observation_variable_t observationVariable - JOIN trial_observation_variable_t tov ON observationVariable.observation_variable_id = tov.observation_variables_id AND tov.trials_id = study_pheno.trial_id + 'culturalPractices', NULL, + + -- Data links = GnpIS trial data files + 'dataLinks', array( + SELECT json_build_object( + 'dataFormat', NULL, + 'description', NULL, + 'fileFormat', NULL, + 'name', dataLink.file_name, + 'provenance', NULL, + 'scientifiqueType', dataLinkType.name, + 'url', 'https://urgi.versailles.inrae.fr/files' || dataLink.file_path || '/' || dataLink.file_name, + 'version', NULL + ) + FROM data_file_t dataLink + JOIN bio_type_t dataLinkType ON dataLinkType.bio_type_id = dataLink.file_type_id + WHERE dataLink.trial_id = study_pheno.trial_id + ), -- REVOIR LES CHAMPS QU'IL CONTIENT + + -- Study = GnpIS trial + 'documentationURL', ( + CASE study_pheno.group_id + WHEN 0 THEN CONCAT(:'gnpisBaseURL', 'ephesis/ephesis/viewer.do#trialCard/trialId=', study_pheno.trial_id) + END ), + 'endDate', to_char(study_pheno.date_end, 'YYYY-MM-dd'), + 'environmentParameters', NULL, + 'experimentaDesign', NULL, + 'externalReferences', NULL, + -- Germplasm = GnpIS accessions 'germplasmDbIds', array( SELECT DISTINCT text(germplasm.accession_id) @@ -110,12 +80,62 @@ SELECT json_build_object( JOIN lot_t lot ON lot.accession_id = germplasm.accession_id JOIN trial_lot_t studyLot ON studyLot.lots_id = lot.lot_id AND studyLot.trials_id = study_pheno.trial_id ), + 'groupId', study_pheno.group_id, + 'growthFacility', NULL, + 'lastUpdate', json_build_object( + 'version', NULL, + -- Print ISO date format ex: "2017-06-21T16:06:56Z" + 'timestamp', to_char(study_pheno.update_date::timestamp at time zone 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') + ), + 'license', NULL, - -- Additional info = GnpIS trial info - 'additionalInfo', ( - SELECT json_object_agg(key, value) - FROM trial_info_t studyAdditionalInfo - WHERE studyAdditionalInfo.trial_id = study_pheno.trial_id + -- Location for study details => fetch location detail from location index + 'location', json_build_object('locationDbId', text(location.site_id)), + -- Location = GnpIS site + 'locationDbId', text(location.site_id), + 'locationName', location.site_name, + 'name', study_pheno.name, + 'observationLevels', NULL, + 'observationUnitsDescription', NULL, + + -- Observation variables = GnpIS observation variables + 'observationVariableDbIds', array( + SELECT DISTINCT observationVariable.term_identifier + FROM observation_variable_t observationVariable + JOIN trial_observation_variable_t tov ON observationVariable.observation_variable_id = tov.observation_variables_id AND tov.trials_id = study_pheno.trial_id + ), + + -- Program = GnpIS project + 'programDbId', text(program.project_code), + 'programName', program.project_name, + + -- Seasons = GnpIS phenotyping campaigns + 'seasons', array( + SELECT DISTINCT season.name + FROM phenotyping_campaign_t season + WHERE study_pheno.trial_id = season.trial_id + ), + + 'startDate', to_char(study_pheno.date_begin, 'YYYY-MM-dd'), + 'studyCode', NULL, + 'studyDbId', text(study_pheno.trial_number), + 'studyDescription', NULL, + 'studyName', study_pheno.name, + 'studyPUI', NULL, + 'studyType', ( + CASE + WHEN type.name_en is null THEN 'Phenotyping Study' + ELSE type.name_en + END + ), + + -- Trials = GnpIS trial set + 'trialDbId', NULL, + 'trialName', NULL, + 'trialDbIds', array( + SELECT DISTINCT text(trial.trial_set_id) + FROM trial_set_t trial + JOIN trial_trial_set_t trialStudy ON trialStudy.trial_sets_id = trial.trial_set_id AND trialStudy.trials_id = study_pheno.trial_id ) ) FROM trial_t study_pheno @@ -138,62 +158,10 @@ WHERE study_pheno.trial_id = :trialId OR :trialId IS NULL -- EXTRACT GENOTYPING STUDIES SELECT json_build_object( - 'groupId', study_geno.group_id, - -- Study = GnpIS genotyping experiment - 'documentationURL', ( - CASE study_geno.group_id - WHEN 0 THEN CONCAT(:'gnpisBaseURL', 'GnpSNP/snp/genotyping/form.do#results/experimentIds=', study_geno.genotyping_experiment_id) - END - ), - 'studyDbId', text(study_geno.genotyping_experiment_id), - 'studyName', study_geno.genotyping_experiment_name, - 'name', study_geno.genotyping_experiment_name, - 'startDate', NULL, - 'endDate', NULL, 'active', NULL, - 'studyType', ( - CASE - WHEN type.name is null THEN 'Genotyping Study' - ELSE type.name - END - ), - 'lastUpdate', json_build_object( - 'version', NULL, - -- Print ISO date format ex: "2017-06-21T16:06:56Z" - 'timestamp', to_char(study_geno.update_date::timestamp at time zone 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') - ), - - -- Trials = NA - 'trialDbId', NULL, - 'trialName', NULL, - 'trialDbIds', NULL, - - -- Program = GnpIS project - 'programDbId', text(program.project_code), - 'programName', program.project_name, - - -- Seasons = NA - 'seasons', NULL, - - -- Location = NA - 'locationDbId', NULL, - 'locationName', NULL, - -- Location for study details => fetch location detail from location index - 'location', NULL, - - -- Data links = GnpIS data files - 'dataLinks', array( - SELECT json_build_object( - 'name', dataLink.file_name, - 'type', dataLinkType.name, - 'url', 'https://urgi.versailles.inrae.fr/files' || dataLink.file_path || '/' || dataLink.file_name - ) - FROM data_file_t dataLink - JOIN bio_type_t dataLinkType ON dataLinkType.bio_type_id = dataLink.file_type_id - WHERE dataLink.file_id = study_geno.datafile_id - ), - + 'additionalInfo', NULL, + 'commonCropName', NULL, -- Contacts = GnpIS contacts 'contacts', array( SELECT json_build_object( @@ -213,9 +181,35 @@ SELECT json_build_object( JOIN institution_t contactInstitution ON contact.institution_id = contactInstitution.institution_id WHERE contact.contact_id = study_geno.contact_id ), + 'culturalPractices', NULL, - -- Observation variables = NA - 'observationVariableDbIds', NULL, + -- Data links = GnpIS data files + 'dataLinks', array( + SELECT json_build_object( + 'dataFormat', NULL, + 'description', NULL, + 'fileFormat', NULL, + 'name', dataLink.file_name, + 'provenance', NULL, + 'scientifiqueType', dataLinkType.name, + 'url', 'https://urgi.versailles.inrae.fr/files' || dataLink.file_path || '/' || dataLink.file_name, + 'version', NULL + ) + FROM data_file_t dataLink + JOIN bio_type_t dataLinkType ON dataLinkType.bio_type_id = dataLink.file_type_id + WHERE dataLink.file_id = study_geno.datafile_id + ), + + -- Study = GnpIS genotyping experiment + 'documentationURL', ( + CASE study_geno.group_id + WHEN 0 THEN CONCAT(:'gnpisBaseURL', 'GnpSNP/snp/genotyping/form.do#results/experimentIds=', study_geno.genotyping_experiment_id) + END + ), + 'endDate', NULL, + 'environmentParameters', NULL, + 'experimentaDesign', NULL, + 'externalReferences', NULL, -- Germplasm = GnpIS accessions 'germplasmDbIds', array( @@ -225,9 +219,46 @@ SELECT json_build_object( JOIN genotyping_exp_lot_t studyLot ON studyLot.lot_id = lot.lot_id WHERE studyLot.genotyping_experiment_id = study_geno.genotyping_experiment_id ), + 'groupId', study_geno.group_id, + 'growthFacility', NULL, + 'lastUpdate', json_build_object( + 'version', NULL, + -- Print ISO date format ex: "2017-06-21T16:06:56Z" + 'timestamp', to_char(study_geno.update_date::timestamp at time zone 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') + ), + 'license', NULL, - -- Additional info = NA - 'additionalInfo', NULL + -- Location for study details => fetch location detail from location index + 'location', NULL, + 'locationDbId', NULL, + 'locationName', NULL, + 'name', study_geno.genotyping_experiment_name, + 'observationLevels', NULL, + 'observationUnitsDescription', NULL, + 'observationVariableDbIds', NULL, + + -- Program = GnpIS project + 'programDbId', text(program.project_code), + 'programName', program.project_name, + 'seasons', NULL, + 'startDate', NULL, + 'studyCode', NULL, + 'studyDbId', text(study_geno.genotyping_experiment_id), + 'studyDescription', NULL, + 'studyName', study_geno.genotyping_experiment_name, + 'studyPUI', NULL, + 'studyType', ( + CASE + WHEN type.name is null THEN 'Genotyping Study' + ELSE type.name + END + ), + + -- Trials = NA + 'trialDbId', NULL, + 'trialName', NULL, + 'trialDbIds', NULL + ) FROM genotyping_experiment_t study_geno -- GitLab From d2a956ed84064aeb8ce32744d447f3c014670a63 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Mon, 23 Oct 2023 17:40:28 +0200 Subject: [PATCH 09/23] WIP: location.sql --- .../gnpis-pg-to-json/location.sql | 127 ++++++++++-------- 1 file changed, 69 insertions(+), 58 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql index 10d1eb2..2d058b0 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql @@ -8,44 +8,9 @@ \set ON_ERROR_STOP true SELECT json_build_object( - 'groupId', location.group_id, - - -- Location = GnpIS site - 'documentationURL', NULL, - 'locationDbId', text(location.site_id), + 'locationDbId', text(location.site_id), 'locationName', location.site_name, - 'name', location.site_name, --removed in BrAPI v2 'abbreviation', NULL, - 'locationType', locationType.name_en, - - -- Country = GnpIS geographical location ontology term - 'countryCode', locationCountry.textual_code, - 'countryName', locationCountry.name_en, - - -- Institution = GnpIS site landowner - 'instituteName', locationInstitution.institution_name, - 'instituteAddress', nullif(concat_ws( - ', ', - -- POSTAL_BOX ROAD - nullif(concat_ws( - ' ', - nullif(locationInstitutionAddress.postal_box, ''), - nullif(locationInstitutionAddress.address, '') - ), ''), - -- POSTCODE CITY CEDEX - nullif(concat_ws( - ' ', - nullif(nullif(locationInstitutionAddress.postcode, ''), '_'), - nullif(locationInstitutionAddress.city, ''), - nullif(locationInstitutionAddress.cedex, '') - ), ''), - -- COUNTRY - nullif(locationInstitutionAddressCountry.name_en, '') - ), ''), - - 'altitude', location.elevation, - 'latitude', location.latitude, - 'longitude', location.longitude, -- Additional info = GnpIS site info 'additionalInfo', ( @@ -55,22 +20,11 @@ SELECT json_build_object( FROM site_info_t locationAdditionalInfo WHERE locationAdditionalInfo.site_id = location.site_id - UNION - SELECT 'Site status' AS key, ot.name_en AS value - FROM ontology_term_t ot - WHERE location.status_id = ot.ontology_term_id - UNION SELECT 'Coordinates precision' AS key, ot.name_en AS value FROM ontology_term_t ot WHERE location.coordinates_precision_id = ot.ontology_term_id - UNION - SELECT 'Slope' AS key, location.slope AS value - - UNION - SELECT 'Exposure'AS key, location.exposure AS value - UNION SELECT 'Geographical location'AS key, string_agg(geoLocation.name_en, ' > ') AS value FROM site_t site @@ -99,22 +53,79 @@ SELECT json_build_object( FROM ontology_term_t directionFromCity WHERE location.direction_from_city_id = directionFromCity.ontology_term_id - UNION - SELECT 'Environment type' AS key, environmentType.name_en AS value - FROM ontology_term_t environmentType - WHERE location.environment_type_id = environmentType.ontology_term_id - - UNION - SELECT 'Topography' AS key, topography.name_en AS value - FROM ontology_term_t topography - WHERE topography.ontology_term_id = location.topography_id - UNION SELECT 'Comment' AS key, string_agg(distinct(tra.translated_name), ', ') AS value FROM translations_t tra WHERE tra.commented_site_id = location.site_id --group by location.site_id ) AS addInfo - ) + ), + + 'coordinateDescription', NULL, + 'coordinateUncertainty', NULL, + 'coordinates', json_build_object( + 'geometry', NULL, + 'type', NULL + ), + + -- Country = GnpIS geographical location ontology term + 'countryCode', locationCountry.textual_code, + 'countryName', locationCountry.name_en, + + 'documentationURL', NULL, + 'environmentType', ( + SELECT environmentType.name_en + FROM ontology_term_t environmentType + WHERE location.environment_type_id = environmentType.ontology_term_id + ), + 'exposure', location.exposure, + 'externalReferences', json_build_object( + 'referenceID', NULL, + 'referenceId', NULL, + 'referenceSource', NULL + ), + 'groupId', location.group_id, + + -- Institution = GnpIS site landowner + 'instituteAddress', nullif(concat_ws( + ', ', + -- POSTAL_BOX ROAD + nullif(concat_ws( + ' ', + nullif(locationInstitutionAddress.postal_box, ''), + nullif(locationInstitutionAddress.address, '') + ), ''), + -- POSTCODE CITY CEDEX + nullif(concat_ws( + ' ', + nullif(nullif(locationInstitutionAddress.postcode, ''), '_'), + nullif(locationInstitutionAddress.city, ''), + nullif(locationInstitutionAddress.cedex, '') + ), ''), + -- COUNTRY + nullif(locationInstitutionAddressCountry.name_en, '') + ), ''), + 'instituteName', locationInstitution.institution_name, + 'locationType', locationType.name_en, + + 'parentLocationDbId', NULL, + 'parentLocationName', NULL, + 'siteStatus', ( + SELECT ot.name_en + FROM ontology_term_t ot + WHERE location.status_id = ot.ontology_term_id + ), + 'slope', location.slope, + 'topography', ( + SELECT topography.name_en + FROM ontology_term_t topography + WHERE topography.ontology_term_id = location.topography_id + ), + + + 'altitude', location.elevation, + 'latitude', location.latitude, + 'longitude', location.longitude + ) FROM site_t location -- GitLab From e70051082d19b586c3d6e68f659fbf7e234577c8 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Mon, 23 Oct 2023 17:40:37 +0200 Subject: [PATCH 10/23] WIP: trial.sql --- .../gnpis-pg-to-json/trial.sql | 108 ++++++++++-------- 1 file changed, 61 insertions(+), 47 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql index 98a8c45..b1f85cd 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql @@ -8,58 +8,72 @@ \set ON_ERROR_STOP true SELECT json_build_object( - 'groupId', trial.group_id, - 'documentationURL', ( - CASE trial.group_id - WHEN 0 THEN CONCAT(:'gnpisBaseURL', 'ephesis/ephesis/viewer.do#dataResults/trialSetIds=', trial.trial_set_id) - END - ), + 'trialDbId', text(trial.trial_set_id), + 'trialName', trial.trial_set_name, - -- Trial = GnpIS trial set - 'trialDbId', text(trial.trial_set_id), - 'trialPUI', nullif(trial.pui, ''), - 'trialName', trial.trial_set_name, - 'trialType', trialType.name, - 'startDate', NULL, - 'endDate', NULL, - 'active', NULL, - 'datasetAuthorship', json_build_object( - 'license', NULL, - 'datasetPUI', nullif(trial.pui, '') - ), + 'active', NULL, + 'additionalInfo', NULL, + 'commonCropName', NULL, - -- Contacts = GnpIS trial contacts - 'contacts', array( - SELECT json_build_object( - 'contactDbId', text(contact.contact_id), - 'name', contact.first_name || ' ' || contact.last_name, - 'institutionName', nullif(contactInstitution.institution_name, 'unknown'), - 'email', contact.email, - 'type', studyContact.contact_type, - 'orcid', NULL - ) - FROM contact_t contact - JOIN trial_trial_set_t trialStudy ON trialStudy.trial_sets_id = trial.trial_set_id - JOIN trial_contact_t studyContact ON contact.contact_id = studyContact.contact_id AND studyContact.trial_id = trialStudy.trials_id - LEFT JOIN institution_t contactInstitution ON contact.institution_id = contactInstitution.institution_id - GROUP BY contact.contact_id, contactInstitution.institution_name, studyContact.contact_type - ), + -- Contacts = GnpIS trial contacts + 'contacts', array( + SELECT json_build_object( + 'contactDbId', text(contact.contact_id), + 'name', contact.first_name || ' ' || contact.last_name, + 'instituteName', nullif(contactInstitution.institution_name, 'unknown'), + 'email', contact.email, + 'type', studyContact.contact_type, + 'orcid', NULL + ) + FROM contact_t contact + JOIN trial_trial_set_t trialStudy ON trialStudy.trial_sets_id = trial.trial_set_id + JOIN trial_contact_t studyContact ON contact.contact_id = studyContact.contact_id AND studyContact.trial_id = trialStudy.trials_id + LEFT JOIN institution_t contactInstitution ON contact.institution_id = contactInstitution.institution_id + GROUP BY contact.contact_id, contactInstitution.institution_name, studyContact.contact_type + ), - -- Studies = GnpIS trials - 'studies', array( - SELECT json_build_object( - 'studyDbId', text(study.trial_number), - 'studyName', study.name, - 'locationDbId', text(studyLocation.site_id), - 'locationName', studyLocation.site_name - ) - FROM trial_t study - JOIN trial_trial_set_t trialStudy ON trialStudy.trials_id = study.trial_id AND trialStudy.trial_sets_id = trial.trial_set_id - LEFT JOIN site_t studyLocation ON studyLocation.site_id = study.site_id + 'datasetAuthorships', json_build_object( + 'datasetPUI', nullif(trial.pui, ''), + 'license', NULL, + 'publicReleaseDate', NULL, + 'submissionDate', NULL + ), + 'documentationURL', ( + CASE trial.group_id + WHEN 0 THEN CONCAT(:'gnpisBaseURL', 'ephesis/ephesis/viewer.do#dataResults/trialSetIds=', trial.trial_set_id) + END + ), + 'endDate', NULL, + 'externalReferences', json_build_object( + 'referenceID', NULL, + 'referenceId', NULL, + 'referenceSource', NULL + ), + 'groupId', trial.group_id, + 'programDbId', NULL, + 'programName', NULL, + 'publications', json_build_object( + 'publicationPUI', NULL, + 'publicationReference', NULL ), + 'startDate', NULL, + + -- Studies = GnpIS trials + 'studies', array( + SELECT json_build_object( + 'studyDbId', text(study.trial_number), + 'studyName', study.name, + 'locationDbId', text(studyLocation.site_id), + 'locationName', studyLocation.site_name + ) + FROM trial_t study + JOIN trial_trial_set_t trialStudy ON trialStudy.trials_id = study.trial_id AND trialStudy.trial_sets_id = trial.trial_set_id + LEFT JOIN site_t studyLocation ON studyLocation.site_id = study.site_id + ), + + 'trialPUI', nullif(trial.pui, ''), + 'trialType', trialType.name - -- Additional info - 'additionalInfo', NULL ) FROM trial_set_t trial -- GitLab From 5a62ad77cfbdf539805010de03069f24020a29fc Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Mon, 23 Oct 2023 17:40:56 +0200 Subject: [PATCH 11/23] WIP: program.sql --- .../gnpis-pg-to-json/program.sql | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql index 61f115b..90290c0 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql @@ -8,14 +8,22 @@ \set ON_ERROR_STOP true SELECT json_build_object( + + 'programDbId', text(program.project_code), + 'programName', program.project_name, + + 'abbreviation', program.project_code, + 'additionalInfo', NULL, + 'commonCropName', NULL, + 'documentationURL', NULL, + 'externalReferences', json_build_object( + 'referenceID', NULL, + 'referenceId', NULL, + 'referenceSource', NULL + ), + 'fundingInformation', NULL, 'groupId', program.group_id, - - -- Program = GnpIS project - 'documentationURL', NULL, - 'programDbId', text(program.project_code), - 'programName', program.project_name, - 'abbreviation', program.project_code, - 'commonCropName', NULL, + 'leadPerson', ( SELECT programLeadPerson.first_name || ' ' || programLeadPerson.last_name FROM project_coordinator_t programLead @@ -25,7 +33,8 @@ SELECT json_build_object( ), --removed in BrAPI v2 --'leadPersonDbId', leadPerson.contact_id, --'leadPersonName', leadPerson.name, - 'objective', NULL + 'objective', NULL, + 'programType', NULL ) FROM project_t program -- GitLab From 14d028f8e24aeb3cabeda488d03bc2fce2854831 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Mon, 23 Oct 2023 17:41:43 +0200 Subject: [PATCH 12/23] Delete specific germplasmMcpd --- .../gnpis-pg-to-json/germplasmMcpd.sql | 345 ------------------ 1 file changed, 345 deletions(-) delete mode 100644 etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmMcpd.sql diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmMcpd.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmMcpd.sql deleted file mode 100644 index 2785ced..0000000 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmMcpd.sql +++ /dev/null @@ -1,345 +0,0 @@ ----https://hashrocket.com/blog/posts/faster-json-generation-with-postgresql --- Warning: The name of this file is used as the elasticsearch document type name - --- Example of usage: --- psql --host shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -A -t -f germplasmMcpd.sql -v faidareURL=https://urgi.versailles.inrae.fr/faidare > germplasmMcpd.json - --- Making sure to stop psql with non-zero exit code on any error -\set ON_ERROR_STOP true - -SELECT json_build_object( - 'groupId', a.group_id, - 'germplasmDbId', text(a.accession_id), - 'germplasmPUI', a.puid, - 'accessionNumber', a.accession_number, - 'alternateIDs', array[ - a.accession_number, - a.puid, - text(a.accession_id) - ], - 'accessionNames', array( - select a.accession_name - UNION - select distinct(accsyn.accession_synonym_name) - from accession_synonym_t accsyn - where accsyn.accession_id = a.accession_id - ), - 'commonCropName', ( - CASE - WHEN ta.dataset_id = (select dataset_id from dataset_t where dataset_name = 'Forest tree taxa') AND a.taxon_group != 'Cherry' AND a.taxon_group != 'Walnut' THEN 'Forest tree' - ELSE a.taxon_group - END - ), - 'acquisitionDate', a.collecting_date, - 'acquisitionSourceCode', collecting_site.environment_type, - 'ancestralData', a.pedigree, - 'biologicalStatusOfAccessionCode', biostat.numeric_code, - 'breedingInstitutes', array( - SELECT json_build_object( - 'instituteName', bi.institution_name, - 'instituteCode', bi.fao_code, - 'acronym', bi.acronym, - 'organisation', bi.organization, - 'instituteType', bi_type.name_en, - 'webSite', bi.internet_site, - 'instituteAddress', trim(concat_ws(', ', - CASE WHEN bi_address.address is not null and bi_address.address != '' THEN bi_address.address END, - CASE WHEN bi_address.postal_box is not null and bi_address.postal_box != '' THEN bi_address.postal_box END, - CASE WHEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) != '' THEN trim(concat_ws(' ', bi_address.postcode, bi_address.city, bi_address.cedex)) END, - CASE WHEN bi_address_country.name_en is not null and bi_address_country.name_en != '' THEN bi_address_country.name_en END - )), - 'logo', 'https://urgi.versailles.inra.fr/files/siregal/images/institution/' || bi.institution_logo - ) - FROM institution_t bi - LEFT JOIN ontology_term_t bi_type ON bi_type.ontology_term_id = bi.institution_type_id - LEFT JOIN address_t bi_address ON bi_address.address_id = bi.address_id - LEFT JOIN ontology_term_t bi_address_country ON bi_address_country.ontology_term_id = bi_address.country_id - WHERE bi.institution_id = a.breeder_institution_id - ), - 'breederAccessionNumber', a.breeder_accession_number, - 'breedingCreationYear', a.breeding_creation_year, - 'catalogRegistrationYear', a.catalog_registration_year, - 'catalogDeregistrationYear', a.catalog_deregistration_year, - 'collectingInfo', json_build_object( - 'collectingDate', a.collecting_date, - 'collectingInstitutes', array( - SELECT json_build_object( - 'instituteName', ci.institution_name, - 'instituteCode', ci.fao_code, - 'acronym', ci.acronym, - 'organisation', ci.organization, - 'instituteType', ci_type.name_en, - 'webSite', ci.internet_site, - 'instituteAddress', trim(concat_ws(', ', - CASE WHEN ci_address.address is not null and ci_address.address != '' THEN ci_address.address END, - CASE WHEN ci_address.postal_box is not null and ci_address.postal_box != '' THEN ci_address.postal_box END, - CASE WHEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) != '' THEN trim(concat_ws(' ', ci_address.postcode, ci_address.city, ci_address.cedex)) END, - CASE WHEN ci_address_country.name_en is not null and ci_address_country.name_en != '' THEN ci_address_country.name_en END - )), - 'logo', 'https://urgi.versailles.inra.fr/files/siregal/images/institution/' || ci.institution_logo - ) - FROM institution_t ci - LEFT JOIN ontology_term_t ci_type ON ci_type.ontology_term_id = ci.institution_type_id - LEFT JOIN address_t ci_address ON ci_address.address_id = ci.address_id - LEFT JOIN ontology_term_t ci_address_country ON ci_address_country.ontology_term_id = ci_address.country_id - WHERE ci.institution_id = a.collector_institution_id - ), - 'collectingMissionIdentifier', null, - 'collectingNumber', a.collecting_number, - 'collectors', a.collectors_list, - 'materialType', cmt.name_en, - 'collectingSite', json_build_object( - 'locationDbId', text(collecting_site.site_id), - 'locationName', collecting_site.site_name, - 'coordinateUncertainty', null, - 'elevation', collecting_site.elevation, - 'georeferencingMethod', null, - 'latitudeDecimal', collecting_site.latitude, - 'latitudeDegrees', null, - 'locationDescription', collecting_site.description, - 'longitudeDecimal', collecting_site.longitude, - 'longitudeDegrees', null, - 'spatialReferenceSystem', null - ) - ), - 'countryOfOriginCode', originCountry.name_en, - 'originLocationDbId', text(origin_site.site_id), - 'originLocationName', origin_site.site_name, - 'donorInfo', array( - SELECT json_build_object( - 'donorAccessionNumber', a.donor_accession_number, - 'donorInstitute', json_build_object( - 'instituteName', donor.institution_name, - 'instituteCode', donor.fao_code, - 'acronym', donor.acronym, - 'organisation', donor.organization, - 'instituteType', donor_type.name_en, - 'webSite', donor.internet_site, - 'instituteAddress', trim(concat_ws(', ', - CASE WHEN address.address is not null and address.address != '' THEN address.address END, - CASE WHEN address.postal_box is not null and address.postal_box != '' THEN address.postal_box END, - CASE WHEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) != '' THEN trim(concat_ws(' ', address.postcode, address.city, address.cedex)) END, - CASE WHEN address_country.name_en is not null and address_country.name_en != '' THEN address_country.name_en END - )), - 'logo', 'https://urgi.versailles.inra.fr/files/siregal/images/institution/' || donor.institution_logo - ), - 'donationDate', a.donation_date - ) - FROM institution_t donor - LEFT JOIN ontology_term_t donor_type ON donor_type.ontology_term_id = donor.institution_type_id - LEFT JOIN address_t address ON address.address_id = donor.address_id - LEFT JOIN ontology_term_t address_country ON address_country.ontology_term_id = address.country_id - WHERE donor.institution_id = a.donor_institution_id - ), - 'genus', ( - CASE - WHEN ta.genus is not null AND ta.genus != '' THEN ta.genus - ELSE '' - END - ), - 'species', ta.species, - 'speciesAuthority', species.author_list, - 'subtaxon', trim(concat_ws(' ', - CASE WHEN ta.subspecies is not null AND ta.subspecies != '' THEN 'subsp. ' || ta.subspecies END, - CASE WHEN ta.variety is not null AND ta.variety != '' THEN 'var. ' || ta.variety END, - CASE WHEN ta.cultivar is not null AND ta.cultivar != '' THEN 'cv. ' || ta.cultivar END, - CASE WHEN ta.form is not null AND ta.form != '' THEN 'f. ' || ta.form END, - CASE WHEN ta.tax_group is not null AND ta.tax_group != '' THEN 'gr. ' || ta.tax_group END, - CASE WHEN ta.subgroup is not null AND ta.subgroup != '' THEN 'sgr. ' || ta.subgroup END - )), - 'subtaxonAuthority', ( - CASE - WHEN (ta.subspecies is not null AND ta.subspecies != '') - OR (ta.variety is not null AND ta.variety != '') - OR (ta.cultivar is not null AND ta.cultivar != '') - OR (ta.form is not null AND ta.form != '') - OR (ta.tax_group is not null AND ta.tax_group != '') - OR (ta.subgroup is not null AND ta.subgroup != '') - THEN ta.author_list - ELSE '' - END - ), - 'instituteCode', hi.fao_code, - 'holdingInstitute', json_build_object( - 'instituteName', hi.institution_name, - 'instituteCode', hi.fao_code, - 'acronym', hi.acronym, - 'organisation', hi.organization, - 'instituteType', hi_type.name_en, - 'webSite', hi.internet_site, - 'instituteAddress', trim(concat_ws(', ', - CASE WHEN hi_address.address is not null and hi_address.address != '' THEN hi_address.address END, - CASE WHEN hi_address.postal_box is not null and hi_address.postal_box != '' THEN hi_address.postal_box END, - CASE WHEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) != '' THEN trim(concat_ws(' ', hi_address.postcode, hi_address.city, hi_address.cedex)) END, - CASE WHEN hi_address_country.name_en is not null and hi_address_country.name_en != '' THEN hi_address_country.name_en END - )), - 'logo', 'https://urgi.versailles.inra.fr/files/siregal/images/institution/' || hi.institution_logo - ), - 'holdingGenbank', json_build_object ( - 'instituteName', ( - CASE WHEN grc_name.translated_name is not null and grc_name.translated_name != '' THEN grc_name.translated_name END - ), - 'instituteCode', grc.grc_code, - 'webSite', grc.internet_site, - 'logo', CASE WHEN grc.grc_code is not null THEN 'https://urgi.versailles.inra.fr/files/siregal/images/grc/inra_brc_en.png' END - ), - 'mlsStatus', ( - select - CASE string_agg(distinct(ott.textual_code), ', ') - WHEN 'MLS' THEN '1' - ELSE '0' - END - from accession_distributor_t ad - join ontology_term_t ott on ott.ontology_term_id = ad.distribution_status_id - where ad.accession_id = a.accession_id - - ), - 'geneticNature', genetic_nature.name_en, - 'presenceStatus', pres_status.name_en, - 'remarks', agg_accession_comments.accession_comments, - 'safetyDuplicateInstitues', array[ - json_build_object( - 'instituteCode', null, - 'instituteName', null - ) - ], - 'storageTypeCodes', array[''], - 'distributorInfos', array( - SELECT json_build_object( - 'institute', json_build_object( - 'instituteName', di.institution_name, - 'instituteCode', di.fao_code, - 'acronym', di.acronym, - 'organisation', di.organization, - 'instituteType', di_type.name_en, - 'webSite', di.internet_site, - 'instituteAddress', trim(concat_ws(', ', - CASE WHEN di_address.address is not null AND di_address.address != '' THEN di_address.address END, - CASE WHEN di_address.postal_box is not null AND di_address.postal_box != '' THEN di_address.postal_box END, - CASE WHEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) != '' THEN trim(concat_ws(' ', di_address.postcode, di_address.city, di_address.cedex)) END, - CASE WHEN di_address_country.name_en is not null AND di_address_country.name_en != '' THEN di_address_country.name_en END - )), - 'logo', 'https://urgi.versailles.inra.fr/files/siregal/images/institution/' || di.institution_logo - ), - 'accessionNumber', ad.distributor_accession_number, - 'distributionStatus', ot.name_en - ) - FROM accession_distributor_t ad - JOIN ontology_term_t ot ON ot.ontology_term_id = ad.distribution_status_id - JOIN institution_t di ON di.institution_id = ad.distributor_institution_id - LEFT JOIN ontology_term_t di_type ON di_type.ontology_term_id = di.institution_type_id - LEFT JOIN address_t di_address ON di_address.address_id = di.address_id - LEFT JOIN ontology_term_t di_address_country ON di_address_country.ontology_term_id = di_address.country_id - WHERE ad.accession_id = a.accession_id - ) -) -FROM accession_t a - --- taxon -JOIN taxon_t ta ON a.taxon_id = ta.taxon_id - --- Recursive join to get species authors -LEFT JOIN ( - WITH RECURSIVE tax(taxon_id, rank, parent_id, author_list) AS ( - SELECT tax.taxon_id, bt.name, tax.parent_id, tax.author_list - FROM taxon_t tax - JOIN bio_type_t bt ON bt.bio_type_id = tax.rank_id - UNION ALL - SELECT tax.taxon_id, bt.name, parent_tax.parent_id, parent_tax.author_list - FROM tax - JOIN taxon_t parent_tax ON parent_tax.taxon_id = tax.parent_id - JOIN bio_type_t bt ON bt.bio_type_id = parent_tax.rank_id - ) - SELECT * FROM tax - WHERE rank = 'species' -) AS species ON species.taxon_id = ta.taxon_id - --- biological status -LEFT JOIN ontology_term_t biostat ON biostat.ontology_term_id = a.biological_status_id - --- genetic nature -LEFT JOIN ontology_term_t genetic_nature ON genetic_nature.ontology_term_id = a.genetic_nature_id - --- aggregates accession comments in one line -LEFT JOIN ( - SELECT acc.accession_id, string_agg(distinct(translated_name), ', ') AS accession_comments - FROM translations_t tra - JOIN accession_t acc ON tra.commented_accession_id = acc.accession_id - GROUP BY acc.accession_id -) AS agg_accession_comments ON agg_accession_comments.accession_id = a.accession_id - --- holding institution -LEFT JOIN institution_t hi ON hi.institution_id = a.holding_institution_id -LEFT JOIN ontology_term_t hi_type ON hi_type.ontology_term_id = hi.institution_type_id -LEFT JOIN address_t hi_address ON hi_address.address_id = hi.address_id -LEFT JOIN ontology_term_t hi_address_country ON hi_address_country.ontology_term_id = hi_address.country_id - --- grc -LEFT JOIN grc_t grc ON grc.grc_id = a.grc_id -LEFT JOIN ( - SELECT grc_tra.named_grc_id, grc_tra.translated_name - FROM translations_t grc_tra - JOIN languages_t l ON grc_tra.language_id = l.language_id - WHERE l.language_code = 'en' -) AS grc_name ON grc_name.named_grc_id = a.grc_id - --- presence status -LEFT JOIN ontology_term_t pres_status ON pres_status.ontology_term_id = a.presence_status_id - --- origin site -LEFT JOIN ( - SELECT s.site_id, s.site_name, s.latitude, s.longitude, ot.name_en AS site_type - FROM site_t s - JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id -) AS origin_site ON origin_site.site_id = a.origin_site_id - --- Recursive join on site_t.geographical_location_id = ontology_term_t.ontology_term_id --- Select geographical location level = COUNTRY or OLD_COUNTRY -LEFT JOIN ( - WITH RECURSIVE geo(site_id, level_textual_code, ontology_term_id, parent_id, textual_code, name_en) AS ( - SELECT location.site_id, level.textual_code, geo.ontology_term_id, geo.parent_id, geo.textual_code, geo.name_en - FROM ontology_term_t geo - JOIN site_t location ON location.geographical_location_id = geo.ontology_term_id - LEFT JOIN ontology_term_t level ON level.ontology_term_id = geo.term_level_id - UNION ALL - SELECT geo.site_id, level.textual_code, parent_geo.ontology_term_id, parent_geo.parent_id, parent_geo.textual_code, parent_geo.name_en - FROM geo - JOIN ontology_term_t parent_geo ON parent_geo.ontology_term_id = geo.parent_id - LEFT JOIN ontology_term_t level ON level.ontology_term_id = parent_geo.term_level_id - ) - SELECT DISTINCT ON (site_id) site_id, level_textual_code, ontology_term_id, parent_id, textual_code, name_en - FROM geo - WHERE level_textual_code = 'COUNTRY' OR level_textual_code = 'OLD_COUNTRY' -) AS originCountry ON originCountry.site_id = a.origin_site_id - --- collecting site -LEFT JOIN ( - SELECT s.site_id, s.site_name, s.latitude, s.longitude, s.elevation, ot.name_en AS site_type, ot2.numeric_code AS environment_type, string_agg(distinct(t.translated_name), ', ') AS description - FROM site_t s - JOIN ontology_term_t ot ON ot.ontology_term_id = s.site_type_id - LEFT JOIN ontology_term_t ot2 ON ot2.ontology_term_id = s.environment_type_id - LEFT JOIN translations_t t ON t.commented_site_id = s.site_id - GROUP BY s.site_id, s.site_name, s.latitude, s.longitude, s.elevation, site_type, environment_type -) AS collecting_site ON collecting_site.site_id = a.site_id - --- collector -LEFT JOIN institution_t ci ON ci.institution_id = a.collector_institution_id -LEFT JOIN ontology_term_t ci_type ON ci_type.ontology_term_id = ci.institution_type_id -LEFT JOIN address_t ci_address ON ci_address.address_id = ci.address_id -LEFT JOIN ontology_term_t ci_address_country ON ci_address_country.ontology_term_id = ci_address.country_id - --- breeder -LEFT JOIN institution_t bi ON bi.institution_id = a.breeder_institution_id -LEFT JOIN ontology_term_t bi_type ON bi_type.ontology_term_id = bi.institution_type_id -LEFT JOIN address_t bi_address ON bi_address.address_id = bi.address_id -LEFT JOIN ontology_term_t bi_address_country ON bi_address_country.ontology_term_id = bi_address.country_id - --- collected material type -LEFT JOIN ontology_term_t cmt ON cmt.ontology_term_id = a.collected_material_type_id - --- dataset -LEFT JOIN dataset_t ds ON ds.dataset_id = a.dataset_id -WHERE a.accession_id between :startPageId and :endPageId ---ORDER BY a.insertion_date ---ORDER BY RANDOM() LIMIT 10 -; -- GitLab From 969e7b666649bea924bb81d47e355ba750b6fba2 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Wed, 25 Oct 2023 17:15:07 +0200 Subject: [PATCH 13/23] Typo --- etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql index ef0617b..53a769e 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql @@ -70,7 +70,7 @@ SELECT json_build_object( 'endDate', to_char(study_pheno.date_end, 'YYYY-MM-dd'), 'environmentParameters', NULL, - 'experimentaDesign', NULL, + 'experimentalDesign', NULL, 'externalReferences', NULL, -- Germplasm = GnpIS accessions @@ -208,7 +208,7 @@ SELECT json_build_object( ), 'endDate', NULL, 'environmentParameters', NULL, - 'experimentaDesign', NULL, + 'experimentalDesign', NULL, 'externalReferences', NULL, -- Germplasm = GnpIS accessions -- GitLab From d221492cf32c96cc4b1951e2568de53b8e56eb3a Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Wed, 15 Nov 2023 15:17:42 +0100 Subject: [PATCH 14/23] Some corrections --- etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql index 53a769e..9d8ba22 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql @@ -70,7 +70,7 @@ SELECT json_build_object( 'endDate', to_char(study_pheno.date_end, 'YYYY-MM-dd'), 'environmentParameters', NULL, - 'experimentalDesign', NULL, + 'experimentalDesign', trial_t.trial_design, 'externalReferences', NULL, -- Germplasm = GnpIS accessions @@ -117,9 +117,9 @@ SELECT json_build_object( ), 'startDate', to_char(study_pheno.date_begin, 'YYYY-MM-dd'), - 'studyCode', NULL, + 'studyCode', trial_t.trial_number, 'studyDbId', text(study_pheno.trial_number), - 'studyDescription', NULL, + 'studyDescription', trial_t.description, 'studyName', study_pheno.name, 'studyPUI', NULL, 'studyType', ( @@ -208,7 +208,7 @@ SELECT json_build_object( ), 'endDate', NULL, 'environmentParameters', NULL, - 'experimentalDesign', NULL, + 'experimentalDesign', trial_t.trial_design, 'externalReferences', NULL, -- Germplasm = GnpIS accessions @@ -242,7 +242,7 @@ SELECT json_build_object( 'programName', program.project_name, 'seasons', NULL, 'startDate', NULL, - 'studyCode', NULL, + 'studyCode', trial_t.trial_number, 'studyDbId', text(study_geno.genotyping_experiment_id), 'studyDescription', NULL, 'studyName', study_geno.genotyping_experiment_name, -- GitLab From 2d62e148f7a4fbe3ad68496a556807f2df15be4f Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Wed, 15 Nov 2023 15:53:03 +0100 Subject: [PATCH 15/23] Corrections --- .../gnpis-pg-to-json/program.sql | 5 ++- .../gnpis-pg-to-json/study.sql | 32 ++++++++++++++++--- .../gnpis-pg-to-json/trial.sql | 7 +++- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql index 90290c0..dbfc081 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql @@ -13,7 +13,10 @@ SELECT json_build_object( 'programName', program.project_name, 'abbreviation', program.project_code, - 'additionalInfo', NULL, + 'additionalInfo', ( + SELECT project_t.short_remark + FROM project_t + ), 'commonCropName', NULL, 'documentationURL', NULL, 'externalReferences', json_build_object( diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql index 9d8ba22..1b91ede 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql @@ -59,7 +59,7 @@ SELECT json_build_object( FROM data_file_t dataLink JOIN bio_type_t dataLinkType ON dataLinkType.bio_type_id = dataLink.file_type_id WHERE dataLink.trial_id = study_pheno.trial_id - ), -- REVOIR LES CHAMPS QU'IL CONTIENT + ), -- Study = GnpIS trial 'documentationURL', ( @@ -69,8 +69,19 @@ SELECT json_build_object( ), 'endDate', to_char(study_pheno.date_end, 'YYYY-MM-dd'), - 'environmentParameters', NULL, - 'experimentalDesign', trial_t.trial_design, + 'environmentParameters', json_build_object( + "description", NULL, + "parameterName", NULL, + "parameterPUI", NULL, + "unit", NULL, + "unitPUI", NULL, + "value", NULL, + "valuePUI", NULL + ), + "experimentalDesign", json_build_object( + "PUI", NULL, + "description", trial_t.trial_design + ), 'externalReferences', NULL, -- Germplasm = GnpIS accessions @@ -207,8 +218,19 @@ SELECT json_build_object( END ), 'endDate', NULL, - 'environmentParameters', NULL, - 'experimentalDesign', trial_t.trial_design, + 'environmentParameters', json_build_object( + "description", NULL, + "parameterName", NULL, + "parameterPUI", NULL, + "unit", NULL, + "unitPUI", NULL, + "value", NULL, + "valuePUI", NULL + ), + "experimentalDesign", json_build_object( + "PUI", NULL, + "description", trial_t.trial_design + ), 'externalReferences', NULL, -- Germplasm = GnpIS accessions diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql index b1f85cd..2b0ac75 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql @@ -51,7 +51,12 @@ SELECT json_build_object( ), 'groupId', trial.group_id, 'programDbId', NULL, - 'programName', NULL, + 'programName', ( + SELECT project_t.title + FROM project_t, trial_t + WHEN trial_t.project_id = project_t.project_id + LIMIT 1 + ), 'publications', json_build_object( 'publicationPUI', NULL, 'publicationReference', NULL -- GitLab From 0f179d46b294cbcecb19273c0ca7fdf386aa43a8 Mon Sep 17 00:00:00 2001 From: Cyril Pommier <cyril.pommier@inrae.fr> Date: Fri, 22 Dec 2023 15:31:05 +0100 Subject: [PATCH 16/23] Correct quote errors. Remove unused trialId parameter. Add commented filter on trial_set_id to export exemplar datasets. --- .../gnpis-pg-to-json/study.sql | 55 +++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql index 1b91ede..86d2129 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql @@ -1,10 +1,9 @@ ---https://hashrocket.com/blog/posts/faster-json-generation-with-postgresql -- Warning: The name of this file is used as the elasticsearch document type name --- Warning: The 'trialId' argument must always be set. Set it to NULL if you want to export all Ephesis trials as BrAPI studies: 'psql -v trialId=NULL' -- Example of usage: --- psql --host shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -A -t -f study.sql -v trialId=NULL -v gnpisBaseURL=https://urgi.versailles.inrae.fr > study.json +-- psql --host shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -A -t -f study.sql -v gnpisBaseURL=https://urgi.versailles.inrae.fr > study.json -- Making sure to stop psql with non-zero exit code on any error \set ON_ERROR_STOP true @@ -63,24 +62,24 @@ SELECT json_build_object( -- Study = GnpIS trial 'documentationURL', ( - CASE study_pheno.group_id + CASE study_pheno.group_id--That's wrong, there should always be a URL even for confidential data WHEN 0 THEN CONCAT(:'gnpisBaseURL', 'ephesis/ephesis/viewer.do#trialCard/trialId=', study_pheno.trial_id) END ), 'endDate', to_char(study_pheno.date_end, 'YYYY-MM-dd'), 'environmentParameters', json_build_object( - "description", NULL, - "parameterName", NULL, - "parameterPUI", NULL, - "unit", NULL, - "unitPUI", NULL, - "value", NULL, - "valuePUI", NULL + 'description', NULL, + 'parameterName', NULL, + 'parameterPUI', NULL, + 'unit', NULL, + 'unitPUI', NULL, + 'value', NULL, + 'valuePUI', NULL ), - "experimentalDesign", json_build_object( - "PUI", NULL, - "description", trial_t.trial_design + 'experimentalDesign', json_build_object( + 'PUI', NULL, + 'description', study_pheno.trial_design ), 'externalReferences', NULL, @@ -128,9 +127,9 @@ SELECT json_build_object( ), 'startDate', to_char(study_pheno.date_begin, 'YYYY-MM-dd'), - 'studyCode', trial_t.trial_number, + 'studyCode', study_pheno.trial_number, 'studyDbId', text(study_pheno.trial_number), - 'studyDescription', trial_t.description, + 'studyDescription', study_pheno.description, 'studyName', study_pheno.name, 'studyPUI', NULL, 'studyType', ( @@ -160,8 +159,8 @@ LEFT JOIN site_t location ON study_pheno.site_id = location.site_id -- Program LEFT JOIN project_t program ON study_pheno.project_id = program.project_id +--JOIN trial_trial_set_t tts on tts.trials_id=study_pheno.trial_id WHERE tts.trial_sets_id in (24, 7 ) -WHERE study_pheno.trial_id = :trialId OR :trialId IS NULL --ORDER BY RANDOM() LIMIT 500 ; @@ -219,18 +218,18 @@ SELECT json_build_object( ), 'endDate', NULL, 'environmentParameters', json_build_object( - "description", NULL, - "parameterName", NULL, - "parameterPUI", NULL, - "unit", NULL, - "unitPUI", NULL, - "value", NULL, - "valuePUI", NULL - ), - "experimentalDesign", json_build_object( - "PUI", NULL, - "description", trial_t.trial_design + 'description', NULL, + 'parameterName', NULL, + 'parameterPUI', NULL, + 'unit', NULL, + 'unitPUI', NULL, + 'value', NULL, + 'valuePUI', NULL ), + --'experimentalDesign', json_build_object( -- meaningless for geno study + -- 'PUI', NULL, + -- 'description', study_geno.trial_design + --), 'externalReferences', NULL, -- Germplasm = GnpIS accessions @@ -264,7 +263,7 @@ SELECT json_build_object( 'programName', program.project_name, 'seasons', NULL, 'startDate', NULL, - 'studyCode', trial_t.trial_number, + 'studyCode', NULL, 'studyDbId', text(study_geno.genotyping_experiment_id), 'studyDescription', NULL, 'studyName', study_geno.genotyping_experiment_name, -- GitLab From a19f8c422d6a9d0aaead03a34d30830375fd2ec4 Mon Sep 17 00:00:00 2001 From: Cyril Pommier <cyril.pommier@inrae.fr> Date: Fri, 22 Dec 2023 16:36:12 +0100 Subject: [PATCH 17/23] Add commented filter on trial_set_id to export exemplar datasets. --- etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql | 2 ++ .../gnpis-pg-to-json/germplasmAttribute.sql | 1 + .../gnpis-pg-to-json/germplasmPedigree.sql | 1 + .../gnpis-pg-to-json/germplasmProgeny.sql | 1 + etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql | 1 + etl_gnpis-core_brapi/gnpis-pg-to-json/observationUnit.sql | 8 +++----- etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql | 4 +++- 7 files changed, 12 insertions(+), 6 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql index 7d5d6c9..32a3d17 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql @@ -664,6 +664,8 @@ LEFT JOIN ontology_term_t bi_address_country ON bi_address_country.ontology_term LEFT JOIN dataset_t ds ON ds.dataset_id = a.dataset_id WHERE a.accession_id between :startPageId and :endPageId +--AND exists (select 1 from lot_t l JOIN trial_lot_t tl on tl.lots_id=l.lot_id JOIN trial_trial_set_t tts on tts.trials_id=tl.trials_id WHERE l.accession_id=a.accession_id AND tts.trial_sets_id in (24, 7)) + ORDER BY a.accession_id --ORDER BY RANDOM() LIMIT 500 --LIMIT 5 diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmAttribute.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmAttribute.sql index bd65ded..dc5ea9e 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmAttribute.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmAttribute.sql @@ -26,6 +26,7 @@ SELECT json_build_object( ) FROM accession_t germplasm WHERE exists (select 1 from accession_descriptor_t ad where germplasm.accession_id = ad.accession_id) +--AND exists (select 1 from lot_t l JOIN trial_lot_t tl on tl.lots_id=l.lot_id JOIN trial_trial_set_t tts on tts.trials_id=tl.trials_id WHERE l.accession_id=germplasm.accession_id AND tts.trial_sets_id in (24, 7)) --ORDER BY RANDOM() LIMIT 500 ; diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmPedigree.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmPedigree.sql index e8fb160..310f7cd 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmPedigree.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmPedigree.sql @@ -66,6 +66,7 @@ LEFT JOIN accession_t fp ON fp.accession_id = genea.first_parent_id LEFT JOIN ontology_term_t fp_type ON fp_type.ontology_term_id = genea.first_parent_type_id LEFT JOIN accession_t sp ON sp.accession_id = genea.second_parent_id LEFT JOIN ontology_term_t sp_type ON sp_type.ontology_term_id = genea.second_parent_type_id +--WHERE exists (select 1 from lot_t l JOIN trial_lot_t tl on tl.lots_id=l.lot_id JOIN trial_trial_set_t tts on tts.trials_id=tl.trials_id WHERE l.accession_id=a.accession_id AND tts.trial_sets_id in (24, 7)) --ORDER BY RANDOM() LIMIT 500 ; diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmProgeny.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmProgeny.sql index 5cb25ab..c4232ff 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmProgeny.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmProgeny.sql @@ -54,5 +54,6 @@ SELECT json_build_object( ) FROM accession_t a WHERE exists (select 1 from genealogy_t ge where ge.first_parent_id = a.accession_id or ge.second_parent_id = a.accession_id) +--AND exists (select 1 from lot_t l JOIN trial_lot_t tl on tl.lots_id=l.lot_id JOIN trial_trial_set_t tts on tts.trials_id=tl.trials_id WHERE l.accession_id=a.accession_id AND tts.trial_sets_id in (24, 7)) --ORDER BY RANDOM() LIMIT 500 ; diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql index 2d058b0..e486999 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql @@ -153,6 +153,7 @@ LEFT JOIN ontology_term_t locationType ON location.site_type_id = locationType.o LEFT JOIN institution_t locationInstitution ON locationInstitution.institution_id = location.landowner_id LEFT JOIN address_t locationInstitutionAddress ON locationInstitutionAddress.address_id = locationInstitution.address_id LEFT JOIN ontology_term_t locationInstitutionAddressCountry ON locationInstitutionAddressCountry.ontology_term_id = locationInstitutionAddress.country_id +--JOIN trial_t t on t.site_id=location.site_id JOIN trial_trial_set_t tts on tts.trials_id=t.trial_id WHERE tts.trial_sets_id in (24, 7 ) --ORDER BY RANDOM() LIMIT 500 ; diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/observationUnit.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/observationUnit.sql index 22387d8..7466233 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/observationUnit.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/observationUnit.sql @@ -1,10 +1,8 @@ ---https://hashrocket.com/blog/posts/faster-json-generation-with-postgresql -- Warning: The name of this file is used as the elasticsearch document type name --- Warning: The 'trialId' argument must always be set. Set it to NULL if you want to export for all trials: 'psql -v trialId=NULL' - -- Example of usage: --- psql --host shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -A -t -f observationUnit.sql -v trialId=NULL > observationUnit.json +-- psql --host shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -A -t -f observationUnit.sql > observationUnit.json -- Making sure to stop psql with non-zero exit code on any error \set ON_ERROR_STOP true @@ -21,8 +19,7 @@ WITH observation_unit_document AS ( JOIN phenotype_t observation ON observationUnit.study_subject_id = observation.study_subject_id LEFT JOIN phenotyping_campaign_t season ON observation.phenotyping_campaign_id = season.phenotyping_campaign_id JOIN trial_t study ON study.trial_id = observationUnit.trial_id - WHERE (:trialId IS NULL OR study.trial_id = :trialId) - AND observationUnit.study_subject_id between :startPageId and :endPageId + WHERE observationUnit.study_subject_id between :startPageId and :endPageId GROUP BY docId, observationUnit.study_subject_id, season.phenotyping_campaign_id --ORDER BY RANDOM() LIMIT 5000 ) @@ -159,6 +156,7 @@ LEFT JOIN project_t program ON program.project_id = study.project_id LEFT JOIN site_t location ON location.site_id = study.site_id JOIN lot_t lot ON observationUnit.lot_id = lot.lot_id JOIN accession_t germplasm ON lot.accession_id = germplasm.accession_id +--JOIN trial_trial_set_t tts on tts.trials_id=study.trial_id WHERE tts.trial_sets_id in (24, 7 ) --ORDER BY RANDOM() LIMIT 100 ; diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql index 2b0ac75..7dc7b01 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql @@ -54,7 +54,7 @@ SELECT json_build_object( 'programName', ( SELECT project_t.title FROM project_t, trial_t - WHEN trial_t.project_id = project_t.project_id + WHERE trial_t.project_id = project_t.project_id LIMIT 1 ), 'publications', json_build_object( @@ -84,5 +84,7 @@ FROM trial_set_t trial LEFT JOIN bio_type_t trialType ON trial.trial_set_type_id = trialType.bio_type_id +--WHERE trial.trial_set_id in (24, 7 ) --42 = Drops) + --ORDER BY RANDOM() LIMIT 500 ; -- GitLab From d90a31097c3f935f8f8a84759d98c2efac16496b Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Thu, 4 Jan 2024 12:05:57 +0100 Subject: [PATCH 18/23] Add accession synonym --- etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql index 32a3d17..1b4b7e3 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql @@ -230,7 +230,16 @@ SELECT json_build_object( ELSE '' END ), - 'synonyms', null, + 'synonyms', array( + SELECT json_build_object( + 'synonym', array ( + SELECT distinct accsyn.accession_synonym_name AS synonyms + FROM accession_synonym_t accsyn + WHERE accsyn.accession_id = a.accession_id + ), + 'type', null + ) + ), 'taxonIds', ta.taxon_external_ids, -- OTHER IN GERMPLASM -- 'groupId', a.group_id, -- GitLab From 4de395dbc7dcd35cc87d175a1f5177bea62cc199 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Fri, 19 Jan 2024 10:23:29 +0100 Subject: [PATCH 19/23] program.sql correction --- etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql index dbfc081..f807e82 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql @@ -11,11 +11,11 @@ SELECT json_build_object( 'programDbId', text(program.project_code), 'programName', program.project_name, - 'abbreviation', program.project_code, - 'additionalInfo', ( + 'additionalInfo',( SELECT project_t.short_remark FROM project_t + LIMIT 1 ), 'commonCropName', NULL, 'documentationURL', NULL, @@ -26,16 +26,13 @@ SELECT json_build_object( ), 'fundingInformation', NULL, 'groupId', program.group_id, - 'leadPerson', ( SELECT programLeadPerson.first_name || ' ' || programLeadPerson.last_name FROM project_coordinator_t programLead JOIN contact_t programLeadPerson ON programLeadPerson.contact_id = programLead.contact_coordinator_id WHERE programLead.project_coordinator_id = program.project_id LIMIT 1 - ), --removed in BrAPI v2 - --'leadPersonDbId', leadPerson.contact_id, - --'leadPersonName', leadPerson.name, + ), 'objective', NULL, 'programType', NULL ) -- GitLab From 651175f2e73354dedb0adc9d4ae1cf0989314a7b Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Tue, 23 Jan 2024 10:50:48 +0100 Subject: [PATCH 20/23] Add coordinates values --- etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql index e486999..0cbec95 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql @@ -63,8 +63,16 @@ SELECT json_build_object( 'coordinateDescription', NULL, 'coordinateUncertainty', NULL, 'coordinates', json_build_object( - 'geometry', NULL, - 'type', NULL + 'geometry', json_build_object ( + 'coordinates', ( + CASE + WHEN location.longitude is not null AND location.latitude is not null THEN location.longitude || ', ' || location.latitude + WHEN location.longitude is not null AND location.latitude is not null AND location.elevation is not null THEN location.longitude || ', ' || location.latitude || ', ' || location.elevation + END + ), + 'type', CASE WHEN location.longitude is not null AND location.latitude is not null THEN 'Point' END + ), + 'type', CASE WHEN location.longitude is not null AND location.latitude is not null THEN 'Feature' END ), -- Country = GnpIS geographical location ontology term @@ -121,7 +129,6 @@ SELECT json_build_object( WHERE topography.ontology_term_id = location.topography_id ), - 'altitude', location.elevation, 'latitude', location.latitude, 'longitude', location.longitude -- GitLab From a6d00795bdfdc1b6609b389475fbd6e69b53ec95 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Wed, 20 Mar 2024 15:28:56 +0100 Subject: [PATCH 21/23] Add trialDbId and trialName --- etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql index 86d2129..b5d9b09 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql @@ -140,8 +140,18 @@ SELECT json_build_object( ), -- Trials = GnpIS trial set - 'trialDbId', NULL, - 'trialName', NULL, + 'trialDbId', ( + SELECT text(trial.trial_set_id) + FROM trial_set_t trial + JOIN trial_trial_set_t trialStudy ON trialStudy.trial_sets_id = trial.trial_set_id AND trialStudy.trials_id = study_pheno.trial_id + LIMIT 1 + ), + 'trialName', ( + SELECT trial.trial_set_name + FROM trial_set_t trial + JOIN trial_trial_set_t trialStudy ON trialStudy.trial_sets_id = trial.trial_set_id AND trialStudy.trials_id = study_pheno.trial_id + LIMIT 1 + ), 'trialDbIds', array( SELECT DISTINCT text(trial.trial_set_id) FROM trial_set_t trial -- GitLab From 7bf8514e24f92a2c088bb70ba3bf9651bb030b51 Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Thu, 23 May 2024 11:56:21 +0200 Subject: [PATCH 22/23] Correct request --- etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql index b5d9b09..58eb868 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql @@ -141,7 +141,7 @@ SELECT json_build_object( -- Trials = GnpIS trial set 'trialDbId', ( - SELECT text(trial.trial_set_id) + SELECT DISTINCT text(trial.trial_set_id) FROM trial_set_t trial JOIN trial_trial_set_t trialStudy ON trialStudy.trial_sets_id = trial.trial_set_id AND trialStudy.trials_id = study_pheno.trial_id LIMIT 1 -- GitLab From 6843ab6ae421ca354f4c3fb099009ee2c0ddb46c Mon Sep 17 00:00:00 2001 From: "maud.marty" <maud.marty@inrae.fr> Date: Thu, 23 May 2024 11:58:27 +0200 Subject: [PATCH 23/23] Typos --- .../gnpis-pg-to-json/germplasm.sql | 40 +------------------ 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql index 1b4b7e3..97b39b5 100644 --- a/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql +++ b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql @@ -63,7 +63,7 @@ SELECT json_build_object( WHERE ac.accession_id = a.accession_id GROUP BY co.collection_id, ot.name_en, tr.translated_name ), - 'collectinInfo', json_build_object( + 'collectingInfo', json_build_object( 'collectingDate', a.collecting_date, 'collectingInstitutes', array( SELECT json_build_object( @@ -324,21 +324,6 @@ SELECT json_build_object( ) )::jsonb || json_build_object( - 'holdingGenbank', json_build_object ( - 'instituteName', ( - CASE - WHEN grc_name.translated_name is not null and grc_name.translated_name != '' THEN grc_name.translated_name - ELSE '' - END - ), - 'instituteCode', grc.grc_code, - 'webSite', grc.internet_site, - 'logo', - CASE - WHEN grc.grc_code is not null THEN 'https://urgi.versailles.inrae.fr/files/siregal/images/grc/inra_brc_en.png' - ELSE '' - END - ), 'accessionHolder', ( CASE WHEN (select string_agg(distinct(coll.collection_code), ', ') @@ -480,29 +465,6 @@ SELECT json_build_object( WHERE l.accession_id = a.accession_id GROUP BY pa.panel_id, pa.panel_name ), - 'collection', array( - SELECT json_build_object( - 'id', co.collection_id, - 'name', ( - CASE - WHEN tr.translated_name is not null THEN tr.translated_name - ELSE co.collection_code - END - ), - 'type', ot.name_en, - 'germplasmCount', ( - SELECT count(distinct ac2.accession_id) - FROM accession_collection_t ac2 - WHERE ac2.collection_id = co.collection_id - ) - ) - FROM collections_t co - JOIN accession_collection_t ac ON co.collection_id = ac.collection_id - LEFT JOIN translations_t tr ON co.collection_id = tr.named_collection_id - LEFT JOIN ontology_term_t ot ON ot.ontology_term_id = co.collection_type_id - WHERE ac.accession_id = a.accession_id - GROUP BY co.collection_id, ot.name_en, tr.translated_name - ), 'population', array( SELECT json_build_object( 'id', prg.population_id, -- GitLab