Specify IPT integration
After setting up the Schema Mapper and running the Data Exporter (see Specify to Darwin Core mapping), I found out that the command line tool to update the exported data, ExpCmdLine, requires a graphics card, which makes that I cannot run it from the server and use it to schedule updates. Therefore, inspired by Andy's presentation of the DwCA tool in Specify 7, I decided to try and write an SQL query that I could use directly in the IPT. This worked after a bit of trial and error.
This is the rather impressive-looking (if I may say so myself) query I came up with.
select
co.CollectionObjectID as id,
co.GUID as occurrenceID,
-- record level terms
'PhysicalObject' as "type",
co.TimestampModified as modified,
'https://creativecommons.org/licenses/by/4.0/legalcode' AS license,
'Royal Botanic Gardens Board' AS rightsHolder,
'MEL' as institutionCode,
'MEL' as collectionCode,
'PreservedSpecimen' as basisOfRecord,
-- Occurrence
concat('MEL ', co.CatalogNumber) as catalogNumber,
ce.VerbatimLocality as occurrenceRemarks,
ce.StationFieldNumber as recordNumber,
collectorstring(ce.CollectingEventID, '|', true) as recordedBy,
recorded_by_id(ce.CollectingEventID) as recordedByID,
dwc_reproductive_condition(co.CollectionObjectID) as reproductiveCondition,
dwc_establishment_means(co.CollectionObjectID) as establishmentMeans,
'present' as occurrenceStatus,
preparations(co.CollectionObjectID) as preparations,
associated_sequences(co.CollectionObjectID) as associatedSequences,
-- associatedTaxa,
-- Organism
previous_identifications(co.CollectionObjectID) previousIdentifications,
-- Event
ce.GUID as eventID,
ctr.CollectingTripName as parentEventID,
concat_ws('/', dateWithPrecision(ce.StartDate, ce.StartDatePrecision), dateWithPrecision(ce.EndDate, ce.EndDatePrecision)) as eventDate,
if(ce.EndDate is null and ce.StartDatePrecision=1, dayofyear(ce.StartDate), null) as startDayOfYear,
if(ce.EndDate is null, year(ce.StartDate), null) as "year",
if(ce.EndDate is null and ce.StartDatePrecision in (1, 2), month(ce.StartDate), null) as "month",
if(ce.EndDate is null and ce.StartDatePrecision=1, day(ce.StartDate), null) as "day",
ce.VerbatimDate as verbatimEventDate,
ce.Remarks as habitat,
-- Location
l.GUID as locationID,
hg.higherGeography,
ld.WaterBody as waterBody,
ld.IslandGroup as islandGroup,
ld.Island as island,
hg.continent,
hg.Country as country,
g.Text1 as countryCode,
hg.State as stateProvince,
hg.County as county,
l.LocalityName as verbatimLocality,
l.LocalityName as locality,
CASE
WHEN l.VerbatimElevation IS NOT NULL THEN l.VerbatimElevation
ELSE
CASE l.Text1
WHEN 'ft' THEN CASE WHEN l.MaxElevation IS NULL THEN CONCAT_WS(' ', l.MinElevation, l.Text1) ELSE CONCAT(l.MinElevation, '–', l.MaxElevation, ' ', l.Text1) END
ELSE NULL
END
END as verbatimElevation,
CASE l.Text1 WHEN 'ft' THEN round(l.MinElevation * 0.3048) ELSE l.MinElevation END as minimumElevationInMeters,
CASE
WHEN l.MaxElevation Is Null THEN
CASE l.Text1 WHEN 'ft' THEN round(l.MinElevation * 0.3048) ELSE l.MinElevation END
ELSE
CASE l.Text1 WHEN 'ft' THEN round(l.MaxElevation * 0.3048) ELSE l.MaxElevation END
END as maximumElevationInMeters,
CASE
WHEN EndDepth IS NULL THEN
CASE StartDepthUnit
WHEN '2' THEN CONCAT(StartDepth, ' ft')
WHEN '3' THEN CONCAT(StartDepth, ' fathoms')
ELSE NULL
END
ELSE
CASE StartDepthUnit
WHEN '2' THEN CONCAT(StartDepth, '–', EndDepth, ' ft')
WHEN '3' THEN CONCAT(StartDepth, '–', ENDDepth, ' fathoms')
ELSE NULL
END
END as verbatimDepth,
CASE StartDepthUnit WHEN '1' THEN StartDepth WHEN '2' THEN ROUND(StartDepth * 0.3048) WHEN '3' THEN ROUND(StartDepth * 1.8288) END as minimumDepthInMeters,
CASE
WHEN EndDepth IS NULL THEN
CASE StartDepthUnit WHEN '1' THEN StartDepth WHEN '2' THEN ROUND(StartDepth * 0.3048) WHEN '3' THEN ROUND(StartDepth * 1.8288) END
ELSE
CASE StartDepthUnit WHEN '1' THEN EndDepth WHEN '2' THEN ROUND(EndDepth * 0.3048) WHEN '3' THEN ROUND(EndDepth * 1.8288) END
END as maximumDepthInMeters,
if(l.Latitude1 is not null and l.Longitude1 is not null, l.Lat1Text, null) as verbatimLatitude,
if(l.Latitude1 is not null and l.Longitude1 is not null, l.Long1Text, null) as verbatimLongitude,
gc.OriginalCoordSystem as verbatimCoordinateSystem,
srs_from_datum(l.Datum) as verbatimSRS,
l.Latitude1 as decimalLatitude,
l.Longitude1 as decimalLongitude,
srs_from_datum(l.Datum) as geodeticDatum,
coalesce(ROUND(gc.GeoRefAccuracy), ROUND(gc.MaxUncertaintyEst), coordinate_uncertainty_in_meters(l.OriginalElevationUnit)) as coordinateUncertaintyInMeters,
gc.NamedPlaceExtent as coordinatePrecision,
concat_ws(', ', gca.LastName, gca.FirstName) as georeferencedBy,
gc.GeoRefDetDate as georeferencedDate,
l.LatLongMethod as georeferenceProtocol,
gc.Text1 as georeferenceSources,
replace(gc.GeoRefVerificationStatus, 'Corrected', 'Verified') as georeferenceVerificationStatus,
gc.GeoRefRemarks as georeferenceRemarks,
-- Identification
d.GUID as identificationID,
concat_ws(', ', da.LastName, da.FirstName) as identifiedBy,
identified_by_id(d.DeterminationID) as identifiedByID,
dateWithPrecision(d.DeterminedDate, d.DeterminedDatePrecision) as dateIdentified,
d.Remarks as identificationRemarks,
dwc_identification_qualifier(d.Qualifier, d.VarQualifier, t.TaxonID) as identificationQualifier,
dwc_type_status(co.CollectionObjectID) as typeStatus,
-- Taxon
if(t.FullName LIKE '% [%' , substring(t.FullName, 1, LOCATE(' [', t.FullName)-1), t.FullName) as scientificName,
t.Author as scientificNameAuthorship,
hc.higherTaxonomy as higherClassification,
hc.kingdom,
hc.phylum,
hc.`class`,
hc.`order`,
hc.family,
hc.genus,
hc.specificEpithet,
hc.infraspecificEpithet,
hc.taxonRank,
t.Remarks as taxonRemarks,
'ICN' as nomenclaturalCode,
t.EsaStatus as nomenclaturalStatus
from collectionobject co
join collectingevent ce on co.CollectingEventID=ce.CollectingEventID
left join collectingtrip ctr ON ce.CollectingTripID=ctr.CollectingTripID
join locality l on ce.LocalityID=l.LocalityID
left join geography g on l.GeographyID=g.GeographyID
left join aux_highergeography_test hg on g.GeographyID=hg.GeographyID
left join localitydetail ld on l.LocalityID=ld.LocalityID
left join geocoorddetail gc on l.LocalityID=gc.LocalityID
left join agent gca on gc.AgentID=gca.AgentID
left join determination d on co.CollectionObjectID=d.CollectionObjectID and d.IsCurrent=true
left join agent da on d.DeterminerID=da.AgentID
left join taxon t on d.TaxonID=t.TaxonID
left join aux_highertaxonomy_test hc on t.TaxonID=hc.TaxonID
where co.CollectionID=4
This query uses all the functions described in Specify to Darwin Core mapping, but there is no need anymore to store the returned values in the database. The only extra thing that is needed are tables to cache the higher classification and higher geography, which I had already in place for use with the BioCASe provider. Without those tables the query would run very slow indeed and most likely time out.
As it is now, the IPT takes just over 20 minutes to publish our entire data set of almost a million records. The Darwin Core Archive creation tool in Specify 7 is almost twice as quick, but we cannot use the archives that are produced with Specify 7, as our data is stored in a different way than we want to export/publish it in and we also have extensions with external data (for images for example).
Extra functions
These extra functions were written to do what is done by formatters and aggregators in Specify.
collectorstring()
-- collector_string
DROP function IF EXISTS `collectorstring`;
DELIMITER $$
USE `melisr`$$
CREATE FUNCTION `collectorstring`(p_collectingeventid int, p_separator varchar(4), p_primary bit) RETURNS varchar(256) CHARSET utf8
READS SQL DATA
BEGIN
DECLARE out_collector_string varchar(256);
IF p_primary=true THEN
SELECT REPLACE(GROUP_CONCAT(IF(!isnull(a.FirstName), CONCAT(a.LastName, ', ', a.FirstName), a.LastName) ORDER BY c.OrderNumber SEPARATOR 'placeholder-sep'), 'placeholder-sep', p_separator)
INTO out_collector_string
FROM collector c
JOIN agent a ON c.AgentID=a.AgentID
WHERE c.CollectingEventID=p_collectingeventid AND c.IsPrimary=p_primary
GROUP BY c.CollectingEventID;
ELSE
SELECT REPLACE(GROUP_CONCAT(IF(!isnull(a.FirstName), CONCAT(a.LastName, ', ', a.FirstName), a.LastName) ORDER BY c.OrderNumber SEPARATOR 'placeholder-sep'), 'placeholder-sep', p_separator)
INTO out_collector_string
FROM collector c
JOIN agent a ON c.AgentID=a.AgentID
WHERE c.CollectingEventID=p_collectingeventid
GROUP BY c.CollectingEventID;
END IF;
RETURN out_collector_string;
END$$
DELIMITER ;
recorded_by_id()
-- recorded_by_id
DROP function IF EXISTS `recorded_by_id`;
DELIMITER $$
USE `melisr`$$
CREATE FUNCTION `recorded_by_id`(in_collecting_event_id int) RETURNS varchar(256) CHARSET utf8
BEGIN
DECLARE out_recorded_by_id varchar(256);
SELECT GROUP_CONCAT(DISTINCT COALESCE(av1.Name, av2.Name) ORDER BY c.OrderNumber SEPARATOR ' | ')
INTO out_recorded_by_id
FROM collector c
JOIN agent a ON c.AgentID=a.agentID
LEFT JOIN agentvariant av1 ON a.AgentID=av1.AgentID AND av1.VarType=11
LEFT JOIN agentvariant av2 ON a.AgentID=av2.AgentID AND av2.VarType=9
WHERE c.CollectingEventID=in_collecting_event_id AND (av1.AgentVariantID IS NOT NULL OR av2.AgentVariantID IS NOT NULL)
GROUP BY c.CollectingEventID;
RETURN out_recorded_by_id;
END$$
DELIMITER ;
preparations()
-- preparations
DROP function IF EXISTS `preparations`;
DELIMITER $$
USE `melisr`$$
CREATE FUNCTION `preparations`(in_collection_object_id INTEGER) RETURNS varchar(256) CHARSET utf8
BEGIN
DECLARE out_preparations VARCHAR(256);
SELECT GROUP_CONCAT(DISTINCT pt.Name ORDER BY pt.PrepTypeID SEPARATOR ' | ')
INTO out_preparations
FROM preparation p
JOIN preptype pt ON p.PrepTypeID=pt.PrepTypeID
WHERE p.CollectionObjectID=in_collection_object_id AND pt.PrepTypeID NOT IN (13, 15, 16, 17, 18, 24)
GROUP BY p.CollectionObjectID;
RETURN out_preparations;
END$$
DELIMITER ;
associated_sequences()
-- associated_sequences
DROP function IF EXISTS `associated_sequences`;
DELIMITER $$
USE `melisr`$$
CREATE FUNCTION `associated_sequences`(in_collection_object_id int) RETURNS varchar(256) CHARSET utf8
BEGIN
DECLARE out_associated_sequences varchar(256);
SELECT group_concat(concat('https://www.ncbi.nlm.nih.gov/nuccore/', GenBankAccessionNumber) SEPARATOR ' | ')
INTO out_associated_sequences
FROM dnasequence
WHERE CollectionObjectID=in_collection_object_id
GROUP BY CollectionObjectID;
RETURN out_associated_sequences;
END$$
DELIMITER ;
previous_identifications()
-- previous_identifications
DROP function IF EXISTS `previous_identifications`;
DELIMITER $$
USE `melisr`$$
CREATE FUNCTION `previous_identifications`(in_collection_object_id INTEGER) RETURNS text CHARSET utf8
BEGIN
DECLARE out_previous_identifications TEXT;
select
group_concat(identification_string(concat_ws(', ', concat_ws(' ', if(t.FullName LIKE '% [%' , substring(t.FullName, 1, LOCATE(' [', t.FullName)-1), t.FullName), t.author), t.EsaStatus),
concat_ws(' ', a.LastName, a.FirstName), dateWithPrecision(d.DeterminedDate, d.DeterminedDatePrecision), d.Remarks) SEPARATOR ' | ') as previousIdentifications
into out_previous_identifications
from determination d
join taxon t on d.TaxonID=t.TaxonID
left join agent a on d.DeterminerID=a.AgentID
where d.CollectionObjectID=in_collection_object_id and (d.IsCurrent=false or d.IsCurrent is null) and (d.FeatureOrBasis!='Type status' or d.FeatureOrBasis is null)
group by d.CollectionObjectID;
RETURN out_previous_identifications;
END$$
DELIMITER ;
identification_string()
-- identification_string
DROP function IF EXISTS `identification_string`;
DELIMITER $$
USE `melisr`$$
CREATE FUNCTION `identification_string`(in_scientific_name varchar(128), in_identified_by varchar(128), in_date_identified varchar(16), in_identification_remarks text) RETURNS text CHARSET utf8
BEGIN
DECLARE out_identification_string TEXT;
SET out_identification_string = in_scientific_name;
IF in_identified_by IS NOT NULL AND in_identified_by!='' THEN
SET out_identification_string = CONCAT(out_identification_string, ', ', in_identified_by);
IF in_date_identified IS NOT NULL AND in_date_identified!='' THEN
SET out_identification_string = CONCAT(out_identification_string, ', ', in_date_identified);
END IF;
ELSE
IF in_date_identified IS NOT NULL AND in_date_identified!='' THEN
SET out_identification_string = CONCAT(out_identification_string, ', ', in_date_identified);
END IF;
END IF;
IF in_identification_remarks IS NOT NULL AND in_identification_remarks!='' THEN
SET out_identification_string = CONCAT(out_identification_string, ', ', in_identification_remarks);
END IF;
RETURN out_identification_string;
END$$
DELIMITER ;
identified_by_id()
-- identified_by_id
DROP function IF EXISTS `identified_by_id`;
DELIMITER $$
USE `melisr`$$
CREATE FUNCTION `identified_by_id`(in_determination_id int) RETURNS varchar(256) CHARSET utf8
BEGIN
DECLARE out_identified_by_id varchar(256);
SELECT GROUP_CONCAT(DISTINCT COALESCE(av1.Name, av2.Name) ORDER BY gp.OrderNumber SEPARATOR ' | ')
INTO out_identified_by_id
FROM determination d
JOIN agent a ON d.DeterminerID=a.AgentID
LEFT JOIN groupperson gp ON a.AgentID=gp.GroupID
LEFT JOIN agent m ON gp.MemberID=m.AgentID
LEFT JOIN agentvariant av1 ON COALESCE(m.AgentID, a.AgentID)=av1.AgentID AND av1.VarType=11
LEFT JOIN agentvariant av2 ON COALESCE(m.AgentID, a.AgentID)=av2.AgentID AND av2.VarType=9
WHERE d.DeterminationID=in_determination_id AND (av1.AgentVariantID IS NOT NULL OR av2.AgentID IS NOT NULL)
GROUP BY d.DeterminationID;
RETURN out_identified_by_id;
END$$
DELIMITER ;
Denormalise trees
Taxon tree
In order for the publication of the Darwin Core Archive to complete in a reasonable amont of time, we need to cache a denormalised version of the Taxon tree. This table stores all taxonomic ranks in a single record, so we can just join it to the Taxon table and do not have to do a lot of recurrent querying.
We also add some other classification related terms, taxonRank
and
higherTaxonomy
(I called it higherTaxonomy
because I got my Darwin Core
terms wrong and have not changed it to higherClassification
yet, as I am not
sure that I am not using this table for anything else).
While you could just update the records that have changed since the last update,
I choose to replace all records every time – even though this takes almost three
hours – as I am not sure every change in the Taxon tree will lead to a change of
the TimestampModified
in the Taxon table. It is not terrible if a Darwin Core
Archive is published before the auxilliary higher taxonomy table has been
updated, as the essential information is in the Taxon table and the higher
classification can be picked up in the next publication. We publish every week.
Table create statement
CREATE TABLE `aux_highertaxonomy_test` (
`TaxonID` int(10) unsigned NOT NULL DEFAULT '0',
`taxonRank` varchar(16) DEFAULT NULL,
`kingdom` varchar(64) DEFAULT NULL,
`phylum` varchar(64) DEFAULT NULL,
`class` varchar(64) DEFAULT NULL,
`order` varchar(64) DEFAULT NULL,
`family` varchar(64) DEFAULT NULL,
`genus` varchar(64) DEFAULT NULL,
`specificEpithet` varchar(128) DEFAULT NULL,
`infraspecificEpithet` varchar(128) DEFAULT NULL,
`higherTaxonomy` text,
`modified` datetime DEFAULT NULL,
PRIMARY KEY (`TaxonID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
Procedure to update the higher taxonomy table
DROP PROCEDURE IF EXISTS `reindex_higher_taxonymy`;
DELIMITER $$
CREATE PROCEDURE `reindex_higher_taxonymy` ()
taxa: BEGIN
DECLARE var_taxon_id INTEGER(11);
DECLARE var_node_number INT;
DECLARE var_taxon_rank VARCHAR(16);
DECLARE var_finished INT DEFAULT 0;
DECLARE taxon_cursor CURSOR FOR
SELECT t.TaxonID, t.NodeNumber, td.Name
FROM taxon t
JOIN taxontreedefitem td ON t.TaxonTreeDefItemID=td.TaxonTreeDefItemID
JOIN determination d ON t.TaxonID=d.TaxonID
WHERE d.CollectionMemberID=4
GROUP BY t.TaxonID;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET var_finished = 1;
TRUNCATE aux_highertaxonomy_test;
OPEN taxon_cursor;
taxon_loop: LOOP
FETCH taxon_cursor
INTO var_taxon_id, var_node_number, var_taxon_rank;
CALL replace_higher_taxonomy_record(var_taxon_id, var_node_number, var_taxon_rank);
IF var_finished = 1 THEN
LEAVE taxon_loop;
END IF;
END LOOP taxon_loop;
CLOSE taxon_cursor;
Procedure to replace a higher taxonomy record
DROP procedure IF EXISTS `replace_higher_taxonomy_record`;
DELIMITER $$
USE `melisr`$$
CREATE PROCEDURE `replace_higher_taxonomy_record`(in_taxon_id INTEGER(11), in_node_number INT, in_taxon_rank VARCHAR(16))
BEGIN
DECLARE var_kingdom VARCHAR(64);
DECLARE var_phylum VARCHAR(64);
DECLARE var_class VARCHAR(64);
DECLARE var_order VARCHAR(64);
DECLARE var_family VARCHAR(64);
DECLARE var_genus VARCHAR(64);
DECLARE var_specific_epithet VARCHAR(64);
DECLARE var_infraspecific_epithet VARCHAR(64);
DECLARE var_higher_taxonomy TEXT;
DECLARE var_taxonomy_rank VARCHAR(16);
DECLARE var_taxonomy_name VARCHAR(128);
DECLARE var_taxonomy_full_name VARCHAR(128);
DECLARE var_finished INT DEFAULT 0;
DECLARE higher_taxonomy_cursor CURSOR FOR
SELECT td.Name as var_rank, t.name as var_name, if(t.FullName LIKE '% [%' , substring(t.FullName, 1, LOCATE(' [', t.FullName)-1), t.FullName)
FROM taxon t
JOIN taxontreedefitem td ON t.TaxonTreeDefItemID=td.TaxonTreeDefItemID
WHERE NodeNumber<=in_node_number AND HighestChildNodeNumber>=in_node_number
AND NodeNumber>1 AND t.Name NOT LIKE '%indet.'
ORDER BY td.RankID;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET var_finished = 1;
OPEN higher_taxonomy_cursor;
higher_taxonomy_loop: LOOP
FETCH higher_taxonomy_cursor
INTO var_taxonomy_rank, var_taxonomy_name, var_taxonomy_full_name;
IF var_taxonomy_rank != in_taxon_rank THEN
IF var_higher_taxonomy IS NULL THEN
SET var_higher_taxonomy = var_taxonomy_full_name;
ELSE
SET var_higher_taxonomy = CONCAT_WS(' | ', var_higher_taxonomy, var_taxonomy_full_name);
END IF;
END IF;
CASE var_taxonomy_rank
WHEN 'kingdom' THEN
SET var_kingdom = var_taxonomy_name;
WHEN 'division' THEN
SET var_phylum = var_taxonomy_name;
WHEN 'class' THEN
SET var_class = var_taxonomy_name;
WHEN 'order' THEN
SET var_order = var_taxonomy_name;
WHEN 'family' THEN
SET var_family = var_taxonomy_name;
WHEN 'genus' THEN SET var_genus = var_taxonomy_name;
WHEN 'species' THEN
SET var_specific_epithet = var_taxonomy_name;
WHEN 'subspecies' THEN
SET var_infraspecific_epithet = var_taxonomy_name;
WHEN 'variety' THEN
SET var_infraspecific_epithet = var_taxonomy_name;
WHEN 'subvariety' THEN
SET var_infraspecific_epithet = var_taxonomy_name;
WHEN 'forma' THEN
SET var_infraspecific_epithet = var_taxonomy_name;
WHEN 'subforma' THEN
SET var_infraspecific_epithet = var_taxonomy_name;
ELSE
BEGIN END;
END CASE;
IF var_finished = 1 THEN
LEAVE higher_taxonomy_loop;
END IF;
END LOOP higher_taxonomy_loop;
IF in_taxon_rank='division' THEN
SET in_taxon_rank='phylum';
END IF;
REPLACE INTO aux_highertaxonomy_test (TaxonID, taxonRank,
kingdom, phylum, `class`, `order`, family, genus, specificEpithet, infraspecificEpithet,
higherTaxonomy, modified)
VALUES (in_taxon_id, in_taxon_rank,
var_kingdom, var_phylum, var_class, var_order, var_family, var_genus,
var_specific_epithet, var_infraspecific_epithet, var_higher_taxonomy, now());
CLOSE higher_taxonomy_cursor;
END$$
DELIMITER ;
Geography tree
The Geography tree also needs to be flattened. I have added countryCode
to
this table as well, so it does not need to be stored in a custom field in the
Geography table.
We use the TDWG World Geographical Scheme for Recording Plant Distributions (WGSRPD) in our Geography tree and, while we are moving away from that, the WGSRPD Level 1 regions play an important role in how we store specimens and are printed on our labels, so we cannot change them in our Geography tree. Therefore, I have created a function that maps the WGSRPD Level 1 regions (which we just call 'Continents' in our database) to continents and store the latter in the auxilliary table.
A full re-index of this table takes well under a minute.
Table create statement
CREATE TABLE `aux_highergeography_test` (
`GeographyID` int(11) NOT NULL,
`GeographyRank` varchar(16) DEFAULT NULL,
`Continent` varchar(64) DEFAULT NULL,
`Country` varchar(64) DEFAULT NULL,
`State` varchar(64) DEFAULT NULL,
`County` varchar(64) DEFAULT NULL,
`higherGeography` text,
`countryCode` varchar(4) DEFAULT NULL,
`modified` datetime DEFAULT NULL,
UNIQUE KEY `GeographyIDUNIQ` (`GeographyID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
Procedure to update higher geography table
DROP PROCEDURE IF EXISTS `reindex_higher_geography`;
DELIMITER $$
CREATE PROCEDURE `reindex_higher_geography` ()
BEGIN
DECLARE var_geography_id INTEGER(11);
DECLARE var_node_number INT;
DECLARE var_geography_rank VARCHAR(16);
DECLARE var_finished INT DEFAULT 0;
DECLARE geography_cursor CURSOR FOR
SELECT g.GeographyID, g.NodeNumber, gd.Name
FROM geography g
JOIN geographytreedefitem gd ON g.GeographyTreeDefItemID=gd.GeographyTreeDefItemID
JOIN locality l ON g.GeographyID=l.GeographyID
JOIN collectingevent ce ON l.LocalityID=ce.CollectingEventID
JOIN collectionobject co ON ce.CollectingEventID=co.CollectingEventID
WHERE co.CollectionID=4
GROUP BY g.GeographyID;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET var_finished = 1;
TRUNCATE aux_highergeography_test;
OPEN geography_cursor;
geography_loop: LOOP
FETCH geography_cursor
INTO var_geography_id, var_node_number, var_geography_rank;
CALL replace_higher_geography_record(var_geography_id, var_node_number, var_geography_rank);
IF var_finished = 1 THEN
LEAVE geography_loop;
END IF;
END LOOP geography_loop;
CLOSE geography_cursor;
END $$
DELIMITER ;
Procedure to replace a higher geography record
DROP procedure IF EXISTS `replace_higher_geography_record`;
DELIMITER $$
USE `melisr`$$
CREATE PROCEDURE `replace_higher_geography_record`(in_geography_id INTEGER(11), in_node_number INT, in_geography_rank VARCHAR(16))
BEGIN
DECLARE var_continent VARCHAR(64);
DECLARE var_country VARCHAR(64);
DECLARE var_state VARCHAR(64);
DECLARE var_county VARCHAR(64);
DECLARE var_higher_geography TEXT;
DECLARE var_geography_rank VARCHAR(16);
DECLARE var_geography_name VARCHAR(128);
DECLARE var_geography_full_name VARCHAR(128);
DECLARE var_geography_code VARCHAR(16);
DECLARE var_country_code VARCHAR(4);
DECLARE var_finished INT DEFAULT 0;
DECLARE higher_geography_cursor CURSOR FOR
SELECT gd.Name as var_rank, g.name as var_name, g.GeographyCode AS var_code
FROM geography g
JOIN geographytreedefitem gd ON g.GeographyTreeDefItemID=gd.GeographyTreeDefItemID
WHERE g.NodeNumber<=in_node_number AND g.HighestChildNodeNumber>=in_node_number
AND g.NodeNumber>1 AND g.GeographyTreeDefID=1
ORDER BY gd.RankID;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET var_finished = 1;
OPEN higher_geography_cursor;
higher_geography_loop: LOOP
FETCH higher_geography_cursor
INTO var_geography_rank, var_geography_name, var_geography_code;
IF var_geography_rank = 'Continent' THEN
SET var_geography_name = map_continent(var_geography_name);
END IF;
IF var_geography_rank != in_geography_rank THEN
IF var_higher_geography IS NULL THEN
SET var_higher_geography = var_geography_name;
ELSE
SET var_higher_geography = CONCAT_WS(' | ', var_higher_geography, var_geography_name);
END IF;
END IF;
CASE var_geography_rank
WHEN 'continent' THEN
SET var_continent = var_geography_name;
WHEN 'country' THEN
SET var_country = var_geography_name;
SET var_country_code=var_geography_code;
WHEN 'state' THEN
SET var_state = var_geography_name;
WHEN 'county' THEN
SET var_county = var_geography_name;
ELSE BEGIN END;
END CASE;
IF var_finished = 1 THEN
LEAVE higher_geography_loop;
END IF;
END LOOP higher_geography_loop;
REPLACE INTO aux_highergeography_test (GeographyID, geographyRank,
continent, country, `state`, county, higherGeography, countryCode, modified)
VALUES (in_geography_id, in_geography_rank,
var_continent, var_country, var_state, var_county, var_higher_geography, var_country_code, now());
CLOSE higher_geography_cursor;
END$$
DELIMITER ;
map_continent() function
DROP function IF EXISTS `map_continent`;
DELIMITER $$
CREATE FUNCTION `map_continent`(in_continent varchar(64)) RETURNS varchar(32) CHARSET utf8
BEGIN
DECLARE out_continent VARCHAR(32);
SET out_continent = CASE in_continent
WHEN '1. Europe' THEN 'Europe'
WHEN '2. Africa' THEN 'Africa'
WHEN '3. Asia-Temperate' THEN 'Asia'
WHEN '4. Asia-Tropical' THEN 'Asia'
WHEN '5. Australasia' THEN 'Oceania'
WHEN '6. Pacific' THEN 'Oceania'
WHEN '7. Northern America' THEN 'North America'
WHEN '8. Southern America' THEN 'South America'
WHEN '9. Antarctica' THEN 'Antarctica'
ELSE NULL
END;
RETURN out_continent;
END$$
DELIMITER ;