import eu.dnetlib.hope.domains.common.* import eu.dnetlib.hope.domains.generic.* import eu.dnetlib.miscutils.functional.string.EscapeHtml import eu.dnetlib.miscutils.functional.string.EscapeXml public class CEDIASUtils{ /** * Driver namespace for elements in the OAI header. */ def dri = new groovy.xml.Namespace("http://www.driver-repository.eu/namespace/dri", 'dri') def oai_dc = new groovy.xml.Namespace("http://www.openarchives.org/OAI/2.0/oai_dc/", 'oai_dc') def dc = new groovy.xml.Namespace("http://purl.org/dc/elements/1.1/", 'dc') def xml = new groovy.xml.Namespace("http://www.w3.org/XML/1998/namespace", 'xml') def EscapeHtml htmlEscaper = new EscapeHtml() def EscapeXml xmlEscaper = new EscapeXml() def boolean isDigital = true def getOAIHeader = { Node xmlRoot, String repoID -> def Node oaiHeaderNode = xmlRoot.'**'.header[0] def String oaiIdentifier = oaiHeaderNode.identifier.text() def String setName = oaiHeaderNode.setSpec.text() def OAIHeader oaiheader = new OAIHeader(nonDigitalCollection:!isDigital) oaiheader.setDateOfCollection(oaiHeaderNode.'dri:dateOfCollection'.text()) oaiheader.setIdentifier(oaiIdentifier) oaiheader.setObjIdentifier(oaiHeaderNode.'dri:objIdentifier'.text()) oaiheader.setRepositoryId(repoID) oaiheader.setSetSpec(setName) return oaiheader } def processDCRecord = { Node dcNode, String oaiIdentifier, String setName, String parentID, String mdLanguage, String dataProvider -> if(!dcNode){ println "Skipping record "+oaiIdentifier+". Missing oai_dc:dc node." return null } def String localID = oaiIdentifier.replace('oai:repohope.cediasbibli.org:', '') def String dcIdentifier = dcNode.'dc:identifier'?.text() def LandingPage lPage if(!dcIdentifier) println "record oai: "+oaiIdentifier+" has no dc:identifier!\n"+dcNode else lPage = new LandingPage(localID:localID, href:dcIdentifier.replaceAll('&', '&')) def String euroRights = dcNode.'dc:rights'[0]?.text() def List drLanguages = dcNode.'dc:language'*.text() def formatNodes = dcNode.'dc:format' def List theExtents = formatNodes.findAll{ !it.text().contains('/') }.collect{ new LanguagedValue(language:mdLanguage, value:xmlEscaper.evaluate(it.text())) } def List theMediums = formatNodes.findAll{ it.text().contains('/') }.collect{ new LanguagedValue(language:mdLanguage, value:xmlEscaper.evaluate(it.text())) } def List theSubjects = dcNode.'**'.'dc:subject'.collect{ new LanguagedValue(language:mdLanguage, value:xmlEscaper.evaluate(it.text())) } def List theSpatials = [] def List theTemporals = [] def List theRelations = dcNode.'dc:relation'.findAll{ !it.text().toUpperCase().startsWith('VIGNETTE :') }.collect{ new LanguagedValue(value:it.text()) } def List theProvenances = [] def List collItems = [oaiIdentifier] if(isDigital){ def List dc_sources = dcNode.'dc:source'*.text() collItems.addAll(dc_sources) } def DCMetadata duMD = new DCMetadata(localID:localID, parentID:parentID, parentPID:'', collItemIds:collItems, language:'fra',descriptionLevel:'item', issueDates: dcNode.'dc:date'*.text().collect{ return xmlEscaper.evaluate(it) }, landingPage:lPage, extents:theExtents, mediums:theMediums,subjects:theSubjects, spatials:theSpatials, temporals:theTemporals, relations:theRelations, provenances:theProvenances, rights:[ new LanguagedValue(value:euroRights)] ) def List theTitles = dcNode.'**'.'dc:title'.collect{ return new LanguagedValue(language:mdLanguage, value:xmlEscaper.evaluate(it.text())) } def List theDescriptions = dcNode.'**'.'dc:description'.collect{ return new LanguagedValue(language:mdLanguage, value:xmlEscaper.evaluate(it.text())) } def List theCreators = dcNode.'**'.'dc:creator'*.text().collect{ return xmlEscaper.evaluate(it) } def List theContributors = dcNode.'**'.'dc:contributor'*.text().collect{ return xmlEscaper.evaluate(it) } def List thePublishers = dcNode.'**'.'dc:publisher'*.text().collect{ return xmlEscaper.evaluate(it) } def euroType = dcNode.'dc:type'.text() if(!euroType) euroType = 'TEXT' def DCObject dcObject = new DCObject(europeanaType:euroType,cpName:dataProvider, titles:theTitles, descriptions:theDescriptions, creators:theCreators, contributors:theContributors, publishers:thePublishers, du:duMD) if(isDigital){ def Derivative der3 = new Derivative() def DCResource drMD = new DCResource(languages:drLanguages) def String der3URL = dcNode.'dc:relation'*.text().find{ it.toUpperCase().startsWith('VIGNETTE : ') }?.substring(11) if(der3URL){ def String urlEscaped = htmlEscaper.evaluate(der3URL) der3 = new Derivative(type:'derivative3', localID:urlEscaped, href:urlEscaped) drMD.setDerivative3(der3) } dcObject.setDr(drMD) dcObject.setEuropeanaRights(euroRights) } return dcObject } }