import java.util.concurrent.BlockingQueue import org.antlr.stringtemplate.StringTemplate import org.antlr.stringtemplate.StringTemplateGroup import org.antlr.stringtemplate.language.DefaultTemplateLexer import org.apache.commons.lang.StringUtils import eu.dnetlib.enabling.manager.msro.hope.groovy.RunnableGroovy import eu.dnetlib.hope.domains.common.* import eu.dnetlib.hope.domains.generic.* import eu.dnetlib.miscutils.functional.string.EscapeHtml class BDICGenericDefault implements RunnableGroovy { def String templatePath = "/var/lib/hope-scripts/templates" def BlockingQueue resultsQueue, inputQueue //def String thumbnailHost = "http://localhost"; /** * Driver namespace for elements in the OAI header. */ def dri = new groovy.xml.Namespace("http://www.driver-repository.eu/namespace/dri", 'dri') def oai_dc = new groovy.xml.Namespace("http://www.openarchives.org/OAI/2.0/oai_dc/", 'oai_dc') def dc = new groovy.xml.Namespace("http://purl.org/dc/elements/1.1/", 'dc') def xml = new groovy.xml.Namespace("http://www.w3.org/XML/1998/namespace", 'xml') def setsWithCreationDate = [ 'default.NOTICES.OAI_VALOIS', 'default.NOTICES.PHOTOGRAPHIES_ELIE_KAGAN', 'default.NOTICES.GUERRE_ESPAGNE', 'default.NOTICES.1914_1918_ARCHIVES' ] @Override public void run() { def StringTemplateGroup group = new StringTemplateGroup("genericGroup", templatePath, DefaultTemplateLexer.class) def StringTemplate genericTemplate = group.getInstanceOf("genericTemplate") println "got template at path "+templatePath def parser = new XmlParser(false, true) println "got parser" def input def String dataProvider = "Bibliothèque de Documentation Internationale Contemporaine (BDIC)" while(!((input = this.inputQueue.take()).equals("END"))){ def Node xmlRoot = parser.parseText(input) def Node oaiHeaderNode = xmlRoot.'**'.header[0] def String repoId = oaiHeaderNode.'dri:repositoryId'.text() def String oaiIdentifier = oaiHeaderNode.identifier.text() def String setName = oaiHeaderNode.setSpec.text() def OAIHeader oaiheader = new OAIHeader() oaiheader.setDateOfCollection(oaiHeaderNode.'dri:dateOfCollection'.text()) oaiheader.setIdentifier(oaiIdentifier) oaiheader.setObjIdentifier(oaiHeaderNode.'dri:objIdentifier'.text()) oaiheader.setRepositoryId(repoId) oaiheader.setSetSpec(setName) def Node dcNode = xmlRoot.'**'.'oai_dc:dc'[0] //TODO: put in upper case //translate( normalize-space( oai:metadata/oai_dc:dc/dc:language), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') def List drLanguages = dcNode.'dc:language'*.text() //if no language --> skip records if(drLanguages != null && !drLanguages.isEmpty()){ def String dcIdentifier = dcNode.'dc:identifier'.text() def LandingPage lPage = new LandingPage(localID:oaiIdentifier, href:dcIdentifier) def List theExtents = [] def List theMediums = [] def List theSubjects = dcNode.'**'.'dc:subject'.collect{new LanguagedValue(language:it.attribute(xml.lang), value:it.text())} def List theSpatials = [] def List theTemporals = [] def List coveragesNode = dcNode.'**'.'dc:coverage' for(Node coverage in coveragesNode){ def String lang = coverage.attribute(xml.lang) def String val = coverage.text() if(StringUtils.isNumeric(val.substring(0,3))) { theTemporals.add(new LanguagedValue(language:lang, value:val)) } else { theSpatials.add(new LanguagedValue(language:lang, value:val)) } } def List theRelations = dcNode.'dc:relation'.findAll{it.text().toUpperCase().startsWith('FONDS :')}.collect{ new LanguagedValue(language:it.attribute(xml.lang), value:it.text())} def List theProvenances = [] def List theRights = dcNode.'**'.'dc:rights'.collect{new LanguagedValue(language:it.attribute(xml.lang), value:it.text())} def List createDates = [] def List issueDates = [] if(setsWithCreationDate.contains(setName)) createDates = dcNode.'dc:date'*.text() else //default.NOTICES.jourtranch & others issueDates = dcNode.'dc:date'*.text() def List dc_sources = dcNode.'dc:source'*.text() def DCMetadata duMD = new DCMetadata(localID:oaiIdentifier, collItemIds:[oaiIdentifier]+ dc_sources, language:'fra',descriptionLevel:'item', creationDates:createDates,issueDates:issueDates, landingPage:lPage, extents:theExtents, mediums:theMediums,subjects:theSubjects, spatials:theSpatials, temporals:theTemporals, relations:theRelations, provenances:theProvenances, rights:theRights ) def Derivative der3 = new Derivative() def DCResource drMD =new DCResource(languages:drLanguages) def String der3URL = dcNode.'dc:relation'*.text().find{it.toUpperCase().startsWith('VIGNETTE : ')}?.substring(11) if(der3URL){ def EscapeHtml escaper = new EscapeHtml() def String[] der3URLSplit = der3URL.split('\\?') def String der3Id = escaper.evaluate(der3URLSplit[1]) def String urlEscaped = escaper.evaluate(der3URL) der3 = new Derivative(type:'derivative3', localID:der3Id, href:urlEscaped) //only some thumbnails for BDIC drMD.setDerivative3(der3) } def List theTitles = dcNode.'**'.'dc:title'.collect{new LanguagedValue(language:it.attribute(xml.lang), value:it.text())} def List theDescriptions = dcNode.'**'.'dc:description'.collect{new LanguagedValue(language:it.attribute(xml.lang), value:it.text())} def List theCreators = dcNode.'**'.'dc:creator'*.text() def List theContributors = dcNode.'**'.'dc:contributor'*.text() def List thePublishers = dcNode.'**'.'dc:publisher'*.text() def euroType = dcNode.'dc:type'.find{it.attribute(xml.lang).equals("eng")}.text().toUpperCase() def String euroRights = dcNode.'dc:rights'.find{it.attribute(xml.lang).equals("eng")}.text().toUpperCase() def DCObject dcObject = new DCObject(europeanaType:euroType, europeanaRights:euroRights,cpName:dataProvider, titles:theTitles, descriptions:theDescriptions, creators:theCreators, contributors:theContributors, publishers:thePublishers, du:duMD, dr:drMD ) genericTemplate.setAttribute("oaiheader", oaiheader) genericTemplate.setAttribute("dc", dcObject) def String res = genericTemplate.toString() //println res resultsQueue.put(res) genericTemplate.reset() } else{ println "No languages found. Skipping record "+oaiIdentifier genericTemplate.reset() } } println "consumed input queue" this.resultsQueue.put("END") } @Override public void setResultsQueue(BlockingQueue queue) { this.resultsQueue = queue } @Override public void setInputQueue(BlockingQueue queue) { this.inputQueue = queue } @Override public void closeResultQueue() { this.resultsQueue.put("END") } @Override public void setRepositoryId(String repoId) {} }