import java.util.concurrent.BlockingQueue import org.antlr.stringtemplate.StringTemplate import org.antlr.stringtemplate.StringTemplateGroup import org.antlr.stringtemplate.language.DefaultTemplateLexer import org.apache.commons.lang.StringEscapeUtils import eu.dnetlib.enabling.manager.msro.hope.groovy.RunnableGroovy import eu.dnetlib.hope.domains.archive.* import eu.dnetlib.hope.domains.common.* import eu.dnetlib.miscutils.functional.string.EscapeHtml class CGILExecBodiesEAD implements RunnableGroovy { def String templatePath = "/var/lib/hope-scripts/templates" def BlockingQueue resultsQueue, inputQueue; def String thumbnailHost = "http://localhost"; /** * Driver namespace for elements in the OAI header. */ def dri = new groovy.xml.Namespace("http://www.driver-repository.eu/namespace/dri", 'dri') @Override public void run() { def StringTemplateGroup group = new StringTemplateGroup("archiveGroup", templatePath, DefaultTemplateLexer.class); def StringTemplate archiveTemplate = group.getInstanceOf("archiveTemplate"); println "got template at path "+templatePath def parser = new XmlParser(false, true) println "got parser" def input; while(!((input = this.inputQueue.take()).equals("END"))){ println "processing record" def Node xmlRoot = parser.parseText(input) def Node oaiHeaderNode = xmlRoot.'**'.header[0] def String repoId = oaiHeaderNode.'dri:repositoryId'.text() def OAIHeader oaiheader = new OAIHeader() oaiheader.setDateOfCollection(oaiHeaderNode.'dri:dateOfCollection'.text()) oaiheader.setIdentifier(oaiHeaderNode.identifier.text()) oaiheader.setObjIdentifier(oaiHeaderNode.'dri:objIdentifier'.text()) oaiheader.setRepositoryId(repoId) oaiheader.setSetSpec(oaiHeaderNode.setSpec.text()) def String dataProvider = "Confederazione Generale Italiana del Lavoro" def String datasetName = "CGIL Executive bodies minutes" def header = generateEadHeader(dataProvider, datasetName) as EadHeader def Node metadataNode = xmlRoot.'**'.metadata[0] def archDesc = generateEadArchDesc(metadataNode, repoId) as EadArchDesc archiveTemplate.setAttribute("oaiheader", oaiheader) archiveTemplate.setAttribute("header", header) archiveTemplate.setAttribute("archdescs", [archDesc]) def String res = archiveTemplate.toString() res = res.replaceAll("&", "&") resultsQueue.put(res) println "result pushed" archiveTemplate.reset() } println "consumed input queue" this.resultsQueue.put("END") } def generateEadHeader = { provider,datasetName -> def EadHeader eadHeader = new EadHeader( mainagencycode:"HOPE - Heritage of the People's Europe", eadid:datasetName.replace(' ', '_'), titleproper:new UnitTitle(title:datasetName), creationProvider:provider, languages:[ new Language(code:'ita', text:'Italian') ]) return eadHeader } /* xmlRoot points to the start of ead, that is it is on the 'oai:metadata' tag, just before any ead tag*/ def generateEadArchDesc= {xmlRoot, repoId -> def collectionID = '0000000518' def Node collNode = xmlRoot.'**'.c.find{it.did.unitid.text().equals(collectionID)} def EadArchDesc eadArchDesc = new EadArchDesc() def EadDid eadArchDid = generateArchDid(collNode) eadArchDesc.setDid(eadArchDid) eadArchDesc = generateCFields(eadArchDesc, collNode); def List rootLevels = collNode.c def List midlevels = rootLevels.collect { def EadMidLevel midlevel = new EadMidLevel() def EadDid eadDid = generateDid(it) midlevel.setDid(eadDid); midlevel = generateCFields(midlevel, it) List nestedLevels = generateNestedLevels(it) midlevel.setNestedLevels(nestedLevels) def items = generateItemList(it) midlevel.setItems(items) return midlevel } eadArchDesc.setNestedLevels(midlevels) return eadArchDesc } def generateNestedLevels = {containerNode -> def List nestedLevels = containerNode.c.findAll{!it.'@level' .equals('item')} //println "Found "+nestedLevels.size()+" nested c levels - no item - for c element with unitid: "+containerNode.did.unitid.text() def List eadNestedLevels = nestedLevels.collect{ def EadMidLevel nestedlevel = new EadMidLevel() def EadDid eadDid = generateDid(it) nestedlevel.setDid(eadDid); nestedlevel = generateCFields(nestedlevel, it) def recursiveNested = generateNestedLevels(it) nestedlevel.setNestedLevels(recursiveNested) def items = generateItemList(it) nestedlevel.setItems(items) return nestedlevel } return eadNestedLevels; } def generateItemList = {Node container -> def List itemNodes = container.c.findAll{it.'@level' .equals('item')} //println "Found "+itemNodes.size()+" item levels under c element with unitid: "+container.did.unitid.text() def List items = itemNodes.collect{ EadDid did = generateDid(it) //TODO: check this: transcription shfould be a list of transcription strings def EadItem eadItem = new EadItem(type:"TEXT", rights:"Rights Reserved - Free Access", languages:['ita']) eadItem = generateCFields(eadItem, it) as EadItem eadItem.setDid(did) def int numberOfEntries = 3 eadItem.setNumberOfEntries(numberOfEntries) //daolocs: def Node daoGrpNode = it.daogrp[0] if(daoGrpNode != null){ def dao = daoGrpNode.daoloc[0] as Node if(dao != null){ //concat("http://cgil.maas.ccr.it/cgil/AJAXAttachment.ashx?resource=", "//daogrp/daoloc[1][@href]/=text()") def String daoGrpId = daoGrpNode.'@id' //dao.@href.text()[1..-1] is to remove the first char of the string (\). def String der2URL = "http://cgil.maas.ccr.it/cgil/AJAXAttachment.ashx?resource="+dao.@href[1..-1] //def String digitalObjectID = dao.'@href' def EadDaoLoc der2 = new EadDaoLoc(href:der2URL, label:"derivative2") //def EadDaoLoc der3 = new EadDaoLoc(href:thumbnailHost+"/"+daoGrpId+".jpg", label:"derivative3") def EadDaoLoc der3 = new EadDaoLoc(href:"http://node1.d.hope.research-infrastructures.eu/unavailable.html", label:"derivative3") eadItem.setDaolocs([der2, der3]) eadItem.setDaogrpId(daoGrpId) } } return eadItem } return items } def generateCFields = {eadC, cElement -> def String cId = cElement.did.unitid.find{it.'@type' .equals("nrecord")}.text() //eadC.setId(cId) eadC.setLevel(cElement.@level) def bioghistPars = cElement.bioghist.p*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setBioghistPars(bioghistPars) def custodHistPars = cElement.custodhist.p*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setCustodHistPars(custodHistPars) def acqInfoPars = cElement.acqinfo.p*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setAcquinfoPars(acqInfoPars) def scopeContentNodes = cElement.scopecontent def List scopeContents = [] scopeContentNodes .each{ def String head = it?.head.text() if(head){ it?.p .each{ if(it?.text()) scopeContents.add(head+": "+it.text()) } } else scopeContents.addAll(it?.p*.text()) } //def scopeContentPars = cElement.scopecontent.p*.text() eadC.setScopecontentPars(scopeContents.collect{new LanguagedValue(value:new EscapeHtml().evaluate(it), language:'ita')}) def arrPars = cElement.arrangement.p*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setArrangmentPars(arrPars) def accessPars = cElement.accessrestrict.p*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setAccessrestrictPars(accessPars) def phystechPars = cElement.phystech.p*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setPhystechPars(phystechPars) //otherfindaids def otherfindaids = cElement.otherfindaid.p*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setOtherfindaids(otherfindaids) def relMaterials = cElement.relatedmaterial*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setRelatedmaterials(relMaterials) def sepMaterials = cElement.separatedmaterial*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setSeparatedmaterials(sepMaterials) def biblios = cElement.bibliography*.text().collect{new LanguagedValue(value:it, language:'ita')} eadC.setBibliographies(biblios) def persnames = cElement.'**'.controlaccess.'**'.persname*.text() + cElement.'**'.scopecontent.'**'.persname*.text() as Set def corpnames = cElement.'**'.controlaccess.'**'.corpname*.text() + cElement.'**'.scopecontent.'**'.corpname*.text() as Set def EadControlAccess caAgents = new EadControlAccess(corpNames:corpnames.asList().collect{new LanguagedValue(value:it, language:'ita')}, persNames:persnames.asList().collect{new LanguagedValue(value:it, language:'ita')}) def accesses = cElement.controlaccess def List accessList = accesses.collect{ def EadControlAccess controlAccess = new EadControlAccess(controlaccess:it.text()) controlAccess.setSubjects(it.subject*.text().collect{new LanguagedValue(value:it, language:'ita')}) controlAccess.setGeognames(it.geogname*.text().collect{new LanguagedValue(value:it, language:'ita')}) return controlAccess } accessList.add(caAgents) eadC.setControlAccesses(accessList) //landing page: //concat("cgil.maas.ccr.it/cgil/HAPConsole.aspx?dns=hap:localhost/cgilpubblico/", "//c/did/unitid[@type='nrecord']/text()") def EadDaoLoc landingPage = new EadDaoLoc() landingPage.setHref(new EscapeHtml().evaluate("http://cgil.maas.ccr.it/cgil/HAPConsole.aspx?dns=hap:localhost/cgilpubblico/"+cId)) landingPage.setLabel("landing_page") //landingPage.setId(eadId+"_"+it.@id) eadC.setLandingPage(landingPage) eadC.setEuropeanaType('TEXT'); eadC.setEuropeanaRights("Rights Reserved - Free Access") return eadC; } def generateDid = {cElement -> def Node unitidNode = cElement.did.unitid.find{it.'@type' .equals("nrecord")} def cId = unitidNode.text() //def callNumber = cElement.did.unitid.find{it.'@type' .equals("call number")}.text().normalize() def List cUnitTitles = cElement.did.unittitle.collect{new UnitTitle(title:it?.text(), date:it.unitdate?.text(), normalDate:it.unitdate?.find{it.'@normal'}?.text())} ////c/did/note[@type="genealogia"] def List genealogiaTitles = [] def notes = cElement.did?.note?.find{it?.'@type' == 'genealogia'}?.'**'?.unittitle*.text() if(notes && notes != []){ def String notesJoined = notes.join('. ') genealogiaTitles.add(new UnitTitle(title:notesJoined)) } else genealogiaTitles.addAll(cElement.parent()?.unittitle*.text().collect{new UnitTitle(title:it)}) def List dates = cElement.did.unitdate.collect{ def String normalised = it.'@normal' def String date = it?.text() if(!normalised) normalised = date if(!date) date= normalised return new EadDate(value:date, normal:normalised) } def physDesc = new EadPhysDesc(physdesc:cElement.did.physdesc*.physfacet*.text().join('. '), extents: cElement.did.physdesc*.extent*.text().collect{ new LanguagedValue(value:it, language:'ita')} ) def langMaterials = cElement.did.langmaterial.language def List languages if(langMaterials){ languages = langMaterials.collect{ if(it.text()?.normalize()) def Language l= new Language(code:"", text:it.text().normalize()) else def Language ldefault = new Language(code:"ita", text:'Italian') } } else languages = [ new Language(code:"ita", text:'Italian') ] def notesList = cElement.note.findAll{it.'@label' .equals("note") || it.'@label' .equals("osservazioni")}.p*.text().collect{new LanguagedValue(value:StringEscapeUtils.escapeXml(it), language:'ita')} def oddsList = cElement.odd.p*.text().collect{new LanguagedValue(value:it, language:'ita')} def eadDid = new EadDid(localId:cId, langmaterials:languages, repository:unitidNode.'@repositorycode', unitdates:dates, originations:[ new EadOrigination(origination:cElement.did.origination?.find{it.'@label'.equals('literal')}?.text()) ], physicalDescriptions:[physDesc], unittitles:cUnitTitles, parentTitles:genealogiaTitles, odds:[], notes:notesList+oddsList) return eadDid; } /* * If thee mapping is updated to have separate odds and notes also for c elements, * this closure could be dropped and the generic one above updated and used instead. */ def generateArchDid = {archElement -> def notesList = archElement.note.findAll{it.'@label' .equals("note") || it.'@label' .equals("osservazioni")}.p*.text().collect{new LanguagedValue(value:it, language:'ita')} def oddsList = archElement.odd.p*.text().collect{new LanguagedValue(value:it, language:'ita')} def eadDid = generateDid(archElement) eadDid.setOdds(oddsList) eadDid.setNotes(notesList) return eadDid; } @Override public void setResultsQueue(BlockingQueue queue) { this.resultsQueue = queue } @Override public void setInputQueue(BlockingQueue queue) { this.inputQueue = queue } @Override public void closeResultQueue() { this.resultsQueue.put("END") } @Override public void setRepositoryId(String repoId) {} }