import java.util.concurrent.* import org.antlr.stringtemplate.StringTemplate import org.antlr.stringtemplate.StringTemplateGroup import org.antlr.stringtemplate.language.DefaultTemplateLexer import eu.dnetlib.enabling.manager.msro.hope.groovy.RunnableGroovy import eu.dnetlib.hope.domains.common.* import eu.dnetlib.hope.domains.library.DateStm import eu.dnetlib.hope.domains.library.MarcRecord import eu.dnetlib.hope.domains.library.MarcTitle import eu.dnetlib.hope.domains.library.MarcTranslatedTitle class OSAPeriodicalsNDLibrary implements RunnableGroovy { def BlockingQueue resultsQueue, inputQueue; def String templatePath = "/var/lib/hope-scripts/templates" def StringTemplateGroup group = new StringTemplateGroup("libraryGroup", templatePath, DefaultTemplateLexer.class); def StringTemplate libraryTemplate = group.getInstanceOf("libraryTemplate"); def String euroType = 'TEXT' def String dataProvider = "Open Society Archives at Central European University" def String dataProviderLanguage = 'English' def String collectionID = "OSAColdWarPeriodicalCollection" def String collectionPID = "OSAColdWarPeriodicalCollection" def String repositoryId; @Override public void run() { def OAIHeader collectionHeader = new OAIHeader(repositoryId:repositoryId, identifier:collectionPID, objIdentifier:repositoryId+'::'+collectionPID, nonDigitalCollection:true) def MarcRecord collectionRecord = generateCollectionRecord() libraryTemplate.setAttribute("oaiheader", collectionHeader) libraryTemplate.setAttribute("marc", collectionRecord) resultsQueue.put(libraryTemplate.toString()) libraryTemplate.reset() def input; while(!((input = this.inputQueue.take()).equals("END"))){ def result = processRecord(input, collectionID, collectionPID, repositoryId) if(result){ def String outputRecord = groovy.xml.XmlUtil.serialize(result) resultsQueue.put(outputRecord) } } println "consumed input queue" this.resultsQueue.put("END") } def generateCollectionRecord = { def String theAbstract = '''Serial holdings of the Research Institute of the Radio Free Europe/Radio Liberty focus on the history, politics and culture of the former Communist countries. ''' def MarcRecord marcCollection = new MarcRecord(descriptionLevel:'collection', localID:collectionID, pid:collectionPID, landingPage:new LandingPage(localID:'', href:''), europeanaType:'',language:dataProviderLanguage, accessRights:'', useRights:'', europeanaRights:'', leader:'', aggregator:"HOPE - Heritage of the people's Europe", creationDate:new DateStm(date:'1917-2012', stm:'1917-2012'), accumulationDate:new DateStm(date:'', stm:''), isbn_020:'', issn_022:'', providerName:dataProvider, providerLanguage:'eng', bookNo_091:'', extents:[ new LanguagedValue(value:'979') ], physDetails:[], dimensions:[],materialDesignations:[ new LanguagedValue(language:'eng', value:'Newspapers, journals') ], accruals:[], toc:'', theabstracts:[ new LanguagedValue(language:'eng', value:theAbstract) ], genre:'', keywords:[], creatorPersons_100:[], creatorCorps_110:[], contributorMeetings_711:[], personSubjects_600:[], corporateSubjects_610:[], temporalCoverages_648:['1938 -2000'], topicSubjects_650:[], spatialCoverages_651:[ 'Soviet Union', 'Western Europe', 'United States of America', 'Poland', 'Hungary', 'Czechoslovakia', 'Bulgaria', 'Yugoslavia', 'Romania' ], personContributors:[], corporateContributors:[], creatorMeetings_111:[], titles_245:[ new MarcTitle(main:new MarcTranslatedTitle(original:'Cold War Periodical Collection'), language:'eng') ], editionStms_250:[], repositories:[this.dataProvider], languages_041:[ 'bul', 'cze', 'eng', 'alb', 'ara', 'dan', 'ger', 'swe', 'est', 'fre', 'ger', 'ita', 'rus', 'pol', 'spa', 'ukr', 'slv', 'hun', 'srp', 'hrv' ]) return marcCollection; } def processRecord = {String input, String collectionID, String collectionPID, String repositoryId -> def parser = new XmlSlurper( false, false ) def xmlRoot = parser.parseText(input) def oaiHeaderNode = xmlRoot.header def metadataNode = xmlRoot.metadata.record def String id = oaiHeaderNode.identifier.text() //need to remove all 001 controlfield whose value does not start with http:// (it is supposed to contain a PID): metadataNode.controlfield.findAll{it.@tag == '001' && !it.text().startsWith('http:')}.each{node -> node.replaceNode{}} def hasPID = metadataNode.controlfield.find{it.@tag == '001' && it.text().startsWith('http:')} if(!hasPID){ println "Record "+id+" with no PID in controlfield 001: the record will be skipped" return null } else{ def String newLocID = hasPID.text() - 'http://hdl.handle.net/10891/' //need to remove all 035 datafields, then we'll add one back with a localID generated from the PID URL: metadataNode.datafield.findAll{it.@tag == '035'}.each{node -> node.replaceNode{}} metadataNode.appendNode { datafield(tag:'035') { subfield(newLocID, code:'a') } } // need to update the OAI header def String oaiHeader = '
'+id+""+ ""+repositoryId+""+ ""+repositoryId+'::'+id+""+ "true
" def updatedHeader = new XmlSlurper( false, false ).parseText( oaiHeader ) oaiHeaderNode.replaceNode{ node -> mkp.yield(updatedHeader)} //need to add the description level and the parent identifiers def String additionalInfo1 = 'item' def String additionalInfo2 = ''+collectionID+''+ ''+collectionPID+'' def fragmentToAdd1 = new XmlSlurper( false, false ).parseText( additionalInfo1 ) metadataNode.appendNode(fragmentToAdd1) def fragmentToAdd2 = new XmlSlurper( false, false ).parseText( additionalInfo2 ) metadataNode.appendNode(fragmentToAdd2) //need to change the value of 040a -- providerName metadataNode.datafield.findAll{it.@tag == '040'}.subfield.findAll{it.@code = 'a'}.each{node -> node.replaceNode{}} metadataNode.appendNode { datafield(tag:'040') { subfield(dataProvider, code:'a') } } return xmlRoot } } @Override public void setResultsQueue(BlockingQueue queue) { this.resultsQueue = queue } @Override public void setInputQueue(BlockingQueue queue) { this.inputQueue = queue } @Override public void closeResultQueue() { this.resultsQueue.put("END") } @Override public void setRepositoryId(String repoId) { this.repositoryId = repoId } }