import java.util.concurrent.BlockingQueue import org.antlr.stringtemplate.StringTemplate import org.antlr.stringtemplate.StringTemplateGroup import org.antlr.stringtemplate.language.DefaultTemplateLexer import eu.dnetlib.enabling.manager.msro.hope.groovy.RunnableGroovy import eu.dnetlib.hope.domains.archive.EadArchDesc import eu.dnetlib.hope.domains.archive.EadDaoLoc import eu.dnetlib.hope.domains.archive.EadDate import eu.dnetlib.hope.domains.archive.EadDid import eu.dnetlib.hope.domains.archive.EadHeader import eu.dnetlib.hope.domains.archive.EadOrigination import eu.dnetlib.hope.domains.archive.EadPhysDesc import eu.dnetlib.hope.domains.archive.Language import eu.dnetlib.hope.domains.archive.UnitTitle import eu.dnetlib.hope.domains.common.* class OSAItems implements RunnableGroovy { def String templatePath = "/var/lib/hope-scripts/templates" def BlockingQueue resultsQueue, inputQueue; def OSAUtils utils = new OSAUtils() def String dataProvider = "Open Society Archives at Central European University" def String datasetName = "OSAItems" def String collectionPID = "http://hdl.handle.net/10891/osa:484d852e-1334-4570-a2be-e41230b9e36a" def String repositoryId /** * Namespaces */ def dri = new groovy.xml.Namespace("http://www.driver-repository.eu/namespace/dri", 'dri') def foxml= new groovy.xml.Namespace("info:fedora/fedora-system:def/foxml#", 'foxml') def mets=new groovy.xml.Namespace("http://www.loc.gov/METS/", 'mets') def osa=new groovy.xml.Namespace("http://greenfield.osaarchivum.org/ns/item", 'osa') def xlink = new groovy.xml.Namespace("http://www.w3.org/1999/xlink", 'xlink') @Override public void run() { def StringTemplateGroup group = new StringTemplateGroup("archiveGroup", templatePath, DefaultTemplateLexer.class); def StringTemplate archiveTemplate = group.getInstanceOf("archiveTemplate"); println "got template at path "+templatePath def parser = new XmlParser(false, true) println "got parser" def input; def EadArchDesc eadArchDesc = generateCollectionRecord() while(!((input = this.inputQueue.take()).equals("END"))){ //print "processing record " def Node xmlRoot = parser.parseText(input) def Node oaiHeaderNode = xmlRoot.'**'.header[0] def OAIHeader oaiheader = new OAIHeader() oaiheader.setDateOfCollection(oaiHeaderNode.'dri:dateOfCollection'.text()) oaiheader.setIdentifier(oaiHeaderNode.identifier.text()) def String objId = oaiHeaderNode.'dri:objIdentifier'.text() oaiheader.setObjIdentifier(objId) oaiheader.setRepositoryId(this.repositoryId) oaiheader.setSetSpec(oaiHeaderNode.setSpec.text()) //print objId def header = generateEadHeader() as EadHeader utils.fillEadArchDesc(xmlRoot, dataProvider, eadArchDesc) archiveTemplate.setAttribute("oaiheader", oaiheader) archiveTemplate.setAttribute("header", header) archiveTemplate.setAttribute("archdescs", [eadArchDesc]) def String res = archiveTemplate.toString() //println res resultsQueue.put(res) //println " result pushed" archiveTemplate.reset() } println "consumed input queue" this.resultsQueue.put("END") } def generateEadHeader = { def EadHeader eadHeader = new EadHeader( mainagencycode:"Hope", eadid:this.collectionPID, titleproper:new UnitTitle(title:'Radio Free Europe Information Items', language:'eng'), creationProvider:this.dataProvider, languages:[ new Language(code:'eng', text:'English')] ) return eadHeader } def generateCollectionRecord = { def String description = ''' The collection contains 70,292 digitized Information Item reports created by Radio Free Europe’s (RFE) News and Information Department in multiple languages from 1951-1957, covering political, economic, social and cultural issues behind the Iron Curtain. The Items concerned topics ranging from official Communist Party and state apparatus organization to micro-level practices of everyday life. Information Items were created at RFE Munich based on one or more sources received from field offices operating between 1951 to 1956: transcripts of interviews and conversations, reports, radio program summaries, incoming letters, unofficial translations of official documents. The bulk of the collection consists of transcripts of interviews conducted with émigrés, people traveling to the West as tourists or on business, and defectors. Sources remained anonymous, and their testimonies were screened for reliability, and processed by national evaluation units for Czechoslovakia, Bulgaria, Hungary, Poland and Romania. The reliability of information, however, cannot always be verified. As finished products, Items were mimeographed and circulated in 140 to 200 copies. While processing was carried out in English, Items also include excerpts from the original language transcription. The collection represents the sole surviving portion of the total Information Items created over the period 1951-1972, and its subject matter includes propaganda and communication, resistance and purges, Party and state apparatus organization, as well as macro-economics, politics and anti-Communist movements, the informal economy (bribery, corruption, black marketeering), political corruption (alibis, spies, deviationists) and mild subversion (jokes, posters). ''' def List spatiaCoverages = [ 'Czechoslovakia', 'Hungary', 'Poland', 'Bulgaria', 'Romania', 'Soviet Union', 'Germany (East)', 'Albania', 'Austria', 'France', 'Italy', 'Germany (West)', 'Greece', 'Sweden', 'China', 'Finland', 'Great Britain', 'Yugoslavia', 'Norway' ] def String useRestrict = ''' Ownership of and financial copyrights to the reports belong to RFE/RL, Inc. Reprinted with the permission of Radio Free Europe/Radio Liberty, 1201 Connecticut Ave., N.W., Washington, DC 20036, USA. Users of RFE/RL content cannot alter the meaning, name or integrity of the content. RFE/RL reserves the right to revoke permission for use of its content at any time. The sale of RFE/RL content is strictly prohibited. ''' def EadArchDesc eadArchDesc = new EadArchDesc(dateCreated:new EadDate(normal:'1951-1957', value:'1951-1957'), pid:this.collectionPID, landingPage:new EadDaoLoc(href:this.collectionPID), //dateAccumulated:new EadDate(normal:'1948-1990', value:'1948-1990'), temporalCoverages:['1951 - 1956'], spatialCoverages:spatiaCoverages, europeanaType:'TEXT', europeanaRights:'Rights reserved – free access', level:'collection', scopecontentPars:[ new LanguagedValue(language:'eng', value:description) ], accessrestrictPars:[ new LanguagedValue(value:'Rights reserved – free access') ], userrestrictPars:[ new LanguagedValue(value:useRestrict, language:'eng') ], did:new EadDid(localId:this.datasetName, unittitles:[ new UnitTitle(title:'Radio Free Europe Information Items', language:'eng') ], repository:this.dataProvider, physicalDescriptions:[ new EadPhysDesc(extents:[ new LanguagedValue(value:'70292') ], genreforms:[ new LanguagedValue(value:'Field Reports', language:'eng') ]) ], originations:[ new EadOrigination(corpnames:[ new LanguagedValue(value:'Radio Free Europe/Radio Liberty (RFE/RL) Research Institute', language:'eng') ]) ], langmaterials:[ new Language(code:'eng', text:'English'), new Language(code:'deu', text:'German'), new Language(code:'Czech', text:'Czech'), new Language(code:'hun', text:'Hungarian'), new Language(code:'pol', text:'Polish'), new Language(code:'fra', text:'French'), new Language(code:'Romanian', text:'Romanian'), new Language(code:'Slovak', text:'Slovak') ]) ) return eadArchDesc } @Override public void setResultsQueue(BlockingQueue queue) { this.resultsQueue = queue } @Override public void setInputQueue(BlockingQueue queue) { this.inputQueue = queue } @Override public void closeResultQueue() { this.resultsQueue.put("END") } @Override public void setRepositoryId(String repoId) { this.repositoryId = repoId } }