import java.util.concurrent.BlockingQueue import org.antlr.stringtemplate.StringTemplate import org.antlr.stringtemplate.StringTemplateGroup import org.antlr.stringtemplate.language.DefaultTemplateLexer import eu.dnetlib.enabling.manager.msro.hope.groovy.RunnableGroovy import eu.dnetlib.hope.domains.archive.EadArchDesc import eu.dnetlib.hope.domains.archive.EadDaoLoc import eu.dnetlib.hope.domains.archive.EadDate import eu.dnetlib.hope.domains.archive.EadDid import eu.dnetlib.hope.domains.archive.EadHeader import eu.dnetlib.hope.domains.archive.EadOrigination import eu.dnetlib.hope.domains.archive.EadPhysDesc import eu.dnetlib.hope.domains.archive.Language import eu.dnetlib.hope.domains.archive.UnitTitle import eu.dnetlib.hope.domains.common.* class OSASituationReports implements RunnableGroovy { def String templatePath = "/var/lib/hope-scripts/templates" def BlockingQueue resultsQueue, inputQueue; def OSAUtils utils = new OSAUtils() def String dataProvider = "Open Society Archives at Central European University" def String datasetName = "OSASituationReport" def String collectionPID = "http://hdl.handle.net/10891/osa:f80cb1e2-fb79-4068-af0f-80c7a39465cb" def String repositoryId; /** * Namespaces */ def dri = new groovy.xml.Namespace("http://www.driver-repository.eu/namespace/dri", 'dri') def foxml= new groovy.xml.Namespace("info:fedora/fedora-system:def/foxml#", 'foxml') def mets=new groovy.xml.Namespace("http://www.loc.gov/METS/", 'mets') def osa=new groovy.xml.Namespace("http://greenfield.osaarchivum.org/ns/item", 'osa') def xlink = new groovy.xml.Namespace("http://www.w3.org/1999/xlink", 'xlink') @Override public void run() { def StringTemplateGroup group = new StringTemplateGroup("archiveGroup", templatePath, DefaultTemplateLexer.class); def StringTemplate archiveTemplate = group.getInstanceOf("archiveTemplate"); println "got template at path "+templatePath def parser = new XmlParser(false, true) println "got parser" def input; def EadArchDesc eadArchDesc = generateCollectionRecord() while(!((input = this.inputQueue.take()).equals("END"))){ //print "processing record " def Node xmlRoot = parser.parseText(input) def Node oaiHeaderNode = xmlRoot.'**'.header[0] def OAIHeader oaiheader = new OAIHeader() oaiheader.setDateOfCollection(oaiHeaderNode.'dri:dateOfCollection'.text()) oaiheader.setIdentifier(oaiHeaderNode.identifier.text()) def String objId = oaiHeaderNode.'dri:objIdentifier'.text() oaiheader.setObjIdentifier(objId) oaiheader.setRepositoryId(this.repositoryId) oaiheader.setSetSpec(oaiHeaderNode.setSpec.text()) def header = generateEadHeader() as EadHeader utils.fillEadArchDesc(xmlRoot, dataProvider, eadArchDesc) archiveTemplate.setAttribute("oaiheader", oaiheader) archiveTemplate.setAttribute("header", header) archiveTemplate.setAttribute("archdescs", [eadArchDesc]) def String res = archiveTemplate.toString() //println res resultsQueue.put(res) //println " result pushed" archiveTemplate.reset() } println "consumed input queue" this.resultsQueue.put("END") } def generateEadHeader = { def EadHeader eadHeader = new EadHeader( mainagencycode:"Hope", eadid:this.collectionPID, titleproper:new UnitTitle(title:'Radio Free Europe/Radio Liberty Situation Reports', language:'eng'), creationProvider:this.dataProvider, languages:[ new Language(code:'eng', text:'English')] ) return eadHeader } def generateCollectionRecord = { def String description = ''' The collection is comprised of 9283 digital copies of Situation Reports (SRs) released from 1959 through 1989. Developed and published by Radio Free Europe (RFE), the Reports are based upon extensive monitoring of Soviet bloc newspapers (as well as other media) and independent journalistic research conducted by RFE staff members. SRs were disseminated in print both internally and externally. Presented as updates on current developments in a news format with limited editorial commentary, SRs address a wide range of topics, including political economy, international relations, and everyday life in Communist Central and Eastern Europe. In certain years, reports contained lists (with brief explanations) of “Agreements Signed,” the “Comings and Goings” of political and religious leaders as well as envoys from both eastern and western countries, and “Miscellaneous” news items. The publication of SRs was initiated to supplement other RFE reportage (Background Reports and Press Surveys (1956)) and broaden the broadcasting activities of RFE national radio desks by providing the latest updates and developments in five Central and Eastern European countries: Bulgaria, Czechoslovakia, Hungary, Poland, and Romania. Although RFE did not broadcast to Albania, SRs for this country were published between 1961-1972. Following the merger of RFE and Radio Liberty (RL) in 1976 and the transfer of Baltic broadcasting from the RL division to the RFE division in 1984, the coverage area of the Reports was expanded to include the Baltic Area (Estonia, Latvia, and Lithuania) and Yugoslavia. Every attempt was made to ensure that SRs met a high journalistic standard and were factually accurate so that they could serve as a source of reliable information about the Soviet bloc. (Editorial markings can occasionally be seen on reports.) Beginning in 1959 and continuing through 1961, active units published reports on a more or less daily basis. During this time, many reports were labeled “Negative” and may contain a sentence or two of information, a statement saying that there was nothing of note to report, or no text at all. These reports have been cataloged, but have not been digitized. The frequency with which reports were issued changed during their run: from semiweekly in 1962, to weekly around 1972, fortnightly in 1979, and approximately monthly in the early 1980s. SRs expanded in length and substantive depth over this time. ''' def List spatiaCoverages = [ 'Czechoslovakia', 'Romania', 'Poland', 'Hungary', 'Bulgaria', 'Albania', 'Yugoslavia', 'Soviet Union' ] def String useRestrict = ''' Ownership of and financial copyrights to the reports belong to RFE/RL, Inc. Reprinted with the permission of Radio Free Europe/Radio Liberty, 1201 Connecticut Ave., N.W., Washington, DC 20036, USA. Users of RFE/RL content cannot alter the meaning, name or integrity of the content. RFE/RL reserves the right to revoke permission for use of its content at any time. The sale of RFE/RL content is strictly prohibited. ''' def EadArchDesc eadArchDesc = new EadArchDesc(dateCreated:new EadDate(normal:'1959-1989', value:'1959-1989'),pid:this.collectionPID, landingPage:new EadDaoLoc(href:this.collectionPID), //dateAccumulated:new EadDate(normal:'1948-1990', value:'1948-1990'), temporalCoverages:['1959-1989'], spatialCoverages:spatiaCoverages, europeanaType:'TEXT', europeanaRights:'Rights reserved – free access', level:'collection', scopecontentPars:[ new LanguagedValue(language:'eng', value:description) ], accessrestrictPars:[ new LanguagedValue(value:'Rights reserved – free access') ], userrestrictPars:[ new LanguagedValue(value:useRestrict, language:'eng') ], did:new EadDid(localId:this.collectionPID, unittitles:[ new UnitTitle(title:'Radio Free Europe/Radio Liberty Situation Reports', language:'eng') ], repository:this.dataProvider, physicalDescriptions:[ new EadPhysDesc(extents:[ new LanguagedValue(value:'9283') ], genreforms:[ new LanguagedValue(value:'Reports; Bulletins', language:'eng') ]) ], originations:[ new EadOrigination(corpnames:[ new LanguagedValue(value:'Radio Free Europe/Radio Liberty (RFE/RL) Research Institute', language:'eng') ]) ], langmaterials:[ new Language(code:'eng', text:'English') ]) ) return eadArchDesc } @Override public void setResultsQueue(BlockingQueue queue) { this.resultsQueue = queue } @Override public void setInputQueue(BlockingQueue queue) { this.inputQueue = queue } @Override public void closeResultQueue() { this.resultsQueue.put("END") } @Override public void setRepositoryId(String repoId) { this.repositoryId = repoId } }