import java.util.concurrent.BlockingQueue import org.antlr.stringtemplate.StringTemplate import org.antlr.stringtemplate.StringTemplateGroup import org.antlr.stringtemplate.language.DefaultTemplateLexer import eu.dnetlib.enabling.manager.msro.hope.groovy.RunnableGroovy import eu.dnetlib.hope.domains.archive.EadArchDesc import eu.dnetlib.hope.domains.archive.EadDate import eu.dnetlib.hope.domains.archive.EadDid import eu.dnetlib.hope.domains.archive.EadHeader import eu.dnetlib.hope.domains.archive.EadOrigination import eu.dnetlib.hope.domains.archive.EadPhysDesc import eu.dnetlib.hope.domains.archive.Language import eu.dnetlib.hope.domains.archive.UnitTitle import eu.dnetlib.hope.domains.common.* class OSAMonitoring implements RunnableGroovy { def String templatePath = "/var/lib/hope-scripts/templates" def BlockingQueue resultsQueue, inputQueue; def OSAUtils utils = new OSAUtils() def String dataProvider = "Open Society Archives at Central European University" def String datasetName = "OSAMonitoring" def String collectionPID = "http://hdl.handle.net/10891/osa:8971ff25-e237-4b40-8713-c375c7c37e71" def String repositoryId; /** * Namespaces */ def dri = new groovy.xml.Namespace("http://www.driver-repository.eu/namespace/dri", 'dri') def foxml= new groovy.xml.Namespace("info:fedora/fedora-system:def/foxml#", 'foxml') def mets=new groovy.xml.Namespace("http://www.loc.gov/METS/", 'mets') def osa=new groovy.xml.Namespace("http://greenfield.osaarchivum.org/ns/item", 'osa') def xlink = new groovy.xml.Namespace("http://www.w3.org/1999/xlink", 'xlink') @Override public void run() { def StringTemplateGroup group = new StringTemplateGroup("archiveGroup", templatePath, DefaultTemplateLexer.class); def StringTemplate archiveTemplate = group.getInstanceOf("archiveTemplate"); println "got template at path "+templatePath def parser = new XmlParser(false, true) println "got parser" def input; def EadArchDesc eadArchDesc = generateCollectionRecord() while(!((input = this.inputQueue.take()).equals("END"))){ //print "processing record " def Node xmlRoot = parser.parseText(input) def Node oaiHeaderNode = xmlRoot.'**'.header[0] def OAIHeader oaiheader = new OAIHeader() oaiheader.setDateOfCollection(oaiHeaderNode.'dri:dateOfCollection'.text()) oaiheader.setIdentifier(oaiHeaderNode.identifier.text()) def String objId = oaiHeaderNode.'dri:objIdentifier'.text() oaiheader.setObjIdentifier(objId) oaiheader.setRepositoryId(this.repositoryId) oaiheader.setSetSpec(oaiHeaderNode.setSpec.text()) def header = generateEadHeader() as EadHeader utils.fillEadArchDesc(xmlRoot, dataProvider, eadArchDesc) archiveTemplate.setAttribute("oaiheader", oaiheader) archiveTemplate.setAttribute("header", header) archiveTemplate.setAttribute("archdescs", [eadArchDesc]) def String res = archiveTemplate.toString() //println res resultsQueue.put(res) //println " result pushed" archiveTemplate.reset() } println "consumed input queue" this.resultsQueue.put("END") } def generateEadHeader = { def EadHeader eadHeader = new EadHeader( mainagencycode:"Hope", eadid:this.collectionPID, titleproper:new UnitTitle(title:'Radio Free Europe/Radio Liberty Hungarian Radio Monitoring', language:'eng'), creationProvider:this.dataProvider, languages:[ new Language(code:'', text:'Hungarian'), new Language(code:'eng', text:'English')] ) return eadHeader } def generateCollectionRecord = { def String description = ''' The collection contains digital copies of 1,072 verbatim transcripts of daily news programs broadcast on two Hungarian state radio stations, Kossuth and Petőfi, from January 1, 1988 to December 31, 1990. The transcripts were prepared by Radio Free Europe/Radio Liberty’s (RFE/RL) Hungarian Monitoring Unit, and include brief summaries and supplementary information on the broadcasts themselves. Focusing on prominent daily news and magazine programs about the most important issues of the day in international and domestic politics, economics, and culture, the transcripts documented significant events in Hungary in order to ensure editors of the Hungarian Broadcasting Desk, as well as other RFE/RL editors and departments were kept as fully up-to-date as possible. The Hungarian Monitoring Unit started operating in October 1951 in Munich, and monitored Hungarian state radio stations until the closure of the Hungarian Broadcasting Desk on October 31, 1993. From the outset, the Unit intended to monitor and record both Kossuth and Petőfi Radio. However, because American armed forces radio broadcast at a wavelength very close to that of Petőfi, the Unit had to abandon its aim of around-the-clock monitoring, and instead recorded only early morning and late evening programs. Recordings of program blocs were processed immediately after broadcast, and thus an average of 20 hours of radio broadcasts were transcribed and handed over to the editors the next day. Certain programs, such as programs of particular political or economic relevance or New Year’s Eve broadcasts, were usually included or repeated in the next day’s transcripts or in special editions. The transcripts were prepared and copied seven days a week; in the mid-1970s, one daily issue was about 30-50 pages long, which grew to nearly 100 pages by 1990. ''' def List spatiaCoverages = [ 'Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina', 'Australia', 'Austria', 'Belgium', 'Bulgaria', 'Cambodia', 'Canada', 'Chile', 'China', 'Colombia', 'Cuba', 'Cyprus', 'Czechoslovakia', 'Denmark', 'Egypt', 'Ethiopia', 'Finland', 'France', 'Germany (East)', 'Germany (West)', 'Great Britain', 'Greece', 'Hungary', 'India', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Japan', 'Jordan', 'Korea (North)', 'Korea (South)', 'Kuwait', 'Lebanon', 'Libya', 'Luxembourg', 'Malta', 'Mongolia', 'Mozambique', 'Namibia', 'Netherlands', 'Nicaragua', 'Norway', 'Pakistan', 'Panama', 'Paraguay', 'Philippines', 'Poland', 'Portugal', 'Romania', 'Saudi Arabia', 'South Africa', 'Soviet Union', 'Spain', 'Sweden', 'Switzerland', 'Syria', 'Thailand', 'Tunisia', 'Turkey', 'United States', 'Uruguay', 'Venezuela', 'Vietnam', 'Yugoslavia' ] def String useRestrict = ''' Ownership of and financial copyrights to the reports belong to RFE/RL, Inc. Reprinted with the permission of Radio Free Europe/Radio Liberty, 1201 Connecticut Ave., N.W., Washington, DC 20036, USA. Users of RFE/RL content cannot alter the meaning, name or integrity of the content. RFE/RL reserves the right to revoke permission for use of its content at any time. The sale of RFE/RL content is strictly prohibited. ''' def EadArchDesc eadArchDesc = new EadArchDesc(dateCreated:new EadDate(normal:'1988-1990', value:'1988-1990'), pid:this.collectionPID, //dateAccumulated:new EadDate(normal:'1948-1990', value:'1948-1990'), temporalCoverages:['1988-1990'], spatialCoverages:spatiaCoverages, europeanaType:'TEXT', europeanaRights:'Rights reserved – free access', level:'collection', scopecontentPars:[ new LanguagedValue(language:'eng', value:description) ], accessrestrictPars:[ new LanguagedValue(value:'Rights reserved – free access') ], userrestrictPars:[ new LanguagedValue(value:useRestrict, language:'eng') ], did:new EadDid(localId:this.collectionPID, unittitles:[ new UnitTitle(title:'Radio Free Europe/Radio Liberty Hungarian Radio Monitoring', language:'eng') ], repository:this.dataProvider, physicalDescriptions:[ new EadPhysDesc(extents:[ new LanguagedValue(value:'1072') ], genreforms:[ new LanguagedValue(value:'Broadcasts; Transcripts', language:'eng') ]) ], originations:[ new EadOrigination(corpnames:[ new LanguagedValue(value:'Radio Free Europe/Radio Liberty (RFE/RL) Research Institute', language:'eng') ]) ], langmaterials:[ new Language(code:'hun', text:'Hungarian') ]) ) return eadArchDesc } @Override public void setResultsQueue(BlockingQueue queue) { this.resultsQueue = queue } @Override public void setInputQueue(BlockingQueue queue) { this.inputQueue = queue } @Override public void closeResultQueue() { this.resultsQueue.put("END") } @Override public void setRepositoryId(String repoId) { this.repositoryId = repoId } }