import org.antlr.stringtemplate.StringTemplate import eu.dnetlib.hope.domains.common.* import eu.dnetlib.hope.domains.library.* class OSALibraryUtils { /** * Namespaces */ def dri = new groovy.xml.Namespace("http://www.driver-repository.eu/namespace/dri", 'dri') def foxml= new groovy.xml.Namespace("info:fedora/fedora-system:def/foxml#", 'foxml') def marc=new groovy.xml.Namespace("http://www.loc.gov/MARC21/slim", 'marc') def osa=new groovy.xml.Namespace("http://greenfield.osaarchivum.org/ns/item", 'osa') def rdf = new groovy.xml.Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#", 'rdf') def xlink = new groovy.xml.Namespace("http://www.w3.org/1999/xlink", 'xlink') def escapeXml = { String s -> if(s) return s.replaceAll("&", "&").replaceAll(">", ">").replaceAll("<", "<") else return '' } def String transformRecord(Node theRecordRoot, StringTemplate template, String repoId, String dateOfColl, String dataProviderName, String dataProviderLanguage, String parentID){ template.reset() def englishDatastream = theRecordRoot.'**'.'foxml:datastream'?.find{ it.'@ID'.equals('ITEM-LIB-EN') } def hungarianDatastream = theRecordRoot.'**'.'foxml:datastream'?.find{ it.'@ID'.equals('ITEM-LIB-HU') } //landing page ////concat('http://hdl.handle.net/10891/', //foxml:digitalObject/@PID) def landingPagePID = 'http://hdl.handle.net/10891/'+theRecordRoot.'@PID' def LandingPage landingpage = new LandingPage(localID:landingPagePID, href:landingPagePID) //identifiers def String objPID = landingPagePID def String objLID = landingPagePID def String repoObjId = (repoId+"::"+objPID) //oai header def OAIHeader oaiheader = new OAIHeader(dateOfCollection:dateOfColl, repositoryId:repoId, identifier:objPID, objIdentifier:repoObjId) def String issn = '' def String isbn = '' def Node bookNoNode = englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('099')} def String bookNo = bookNoNode?.'*:subfield'.find{it.'@code'.equals('a')}?.text()+ "" + bookNoNode?.'*:subfield'.find{it.'@code'.equals('f')}?.text() def Node title245Node = englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('245')} def String mainOriginalTitle = title245Node?.'*:subfield'?.find{it.'@code'.equals('a')}?.text() if(!mainOriginalTitle){ println "Skipping record with no 245a - title - "+objPID return null } def Node leaderNode = englishDatastream.'**'.'*:leader'[0] def MarcTitle marcTitle = new MarcTitle(main:new MarcTranslatedTitle( original:mainOriginalTitle, translation:englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('242')}?.'*:subfield'?.find{it.'@code'.equals('a')}?.text(), ), subtitle:new MarcTranslatedTitle( original:escapeXml(title245Node?.'*:subfield'?.find{it.'@code'.equals('b')}?.text()) ), proper:'', parallel:'', materialDesignation:leaderNode?.text()?.charAt(6), responsibility:title245Node?.'*:subfield'?.find{it.'@code' == 'c'}?.text(), language:'eng', translationLanguage:'eng') def publicationNode = englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('260')} def MarcPublication marcPublication= new MarcPublication( placeOfPublication:publicationNode?.'*:subfield'?.find{it.'@code'.equals('a')}?.text(), publisher: publicationNode?.'*:subfield'?.find{it.'@code'.equals('b')}?.text(), dateOfPublication:publicationNode?.'*:subfield'?.find{it.'@code'.equals('c')}?.text(), frequency:'') def String descrLevel = 'item' def String edmType = englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('337')}?.'*:subfield'?.find{it.'@code'.equals('a')}?.text() if(!edmType) edmType = "NO EDM TYPE PROVIDED" def String edmRights = theRecordRoot.'**'.'foxml:datastream'?.find{ it.'@ID'.equals('RIGHTS') }?.'**'.'europeana:rights'[0]?.text() if(!edmRights) edmRights = "NO EDM RIGHTS PROVIDED" def Node phys300EN = englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('300')} def Node phys300HU = hungarianDatastream?.'**'?.'*:datafield'?.find{it?.'@tag'.equals('300')} def subject600persons = englishDatastream.'**'.'*:datafield'.findAll{it.'@tag'.equals('600')} def contribNodes = englishDatastream.'**'.'*:datafield'.findAll{it.'@tag'.equals('700')} def aSubfields = contribNodes.collect{return it?.'*:subfield'.findAll{it.'@code' == 'a'}} def contributors = aSubfields*.collect{return it?.text()} def englishKeywordsNodes = englishDatastream.'**'.'*:datafield'.findAll{it.'@tag'.equals('653')} def engSubfields = englishKeywordsNodes.collect{return it?.'*:subfield'.find{it.'@code' == 'a'}} def englishKeys = engSubfields.collect{return new LanguagedValue(value:it?.text(), language:'eng')} def hungarianKeywordsNodes = hungarianDatastream?.'**'?.'*:datafield'.findAll{it.'@tag'.equals('653')} def hunSubfields = hungarianKeywordsNodes.collect{return it?.'*:subfield'.find{it.'@code' == 'a'}} def hungarianKeys = hunSubfields.collect{return new LanguagedValue(value:it?.text(), language:'hun')} //println englishKeys+hungarianKeys def String resourceLanguage = englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('041')}?.'*:subfield'?.find{it.'@code'.equals('a')}?.text() if(!resourceLanguage) { resourceLanguage = 'polish' println "No 041a field: setting language to 'polish'" } def MarcRecord marcRecord = new MarcRecord(leader:leaderNode?.text(),pid:objPID, aggregator:'', descriptionLevel:descrLevel, parentRecord:new MarcRelative(localID:parentID, pid:parentID), localID:objPID, issn_022:issn, isbn_020:isbn, landingPage:landingpage, providerName:dataProviderName, providerLanguage:dataProviderLanguage, bookNo_091:bookNo, extents:phys300EN?.'*:subfield'.findAll{it.'@code'.equals('a')}*.text().collect{ return new LanguagedValue(value:it, language:'eng') }, physDetails:phys300EN?.'*:subfield'.findAll{it.'@code'.equals('b')}*.text().collect{ return new LanguagedValue(value:it, language:'eng') } + phys300HU?.'*:subfield'.findAll{it.'@code'.equals('b')}*.text().collect{ return new LanguagedValue(value:it, language:'hun') }, dimensions:[],materialDesignations:[], toc:'', theabstracts:[ new LanguagedValue(language:'eng',value:englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('520')}?.'*:subfield'?.find{it.'@code'.equals('a')}?.text()), new LanguagedValue(language:'hun',value:hungarianDatastream?.'**'?.'*:datafield'.find{it.'@tag'.equals('520')}?.'*:subfield'?.find{it.'@code'.equals('a')}?.text()) ], genre:englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('655')}?.'*:subfield'?.find{it.'@code'.equals('a')}?.text(), keywords:englishKeys+hungarianKeys, topicSubjects_650:[], creatorPersons_100:englishDatastream.'**'.'*:datafield'.findAll{it.'@tag'.equals('100')}*.'*:subfield'.findAll{it.'@code'.equals('a')}*.text(), creatorCorps_110:englishDatastream.'**'.'*:datafield'.findAll{it.'@tag'.equals('110')}*.'*:subfield'.findAll{it.'@code'.equals('a')}*.text(), personContributors:contributors, corporateContributors:[], creatorMeetings_111:[], contributorMeetings_711:[], personSubjects_600:subject600persons*.'*:subfield'.findAll{it.'@code'.equals('a')}*.text()+subject600persons*.'*:subfield'.findAll{it.'@code'.equals('d')}*.text(), corporateSubjects_610:[], temporalCoverages_648:[], spatialCoverages_651:englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('651')}?.'*:subfield'.findAll{it.'@code'.equals('a')}*.text(), titles_245:[marcTitle], editionStms_250:[], publication:marcPublication, notes:englishDatastream.'**'.'*:datafield'.find{it.'@tag'.equals('500')}?.'*:subfield'.findAll{it.'@code'.equals('a')}.collect{return new LanguagedValue(value:it?.text(), language:'eng')}, europeanaType:edmType, europeanaRights:edmRights, languages_041 :[resourceLanguage]) //concat('http://hdl.handle.net/10891/', //foxml:datastream[@ID='RELS-EXT']//fedora:isMemberOfCollection/@rdf:resource) // def String parentPID = theRecordRoot.'**'.'foxml:datastream'?.find{ // it.'@ID'.equals('RELS-EXT') // }?.'**'.'fedora:isMemberOfCollection'.'@rdf:resource' //def String parentPIDUrl= 'http://hdl.handle.net/10891/'+parentPID //marcRecord.setParentRecord(new MarcRelative(localID:parentPIDUrl, pid:parentPIDUrl)) //marcRecord.setNextRecord(new MarcRelative(localID:nextRecord?.'oai_hope:localIdentifier'?.text(), pid:nextRecord?.'dc:identifier'?.text())) def Node metsNode = theRecordRoot.'**'.'foxml:datastream'?.find{ it.'@ID'.equals('METS-STRUCTURE') } def resources = metsNode?.'**'.'*:fileGrp'.find{it.'@USE' == 'low_level'}?.'**'.'*:fLocat' def List derivatives2 = resources.collect{return it?.attribute(xlink.href)} def String der3Pid = metsNode?.'**'.'*:fileGrp'.find{it.'@USE' == 'thumbnail'}?.'**'.'*:fLocat'[0]?.attribute(xlink.href) def Derivative der3 = new Derivative(pid:der3Pid, href:der3Pid, localID:der3Pid, type:'derivative3') int countResources = 0; def List marcResources = derivatives2.collect{ def Derivative der2 = new Derivative(pid:it, href:it, localID:it, type:'derivative2') def ders = [der2] if(countResources == 0) ders << der3 countResources++ return new MarcResource(pid:it, id:it, type:edmType, rights:edmRights, languages:[resourceLanguage], derivatives:ders) } //println "Generated "+countResources+" digital resources for "+objPID template.setAttribute("resources", marcResources) template.setAttribute("oaiheader", oaiheader) template.setAttribute("marc", marcRecord) return template.toString(); } }