import org.apache.commons.lang.StringEscapeUtils import eu.dnetlib.hope.domains.archive.EadControlAccess import eu.dnetlib.hope.domains.archive.EadDaoLoc import eu.dnetlib.hope.domains.archive.EadDate import eu.dnetlib.hope.domains.archive.EadDid import eu.dnetlib.hope.domains.archive.EadItem import eu.dnetlib.hope.domains.archive.EadOrigination import eu.dnetlib.hope.domains.archive.EadPhysDesc import eu.dnetlib.hope.domains.archive.Language import eu.dnetlib.hope.domains.archive.UnitTitle import eu.dnetlib.hope.domains.common.* class OSAUtils { /** * Namespaces */ def dri = new groovy.xml.Namespace("http://www.driver-repository.eu/namespace/dri", 'dri') def foxml= new groovy.xml.Namespace("info:fedora/fedora-system:def/foxml#", 'foxml') def mets=new groovy.xml.Namespace("http://www.loc.gov/METS/", 'mets') def osa=new groovy.xml.Namespace("http://greenfield.osaarchivum.org/ns/item", 'osa') def xlink = new groovy.xml.Namespace("http://www.w3.org/1999/xlink", 'xlink') def fillEadArchDesc= { xmlRoot, dataProvider, eadArchDesc, isVideo = false-> def items = generateItemList(xmlRoot, dataProvider, isVideo) eadArchDesc.setItems(items) eadArchDesc.setNestedLevels([]) return eadArchDesc } def generateItemList = { Node xmlRoot, String dataProvider, boolean isVideo-> //print "generating item list" def englishDatastream = xmlRoot.'**'.'foxml:datastream'?.find{ it.'@ID'.equals('ITEM-ARC-EN') } def russianDatastream = xmlRoot.'**'.'foxml:datastream'?.find{ it.'@ID'.equals('ITEM-ARC-RU') } def hungarianDatastream = xmlRoot.'**'.'foxml:datastream'?.find{ it.'@ID'.equals('ITEM-ARC-HU') } def rightsDatastream = xmlRoot.'**'.'foxml:datastream'?.find{ it.'@ID'.equals('RIGHTS') } def metsDatastream = xmlRoot.'**'.'foxml:datastream'?.find{ it.'@ID'.equals('METS-STRUCTURE') } def String rights = "missing europeana:rights field in input record"; def rightsNode = rightsDatastream?.'**'?.'europeana:rights' //println rightsNode if(rightsNode != null) rights = rightsNode[0]?.text() //print rights //print " id: "+it.'**'.'osa:itemPID'[0].text()+" " EadDid did = generateDid(englishDatastream, russianDatastream, hungarianDatastream, dataProvider, isVideo) //TODO: check this: transcription should be a list of transcription strings def EadItem eadItem = new EadItem(rights:rights, languages:englishDatastream.'**'.'osa:documentLanguage'*.text(), type:englishDatastream.'**'.'osa:primaryType'[0]?.text()) eadItem = generateCFields(eadItem, englishDatastream, russianDatastream, hungarianDatastream, rights) as EadItem eadItem.setDid(did) def int numberOfEntries = 3 eadItem.setNumberOfEntries(numberOfEntries) if(metsDatastream){ def String der2Pid = metsDatastream.'**'.'mets:fileGrp'?.find{it.'@USE'.equals('low_level')}?.'**'.'mets:fLocat'[0]?.attribute(xlink.href) def String der3Pid = metsDatastream.'**'.'mets:fileGrp'?.find{it.'@USE'.equals('thumbnail')}?.'**'.'mets:fLocat'[0]?.attribute(xlink.href) EadDaoLoc der2 = new EadDaoLoc(href:der2Pid, label:"derivative2") EadDaoLoc der3 = new EadDaoLoc(href:der3Pid, label:"derivative3") eadItem.setDaolocs([der2, der3]) eadItem.setDaogrpId(der2Pid) eadItem.setDaogrpPid(der2Pid) } else println "**** id: "+englishDatastream.'**'.'osa:itemPID'[0].text()+" metsNode is null - no derivatives " return [eadItem] } def generateCFields = { eadC, datastreamEN, datastreamRU, datastreamHU, europeanaRights -> //print "generateCFileds" def pid = "http://hdl.handle.net/10891/"+datastreamEN.'**'.'osa:itemPID'[0].text() eadC.setPid(pid) eadC.setLevel(datastreamEN.'**'.'osa:descriptionLevel'[0].text()) def List biohistPars = datastreamEN.'**'.'osa:administrativeHistory'*.text().collect{return new LanguagedValue(language:'eng', value:it)}+ datastreamRU?.'**'?.'osa:administrativeHistory'*.text().collect{return new LanguagedValue(language:'rus', value:it)}+ datastreamHU?.'**'?.'osa:administrativeHistory'*.text().collect{return new LanguagedValue(language:'hun', value:it)} eadC.setBioghistPars(biohistPars) def List scopeContentPars = datastreamEN.'**'.'osa:contentsSummary'*.text().collect{return new LanguagedValue(language:'eng', value:StringEscapeUtils.escapeXml(it))}+ datastreamRU?.'**'?.'osa:contentsSummary'*.text().collect{return new LanguagedValue(language:'rus', value:StringEscapeUtils.escapeXml(it))}+ datastreamHU?.'**'?.'osa:contentsSummary'*.text().collect{return new LanguagedValue(language:'hun', value:StringEscapeUtils.escapeXml(it))} def List scopeContentPars2 = datastreamEN.'**'.'osa:contentsTable'*.text().collect{return new LanguagedValue(language:'eng', value:it)}+ datastreamRU?.'**'?.'osa:contentsTable'*.text().collect{return new LanguagedValue(language:'rus', value:it)}+ datastreamHU?.'**'?.'osa:contentsTable'*.text().collect{return new LanguagedValue(language:'hun', value:it)} eadC.setScopecontentPars(scopeContentPars+scopeContentPars2) def List phystechPars =datastreamEN.'**'.'osa:physicalCondition'*.text().collect{return new LanguagedValue(language:'eng', value:it)} eadC.setPhystechPars(phystechPars) def keywords = datastreamEN.'**'.'osa:subjectFree'.collect{new EadControlAccess(controlaccess:it.text(), language:'eng')} def Set persons = datastreamEN.'**'.'osa:associatedPersonal'.'osa:name'*.text() + datastreamEN.'**'.'osa:subjectPersonal'.'osa:name'*.text()+datastreamEN.'**'.'osa:subjectPersonalFree'*.text() as Set def Set corporates = datastreamEN.'**'.'osa:associatedCorporate'.'osa:name'*.text() + datastreamEN.'**'.'osa:subjectCorporate'.'osa:name'*.text()+datastreamEN.'**'.'osa:subjectCorporateFree'*.text() as Set def Set subj = datastreamEN.'**'.'osa:subjectTopical'*.text()+datastreamEN.'**'.'osa:subjectLC'*.text() as Set def Set places = datastreamEN.'**'.'osa:associatedPlace'.'osa:place'*.text()+ datastreamEN.'**'.'osa:associatedCountry'.'osa:country'*.text()+datastreamEN.'**'.'osa:spatialCoverage'.'osa:coverage'*.text()+ datastreamEN.'**'.'osa:spatialCoverageCountry'.'osa:country'*.text() + datastreamEN.'**'.'osa:spatialCoverageFree'*.text() as Set def EadControlAccess controlAccessPersons = new EadControlAccess( persNames:persons.toList().collect{return new LanguagedValue(language:'eng', value:it)}, corpNames:corporates.asList().collect{return new LanguagedValue(language:'eng', value:it)}, subjects:subj.toList().collect{return new LanguagedValue(language:'eng', value:it)}, geognames:places.toList().collect{ return new LanguagedValue(language:'eng', value:it)} ) eadC.setControlAccesses(keywords+controlAccessPersons) //landing page: def landingPagePID = "http://hdl.handle.net/10891/"+datastreamEN.parent().'@PID' def EadDaoLoc landingPage = new EadDaoLoc() landingPage.setHref(pid) landingPage.setLabel("landing_page") //landingPage.setId(eadId+"_"+it.@id) eadC.setLandingPage(landingPage) eadC.setEuropeanaType(datastreamEN.'**'.'osa:primaryType'?.text()); eadC.setEuropeanaRights(europeanaRights) return eadC; } def generateDid = { englishDatastream , russianDatastream, hungarianDatastream, dataProvider, isVideo -> //println 'english datastream is null? '+englishDatastream ==null // println 'russianDatastream datastream is null? '+russianDatastream == null //println 'hungarianDatastream datastream is null? '+hungarianDatastream == null def pid = "http://hld.handle.net/10891/"+englishDatastream.'**'.'osa:itemPID'[0].text() def List englishTitles = englishDatastream.'**'?.'osa:primaryTitle'.'osa:title'*.text().collect{return new UnitTitle(title:StringEscapeUtils.escapeXml(it), language:'eng')} def List russianTitles = russianDatastream?.'**'?.'osa:primaryTitle'?.'osa:title'*.text().collect{return new UnitTitle(title:StringEscapeUtils.escapeXml(it), language:'rus')} def List hungarianTitles = hungarianDatastream?.'**'?.'osa:primaryTitle'?.'osa:title'*.text().collect{return new UnitTitle(title:StringEscapeUtils.escapeXml(it), language:'hun')} def List titles1 = englishTitles+russianTitles+hungarianTitles def List altTitles = englishDatastream.'**'?.'osa:alternativeTitle'?.'osa:title'*.text().collect{return new UnitTitle(title:StringEscapeUtils.escapeXml(it), language:'eng')}+ russianDatastream?.'**'?.'osa:alternativeTitle'?.'osa:title'*.text().collect{return new UnitTitle(title:StringEscapeUtils.escapeXml(it), language:'rus')}+ hungarianDatastream?.'**'?.'osa:alternativeTitle'?.'osa:title'*.text().collect{return new UnitTitle(title:StringEscapeUtils.escapeXml(it), language:'hun')} def thePhysicalNode = englishDatastream.'**'.'osa:sourcePhysicalDescriptionWrap' def theFinalExtentString = '' if(thePhysicalNode){ def theExtentNodes = thePhysicalNode.'osa:subExtent' if(isVideo){ theFinalExtentString = theExtentNodes.'**'.'osa:subExtentNumber'[0]?.text() } else{ def theExtentNumber = theExtentNodes.size() def theExtentUnitText = thePhysicalNode.'osa:extentUnit'.text().trim() theFinalExtentString = theExtentNumber+" "+theExtentUnitText if(theExtentNumber > 1) theFinalExtentString = theFinalExtentString+'s' def List pageNumbers = theExtentNodes.collect{ def theSubExtNumber = it.'**'.'osa:subExtentNumber'[0].text() if(theSubExtNumber.isInteger()){ def pageNum = theSubExtNumber.toInteger() if(pageNum > 1) return it.'**'.'osa:subExtentNumber'[0].text()+' '+it.'**'.'osa:subExtentUnit'[0].text()+'s' } else{ println 'theSubExtNumber should be an integer instead of '+theSubExtNumber } return it.'**'.'osa:subExtentNumber'[0].text()+' '+it.'**'.'osa:subExtentUnit'[0].text() } theFinalExtentString += " ("+pageNumbers.join('; ')+")" } } def physDesc1 = [ new EadPhysDesc( extents:[ new LanguagedValue(value:theFinalExtentString, language:'eng') ], genreforms:englishDatastream.'**'.'osa:formGenre'.'**'.'osa:genre'*.text().collect{return new LanguagedValue(language:'eng', value:it)}) ] def langMaterials = englishDatastream.'**'.'osa:documentLanguage'*.text() def List languages = langMaterials.collect{return new Language(code:it, text:it)} def List notesList = englishDatastream.'**'.'osa:additionalNotes'*.text().collect{return new LanguagedValue(language:'eng', value:it)}+ russianDatastream?.'**'?.'osa:additionalNotes'*.text().collect{return new LanguagedValue(language:'rus', value:it)}+ hungarianDatastream?.'**'?.'osa:additionalNotes'*.text().collect{return new LanguagedValue(language:'hun', value:it)}+ englishDatastream.'**'?.'osa:documentLanguageStatement'*.text().collect{return new LanguagedValue(language:'eng', value:it)} //def String unitdate = englishDatastream.'**'.'osa:dateOfCreationNormalizedStart'.text()+'-'+englishDatastream.'**'.'osa:dateOfCreationNormalizedEnd'.text() def String dateStartYear = englishDatastream.'**'.'osa:dateOfCreationNormalizedStart'.text().takeWhile { it != '-' } def String dateEndYear = englishDatastream.'**'.'osa:dateOfCreationNormalizedEnd'.text().takeWhile { it != '-' } def String unitdate = dateStartYear if(dateStartYear != dateEndYear) unitdate += "-"+dateEndYear def eadDid = new EadDid(localId:pid, langmaterials:languages, unitdates:[ new EadDate(value:unitdate, normal:unitdate) ], repository:dataProvider, originations:[ new EadOrigination( persnames:englishDatastream.'**'.'osa:creatorPersonalFree'.'osa:name'*.text().collect{return new LanguagedValue(language:'eng', value:it)} , corpnames:englishDatastream.'**'.'osa:creatorCorporateFree'.'osa:name'*.text().collect{ return new LanguagedValue(language:'eng', value:it)} ) ], physicalDescriptions:physDesc1, unittitles:titles1+altTitles, odds:[], notes:notesList) return eadDid; } }