import eu.dnetlib.hope.domains.archive.* import eu.dnetlib.hope.domains.common.* class GNQNewsUtils { def escapeXml = { String s -> //return s.replaceAll("&", "&").replaceAll(">", ">").replaceAll("<", "<").replaceAll("\"", """).replaceAll("'", "'") if(s) return s.replaceAll("&", "&").replaceAll(">", ">").replaceAll("<", "<") else return '' } def generateHeader= { Node xmlRoot -> def eadHeaderElement = xmlRoot.'**'.eadheader def String dataProvider = "Génériques" def EadHeader eadHeader = new EadHeader( mainagencycode:"Hope", eadid:eadHeaderElement.eadid.text(), titleproper:new UnitTitle(title:eadHeaderElement.filedesc.titlestmt.titleproper.text()), creationProvider:dataProvider, languages:[ new Language(code:'fra', text:'french')] ) return eadHeader } def generateSharedArchDesc = { -> def String collectionID = 'GNQNewsCollection' def EadDaoLoc collectionLandingPage = new EadDaoLoc(href:'http://odysseo.org/ead.html?id=FRAS075GNQ_edfperiodiques', label:"landing_page"); def String collectionThumbnail def EadDid collectionDid = new EadDid(localId:collectionID, repository:'Génériques', unittitles:[ new UnitTitle(title:'Newspapers of migrants in France') ], physicalDescriptions:[ new EadPhysDesc(genreforms:[ new LanguagedValue(value:'text') ]) ], langmaterials:[ new Language(text:'French', code:'fra'), new Language(text:'Arabic', code:'ara'), new Language(text:'Portoguese', code:'por') ]) def String scopeAbstract = '''This collection includes several newspapers relating to the history of immigration in France (1940-1960). It was started by the founding members of Génériques who, while fighting for immigrants’ rights in France in the early 1970s (in particular Said Bouziri, president of Génériques until his death in June 2009), were careful to save newspapers (as well as posters, archival documents, leaflets, brochures, etc..). The collection has been extended by donations from individuals and not-for-profit organisations. People working at Génériques, whose mission consists in the preservation and valorisation of the private archives of immigration have also worked for the preservation and collection of newspapers by working with organisations linked to immigrants." ''' def EadArchDesc eadArchDesc = new EadArchDesc(level:'collection', landingPage:collectionLandingPage, thumbnail:collectionThumbnail, did:collectionDid, dateCreated:new EadDate(normal:'1948-2000', value:'1948-2000'), dateAccumulated:new EadDate(normal:'2008-2010', value:'2008-2010'), scopecontentPars:[ new LanguagedValue(language:'eng', value:scopeAbstract) ], acrualsPars:[], userrestrictPars:[ new LanguagedValue(value:'Rights Reserved') ],accessrestrictPars:[ new LanguagedValue(value:'Free Access') ], europeanaType:'IMAGE', europeanaRights:'Rights Reserved - Free Access', spatialCoverages:[], temporalCoverages:[]) return eadArchDesc } def fillArchDescSons = { Node xmlRoot, EadArchDesc eadArchDesc, String repoId-> def archDescElement = xmlRoot.'**'.archdesc[0] def String eadId = xmlRoot.'**'.eadheader.eadid.text() def List originationList = archDescElement.did.origination.collect{ return new EadOrigination(origination:escapeXml(archDescElement.it?.text() ), corpnames:it.corpname*.text().collect{ return new LanguagedValue(value:escapeXml(it)) }, persnames:it.persname*.text().collect{ return new LanguagedValue(value:escapeXml(it)) }) } def List defaultLanguages = getLanguages(archDescElement, false) if(!defaultLanguages) { println "No languages in archdesc with eadid: "+eadId+": setting default to french" defaultLanguages = [ new Language(code:'fra', text:'french') ] } def List midLevels = generateMidLevels(archDescElement, repoId, true, eadId, originationList, defaultLanguages) eadArchDesc.setNestedLevels(midLevels) def items = generateItemList(archDescElement.dsc[0], archDescElement, eadId, repoId, originationList, defaultLanguages) eadArchDesc.setItems(items) return eadArchDesc } def generateMidLevels = { cNode, repoId, isArchdesc, eadId, originationList, defaultLanguages -> def List midNodes if(isArchdesc) midNodes = cNode.dsc[0].findAll{ it.'@id' } else midNodes = cNode.c.findAll{ it.'@id' } //println 'counted '+midNodes.size() + " nested c elements" def List midLevels = midNodes.collect { if(it.'@level' == '' || !it.'@level' .equals('item')){ def EadMidLevel eadMidLevel = new EadMidLevel() def EadDid eadDid = generateDid(it, cNode, repoId, originationList, false, defaultLanguages) eadMidLevel.setDid(eadDid); eadMidLevel = generateCFields(eadMidLevel, cNode, it, repoId, eadId, false) def items = generateItemList(it, cNode, eadId, repoId, originationList, defaultLanguages) eadMidLevel.setItems(items) def List nestedLevels = generateMidLevels(it, repoId, false, eadId, originationList, defaultLanguages); eadMidLevel.setNestedLevels(nestedLevels) return eadMidLevel } } return midLevels; } def generateItemList={ Node container, Node archdescElement, String eadId, String repoId, List originationList, List defaultLanguages -> def List itemNodes = container.c.findAll{it.'@level' .equals('item')} .findAll{it.dao} int processedDao = 0; def List items = itemNodes.collect{ EadDid did = generateDid(it, archdescElement, repoId, originationList, true,defaultLanguages) def EadItem eadItem = new EadItem(type:"TEXT", rights:"Rights Reserved - Restricted Access") eadItem = generateCFields(eadItem,archdescElement, it, repoId, eadId, false) as EadItem eadItem.setDid(did) def int numberOfEntries = 3 def List languages = getItemLanguages(it, true) if(languages == null || languages == []) languages = defaultLanguages eadItem.setLanguages(languages.collect{it.text}) numberOfEntries += languages.size() eadItem.setNumberOfEntries(numberOfEntries) //daolocs: def String dao = it.dao.@href[0] if(dao != null && !dao.isEmpty()){ processedDao++; def String digitalObjectID = extractFileName(dao) //println "digitalObject id: "+digitalObjectID EadDaoLoc der2 = new EadDaoLoc(href:"", label:"derivative2") EadDaoLoc der3 = new EadDaoLoc(href:"", label:"derivative3") eadItem.setDaolocs([der2, der3]) eadItem.setDaogrpId(digitalObjectID) } return eadItem } println "processed dao nodes: "+processedDao; return items } def generateCFields={ EadCElement eadC, Node archDescElement, Node cElement, String repoId, String eadId, boolean isArchdesc -> def String level = cElement.@level if(level == null || level == '' || level == 'collection') level = 'mid-level' eadC.setLevel(level) def bioghistPars = archDescElement.bioghist.p*.text().collect{return new LanguagedValue(value:escapeXml(it))} eadC.setBioghistPars(bioghistPars) def custodHistPars = archDescElement.custodhist.p*.text().collect{return new LanguagedValue(value:escapeXml(it))} eadC.setCustodHistPars(custodHistPars) def scopeContentPars = cElement.scopecontent.p*.text().collect{return new LanguagedValue(value:escapeXml(it))}+archDescElement.scopecontent.p*.text().collect{return new LanguagedValue(value:escapeXml(it))} eadC.setScopecontentPars(scopeContentPars) def accesses = cElement.controlaccess + archDescElement.controlaccess def List accessList = accesses.collect{ def EadControlAccess controlAccess = new EadControlAccess() controlAccess.setCorpNames(it.corpname.findAll{it.'@ role' == 'editor'}*.text().collect{return new LanguagedValue(value:it)}) controlAccess.setPersNames(it.persname*.text().collect{return new LanguagedValue(value:it)}) controlAccess.setGeognames(it.geogname.findAll{it.'@ role' == 'lieu_edition'}*.text().collect{return new LanguagedValue(value:it)}) return controlAccess } eadC.setControlAccesses(accessList) eadC.setEuropeanaType('TEXT') eadC.setEuropeanaRights("Rights Reserved - Restricted Access") //landing page: //concat("http://odysseo.org/ead.html?id=", /ead/eadheader/eadid, "&c=", /ead/eadheader/eadid, "_", //c/@id, "&qid=sdx_q24") def EadDaoLoc landingPage = new EadDaoLoc() if(!isArchdesc) landingPage.setHref("http://odysseo.org/ead.html?id="+eadId+"&c="+eadId+'_'+cElement.'@id'+"&qid=sdx_q24") else landingPage.setHref("http://odysseo.org/ead.html?id="+eadId+"&qid=sdx_q24") landingPage.setLabel("landing_page") eadC.setLandingPage(landingPage) return eadC; } def getLanguages = { Node cNode, boolean inheritFromParent -> def Node current = cNode if(inheritFromParent){ current = cNode.parent() } def List langs = current.controlaccess?.subject?.findAll{it.'@source' .equals("liste-langue")}*.text() return langs.collect{new Language(code:it, text:it )} } def getItemLanguages = { Node cNode, boolean inheritFromParent -> def Node current = cNode if(inheritFromParent){ current = cNode.parent() } def String langs = current.did.langmaterial?.text() def List languages = langs.tokenize(';') return languages.collect{new Language(code:it, text:it )} } def extractFileName = {String fullName -> if(fullName.contains('\\')) return fullName.split('\\\\')[1] if(fullName.contains('/')) return fullName.split('/')[1] return fullName } def generateDid = {Node cElement, Node archDescElement, String repoId, List originationList, boolean isItemLevel, defaultLanguages -> def callNo = '' def String dao = cElement.dao?.@href[0] if(dao != null && !dao.isEmpty()){ callNo = extractFileName(dao) } def languages = getLanguages(cElement, isItemLevel) if(!languages) languages = defaultLanguages def String theTitle = cElement.did.unittitle?.text() //The way we want it to appear in Europeana for the c[dao] : The parent c/did/unittitle/text(), the c[dao] unittitle //concat(//c[dao]/../did/unittitle/text(), ', ' , //c[dao]/did/unittitle) if(isItemLevel){ def String parentTitle = cElement.parent().did.unittitle?.text() if(parentTitle) theTitle = parentTitle +', '+theTitle } def List cUnitTitles = cElement.did.collect{ return new UnitTitle( title:theTitle, date:it.unitdate?.text()) } def eadDid = new EadDid(callNumber:callNo, localId:cElement.@id, langmaterials:languages, repository:'' , originations:originationList, physicalDescriptions:[], unittitles:cUnitTitles, odds:[], notes:[]) return eadDid; } }