package eu.dnetlib.oai;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;

import java.text.Normalizer;

import org.apache.commons.lang3.StringEscapeUtils;
import org.junit.Test;

public class PublisherMiscTest {

	@Test
	public void test() {
		final String id = "NonavCreation.filmportal.de/DIF_NonAVCreation_EUROPA_TM & © Aardman Animations, LTD";
		final String newId = StringEscapeUtils.escapeXml11(id);
		assertEquals("NonavCreation.filmportal.de/DIF_NonAVCreation_EUROPA_TM &amp; © Aardman Animations, LTD", newId);
		assertFalse(id.equals(newId));
	}

	@Test
	public void test2() {
		// Hochschulschriftenserver - Universit&#228;t Frankfurt am Main
		String s = "Publikationenserver der Georg-August-Universit&#228;t G&#246;ttingen";
		System.out.println("String to normalize: " + s);
		s = StringEscapeUtils.unescapeXml(s);
		System.out.println("unescaped: " + s);
		s = Normalizer.normalize(s, Normalizer.Form.NFD);
		System.out.println("normalized: " + s);
		// remove tilde, dots... over letters
		s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^-_]]", "");
		// change punctuation into an underscore
		s = s.replaceAll("[\\p{Punct}&&[^-_]]", "_");
		// remove all non-word charcheters
		s = s.replaceAll("[\\W&&[^-_]]", "");
		System.out.println("Converted setSpec to: " + s);
	}

}
