package eu.dnetlib.data.transform;

import java.io.IOException;
import java.io.StringWriter;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.List;

import com.google.protobuf.InvalidProtocolBufferException;
import com.googlecode.protobuf.format.JsonFormat;
import eu.dnetlib.data.mapreduce.util.DNGFTest;
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
import eu.dnetlib.data.proto.KindProtos.Kind;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.dom4j.DocumentException;
import org.junit.Before;
import org.junit.Test;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;

public class SolrProtoMapperTest {

	private static final Log log = LogFactory.getLog(SolrProtoMapper.class); // NOPMD by marko on 11/24/08 5:02 PM

	private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");

	private final static List<String> dateFormats = Arrays.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy");

	private final static String dataset = "{\"kind\": \"entity\",\"entity\": {\"type\": \"dataset\",\"originalId\": [\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"C1293870510-PODAAC\",\"C1293870494-PODAAC\",\"C1293870514-PODAAC\",\"C1293870523-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870510-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}},{\"value\": \"C1293870494-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}},{\"value\": \"C1293870514-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}},{\"value\": \"C1293870523-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|dedup_wf_001::002a1f239e51f7b92df75b7f8902c117\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ANILT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"},{\"value\": \"ECHO10\"},{\"value\": \"ECHO10\"},{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T03:07:08.129Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Images of Antarctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]},{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]},{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]},{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"children\": [{\"type\": \"dataset\",\"originalId\": [\"C1293870514-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870514-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::ed18768335fcac40ae2cc062abc9e442\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ARBLT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T01:56:49.933Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Maps of Arctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},{\"type\": \"dataset\",\"originalId\": [\"C1293870494-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870494-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::ac6f36f8c9ef39f815045eaa1182e745\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ARILT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T02:51:51.788Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Images of Arctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},{\"type\": \"dataset\",\"originalId\": [\"C1293870523-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870523-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::a26b222d236fd523df9711c21a879911\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ANBLT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T03:01:46.198Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Maps of Antarctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},{\"type\": \"dataset\",\"originalId\": [\"C1293870510-PODAAC\",\"http://esipfed.org/ns/fedsearch/1.1/data#\",\"http://esipfed.org/ns/fedsearch/1.1/metadata#\"],\"collectedfrom\": [{\"key\": \"10|wds_________::827fac6a4d016d53b6874573cf37fc05\",\"value\": \"Common Metadata Repository (CMR)\"}],\"pid\": [{\"value\": \"C1293870510-PODAAC\",\"qualifier\": {\"classid\": \"cmrid\",\"classname\": \"cmrid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2016-08-29T14:39:03.816+02:00\",\"id\": \"60|wds_cmr_____::002a1f239e51f7b92df75b7f8902c117\",\"dateoftransformation\": \"\",\"dataset\": {\"metadata\": {\"title\": [{\"value\": \"PODAAC-SEABY-ANILT\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2003-04-10T00:08:15.000Z\",\"qualifier\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"\"},\"publisher\": {\"value\": \"PO.DAAC\"},\"resulttype\": {\"classid\": \"dataset\",\"classname\": \"dataset\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"language\": {\"classid\": \"und\",\"classname\": \"Undetermined\",\"schemeid\": \"dnet:languages\",\"schemename\": \"dnet:languages\"},\"resourcetype\": {\"classid\": \"Dataset\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"size\": {\"value\": \"\"},\"format\": [{\"value\": \"ECHO10\"}],\"version\": {\"value\": \"\"},\"lastmetadataupdate\": {\"value\": \"2012-03-22T03:07:08.129Z\"},\"description\": [{\"value\": \" SeaWinds on ADEOS-II Level 3 Sigma-0 Polar-Stereographic Local-Time-of-Day Browse Images of Antarctic Region \"}]},\"instance\": [{\"licence\": {\"classid\": \"CLOSED\",\"classname\": \"Closed Access\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0021\",\"classname\": \"Dataset\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"\"]}]},\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}}],\"oaiprovenance\": {\"originDescription\": {\"harvestDate\": \"2016-08-29T14:39:03.816+02:00\",\"altered\": true,\"baseURL\": \"https%3A%2F%2Fcmr.earthdata.nasa.gov%2Fsearch%2Fcollections.json%3Fpretty%3Dtrue\",\"identifier\": \"\",\"datestamp\": \"\",\"metadataNamespace\": \"\"}}},\"dataInfo\": {\"inferred\": true,\"deletedbyinference\": false,\"trust\": \"0.9\",\"inferenceprovenance\": \"dedup-similarity-dataset\",\"provenanceaction\": {\"classid\": \"sysimport:dedup\",\"classname\": \"sysimport:dedup\",\"schemeid\": \"dnet:provenanceActions\",\"schemename\": \"dnet:provenanceActions\"}}}";

	private String fields;

	@Before
	public void setUp() throws IOException {
		final StringWriter sw = new StringWriter();
		IOUtils.copy(getClass().getResourceAsStream("fields.xml"), sw);
		fields = sw.toString();
		assertNotNull(fields);
		assertFalse(fields.isEmpty());

		log.info(fields);
	}

	@Test
	public void testProto2SolrDocument() throws DocumentException, InvalidProtocolBufferException, JsonFormat.ParseException {
		final SolrProtoMapper mapper = new SolrProtoMapper(fields);

		assertNotNull(mapper);

		DNGF.Builder builder = DNGF.newBuilder();
		JsonFormat.merge(dataset, builder);

		final DNGF oaf = builder.build();

		assertNotNull(oaf.getEntity().getChildrenList());
		assertFalse(oaf.getEntity().getChildrenList().isEmpty());

		log.info("byte[] size: " + oaf.toByteArray().length);

		log.info("json size:   " + JsonFormat.printToString(oaf).length());

		log.info("base64 size: " + Base64.encodeBase64String(oaf.toByteArray()).length());

		final byte[] decodeBase64 = Base64.decodeBase64(Base64.encodeBase64String(oaf.toByteArray()));

		log.info("decoded: " + JsonFormat.printToString(DNGF.parseFrom(decodeBase64)));

		final SolrInputDocument doc = mapper.map(oaf, getParsedDateField("2015-02-15"), "asd", "action-set");

		assertNotNull(doc);

		for (final SolrInputField f : doc.values()) {
			log.info(f);
		}
	}


	/**
	 * method return a solr-compatible string representation of a date
	 *
	 * @param date
	 * @return
	 * @throws DocumentException
	 * @throws ParseException
	 */
	public String getParsedDateField(final String date) {
		for (String formatString : dateFormats) {
			try {
				return new SimpleDateFormat(outFormat).format(new SimpleDateFormat(formatString).parse(date));
			} catch (ParseException e) {}
		}
		throw new IllegalStateException("unable to parse date: " + date);
	}
}
