package eu.dnetlib.data.mapreduce.dedup;

import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.UUID;

import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
import org.springframework.core.io.ClassPathResource;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import eu.dnetlib.pace.clustering.NGramUtils;
import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.MapDocumentComparator;

public class TitleOrderingTest {

	private List<MapDocument> results = Lists.newArrayList();

	@Before
	public void setUp() throws Exception {

		@SuppressWarnings("unchecked")
		final List<String> lines = IOUtils.readLines(new ClassPathResource("eu/dnetlib/data/mapreduce/dedup/titles.txt").getInputStream());
		for (String title : lines) {
			Map<String, List<Field>> fieldMap = Maps.newHashMap();
			List<Field> list = Lists.newArrayList();
			list.add(new Field(Type.String, "title", title));
			fieldMap.put("title", list);
			results.add(new MapDocument("id-" + UUID.randomUUID(), fieldMap));
		}
	}

	@Test
	public void test() {

		final Queue<MapDocument> queue = new PriorityQueue<MapDocument>(100, new MapDocumentComparator("title"));

		queue.addAll(results);

		Queue<MapDocument> queue2 = simplifyQueue(queue);

		while (!queue2.isEmpty()) {
			MapDocument doc = queue2.remove();
			System.out.println(doc.values("title").get(0).getValue());
		}
	}

	private Queue<MapDocument> simplifyQueue(final Queue<MapDocument> queue) {
		final Queue<MapDocument> q = new LinkedList<MapDocument>();

		String fieldRef = "";
		List<MapDocument> tempResults = Lists.newArrayList();

		while (!queue.isEmpty()) {
			MapDocument result = queue.remove();

			if (!result.values("title").isEmpty()) {
				String field = NGramUtils.cleanupForOrdering(result.values("title").get(0).getValue().toString());
				if (field.equals(fieldRef)) {
					tempResults.add(result);
				} else {
					if (tempResults.size() < 5) {
						q.addAll(tempResults);
					} else {
						System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size());
					}
					tempResults.clear();
					tempResults.add(result);
					fieldRef = field;
				}
			}
		}
		if (tempResults.size() < 5) {
			q.addAll(tempResults);
		} else {
			System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size());
		}

		return q;
	}

}
