package eu.dnetlib.data.hadoop.utils;

import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.math.stat.descriptive.SummaryStatistics;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.springframework.core.io.ClassPathResource;

import com.google.common.collect.Maps;

import eu.dnetlib.data.hadoop.config.ConfigurationFactory;
import eu.dnetlib.data.hadoop.hdfs.SequenceFileUtils;
import eu.dnetlib.miscutils.collections.Pair;

public class ReadSequenceFileTest {

	private static final Path SEQUENCE_FILE_PATH = new Path("hdfs://nmis-hadoop-cluster/tmp/indexrecords_db_openaireplus_sesam_SESAMALL.seq");
	private final String HADOOP_CONF_FILE = "/eu/dnetlib/data/hadoop/config/hadoop-default.dm.cnr.properties";

	private Configuration conf;

	@Before
	public void setUp() {
		final ConfigurationFactory confFactory = new ConfigurationFactory();
		confFactory.setDefaults(new ClassPathResource(HADOOP_CONF_FILE));
		conf = confFactory.getConfiguration();
	}

	@Test
	@Ignore
	public void testReadSequenceFile() throws Exception {
		final SummaryStatistics statsAll = new SummaryStatistics();

		final Map<String, SummaryStatistics> stats = Maps.newHashMap();

		int i = 0;
		for (Pair<Text, Text> pair : SequenceFileUtils.read(SEQUENCE_FILE_PATH, conf)) {
			final String id = pair.getKey().toString();
			final String record = pair.getValue().toString();
			final int length = record.getBytes().length;

			final String type = id.substring(0, 2);
			if (!stats.containsKey(type)) {
				stats.put(type, new SummaryStatistics());
			}
			statsAll.addValue(length);
			stats.get(type).addValue(length);

			if (++i % 10000 == 0) {
				System.out.println("Read " + i);
			}
		}

		printStats("ALL", statsAll);
		for (Entry<String, SummaryStatistics> e : stats.entrySet()) {
			printStats(e.getKey(), e.getValue());
		}
	}

	private void printStats(final String type, final SummaryStatistics stats) {
		System.out.println("************************************");
		System.out.println("Type: " + type);
		System.out.println(String.format("\tmin    : %.2f KBytes", stats.getMin() / 1024));
		System.out.println(String.format("\tmax    : %.2f KBytes", stats.getMax() / 1024));
		System.out.println(String.format("\tavg    : %.2f KBytes", stats.getMean() / 1024));
		System.out.println(String.format("\tstdDev : %.2f", stats.getStandardDeviation() / 1024));
	}
}
