package eu.dnetlib.data;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.commons.csv.CSVParser;
import org.apache.commons.lang.math.RandomUtils;
import org.springframework.beans.factory.annotation.Required;

import com.google.common.collect.Maps;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.WriteConcern;

import eu.dnetlib.miscutils.collections.BloomFilter;

public class PaceDatasetGenerator {
	
	private static final File NAMES = new File("src/main/eu/dnetlib/data/base/names.csv").getAbsoluteFile();
	
	private DB db;
	
	private CSVParser parser;
	
	private Map<Field, List<String>> fMap = Maps.newHashMap();
	
	public enum Field {firstName, lastName, birthDate, country};
	
	private ErrorGenerator errorGen = new ErrorGenerator();
	
	private static int bloomBitsFactor = 10000;
	private static int bloomSizeFactor = 10;
	
	public PaceDatasetGenerator() throws IOException {
		parser = new CSVParser(new FileReader(NAMES));
		
		for(Field f : Field.values())
			fMap.put(f, new ArrayList<String>());
		
	}
	
	public void init() throws IOException {
		String[][] s = parser.getAllValues();
		for (int i = 1; i < s.length; i++) {
			fMap.get(Field.firstName).add(s[i][1]);
			fMap.get(Field.lastName).add(s[i][2]);
			fMap.get(Field.birthDate).add(s[i][3]);
			fMap.get(Field.country).add(s[i][5]);
		}
	}
	
	public long createBaseDataset(int nRecord) {
		
		DBCollection base = db.getCollection("base");
		base.drop();

		insertTuples(base, nRecord);
		//ensureIndex(base);
		
		return base.count();
	}

	private void insertTuples(DBCollection base, int nRecords) {
		
		Map<String, Object> objMap = Maps.newHashMap();
		
		Map<String, Object> fld = Maps.newHashMap();
		Map<String, Object> flc = Maps.newHashMap();
		Map<String, Object> fdc = Maps.newHashMap();
		Map<String, Object> ldc = Maps.newHashMap();
		
		BloomFilter<Map<String, Object>> bf = new BloomFilter<Map<String,Object>>(10000 * nRecords, nRecords);
		
		BloomFilter<Map<String, Object>> bf_fld = new BloomFilter<Map<String,Object>>(bloomBitsFactor * nRecords, bloomSizeFactor * nRecords);
		BloomFilter<Map<String, Object>> bf_flc = new BloomFilter<Map<String,Object>>(bloomBitsFactor * nRecords, bloomSizeFactor * nRecords);
		BloomFilter<Map<String, Object>> bf_fdc = new BloomFilter<Map<String,Object>>(bloomBitsFactor * nRecords, bloomSizeFactor * nRecords);
		BloomFilter<Map<String, Object>> bf_ldc = new BloomFilter<Map<String,Object>>(bloomBitsFactor * nRecords, bloomSizeFactor * nRecords);
		
		System.out.println("expectedFalsePositiveProbability: " + bf.expectedFalsePositiveProbability());
		
		int loop = -1;
		
		for(int i = 0; i < nRecords; i++) {
			objMap.clear();
			
			fld.clear();
			flc.clear();
			fdc.clear();
			ldc.clear();
			
			do {
				objMap.put(Field.firstName.name(), pickRandom(Field.firstName));
				objMap.put(Field.lastName.name(),  pickRandom(Field.lastName));
				objMap.put(Field.birthDate.name(), pickRandom(Field.birthDate));
				objMap.put(Field.country.name(),   pickRandom(Field.country));
				
				fld.put(Field.firstName.name(), objMap.get(Field.firstName.name()));
				fld.put(Field.lastName.name(),  objMap.get(Field.lastName.name()));
				fld.put(Field.birthDate.name(), objMap.get(Field.birthDate.name()));
				
				flc.put(Field.firstName.name(), objMap.get(Field.firstName.name()));
				flc.put(Field.lastName.name(),  objMap.get(Field.lastName.name()));
				flc.put(Field.country.name(),   objMap.get(Field.country.name()));
				
				fdc.put(Field.firstName.name(), objMap.get(Field.firstName.name()));
				fdc.put(Field.birthDate.name(), objMap.get(Field.birthDate.name()));
				fdc.put(Field.country.name(),   objMap.get(Field.country.name()));
				
				ldc.put(Field.lastName.name(),  objMap.get(Field.lastName.name()));
				ldc.put(Field.birthDate.name(), objMap.get(Field.birthDate.name()));
				ldc.put(Field.country.name(),   objMap.get(Field.country.name()));
				
				loop++;
				
			} while(bf.contains(objMap) || bf_fld.contains(fld) || bf_flc.contains(flc) || bf_fdc.contains(fdc) || bf_ldc.contains(ldc));
			
			bf.add(Maps.newHashMap(objMap));
			
			bf_fld.add(Maps.newHashMap(fld));
			bf_flc.add(Maps.newHashMap(flc));
			bf_fdc.add(Maps.newHashMap(fdc));
			bf_ldc.add(Maps.newHashMap(ldc));
			
			objMap.put("kind", "unique");
			objMap.put("error", ErrorKind.none.name());
			objMap.put("n", i);	
			
			boolean gotError = errorGen.insertError(base, Maps.newHashMap(objMap), i);
			
			boolean gotDuplicate = addDuplicate(base, bf, Maps.newHashMap(objMap), i, gotError);
			
			i = gotError ? i+1 : i;
			i = gotDuplicate ? i+1 : i;
						
			base.insert(new BasicDBObject(objMap), WriteConcern.NORMAL);
		}
		System.out.println("finished insertion phase, loops:" + (loop - nRecords));
	}
	
	private boolean addDuplicate(DBCollection base, BloomFilter<Map<String, Object>> bf, Map<String, Object> objMap, int i, boolean prevError) {
		
		//if (!errorGen.dice(5)) return i;
		if (i % 20 != 0) return false;

		objMap.put("kind", "alias");
		
		int j = prevError ? i + 2 : i + 1;
		
		objMap.put("relatedTo", objMap.get("n"));
		objMap.put("n", j);
		
		//do {
			Field field = pickField();
			objMap.put(field.name(), pickRandom(field));
		//} while(bf.contains(objMap));
		
		base.insert(new BasicDBObject(objMap), WriteConcern.NORMAL);
		
		return true;
	}
	
	private Field pickField() {
		return Field.values()[RandomUtils.nextInt(Field.values().length)];
	}

	private void ensureIndex(DBCollection base) {
		base.resetIndexCache();
		
		for(ErrorKind e : ErrorKind.values())
			base.ensureIndex(new BasicDBObject(e.name(), 1));
		
		base.ensureIndex(new BasicDBObject("n", 1));
		base.ensureIndex(new BasicDBObject("kind", 1));
		
		System.out.println("finished indexing phase");
	}
	
	private String pickRandom(Field field) {
		return fMap.get(field).get(RandomUtils.nextInt(fMap.get(field).size()));
	}

	public DB getDb() {
		return db;
	}

	@Required
	public void setDb(DB db) {
		this.db = db;
	}
	
}
