package eu.dnetlib.efg.stress.vocabulary;

import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.IOUtils;

import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;


public class TermFactory {
	
	public enum FIELD { title, person, synopsis }

	protected static final int MIN_LINE_LENGTH = 3;

	private Map<FIELD, Iterator<String>> terms;

	private Predicate<String> lineFilter = new Predicate<String>() {
		@Override
		public boolean apply(String line) {
			return !line.matches(".*\\p{Punct}.*") && line.length() > MIN_LINE_LENGTH;
		}
	};

	public TermFactory() throws IOException {
		
		terms = Maps.newConcurrentMap();
		
		for (FIELD field : FIELD.values())
			terms.put(field, Iterables.cycle(getTerms("terms."+ field.name() + ".txt")).iterator());
	}

	@SuppressWarnings("unchecked")
	private List<String> getTerms(final String termsVocabulary) throws IOException {
		List<String> terms = Lists.newArrayList(
				Iterables.filter(
					IOUtils.readLines(this.getClass().getResourceAsStream(termsVocabulary)), 
					lineFilter));
		Collections.shuffle(terms);
		return terms;
	}
	
	public String nextTerm(final FIELD field) {
		return terms.get(field).next();
	}
	
	public List<FIELD> availableTerms() {
		return Lists.newArrayList(terms.keySet());
	}
	
}
