/**
 * 
 */
package eu.dnetlib.data.textengine.ws;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.dnetlib.common.ws.dataprov.DataProviderException;
import eu.dnetlib.common.ws.dataprov.DataProviderImpl;
import eu.dnetlib.common.ws.dataprov.DataProviderProperties;
import eu.dnetlib.common.ws.dataprov.ResultsResponse;
import eu.dnetlib.data.textengine.categorizer.LanguageCategorizerWrapper;
import eu.dnetlib.data.textengine.extractor.ExtractorException;
import eu.dnetlib.data.textengine.extractor.xml.XMLExtractor;
import eu.dnetlib.enabling.resultset.rmi.ResultSetException;
import eu.dnetlib.enabling.resultset.rmi.ResultSetService;
import eu.dnetlib.resultset.api.ICleanable;

/**
 * @author Jochen Schirrwagen (jochen.schirrwagen@uni-bielefeld.de)
 *
 */
public class TextEngineDataProviderImpl extends DataProviderImpl implements ICleanable {

	private static final Log log = LogFactory.getLog(TextEngineDataProviderImpl.class);
	private Map<String, TextEngineDataProviderProperties> data;
	private LanguageCategorizerWrapper languageCategorizer;
	
	private static final int DEFAULT_EXPIRY_TIME = 86400;
	
	public TextEngineDataProviderImpl() {
		data = Collections.synchronizedMap(new HashMap<String, TextEngineDataProviderProperties>());
	}
		
	protected TextEngineDataProviderProperties createDataProviderProperties(){
		return new TextEngineDataProviderProperties(TextEngineDataProviderImpl.DEFAULT_EXPIRY_TIME);
	}
	
	public String createBulkData(TextEngineDataProviderProperties dataProps){
		String bdId = "rec-" + UUID.randomUUID().toString();
		
		data.put(bdId, dataProps);
		return bdId;
	}
	
	protected TextEngineDataProviderProperties getDataProviderProperties(String bd_id)throws DataProviderException{
		// bd_id is mandatory
		if (!data.containsKey(bd_id)){
			throw new DataProviderException("param value 'bd_id' + " + bd_id + " does not exist.");
		}
		return data.get(bd_id);		
	}
	
	public List<String> getBulkData(String bd_id, int fromPosition, int toPosition)
			throws DataProviderException {
		
		if (fromPosition < 1){
			throw new DataProviderException("param value 'fromPosition' must be >= 1.");			
		}
		if (toPosition < fromPosition){
			throw new DataProviderException("param value 'fromPosition' must be < than 'toPosition'.");			
		}
		List<String> recordResults = null;
		try {
			TextEngineDataProviderProperties props = getDataProviderProperties(bd_id);
			ResultSetService rsService = props.getResultSetService();
			// it is assumed, that the records are in xml format
			recordResults = rsService.getResult(props.getRsResourceIdentifier(), fromPosition, toPosition, null); //props.getRsMode());
		} catch (ResultSetException e) {
			log.error(e);
			throw new DataProviderException(e);
		}catch (Exception e){
			log.error(e);
			throw new DataProviderException(e);
		}
		List<String> languageResults = new LinkedList<String>();
		try{
			
			for (String text : recordResults){
				BufferedInputStream inStream = new BufferedInputStream(new ByteArrayInputStream(text.getBytes()));
				XMLExtractor extractor = new XMLExtractor();
				String extractedText = extractor.extract(inStream);

				String lang = languageCategorizer.findLanguage(extractedText);
				languageResults.add(lang);					
			}
		}catch(ExtractorException e){
			log.error("Error occured while xml-extracting record", e);
			throw new DataProviderException("Error occured while xml-extracting record", e);
		}catch(Exception e){
			log.error(e);
			throw new DataProviderException(e);
		}
		// TODO blacklist, whitelist recently not used
		return languageResults;
	}

	public ResultsResponse getNumberOfResults(String bd_id)
			throws DataProviderException {
		TextEngineDataProviderProperties props = data.get(bd_id);
		
		ResultsResponse response = new ResultsResponse();
		try {
			ResultSetService rsService = props.getResultSetService();
			response.setTotal(rsService.getNumberOfElements(props.getRsResourceIdentifier()));
		} catch (ResultSetException e) {
			log.error(e);
			throw new DataProviderException(e);
		}

		return response;
	}

	/**
	 * @param languageCategorizer the languageCategorizer to set
	 */
	public void setLanguageCategorizer(LanguageCategorizerWrapper languageCategorizer) {
		this.languageCategorizer = languageCategorizer;
	}

	/**
	 * @return the languageCategorizer
	 */
	public LanguageCategorizerWrapper getLanguageCategorizer() {
		return languageCategorizer;
	}

	public String[] getSimpleBulkData(String bd_id, int fromPosition,
			int toPosition) throws DataProviderException {
		
		
		return getBulkData(bd_id, fromPosition, toPosition).toArray(new String[0]);
	}

	public void cleanup() {
		log.debug("starting cleanup operations...");
		long currentTime = System.currentTimeMillis();
		synchronized (data) {
			Set<String> keySet = data.keySet();
			Iterator<String> keysIt = keySet.iterator();
			while (keysIt.hasNext()) {
				String currentKey = keysIt.next();
				DataProviderProperties currentProps = data.get(currentKey);
				if (currentTime > currentProps.getExpirationTime()) {
					log.debug("removing data prov: " + currentKey);
					keysIt.remove();
				}
			}
		}
		log.debug("cleanup operations finished");
	}

}
