package eu.dnetlib.testWebCrawl;

import gr.uoa.di.resourcediscovery.MalformedConfigurationException;
import gr.uoa.di.resourcediscovery.MethodProvider;
import gr.uoa.di.resourcediscovery.MethodProviderFileStorageImpl;
import gr.uoa.di.resourcediscovery.UnknownMethodException;
import gr.uoa.di.resourcediscovery.methods.XPathAndCrawl;

import java.io.IOException;
import java.net.URL;
import java.util.Arrays;
import java.util.List;

import org.junit.Assert;
import org.junit.Test;
import org.xml.sax.SAXException;

public class testCrawl {

	@Test
	public void test() throws MalformedConfigurationException, UnknownMethodException, IOException, SAXException {

		long starttime = System.currentTimeMillis();
		String fileName = "/tmp/method-map.xml";
		List<String> mimeTypes = Arrays.asList(new String[] { "application/pdf" });
		MethodProvider provider = new MethodProviderFileStorageImpl(fileName);
		URL conUrl = new URL("http://arxiv.org/abs/0908.4286.pdf");
		XPathAndCrawl xpath = new XPathAndCrawl(mimeTypes, null);
		List<String> resources = xpath.getResources(conUrl, provider);
		Assert.assertTrue("The length should be > 0", resources.size() > 0);
		long endtime = System.currentTimeMillis();
		System.out.println((endtime - starttime) / 1000);
	}
}
