package eu.dnetlib.data.collective.harvest.provider; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.StringWriter; import java.util.concurrent.BlockingQueue; import org.apache.commons.compress.tar.TarEntry; import org.apache.commons.compress.tar.TarInputStream; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import eu.dnetlib.data.collective.harvest.provider.DataProvider.FileType; /** * * TODO: Why is this code type parametric but actually only reads strings from files? * * @author marko * * @param */ public class ArchiveWalker { private static final Log log = LogFactory.getLog(ArchiveWalker.class); // NOPMD by marko on 11/24/08 5:02 PM private BlockingQueue queue; private FileType type; private File source; public ArchiveWalker(BlockingQueue queue, FileType type, File source) { super(); this.queue = queue; this.type = type; this.source = source; } @SuppressWarnings("unchecked") public void doWalk() throws IOException { TarInputStream tis = new TarInputStream(new FileInputStream(source)); TarEntry entry; while ((entry = tis.getNextEntry()) != null) { if (entry.isDirectory()) continue; StringWriter buf = new StringWriter(); IOUtils.copy(tis, buf); enqueue(queue, (T) buf.toString()); } finish(); } /** * Adds the element to the queue */ private void enqueue(BlockingQueue queue, T element) { try { queue.put(element); } catch (InterruptedException e) { log.warn("ops... ", e); // TODO: I hate this things, copied from claudio's code. } } @SuppressWarnings("unchecked") protected void finish() { enqueue(queue, (T) FileWalker.done); } public BlockingQueue getQueue() { return queue; } public void setQueue(BlockingQueue queue) { this.queue = queue; } public FileType getType() { return type; } public void setType(FileType type) { this.type = type; } public File getSource() { return source; } public void setSource(File source) { this.source = source; } }