input_document=/share/transformers/documentssimilarity/2015-04-23 output_documents_similarity=${workingDir}/out mapredChildJavaOpts=-Xmx20g parallel=20 removal_rate=0.99 removal_least_used=20 tfidfTopnTermPerDocument=20 similarityTopnDocumentPerDocument=20 sample=1.0 remove_sideproducts=false