Commit 35b0e7ca authored by Shyam Upadhyay's avatar Shyam Upadhyay
Browse files

save

parent 2ce3f0c7
......@@ -12,7 +12,7 @@ PACKAGE_PREFIX="edu.illinois.cs.cogcomp"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.eventlinking.evaluation.SoftEvaluation"
MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.eventlinking.Experiments"
MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.WikiLinkStructureIndexer"
MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.WikiEventOrigins"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.WikiEventOrigins"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.utils.WikiRedirectClusters"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.utils.RedirectClustersIndexer"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.CitationIndexer"
......
......@@ -16,6 +16,7 @@ import org.apache.lucene.document.TextField;
import edu.illinois.cs.cogcomp.core.utilities.commands.InteractiveShell;
import edu.illinois.cs.cogcomp.edison.sentences.Constituent;
import edu.illinois.cs.cogcomp.edison.sentences.Sentence;
import edu.illinois.cs.cogcomp.edison.sentences.SpanLabelView;
import edu.illinois.cs.cogcomp.edison.sentences.TextAnnotation;
import edu.illinois.cs.cogcomp.edison.sentences.ViewNames;
......@@ -26,13 +27,13 @@ import edu.illinois.cs.cogcomp.wikifier.utils.lucene.Lucene;
public class IndexWikiLinkStructure {
public static String indexPath = "/shared/bronte/upadhya3/data/WikiLinkStructureIndex";
public static String indexPath = "/shared/bronte/upadhya3/data/WikiLinkStructureIndex2"; // 1 had no sentences
// public static String indexPath = "testIndex";
public static void index() throws IOException {
String dumpPath;
// the REAL stuff
dumpPath = "/shared/bronte/cheng88/wikidump/enwiki-latest-pages-articles2014Jan.xml.bz2";
// for test driving purposes
// dumpPath ="enwiki-latest-pages-articles1.xml-p000000010p000010000.bz2";
String bz2Filename = dumpPath;
......@@ -71,17 +72,21 @@ public class IndexWikiLinkStructure {
List<Document> ans = new ArrayList<>();
SpanLabelView wiki = (SpanLabelView) ta.getView(ViewNames.WIKIFIER);
for (Constituent cons : wiki.getConstituents()) {
// System.out.println(cons.getSurfaceString() + " ----> "
// + cons.getLabel());
String target = cons.getLabel();
String src = page.getTitle();
String edgeLabel = cons.getSurfaceString();
Sentence sentence = ta.getSentence(cons.getSentenceId());
// we will index the edges
if (src != null && target != null && edgeLabel != null) {
Document doc = new Document();
doc.add(new StringField("src", src, Store.YES));
doc.add(new StringField("target", target, Store.YES));
doc.add(new StringField("edgeLabel", edgeLabel, Store.YES));
doc.add(new StringField("sentence",sentence.getText(),Store.YES));
ans.add(doc);
}
}
......
......@@ -134,9 +134,11 @@ public class WikiLinkStructureIndexer {
List<Document> docs = indexer.searchTitleInLinks("World_War_II"); // 469
// IndexReader reader = indexer.searcher.getIndexReader();
for (Document doc : docs) {
// System.out.println("SRC "+doc.get("src"));
System.out.println("~~~~~~~~~~~~~~");
System.out.println("SRC "+doc.get("src"));
System.out.println("TARGET " + doc.get("target"));
// System.out.println("EDGE "+doc.get("edgeLabel"));
System.out.println("EDGE "+doc.get("edgeLabel"));
System.out.println("SENT "+doc.get("sentence"));
}
System.out.println("ANS SIZE: " + docs.size());
// for(int i=0;i<indexer.numDocs();i++)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment