Skip to content
Snippets Groups Projects
Commit 35b0e7ca authored by Shyam Upadhyay's avatar Shyam Upadhyay
Browse files

save

parent 2ce3f0c7
No related branches found
No related tags found
No related merge requests found
......@@ -12,7 +12,7 @@ PACKAGE_PREFIX="edu.illinois.cs.cogcomp"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.eventlinking.evaluation.SoftEvaluation"
MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.eventlinking.Experiments"
MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.WikiLinkStructureIndexer"
MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.WikiEventOrigins"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.WikiEventOrigins"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.utils.WikiRedirectClusters"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.utils.RedirectClustersIndexer"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.CitationIndexer"
......
......@@ -16,6 +16,7 @@ import org.apache.lucene.document.TextField;
import edu.illinois.cs.cogcomp.core.utilities.commands.InteractiveShell;
import edu.illinois.cs.cogcomp.edison.sentences.Constituent;
import edu.illinois.cs.cogcomp.edison.sentences.Sentence;
import edu.illinois.cs.cogcomp.edison.sentences.SpanLabelView;
import edu.illinois.cs.cogcomp.edison.sentences.TextAnnotation;
import edu.illinois.cs.cogcomp.edison.sentences.ViewNames;
......@@ -26,13 +27,13 @@ import edu.illinois.cs.cogcomp.wikifier.utils.lucene.Lucene;
public class IndexWikiLinkStructure {
public static String indexPath = "/shared/bronte/upadhya3/data/WikiLinkStructureIndex";
public static String indexPath = "/shared/bronte/upadhya3/data/WikiLinkStructureIndex2"; // 1 had no sentences
// public static String indexPath = "testIndex";
public static void index() throws IOException {
String dumpPath;
// the REAL stuff
dumpPath = "/shared/bronte/cheng88/wikidump/enwiki-latest-pages-articles2014Jan.xml.bz2";
// for test driving purposes
// dumpPath ="enwiki-latest-pages-articles1.xml-p000000010p000010000.bz2";
String bz2Filename = dumpPath;
......@@ -71,17 +72,21 @@ public class IndexWikiLinkStructure {
List<Document> ans = new ArrayList<>();
SpanLabelView wiki = (SpanLabelView) ta.getView(ViewNames.WIKIFIER);
for (Constituent cons : wiki.getConstituents()) {
// System.out.println(cons.getSurfaceString() + " ----> "
// + cons.getLabel());
String target = cons.getLabel();
String src = page.getTitle();
String edgeLabel = cons.getSurfaceString();
Sentence sentence = ta.getSentence(cons.getSentenceId());
// we will index the edges
if (src != null && target != null && edgeLabel != null) {
Document doc = new Document();
doc.add(new StringField("src", src, Store.YES));
doc.add(new StringField("target", target, Store.YES));
doc.add(new StringField("edgeLabel", edgeLabel, Store.YES));
doc.add(new StringField("sentence",sentence.getText(),Store.YES));
ans.add(doc);
}
}
......
......@@ -134,9 +134,11 @@ public class WikiLinkStructureIndexer {
List<Document> docs = indexer.searchTitleInLinks("World_War_II"); // 469
// IndexReader reader = indexer.searcher.getIndexReader();
for (Document doc : docs) {
// System.out.println("SRC "+doc.get("src"));
System.out.println("~~~~~~~~~~~~~~");
System.out.println("SRC "+doc.get("src"));
System.out.println("TARGET " + doc.get("target"));
// System.out.println("EDGE "+doc.get("edgeLabel"));
System.out.println("EDGE "+doc.get("edgeLabel"));
System.out.println("SENT "+doc.get("sentence"));
}
System.out.println("ANS SIZE: " + docs.size());
// for(int i=0;i<indexer.numDocs();i++)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment