Skip to content
Snippets Groups Projects
Commit 2ce3f0c7 authored by Shyam Upadhyay's avatar Shyam Upadhyay
Browse files

save

parent 4bf95819
No related branches found
No related tags found
No related merge requests found
......@@ -26,15 +26,15 @@ import edu.illinois.cs.cogcomp.wikifier.utils.lucene.Lucene;
public class IndexWikiLinkStructure {
// public static String indexPath = "/shared/bronte/upadhya3/data/WikiLinkStructureIndex";
public static String indexPath = "testIndex";
public static String indexPath = "/shared/bronte/upadhya3/data/WikiLinkStructureIndex";
// public static String indexPath = "testIndex";
public static void index() throws IOException {
String dumpPath;
// dumpPath = "/shared/bronte/cheng88/wikidump/enwiki-latest-pages-articles2014Jan.xml.bz2";
dumpPath = "/shared/bronte/cheng88/wikidump/enwiki-latest-pages-articles2014Jan.xml.bz2";
// for test driving purposes
dumpPath ="enwiki-latest-pages-articles1.xml-p000000010p000010000.bz2";
// dumpPath ="enwiki-latest-pages-articles1.xml-p000000010p000010000.bz2";
String bz2Filename = dumpPath;
try {
......
......@@ -32,8 +32,11 @@ import edu.illinois.cs.cogcomp.core.utilities.commands.InteractiveShell;
import edu.illinois.cs.cogcomp.wikifier.utils.lucene.Lucene;
/**
* @author cheng88
* @modified upadhya3
* Indexes the edge structure in the wikipedia graph. Every edge is stored as a
* doc with 3 fields, the src page,the target page and the surface form for the link.
* The links in a page are available thru the wikifier view.
*
* @author upadhya3
*
*/
......@@ -62,7 +65,8 @@ public class WikiLinkStructureIndexer {
}
public WikiLinkStructureIndexer(String path, boolean readOnly) {
logger.warn("Init wiki link structure indexer in read mode from ..." + path);
logger.warn("Init wiki link structure indexer in read mode from ..."
+ path);
try {
IndexReader reader = Lucene.reader(path);
searcher = new IndexSearcher(reader);
......@@ -88,7 +92,7 @@ public class WikiLinkStructureIndexer {
List<Document> results = Lists.newArrayList();
try {
Query q = new TermQuery(new Term("target", target_title));
// System.out.println(q.toString());
// System.out.println(q.toString());
for (ScoreDoc sc : searcher.search(q, 10000).scoreDocs) {
results.add(searcher.doc(sc.doc));
}
......@@ -113,36 +117,35 @@ public class WikiLinkStructureIndexer {
}
}
public static void indexData() throws IOException
{
public static void indexData() throws IOException {
IndexWikiLinkStructure.index();
}
/**
*
* @param args
* @throws Exception
*/
public static void testIndex() throws Exception {
WikiLinkStructureIndexer indexer = new WikiLinkStructureIndexer(IndexWikiLinkStructure.indexPath);
// List<Document> docs = indexer.searchTitleInLinks("United_States"); // 589 on small
WikiLinkStructureIndexer indexer = new WikiLinkStructureIndexer(
IndexWikiLinkStructure.indexPath);
// List<Document> docs = indexer.searchTitleInLinks("United_States"); //
// 589 on small
List<Document> docs = indexer.searchTitleInLinks("World_War_II"); // 469
// IndexReader reader = indexer.searcher.getIndexReader();
for(Document doc:docs)
{
// System.out.println("SRC "+doc.get("src"));
System.out.println("TARGET "+doc.get("target"));
// System.out.println("EDGE "+doc.get("edgeLabel"));
// IndexReader reader = indexer.searcher.getIndexReader();
for (Document doc : docs) {
// System.out.println("SRC "+doc.get("src"));
System.out.println("TARGET " + doc.get("target"));
// System.out.println("EDGE "+doc.get("edgeLabel"));
}
System.out.println("ANS SIZE: "+docs.size());
// for(int i=0;i<indexer.numDocs();i++)
// {
// Document doc = reader.document(i);
// System.out.println("SRC "+doc.get("src"));
// System.out.println("TARGET "+doc.get("target"));
// System.out.println("EDGE "+doc.get("edgeLabel"));
// }
System.out.println("ANS SIZE: " + docs.size());
// for(int i=0;i<indexer.numDocs();i++)
// {
// Document doc = reader.document(i);
// System.out.println("SRC "+doc.get("src"));
// System.out.println("TARGET "+doc.get("target"));
// System.out.println("EDGE "+doc.get("edgeLabel"));
// }
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment