Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Shyam Upadhyay
event-wikifier
Commits
35b0e7ca
Commit
35b0e7ca
authored
Dec 28, 2014
by
Shyam Upadhyay
Browse files
save
parent
2ce3f0c7
Changes
3
Hide whitespace changes
Inline
Side-by-side
run.sh
View file @
35b0e7ca
...
...
@@ -12,7 +12,7 @@ PACKAGE_PREFIX="edu.illinois.cs.cogcomp"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.eventlinking.evaluation.SoftEvaluation"
MAIN
=
"
$PACKAGE_PREFIX
.wiki.parsing.indexing.citation.eventlinking.Experiments"
MAIN
=
"
$PACKAGE_PREFIX
.wiki.parsing.indexing.WikiLinkStructureIndexer"
MAIN
=
"
$PACKAGE_PREFIX
.wiki.parsing.indexing.WikiEventOrigins"
#
MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.WikiEventOrigins"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.utils.WikiRedirectClusters"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.utils.RedirectClustersIndexer"
#MAIN="$PACKAGE_PREFIX.wiki.parsing.indexing.citation.CitationIndexer"
...
...
src/main/java/edu/illinois/cs/cogcomp/wiki/parsing/indexing/IndexWikiLinkStructure.java
View file @
35b0e7ca
...
...
@@ -16,6 +16,7 @@ import org.apache.lucene.document.TextField;
import
edu.illinois.cs.cogcomp.core.utilities.commands.InteractiveShell
;
import
edu.illinois.cs.cogcomp.edison.sentences.Constituent
;
import
edu.illinois.cs.cogcomp.edison.sentences.Sentence
;
import
edu.illinois.cs.cogcomp.edison.sentences.SpanLabelView
;
import
edu.illinois.cs.cogcomp.edison.sentences.TextAnnotation
;
import
edu.illinois.cs.cogcomp.edison.sentences.ViewNames
;
...
...
@@ -26,13 +27,13 @@ import edu.illinois.cs.cogcomp.wikifier.utils.lucene.Lucene;
public
class
IndexWikiLinkStructure
{
public
static
String
indexPath
=
"/shared/bronte/upadhya3/data/WikiLinkStructureIndex"
;
public
static
String
indexPath
=
"/shared/bronte/upadhya3/data/WikiLinkStructureIndex
2
"
;
// 1 had no sentences
// public static String indexPath = "testIndex";
public
static
void
index
()
throws
IOException
{
String
dumpPath
;
// the REAL stuff
dumpPath
=
"/shared/bronte/cheng88/wikidump/enwiki-latest-pages-articles2014Jan.xml.bz2"
;
// for test driving purposes
// dumpPath ="enwiki-latest-pages-articles1.xml-p000000010p000010000.bz2";
String
bz2Filename
=
dumpPath
;
...
...
@@ -71,17 +72,21 @@ public class IndexWikiLinkStructure {
List
<
Document
>
ans
=
new
ArrayList
<>();
SpanLabelView
wiki
=
(
SpanLabelView
)
ta
.
getView
(
ViewNames
.
WIKIFIER
);
for
(
Constituent
cons
:
wiki
.
getConstituents
())
{
// System.out.println(cons.getSurfaceString() + " ----> "
// + cons.getLabel());
String
target
=
cons
.
getLabel
();
String
src
=
page
.
getTitle
();
String
edgeLabel
=
cons
.
getSurfaceString
();
Sentence
sentence
=
ta
.
getSentence
(
cons
.
getSentenceId
());
// we will index the edges
if
(
src
!=
null
&&
target
!=
null
&&
edgeLabel
!=
null
)
{
Document
doc
=
new
Document
();
doc
.
add
(
new
StringField
(
"src"
,
src
,
Store
.
YES
));
doc
.
add
(
new
StringField
(
"target"
,
target
,
Store
.
YES
));
doc
.
add
(
new
StringField
(
"edgeLabel"
,
edgeLabel
,
Store
.
YES
));
doc
.
add
(
new
StringField
(
"sentence"
,
sentence
.
getText
(),
Store
.
YES
));
ans
.
add
(
doc
);
}
}
...
...
src/main/java/edu/illinois/cs/cogcomp/wiki/parsing/indexing/WikiLinkStructureIndexer.java
View file @
35b0e7ca
...
...
@@ -134,9 +134,11 @@ public class WikiLinkStructureIndexer {
List
<
Document
>
docs
=
indexer
.
searchTitleInLinks
(
"World_War_II"
);
// 469
// IndexReader reader = indexer.searcher.getIndexReader();
for
(
Document
doc
:
docs
)
{
// System.out.println("SRC "+doc.get("src"));
System
.
out
.
println
(
"~~~~~~~~~~~~~~"
);
System
.
out
.
println
(
"SRC "
+
doc
.
get
(
"src"
));
System
.
out
.
println
(
"TARGET "
+
doc
.
get
(
"target"
));
// System.out.println("EDGE "+doc.get("edgeLabel"));
System
.
out
.
println
(
"EDGE "
+
doc
.
get
(
"edgeLabel"
));
System
.
out
.
println
(
"SENT "
+
doc
.
get
(
"sentence"
));
}
System
.
out
.
println
(
"ANS SIZE: "
+
docs
.
size
());
// for(int i=0;i<indexer.numDocs();i++)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment