Skip to content
Snippets Groups Projects
Commit 39993e18 authored by Christos Christodoulopoulos's avatar Christos Christodoulopoulos
Browse files

New config for pipeline

parent 8a088e33
No related branches found
No related tags found
1 merge request!1Shyam
useStanfordConvention true
......@@ -33,7 +33,7 @@ PreviousTagPatternLevel2 1
AggregateContext 0
AggregateGazetteerMatches 0
PrevTagsForContext 1
PredictionsLevel1 1
PredictionsLevel1 0
# Feature groups
BrownClusterPaths 1
......
# Ontonotes config file
# Required fields
configFilename Ontonotes
pathToModelFile data/Models/Ontonotes
taggingEncodingScheme BILOU
tokenizationScheme DualTokenizationScheme
# Optional fields
beamSize 5
forceNewSentenceOnLineBreaks true
labelTypes TIME LAW GPE NORP LANGUAGE PERCENT FAC PRODUCT ORDINAL LOC PERSON WORK_OF_ART MONEY DATE EVENT QUANTITY ORG CARDINAL
logging false
# debuggingLogPath irrelevant
inferenceMethod GREEDY
normalizeTitleText false
pathToTokenNormalizationData brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt
predictionConfidenceThreshold -1
sortLexicallyFilesInFolders true
thresholdPrediction false
treatAllFilesInFolderAsOneBigDocument false
debug true
# Features
Forms 1
Capitalization 1
WordTypeInformation 1
Affixes 1
PreviousTag1 1
PreviousTag2 1
PreviousTagPatternLevel1 1
PreviousTagPatternLevel2 1
AggregateContext 0
AggregateGazetteerMatches 0
PrevTagsForContext 1
PredictionsLevel1 0
# Feature groups
BrownClusterPaths 1
isLowercaseBrownClusters false false false
pathsToBrownClusters brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt brown-clusters/brownBllipClusters brown-clusters/brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt
minWordAppThresholdsForBrownClusters 5 5 5
GazetteersFeatures 1
pathToGazetteersLists ner-ext/KnownLists
WordEmbeddings 0
# pathsToWordEmbeddings WordEmbedding/model-2280000000.LEARNING_RATE=1e-08.EMBEDDING_LEARNING_RATE=1e-07.EMBEDDING_SIZE=50.gz
# embeddingDimensionalities 50
# minWordAppThresholdsForEmbeddings 0
# normalizationConstantsForEmbeddings 1.0
# normalizationMethodsForEmbeddings OVERALL
# isLowercaseWordEmbeddings false
usePos true
useChunker true
useLemmatizer true
useNer true
useStanfordParse true
lemmaCacheFile data/lemmaCache.txt
updateLemmaCacheFile false
maxLemmaCacheEntries 10000
wordnetPath data/WordNet
nerConfigFile config/ner-conll-config.properties
......@@ -5,6 +5,10 @@ curatorPort = 9010
# If set to true, it will force Curator to re-annotate the input
curatorForceUpdate = false
nerConllConfig config/ner-conll-config.properties
nerOntonotesConfig config/ner-ontonotes-config.properties
lemmaConfig config/lemmatizer-config.properties
# If set to true, the output will be a Curator Record instead of a TextAnnotation.
useRecords = false
......@@ -36,7 +40,7 @@ setCacheShutdownHook = true
# Whether to use the Illinois Curator to get the required annotations for training/testing
# If set to false, Illinois NLP pipeline will be used
UseCurator = true
UseCurator = false
# The URL and host of Curator. If UseCurator is false, make sure you have pipeline config file set
CuratorHost = trollope.cs.illinois.edu
CuratorPort = 9010
......@@ -48,7 +52,7 @@ PipelineConfigFile = config/pipeline-config.properties
# The parser used to extract constituents and syntactic features
# Options are: Charniak, Berkeley, Stanford
# NB: Only Stanford can be used in standalone mode.
DefaultParser = Charniak
DefaultParser = Stanford
WordNetConfig = jwnl_properties.xml
......
#mvn -q dependency:copy-dependencies
#mvn -q compile
# module load sun-jdk/1.8.0
CP="./config/:./target/classes/:./target/dependency/*"
OPTIONS="-Xss40m -ea -cp $CP"
PACKAGE_PREFIX="edu.illinois.cs.cogcomp"
MAIN="$PACKAGE_PREFIX.srl.Main"
time nice java $OPTIONS $MAIN $CONFIG_STR $*
#!/bin/bash
mvn compile
mvn clean compile
mvn -q dependency:copy-dependencies
CP=target/classes:config:target/dependency/*
......
#!/bin/bash
mvn compile
mvn clean compile
mvn -q dependency:copy-dependencies
CP=target/classes:config:target/dependency/*
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment