diff --git a/config/lemmatizer-config.properties b/config/lemmatizer-config.properties new file mode 100644 index 0000000000000000000000000000000000000000..a8db3f65fb528f9b0b0be735948c0db40ab7e016 --- /dev/null +++ b/config/lemmatizer-config.properties @@ -0,0 +1 @@ +useStanfordConvention true diff --git a/config/ner-conll-config.properties b/config/ner-conll-config.properties index 979a36489104f2723dfa9713be03f1e56e8093dd..e77c4ce656b9a1d6a99529814f121f9f3a2df83d 100644 --- a/config/ner-conll-config.properties +++ b/config/ner-conll-config.properties @@ -33,7 +33,7 @@ PreviousTagPatternLevel2 1 AggregateContext 0 AggregateGazetteerMatches 0 PrevTagsForContext 1 -PredictionsLevel1 1 +PredictionsLevel1 0 # Feature groups BrownClusterPaths 1 diff --git a/config/ner-ontonotes-config.properties b/config/ner-ontonotes-config.properties new file mode 100644 index 0000000000000000000000000000000000000000..dbb5ad10f69ce7a8ad90eb801f6239b8a67a5754 --- /dev/null +++ b/config/ner-ontonotes-config.properties @@ -0,0 +1,53 @@ +# Ontonotes config file + +# Required fields +configFilename Ontonotes +pathToModelFile data/Models/Ontonotes +taggingEncodingScheme BILOU +tokenizationScheme DualTokenizationScheme + +# Optional fields +beamSize 5 +forceNewSentenceOnLineBreaks true +labelTypes TIME LAW GPE NORP LANGUAGE PERCENT FAC PRODUCT ORDINAL LOC PERSON WORK_OF_ART MONEY DATE EVENT QUANTITY ORG CARDINAL +logging false +# debuggingLogPath irrelevant +inferenceMethod GREEDY +normalizeTitleText false +pathToTokenNormalizationData brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt +predictionConfidenceThreshold -1 +sortLexicallyFilesInFolders true +thresholdPrediction false +treatAllFilesInFolderAsOneBigDocument false +debug true + +# Features +Forms 1 +Capitalization 1 +WordTypeInformation 1 +Affixes 1 +PreviousTag1 1 +PreviousTag2 1 +PreviousTagPatternLevel1 1 +PreviousTagPatternLevel2 1 +AggregateContext 0 +AggregateGazetteerMatches 0 +PrevTagsForContext 1 +PredictionsLevel1 0 + +# Feature groups +BrownClusterPaths 1 +isLowercaseBrownClusters false false false +pathsToBrownClusters brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt brown-clusters/brownBllipClusters brown-clusters/brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt +minWordAppThresholdsForBrownClusters 5 5 5 + +GazetteersFeatures 1 +pathToGazetteersLists ner-ext/KnownLists + +WordEmbeddings 0 +# pathsToWordEmbeddings WordEmbedding/model-2280000000.LEARNING_RATE=1e-08.EMBEDDING_LEARNING_RATE=1e-07.EMBEDDING_SIZE=50.gz +# embeddingDimensionalities 50 +# minWordAppThresholdsForEmbeddings 0 +# normalizationConstantsForEmbeddings 1.0 +# normalizationMethodsForEmbeddings OVERALL +# isLowercaseWordEmbeddings false diff --git a/config/pipeline-config.properties b/config/pipeline-config.properties deleted file mode 100644 index 9f110cfc7aa0859316cb40bb1c7bcf7548ca05ba..0000000000000000000000000000000000000000 --- a/config/pipeline-config.properties +++ /dev/null @@ -1,10 +0,0 @@ -usePos true -useChunker true -useLemmatizer true -useNer true -useStanfordParse true -lemmaCacheFile data/lemmaCache.txt -updateLemmaCacheFile false -maxLemmaCacheEntries 10000 -wordnetPath data/WordNet -nerConfigFile config/ner-conll-config.properties diff --git a/config/srl-config.properties b/config/srl-config.properties index 492333614cd60d3c21f5f9dd4e5bc6e29416fec6..ac94f90a7e8f298ef6f3c9d0f0a2dd00a1cc4be4 100644 --- a/config/srl-config.properties +++ b/config/srl-config.properties @@ -5,6 +5,10 @@ curatorPort = 9010 # If set to true, it will force Curator to re-annotate the input curatorForceUpdate = false +nerConllConfig config/ner-conll-config.properties +nerOntonotesConfig config/ner-ontonotes-config.properties +lemmaConfig config/lemmatizer-config.properties + # If set to true, the output will be a Curator Record instead of a TextAnnotation. useRecords = false @@ -36,7 +40,7 @@ setCacheShutdownHook = true # Whether to use the Illinois Curator to get the required annotations for training/testing # If set to false, Illinois NLP pipeline will be used -UseCurator = true +UseCurator = false # The URL and host of Curator. If UseCurator is false, make sure you have pipeline config file set CuratorHost = trollope.cs.illinois.edu CuratorPort = 9010 @@ -48,7 +52,7 @@ PipelineConfigFile = config/pipeline-config.properties # The parser used to extract constituents and syntactic features # Options are: Charniak, Berkeley, Stanford # NB: Only Stanford can be used in standalone mode. -DefaultParser = Charniak +DefaultParser = Stanford WordNetConfig = jwnl_properties.xml diff --git a/run.sh b/run.sh deleted file mode 100755 index 290b4fb0641528f1e36900cf589174132482c35e..0000000000000000000000000000000000000000 --- a/run.sh +++ /dev/null @@ -1,11 +0,0 @@ -#mvn -q dependency:copy-dependencies -#mvn -q compile -# module load sun-jdk/1.8.0 -CP="./config/:./target/classes/:./target/dependency/*" - -OPTIONS="-Xss40m -ea -cp $CP" -PACKAGE_PREFIX="edu.illinois.cs.cogcomp" - -MAIN="$PACKAGE_PREFIX.srl.Main" - -time nice java $OPTIONS $MAIN $CONFIG_STR $* diff --git a/scripts/run-interactive.sh b/scripts/run-interactive.sh index 543c241d90d47b6a1eafee60cd934854ca622539..33c7a6fa6e7cd2fac9d1e71d5deb394d8ac0846b 100755 --- a/scripts/run-interactive.sh +++ b/scripts/run-interactive.sh @@ -1,5 +1,5 @@ #!/bin/bash -mvn compile +mvn clean compile mvn -q dependency:copy-dependencies CP=target/classes:config:target/dependency/* diff --git a/scripts/run.sh b/scripts/run.sh index bf1b4d923a4a4fefdfd6112a947c5ab4ee1b2388..500342b5b0a82d798d2db0046656f41c610b9fbb 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -1,5 +1,5 @@ #!/bin/bash -mvn compile +mvn clean compile mvn -q dependency:copy-dependencies CP=target/classes:config:target/dependency/*