From 39993e18b5afcb80f570d7fc71fe116b074d18fb Mon Sep 17 00:00:00 2001
From: Christos Christodoulopoulos <christod@illinois.edu>
Date: Wed, 30 Sep 2015 11:47:35 -0500
Subject: [PATCH] New config for pipeline

---
 config/lemmatizer-config.properties    |  1 +
 config/ner-conll-config.properties     |  2 +-
 config/ner-ontonotes-config.properties | 53 ++++++++++++++++++++++++++
 config/pipeline-config.properties      | 10 -----
 config/srl-config.properties           |  8 +++-
 run.sh                                 | 11 ------
 scripts/run-interactive.sh             |  2 +-
 scripts/run.sh                         |  2 +-
 8 files changed, 63 insertions(+), 26 deletions(-)
 create mode 100644 config/lemmatizer-config.properties
 create mode 100644 config/ner-ontonotes-config.properties
 delete mode 100644 config/pipeline-config.properties
 delete mode 100755 run.sh

diff --git a/config/lemmatizer-config.properties b/config/lemmatizer-config.properties
new file mode 100644
index 0000000..a8db3f6
--- /dev/null
+++ b/config/lemmatizer-config.properties
@@ -0,0 +1 @@
+useStanfordConvention   true
diff --git a/config/ner-conll-config.properties b/config/ner-conll-config.properties
index 979a364..e77c4ce 100644
--- a/config/ner-conll-config.properties
+++ b/config/ner-conll-config.properties
@@ -33,7 +33,7 @@ PreviousTagPatternLevel2	        	1
 AggregateContext						0
 AggregateGazetteerMatches	        	0
 PrevTagsForContext		        		1
-PredictionsLevel1						1
+PredictionsLevel1						0
 
 # Feature groups
 BrownClusterPaths						1
diff --git a/config/ner-ontonotes-config.properties b/config/ner-ontonotes-config.properties
new file mode 100644
index 0000000..dbb5ad1
--- /dev/null
+++ b/config/ner-ontonotes-config.properties
@@ -0,0 +1,53 @@
+# Ontonotes config file
+
+# Required fields
+configFilename                          Ontonotes
+pathToModelFile			        		data/Models/Ontonotes
+taggingEncodingScheme		        	BILOU
+tokenizationScheme						DualTokenizationScheme
+
+# Optional fields
+beamSize								5
+forceNewSentenceOnLineBreaks	        true
+labelTypes                              TIME	LAW	GPE	NORP	LANGUAGE	PERCENT	FAC	PRODUCT	ORDINAL	LOC	PERSON	WORK_OF_ART	MONEY	DATE	EVENT	QUANTITY	ORG	CARDINAL
+logging									false
+# debuggingLogPath						irrelevant
+inferenceMethod			        		GREEDY
+normalizeTitleText	                	false
+pathToTokenNormalizationData	        brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt
+predictionConfidenceThreshold           -1
+sortLexicallyFilesInFolders             true
+thresholdPrediction						false
+treatAllFilesInFolderAsOneBigDocument	false
+debug                                   true
+
+# Features
+Forms									1
+Capitalization							1
+WordTypeInformation		        		1
+Affixes									1
+PreviousTag1							1
+PreviousTag2							1
+PreviousTagPatternLevel1	        	1
+PreviousTagPatternLevel2                1
+AggregateContext						0
+AggregateGazetteerMatches	        	0
+PrevTagsForContext                      1
+PredictionsLevel1						0
+
+# Feature groups
+BrownClusterPaths						1
+isLowercaseBrownClusters	        	false	false	false
+pathsToBrownClusters		        	brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt	brown-clusters/brownBllipClusters	brown-clusters/brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt
+minWordAppThresholdsForBrownClusters	5	5	5
+
+GazetteersFeatures						1
+pathToGazetteersLists					ner-ext/KnownLists
+
+WordEmbeddings                          0
+# pathsToWordEmbeddings					WordEmbedding/model-2280000000.LEARNING_RATE=1e-08.EMBEDDING_LEARNING_RATE=1e-07.EMBEDDING_SIZE=50.gz
+# embeddingDimensionalities             50
+# minWordAppThresholdsForEmbeddings		0
+# normalizationConstantsForEmbeddings   1.0
+# normalizationMethodsForEmbeddings		OVERALL
+# isLowercaseWordEmbeddings	        	false
diff --git a/config/pipeline-config.properties b/config/pipeline-config.properties
deleted file mode 100644
index 9f110cf..0000000
--- a/config/pipeline-config.properties
+++ /dev/null
@@ -1,10 +0,0 @@
-usePos	true
-useChunker	true
-useLemmatizer	true
-useNer	true
-useStanfordParse true
-lemmaCacheFile  data/lemmaCache.txt
-updateLemmaCacheFile    false
-maxLemmaCacheEntries    10000
-wordnetPath data/WordNet
-nerConfigFile   config/ner-conll-config.properties
diff --git a/config/srl-config.properties b/config/srl-config.properties
index 4923336..ac94f90 100644
--- a/config/srl-config.properties
+++ b/config/srl-config.properties
@@ -5,6 +5,10 @@ curatorPort = 9010
 # If set to true, it will force Curator to re-annotate the input
 curatorForceUpdate = false
 
+nerConllConfig   config/ner-conll-config.properties
+nerOntonotesConfig  config/ner-ontonotes-config.properties
+lemmaConfig config/lemmatizer-config.properties
+
 # If set to true, the output will be a Curator Record instead of a TextAnnotation.
 useRecords = false
 
@@ -36,7 +40,7 @@ setCacheShutdownHook = true
 
 # Whether to use the Illinois Curator to get the required annotations for training/testing
 # If set to false, Illinois NLP pipeline will be used
-UseCurator = true
+UseCurator = false
 # The URL and host of Curator. If UseCurator is false, make sure you have pipeline config file set
 CuratorHost = trollope.cs.illinois.edu
 CuratorPort = 9010
@@ -48,7 +52,7 @@ PipelineConfigFile = config/pipeline-config.properties
 # The parser used to extract constituents and syntactic features
 # Options are: Charniak, Berkeley, Stanford
 # NB: Only Stanford can be used in standalone mode.
-DefaultParser = Charniak
+DefaultParser = Stanford
 
 WordNetConfig = jwnl_properties.xml
 
diff --git a/run.sh b/run.sh
deleted file mode 100755
index 290b4fb..0000000
--- a/run.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#mvn -q dependency:copy-dependencies
-#mvn -q compile
-# module load sun-jdk/1.8.0
-CP="./config/:./target/classes/:./target/dependency/*"
-
-OPTIONS="-Xss40m -ea -cp $CP"
-PACKAGE_PREFIX="edu.illinois.cs.cogcomp"
-
-MAIN="$PACKAGE_PREFIX.srl.Main"
-
-time nice java $OPTIONS $MAIN $CONFIG_STR $*
diff --git a/scripts/run-interactive.sh b/scripts/run-interactive.sh
index 543c241..33c7a6f 100755
--- a/scripts/run-interactive.sh
+++ b/scripts/run-interactive.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-mvn compile 
+mvn clean compile
 mvn -q dependency:copy-dependencies
 
 CP=target/classes:config:target/dependency/*
diff --git a/scripts/run.sh b/scripts/run.sh
index bf1b4d9..500342b 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-mvn compile 
+mvn clean compile
 mvn -q dependency:copy-dependencies
 
 CP=target/classes:config:target/dependency/*
-- 
GitLab