Massive cleaning up

Using new AnnotatorService

Massive cleaning up
7e2c3e79 · Christos Christodoulopoulos · ae4e3cee · 7e2c3e79 · 7e2c3e79 · ae4e3cee
Commit 7e2c3e79 authored 9 years ago by Christos Christodoulopoulos
--- a/doc/CHANGELOG
+++ b/doc/CHANGELOG
+Version 5.1.4
+Switched entirely to illinois-sl for structured prediction (removed JLIS traces)
+Using the latest AnnotatorService from illinois-core-utilities for both Curator & pipeline annotation
+Major cleaning up
+
 Version 5.1
 Added JUnit tests
 Removed unnecessary dependencies

--- a/README.md
+++ b/README.md
+# illinois-srl: Semantic Role Labeler
+
+### Running
+#### Interactive mode
+
+#### As `Annotator`
+
+### Training
+
--- a/config/ParallelDCD.config
+++ b/config/ParallelDCD.config
-# {L2LossSSVM, StructuredPerceptron}
-LEARNING_MODEL = L2LossSSVM
-
-#  {DCDSolver, ParallelDCDSolver, DEMIParallelDCDSolver};
-L2_LOSS_SSVM_SOLVER_TYPE = ParallelDCDSolver
-
-NUMBER_OF_THREADS = 8
-C_FOR_STRUCTURE = 1.0
-TRAINMINI = false
-TRAINMINI_SIZE = 1000
-STOP_CONDITION = 0.1
-CHECK_INFERENCE_OPT = false
-MAX_NUM_ITER = 250
-PROGRESS_REPORT_ITER = 10
-INNER_STOP_CONDITION = 0.1
-MAX_ITER_INNER = 250
-MAX_ITER_INNER_FINAL = 2500
-TOTAL_NUMBER_FEATURE = -1
-CLEAN_CACHE = true
-CLEAN_CACHE_ITER = 5
-LEARNING_RATE = 0.01
-DECAY_LEARNING_RATE = false
--- a/config/StructuredPerceptron.config
+++ b/config/StructuredPerceptron.config
-# {L2LossSSVM, StructuredPerceptron}
-LEARNING_MODEL = StructuredPerceptron
-NUMBER_OF_THREADS = 1
-CHECK_INFERENCE_OPT = false
-MAX_NUM_ITER = 10
-TOTAL_NUMBER_FEATURE = -1
-LEARNING_RATE = 0.01
-DECAY_LEARNING_RATE = false
-NUMBER_OF_FEATURE_BITS = 26
--- a/config/learner.properties
+++ b/config/learner.properties
+# Available learning models: {L2LossSSVM, StructuredPerceptron}
+LEARNING_MODEL = L2LossSSVM
+
+# Available solver types: {DCDSolver, ParallelDCDSolver, DEMIParallelDCDSolver}
+L2_LOSS_SSVM_SOLVER_TYPE = ParallelDCDSolver
+
+NUMBER_OF_THREADS = 8
+
+# Regularization parameter
+C_FOR_STRUCTURE = 1.0
+
+# Mini-batch for 'warm' start
+TRAINMINI = true
+TRAINMINI_SIZE = 10000
+
+# Suppress optimatility check
+CHECK_INFERENCE_OPT = false
+
+# Number of training rounds
+MAX_NUM_ITER = 100
--- a/config/lemmatizer-config.properties
+++ b/config/lemmatizer-config.properties
-useStanfordConvention   true
--- a/config/ner-conll-config.properties
+++ b/config/ner-conll-config.properties
-# Conll config file
-
-# Required fields
-configFilename                          finalSystemBILOU
-pathToModelFile                         data/Models/CoNLL
-taggingEncodingScheme                   BILOU
-tokenizationScheme                      DualTokenizationScheme
-
-# Optional fields
-beamSize								5
-forceNewSentenceOnLineBreaks            true
-labelTypes								PER			ORG	LOC	MISC
-logging									false
-# debuggingLogPath						irrelevant
-inferenceMethod			        		GREEDY
-normalizeTitleText	                	false
-pathToTokenNormalizationData	        brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt
-predictionConfidenceThreshold	        -1
-sortLexicallyFilesInFolders	        	true
-thresholdPrediction						false
-treatAllFilesInFolderAsOneBigDocument	true
-debug                                   true
-
-# Features
-Forms									1
-Capitalization							1
-WordTypeInformation		        		1
-Affixes									1
-PreviousTag1							1
-PreviousTag2							1
-PreviousTagPatternLevel1	        	1
-PreviousTagPatternLevel2	        	1
-AggregateContext						0
-AggregateGazetteerMatches	        	0
-PrevTagsForContext		        		1
-PredictionsLevel1						0
-
-# Feature groups
-BrownClusterPaths						1
-isLowercaseBrownClusters	        	false	false	false
-pathsToBrownClusters		        	brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt	brown-clusters/brownBllipClusters	brown-clusters/brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt
-minWordAppThresholdsForBrownClusters	5	5	5
-
-GazetteersFeatures						1
-pathToGazetteersLists					ner-ext/KnownLists
-
-WordEmbeddings	                        0
-# pathsToWordEmbeddings					WordEmbedding/model-2280000000.LEARNING_RATE=1e-08.EMBEDDING_LEARNING_RATE=1e-07.EMBEDDING_SIZE=50.gz
-# embeddingDimensionalities	        	50
-# minWordAppThresholdsForEmbeddings		0
-# normalizationConstantsForEmbeddings   1.0
-# normalizationMethodsForEmbeddings		OVERALL
-# isLowercaseWordEmbeddings	        	false
--- a/config/ner-ontonotes-config.properties
+++ b/config/ner-ontonotes-config.properties
-# Ontonotes config file
-
-# Required fields
-configFilename                          Ontonotes
-pathToModelFile			        		data/Models/Ontonotes
-taggingEncodingScheme		        	BILOU
-tokenizationScheme						DualTokenizationScheme
-
-# Optional fields
-beamSize								5
-forceNewSentenceOnLineBreaks	        true
-labelTypes                              TIME	LAW	GPE	NORP	LANGUAGE	PERCENT	FAC	PRODUCT	ORDINAL	LOC	PERSON	WORK_OF_ART	MONEY	DATE	EVENT	QUANTITY	ORG	CARDINAL
-logging									false
-# debuggingLogPath						irrelevant
-inferenceMethod			        		GREEDY
-normalizeTitleText	                	false
-pathToTokenNormalizationData	        brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt
-predictionConfidenceThreshold           -1
-sortLexicallyFilesInFolders             true
-thresholdPrediction						false
-treatAllFilesInFolderAsOneBigDocument	false
-debug                                   true
-
-# Features
-Forms									1
-Capitalization							1
-WordTypeInformation		        		1
-Affixes									1
-PreviousTag1							1
-PreviousTag2							1
-PreviousTagPatternLevel1	        	1
-PreviousTagPatternLevel2                1
-AggregateContext						0
-AggregateGazetteerMatches	        	0
-PrevTagsForContext                      1
-PredictionsLevel1						0
-
-# Feature groups
-BrownClusterPaths						1
-isLowercaseBrownClusters	        	false	false	false
-pathsToBrownClusters		        	brown-clusters/brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt	brown-clusters/brownBllipClusters	brown-clusters/brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt
-minWordAppThresholdsForBrownClusters	5	5	5
-
-GazetteersFeatures						1
-pathToGazetteersLists					ner-ext/KnownLists
-
-WordEmbeddings                          0
-# pathsToWordEmbeddings					WordEmbedding/model-2280000000.LEARNING_RATE=1e-08.EMBEDDING_LEARNING_RATE=1e-07.EMBEDDING_SIZE=50.gz
-# embeddingDimensionalities             50
-# minWordAppThresholdsForEmbeddings		0
-# normalizationConstantsForEmbeddings   1.0
-# normalizationMethodsForEmbeddings		OVERALL
-# isLowercaseWordEmbeddings	        	false
--- a/config/srl-config.properties
+++ b/config/srl-config.properties
-# Use ResourceManager to read these properties
-curatorHost = trollope.cs.illinois.edu
-curatorPort = 9010
-
-# If set to true, it will force Curator to re-annotate the input
-curatorForceUpdate = false
-
-nerConllConfig   config/ner-conll-config.properties
-nerOntonotesConfig  config/ner-ontonotes-config.properties
-lemmaConfig config/lemmatizer-config.properties
-
-# If set to true, the output will be a Curator Record instead of a TextAnnotation.
-useRecords = false
-
-# If set to true, the input text will be assumed to be pre-tokenized
-respectTokenization = false
-
-# A comma-separated list of views to add (see ViewNames for a complete list of views).
-viewsToAdd = POS,SHALLOW_PARSE,LEMMA,COREF,NER_CONLL,PARSE_STANFORD
-
-# Use this option to output the annotated Record/TextAnnotation as plain text (instead of serialized)
-outputToText = true
-
-# Force the Curator client to overwrite the generated output files
-forceUpdateOutputFile = true
-
-## Properties used by the AnnotatorService to control caching behaviour
-cacheDirectory = annotation-cache
-throwExceptionIfNotCached = false
-cacheHeapSizeInMegabytes = 100
-cacheDiskSizeInMegabytes = 200
-# sets system property to close cache when VM shuts down
-setCacheShutdownHook = true
-
 #############################################
 ##
 ## Illinois SRL Configuration
@@ -41,13 +7,6 @@ setCacheShutdownHook = true
 # Whether to use the Illinois Curator to get the required annotations for training/testing
 # If set to false, Illinois NLP pipeline will be used
 UseCurator = false
-# The URL and host of Curator. If UseCurator is false, make sure you have pipeline config file set
-CuratorHost = trollope.cs.illinois.edu
-CuratorPort = 9010
-
-
-# The file containing the configuration for the Illinois NLP pipeline
-PipelineConfigFile = config/pipeline-config.properties

 # The parser used to extract constituents and syntactic features
 # Options are: Charniak, Berkeley, Stanford
@@ -56,6 +15,9 @@ DefaultParser = Stanford

 WordNetConfig = jwnl_properties.xml

+# The configuration for the Structured learner
+LearnerConfig = config/learner.properties
+
 ### Training corpora directories ###
 # This is the directory of the merged (mrg) WSJ files
 PennTreebankHome = /shared/corpora/corporaWeb/treebanks/eng/pennTreebank/treebank-3/parsed/mrg/wsj/
@@ -64,13 +26,10 @@ NombankHome = /shared/corpora/corporaWeb/treebanks/eng/nombank/

 # The directory of the sentence and pre-extracted features database (~5G of space required)
 # Not used during test/working with pre-trained models
-# TODO Change this when done
-CacheDirectory = /shared/bronte/upadhya3/illinoisSRL/cache
+CacheDirectory = cache

-ModelsDirectory = /shared/bronte/upadhya3/illinoisSRL/models
+ModelsDirectory = models

 # Directory to output gold and predicted files for manual comparison
 # Comment out for no output
-OutputDirectory = srl-out
-
-MaxInferenceRounds = 200
\ No newline at end of file
+OutputDirectory = srl-out
\ No newline at end of file
--- a/pom.xml
+++ b/pom.xml
@@ -29,33 +29,31 @@
            <artifactId>illinois-srl</artifactId>
            <classifier>models-verb-stanford</classifier>
            <version>5.1.4</version>
-            <scope>test</scope>
        </dependency>
-        <!-- <dependency> -->
-        <!--     <groupId>edu.illinois.cs.cogcomp</groupId> -->
-        <!--     <artifactId>illinois-srl</artifactId> -->
-        <!--     <classifier>models-nom-stanford</classifier> -->
-        <!--     <version>5.1.4</version> -->
-        <!--     <scope>test</scope> -->
-        <!-- </dependency> -->
+        <dependency>
+            <groupId>edu.illinois.cs.cogcomp</groupId>
+            <artifactId>illinois-srl</artifactId>
+            <classifier>models-nom-stanford</classifier>
+            <version>5.1.4</version>
+        </dependency>

        <!--The Illinois pipeline can be used instead -->
        <dependency>
            <groupId>edu.illinois.cs.cogcomp</groupId>
            <artifactId>illinois-nlp-pipeline</artifactId>
-            <version>0.1.8-lemmatizer</version>
+            <version>0.1.9</version>
        </dependency>

        <dependency>
            <groupId>edu.illinois.cs.cogcomp</groupId>
            <artifactId>illinois-core-utilities</artifactId>
-            <version>1.2.18</version>
+            <version>1.3.1</version>
        </dependency>

        <dependency>
            <groupId>edu.illinois.cs.cogcomp</groupId>
-            <artifactId>illinois-caching-curator</artifactId>
-            <version>2.1.1</version>
+            <artifactId>illinois-curator</artifactId>
+            <version>3.1.1</version>
        </dependency>
        <dependency>
            <groupId>com.gurobi</groupId>
@@ -106,6 +104,13 @@
            <artifactId>snowball</artifactId>
            <version>1.0</version>
        </dependency>
+
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>1.7.7</version>
+            <optional>true</optional>
+        </dependency>
    </dependencies>


@@ -148,12 +153,27 @@
        </extensions>
    </build>

+    <reporting>
+        <excludeDefaults>true</excludeDefaults>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+                <version>2.10.3</version>
+            </plugin>
+        </plugins>
+    </reporting>
+
    <distributionManagement>
        <repository>
            <id>CogcompSoftware</id>
            <name>CogcompSoftware</name>
            <url>scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo</url>
        </repository>
+        <site>
+            <id>CogcompSoftwareDoc</id>
+            <url>scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/software/doc/${project.artifactId}</url>
+        </site>
    </distributionManagement>

 </project>
--- a/scripts/compileModelsToJar.sh
+++ b/scripts/compileModelsToJar.sh
-#!/bin/bash -e
-
-
-VERSION=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -v 'INFO'`
-
-tmpdir=tmp-Srl-verb-$RANDOM
-
-rm -rdf ${tmpdir}
-mkdir -p ${tmpdir}/models
-
-for parser in STANFORD CHARNIAK; do
-    cp ./models/Verb*${parser}* ${tmpdir}/models
-
-    cd ${tmpdir}
-    rm -rdf ../target/illinois-srl-models-verb-${parser}-${VERSION}.jar
-    jar cf ../target/illinois-srl-models-verb-${parser}-${VERSION}.jar models
-    cd ..
-    rm ${tmpdir}/models/*
-done
-rm -rdf ${tmpdir}
-
-tmpdir=tmp-Srl-nom-$RANDOM
-
-rm -rdf ${tmpdir}
-mkdir -p ${tmpdir}/models
-
-for parser in STANFORD CHARNIAK; do
-    cp ./models/Nom*${parser}* ${tmpdir}/models
-
-    cd ${tmpdir}
-    rm -rdf ../target/illinois-srl-models-nom-${parser}-${VERSION}.jar
-    jar cf ../target/illinois-srl-models-nom-${parser}-${VERSION}.jar models
-    cd ..
-    rm ${tmpdir}/models/*
-done
-rm -rdf ${tmpdir}
\ No newline at end of file
--- a/scripts/deployModels.sh
+++ b/scripts/deployModels.sh
@@ -2,6 +2,51 @@

 VERSION=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -v 'INFO'`

+tmpdir=tmp-Srl-verb-$RANDOM
+
+rm -rdf ${tmpdir}
+mkdir -p ${tmpdir}/models
+
+for parser in STANFORD CHARNIAK; do
+    if [ ! -e "./models/Verb.Classifier.PARSE_${parser}.lex" ]; then
+        echo "$parser Verb models not found"
+        continue
+    fi
+
+    cp ./models/Verb*${parser}* ${tmpdir}/models
+
+    cd ${tmpdir}
+    rm -rdf ../target/illinois-srl-models-verb-${parser}-${VERSION}.jar
+    jar cf ../target/illinois-srl-models-verb-${parser}-${VERSION}.jar models
+    cd ..
+    rm ${tmpdir}/models/*
+done
+rm -rdf ${tmpdir}
+
+tmpdir=tmp-Srl-nom-$RANDOM
+
+rm -rdf ${tmpdir}
+mkdir -p ${tmpdir}/models
+
+for parser in STANFORD CHARNIAK; do
+    if [ ! -e "./models/Nom.Classifier.PARSE_${parser}.lex" ]; then
+        echo "$parser Nom models not found"
+        continue
+    fi
+
+    cp ./models/Nom*${parser}* ${tmpdir}/models
+
+    cd ${tmpdir}
+    rm -rdf ../target/illinois-srl-models-nom-${parser}-${VERSION}.jar
+    jar cf ../target/illinois-srl-models-nom-${parser}-${VERSION}.jar models
+    cd ..
+    rm ${tmpdir}/models/*
+done
+rm -rdf ${tmpdir}
+
+echo "Compiled models to jars"
+
+if [ -e "target/illinois-srl-models-nom-CHARNIAK-${VERSION}.jar" ]; then
 echo "Deploying illinois-srl-models-nom-CHARNIAK-${VERSION}.jar"
 mvn deploy:deploy-file \
    -Dfile=target/illinois-srl-models-nom-CHARNIAK-${VERSION}.jar \
@@ -12,7 +57,9 @@ mvn deploy:deploy-file \
    -Dpackaging=jar \
    -Durl=scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo \
    -DrepositoryId=CogcompSoftware
+fi

+if [ -e "target/illinois-srl-models-nom-STANFORD-${VERSION}.jar" ]; then
 echo "Deploying illinois-srl-models-nom-STANFORD-${VERSION}.jar"
 mvn deploy:deploy-file \
    -Dfile=target/illinois-srl-models-nom-STANFORD-${VERSION}.jar \
@@ -23,7 +70,9 @@ mvn deploy:deploy-file \
    -Dpackaging=jar \
    -Durl=scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo \
    -DrepositoryId=CogcompSoftware
+fi

+if [ -e "target/illinois-srl-models-verb-CHARNIAK-${VERSION}.jar" ]; then
 echo "Deploying illinois-srl-models-verb-CHARNIAK-${VERSION}.jar"
 mvn deploy:deploy-file \
    -Dfile=target/illinois-srl-models-verb-CHARNIAK-${VERSION}.jar \
@@ -34,7 +83,9 @@ mvn deploy:deploy-file \
    -Dpackaging=jar \
    -Durl=scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo \
    -DrepositoryId=CogcompSoftware
+fi

+if [ -e "target/illinois-srl-models-verb-STANFORD-${VERSION}.jar" ]; then
 echo "Deploying illinois-srl-models-verb-STANFORD-${VERSION}.jar"
 mvn deploy:deploy-file \
    -Dfile=target/illinois-srl-models-verb-STANFORD-${VERSION}.jar \
@@ -44,4 +95,5 @@ mvn deploy:deploy-file \
    -Dclassifier=models-verb-stanford \
    -Dpackaging=jar \
    -Durl=scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo \
-    -DrepositoryId=CogcompSoftware
\ No newline at end of file
+    -DrepositoryId=CogcompSoftware
+fi
\ No newline at end of file
--- a/scripts/run.sh
+++ b/scripts/run.sh
 #!/bin/bash
-# mvn clean compile
-# mvn -q dependency:copy-dependencies
+mvn clean compile
+mvn -q dependency:copy-dependencies

 CP=target/classes:config:target/dependency/*


--- a/src/main/java/edu/illinois/cs/cogcomp/srl/ColumnFormatWriter.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/ColumnFormatWriter.java
@@ -70,46 +70,36 @@ public class ColumnFormatWriter {
 		printFormatted(tr, out, pav.getTextAnnotation());
 	}

-	private void printFormatted(String[][] columns, PrintWriter out,
-			TextAnnotation ta) {
-		// System.out.println(columns.length);
-		// System.out.println(columns[0].length);
-
+	private void printFormatted(String[][] columns, PrintWriter out, TextAnnotation ta) {
 		// leftOfStar: length of everything before the asterisk.
 		// rightOfStar: length of asterisk and what comes after.

 		int[] leftOfStar = new int[columns[0].length];
 		int[] rightOfStart = new int[columns[0].length];

-		for (int row = 0; row < columns.length; row++) {
-
-			// System.out.println(row);
-			// System.out.println(Arrays.asList(columns[row]));
+        for (String[] rowData : columns) {
+            for (int col = 0; col < rowData.length; col++) {

-			String[] rowData = columns[row];
+                String word = rowData[col];

-			for (int col = 0; col < rowData.length; col++) {
+                int starPos = word.indexOf("*");

-				String word = rowData[col];
+                int lenLeft, lenRight;
+                if (starPos < 0) {
+                    lenLeft = word.length();
+                    lenRight = -1;
+                } else {
+                    lenLeft = starPos + 1;
+                    lenRight = word.length() - starPos + 1;
+                }

-				int starPos = word.indexOf("*");
-
-				int lenLeft, lenRight;
-				if (starPos < 0) {
-					lenLeft = word.length();
-					lenRight = -1;
-				} else {
-					lenLeft = starPos + 1;
-					lenRight = word.length() - starPos + 1;
-				}
+                if (leftOfStar[col] < lenLeft)
+                    leftOfStar[col] = lenLeft;

-				if (leftOfStar[col] < lenLeft)
-					leftOfStar[col] = lenLeft;
-
-				if (rightOfStart[col] < lenRight)
-					rightOfStart[col] = lenRight;
-			}
-		}
+                if (rightOfStart[col] < lenRight)
+                    rightOfStart[col] = lenRight;
+            }
+        }

 		// System.out.println("here");

@@ -124,13 +114,6 @@ public class ColumnFormatWriter {

 				out.print(rowData[0]);

-				// if (rowData.length == 1)
-				// {
-				// // System.out.println(rowData[0] + "\t" + row);
-				// out.println();
-				// continue;
-				// }
-
 				// print the spaces
 				for (int spCount = rowData[0].length(); spCount < leftOfStar[0]; spCount++)
 					out.print(" ");
@@ -181,10 +164,6 @@ public class ColumnFormatWriter {
 		}
 	}

-	/**
-	 * @param columns
-	 * @return
-	 */
 	private String[][] transpose(List<String[]> columns, int size) {
 		String[][] output = new String[size][];

@@ -204,12 +183,8 @@ public class ColumnFormatWriter {
 	/**
 	 * Return a table. Numrows = number of words. Num Cols depends on how many
 	 * predicate arg relations we have
-	 * 
-	 * @param ta
-	 * @return
 	 */
 	private String[][] transformToColumns(TextAnnotation ta) {
-
 		List<String[]> columns = new ArrayList<>();

 		// first the words
@@ -245,10 +220,6 @@ public class ColumnFormatWriter {
 		return transpose(columns, ta.size());
 	}

-	/**
-	 * @param ta
-	 * @return
-	 */
 	private static String[] getNEData(TextAnnotation ta) {

 		if (!ta.hasView(ViewNames.NER_CONLL)) {
@@ -260,7 +231,7 @@ public class ColumnFormatWriter {

 			return chunk;
 		}
-		SpanLabelView nerView = (SpanLabelView) ta.getView(ViewNames.NER);
+		SpanLabelView nerView = (SpanLabelView) ta.getView(ViewNames.NER_CONLL);

 		List<Constituent> nerConstituents = nerView.getConstituents();

@@ -304,13 +275,11 @@ public class ColumnFormatWriter {
 			return chunk;
 		}

-		SpanLabelView chunkView = (SpanLabelView) ta
-				.getView(ViewNames.SHALLOW_PARSE);
+		SpanLabelView chunkView = (SpanLabelView) ta.getView(ViewNames.SHALLOW_PARSE);

 		List<Constituent> chunkConstituents = chunkView.getConstituents();

-		Collections.sort(chunkConstituents,
-				TextAnnotationUtilities.constituentStartComparator);
+		Collections.sort(chunkConstituents, TextAnnotationUtilities.constituentStartComparator);

 		Map<Integer, String> cc = new HashMap<>();
 		for (Constituent c : chunkConstituents) {
@@ -337,16 +306,11 @@ public class ColumnFormatWriter {
 		return chunk;
 	}

-	/**
-	 * @param columns
-	 * @param ta
-	 */
 	private void addPredicateArgs(List<String[]> columns, TextAnnotation ta) {
 		PredicateArgumentView predArgView = null;

 		if (ta.hasView(predicateArgumentViewName))
-			predArgView = (PredicateArgumentView) ta
-					.getView(predicateArgumentViewName);
+			predArgView = (PredicateArgumentView) ta.getView(predicateArgumentViewName);

 		convertPredicateArgView(ta, predArgView, columns, true);

@@ -359,15 +323,15 @@ public class ColumnFormatWriter {
 		if (pav != null)
 			predicates = pav.getPredicates();

-		Collections.sort(predicates,
-				TextAnnotationUtilities.constituentStartComparator);
+		Collections.sort(predicates, TextAnnotationUtilities.constituentStartComparator);

 		int size = ta.size();

 		addPredicateInfo(columns, predicates, size, addSense);

 		for (Constituent predicate : predicates) {
-			List<Relation> args = pav.getArguments(predicate);
+            assert pav != null;
+            List<Relation> args = pav.getArguments(predicate);

 			String[] paInfo = addPredicateArgInfo(predicate, args, size);

@@ -375,21 +339,14 @@ public class ColumnFormatWriter {
 		}
 	}

-	/**
-	 * @param columns
-	 * @param predicates
-	 * @param size
-	 */
 	private void addPredicateInfo(List<String[]> columns,
 			List<Constituent> predicates, int size, boolean addSense) {
 		Map<Integer, String> senseMap = new HashMap<>();
 		Map<Integer, String> lemmaMap = new HashMap<>();

 		for (Constituent c : predicates) {
-			senseMap.put(c.getStartSpan(),
-					c.getAttribute(CoNLLColumnFormatReader.SenseIdentifer));
-			lemmaMap.put(c.getStartSpan(),
-					c.getAttribute(CoNLLColumnFormatReader.LemmaIdentifier));
+			senseMap.put(c.getStartSpan(), c.getAttribute(CoNLLColumnFormatReader.SenseIdentifer));
+			lemmaMap.put(c.getStartSpan(), c.getAttribute(CoNLLColumnFormatReader.LemmaIdentifier));
 		}

 		String[] sense = new String[size];
@@ -406,19 +363,11 @@ public class ColumnFormatWriter {
 			}
 		}

-		// System.out.println(Arrays.asList(sense));
-		// System.out.println(Arrays.asList(lemma));
 		if (addSense)
 			columns.add(sense);
 		columns.add(lemma);
 	}

-	/**
-	 * @param predicate
-	 * @param args
-	 * @param size
-	 * @return
-	 */
 	private String[] addPredicateArgInfo(Constituent predicate,
 			List<Relation> args, int size) {
 		Map<Integer, String> paInfo = new HashMap<>();
@@ -430,8 +379,7 @@ public class ColumnFormatWriter {
 			argPredicate = argPredicate.replaceAll("ARG", "A");
 			argPredicate = argPredicate.replaceAll("Support", "SUP");

-			for (int i = r.getTarget().getStartSpan(); i < r.getTarget()
-					.getEndSpan(); i++) {
+			for (int i = r.getTarget().getStartSpan(); i < r.getTarget().getEndSpan(); i++) {
 				paInfo.put(i, "*");
 				if (i == r.getTarget().getStartSpan())
 					paInfo.put(i, "(" + argPredicate + paInfo.get(i));
@@ -453,27 +401,18 @@ public class ColumnFormatWriter {
 		return paColumn;
 	}

-	/**
-	 * @param ta
-	 * @return
-	 */
 	private String[] getParse(TextAnnotation ta) {
 		String[] parse = new String[ta.size()];

 		for (int sentenceId = 0; sentenceId < ta.getNumberOfSentences(); sentenceId++) {

-			Tree<String> tree = ParseHelper.getParseTree(parseViewName, ta,
-					sentenceId);
+			Tree<String> tree = ParseHelper.getParseTree(parseViewName, ta, sentenceId);

 			Sentence sentence = ta.getSentence(sentenceId);

 			tree = ParseUtils.snipNullNodes(tree);
 			tree = ParseUtils.stripFunctionTags(tree);

-			// tree = tree.getChild(0);
-
-			// System.out.println("tree is :" + tree);
-
 			String[] treeLines = tree.toString().split("\n");

 			if (treeLines.length != sentence.size()) {

--- a/src/main/java/edu/illinois/cs/cogcomp/srl/Constants.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/Constants.java
@@ -5,7 +5,7 @@ package edu.illinois.cs.cogcomp.srl;
 * TODO Change this before shipping
 */
 public class Constants {
-	public final static String systemVersion = "5.1";
+	public final static String systemVersion = "5.4.1";

 	public final static String systemName = "illinoisSRL";


--- a/src/main/java/edu/illinois/cs/cogcomp/srl/Main.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/Main.java
@@ -97,24 +97,30 @@ public class Main {

 	@CommandDescription(description = "Performs the full training & testing sequence for all SRL types",
 			usage = "expt [Verb | Nom] cacheDatasets=[true | false]")
-	public static void expt(String srlType, String cacheDatasets, String learnerConfig) throws Exception {
+	public static void expt(String srlType, String cacheDatasets) throws Exception {
 		// Step 1: Cache all the datasets we're going to use
 		if (Boolean.parseBoolean(cacheDatasets)) cacheDatasets();

 		// Step 2: Iterate between pre-extracting all the features needed for training and training
+
+        // We don't need to train a predicate classifier for Verb
+        if (SRLType.valueOf(srlType) == SRLType.Nom) {
+            preExtract(srlType, "Predicate");
+            train(srlType, "Predicate");
+        }
+
        preExtract(srlType, "Sense");
-        train(srlType, "Sense",learnerConfig);
+        train(srlType, "Sense");

        preExtract(srlType, "Identifier");
-        train(srlType, "Identifier",learnerConfig);
+        train(srlType, "Identifier");
        tuneIdentifier(srlType);

        preExtract(srlType, "Classifier");
-        train(srlType, "Classifier",learnerConfig);
+        train(srlType, "Classifier");

 		// Step 3: Evaluate
 		evaluate(srlType);
-		System.out.println("All Done!");
 	}

 	@CommandDescription(description = "Reads and caches all the datasets", usage = "cacheDatasets")
@@ -185,7 +191,7 @@ public class Main {
 		Counter<String> addedViews = new Counter<>();

 		log.info("Initializing pre-processor");
-		TextPreProcessor.initialize(configFile, true);
+		TextPreProcessor.initialize(configFile);

 		int count = 0;
 		while (dataset.hasNext()) {
@@ -229,10 +235,20 @@ public class Main {
 	public static SRLManager getManager(SRLType srlType, boolean trainingMode) throws Exception {
 		String viewName;
 		if (defaultParser == null) defaultParser = SRLProperties.getInstance().getDefaultParser();
-		if (defaultParser.equals("Charniak")) viewName = ViewNames.PARSE_CHARNIAK;
-		else if (defaultParser.equals("Berkeley")) viewName = ViewNames.PARSE_BERKELEY;
-		else if (defaultParser.equals("Stanford")) viewName = ViewNames.PARSE_STANFORD;
-		else viewName = defaultParser;
+        switch (defaultParser) {
+            case "Charniak":
+                viewName = ViewNames.PARSE_CHARNIAK;
+                break;
+            case "Berkeley":
+                viewName = ViewNames.PARSE_BERKELEY;
+                break;
+            case "Stanford":
+                viewName = ViewNames.PARSE_STANFORD;
+                break;
+            default:
+                viewName = defaultParser;
+                break;
+        }

 		if (srlType == SRLType.Verb)
 			return new VerbSRLManager(trainingMode, viewName);
@@ -275,7 +291,6 @@ public class Main {

 		String allDataCacheFile = properties.getFeatureCacheFile(srlType,
 				modelToExtract, featureSet, defaultParser, dataset);
-		System.out.println("reading feature cache from " + allDataCacheFile);
 		FeatureVectorCacheFile featureCache = preExtract(numConsumers, manager,
 				modelToExtract, dataset, allDataCacheFile, false);

@@ -307,9 +322,6 @@ public class Main {
 		if (IOUtils.exists(cacheFile2)) {
 			log.warn("Old pruned cache file found. Not doing anything...");
 			return;
-//			log.warn("Old pruned cache file found. Deleting...");
-//			IOUtils.rm(cacheFile2);
-//			log.info("Done");
 		}

 		log.info("Pruning features. Saving pruned features to {}", cacheFile2);
@@ -358,7 +370,7 @@ public class Main {

 	@CommandDescription(description = "Trains a specific model and SRL type",
 			usage = "train [Verb | Nom] [Predicate | Sense | Identifier | Classifier]")
-	public static void train(String srlType_, String model_, String learnerConfig) throws Exception {
+	public static void train(String srlType_, String model_) throws Exception {
 		SRLType srlType = SRLType.valueOf(srlType_);
 		SRLManager manager = getManager(srlType, true);

@@ -392,7 +404,7 @@ public class Main {
 		log.info("Setting up solver, learning may take time if you have too many instances in SLProblem ....");

 		SLParameters params = new SLParameters();
-		params.loadConfigFile(learnerConfig);
+		params.loadConfigFile(properties.getLearnerConfig());
 		params.C_FOR_STRUCTURE = (float) c;

 		SRLMulticlassInference infSolver = new SRLMulticlassInference(manager, model);
@@ -504,12 +516,10 @@ public class Main {
 			count++;
 			if (count % 1000 == 0) {
 				long end = System.currentTimeMillis();
-				log.info(count + " sentences done. Took "
-						+ (end - start) + "ms, F1 so far = "
-						+ tester.getAverageF1());
+				log.info(count + " sentences done. Took " + (end - start) + "ms, " +
+                        "F1 so far = " + tester.getAverageF1());
 			}
 		}
-		System.exit(-1);
 		long end = System.currentTimeMillis();
 		System.out.println(count + " sentences done. Took " + (end - start) + "ms");


--- a/src/main/java/edu/illinois/cs/cogcomp/srl/SRLProperties.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/SRLProperties.java
@@ -19,25 +19,18 @@ public class SRLProperties {
 	private static final Logger log = LoggerFactory.getLogger(SRLProperties.class);
 	private static SRLProperties theInstance;
 	private PropertiesConfiguration config;
-	private final String curatorHost;
-	private final int curatorPort, maxInferenceRounds;
-	private final String wordNetFile;
+    private final String wordNetFile;

 	private SRLProperties(URL url) throws ConfigurationException {
 		config = new PropertiesConfiguration(url);

-		curatorHost = config.getString("CuratorHost", "");
-		curatorPort = config.getInt("CuratorPort", -1);
-
-		this.wordNetFile = config.getString("WordNetConfig");
+        this.wordNetFile = config.getString("WordNetConfig");

 		if (config.containsKey("LoadWordNetConfigFromClassPath")
 				&& config.getBoolean("LoadWordNetConfigFromClassPath")) {
 			WordNetManager.loadConfigAsClasspathResource(true);
 		}
-
-        maxInferenceRounds = config.getInt("MaxInferenceRounds");
-	}
+    }

 	public static void initialize(String configFile) throws Exception {
 		// first try to load the file from the file system
@@ -156,19 +149,7 @@ public class SRLProperties {
 		return Constants.systemVersion;
 	}

-	public String getPipelineConfigFile() {
-		return config.getString("PipelineConfigFile");
-	}
-
-	public String getCuratorHost() {
-		return curatorHost;
-	}
-
-	public int getCuratorPort() {
-		return curatorPort;
-	}
-
-    public int getMaxInferenceRounds() {
-        return maxInferenceRounds;
+    public String getLearnerConfig() {
+        return this.config.getString("LearnerConfig");
    }
 }
--- a/src/main/java/edu/illinois/cs/cogcomp/srl/SemanticRoleLabeler.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/SemanticRoleLabeler.java
@@ -95,7 +95,7 @@ public class SemanticRoleLabeler implements Annotator {
 		properties = SRLProperties.getInstance();

 		log.info("Initializing pre-processor");
-		TextPreProcessor.initialize(configFile, false);
+		TextPreProcessor.initialize(configFile);

 		log.info("Creating {} manager", srlType);
 		manager = Main.getManager(SRLType.valueOf(srlType), false);

--- a/src/main/java/edu/illinois/cs/cogcomp/srl/caches/FeatureVectorCacheFile.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/caches/FeatureVectorCacheFile.java
@@ -3,7 +3,6 @@ package edu.illinois.cs.cogcomp.srl.caches;
 import edu.illinois.cs.cogcomp.core.datastructures.Pair;
 import edu.illinois.cs.cogcomp.sl.core.SLProblem;
 import edu.illinois.cs.cogcomp.sl.util.IFeatureVector;
-import edu.illinois.cs.cogcomp.sl.util.SparseFeatureVector;
 import edu.illinois.cs.cogcomp.srl.core.Models;
 import edu.illinois.cs.cogcomp.srl.core.SRLManager;
 import edu.illinois.cs.cogcomp.srl.jlis.SRLMulticlassInstance;
@@ -86,7 +85,7 @@ public class FeatureVectorCacheFile implements Closeable,
 			String features = parts[2];

 			SRLMulticlassInstance x = new SRLMulticlassInstance(model, lemma, features);
-			SRLMulticlassLabel y = new SRLMulticlassLabel(x, label, model, manager);
+			SRLMulticlassLabel y = new SRLMulticlassLabel(label, model, manager);

 			return new Pair<>(x, y);
 		} catch (Exception e) {

--- a/src/main/java/edu/illinois/cs/cogcomp/srl/core/AbstractPredicateDetector.java
+++ b/src/main/java/edu/illinois/cs/cogcomp/srl/core/AbstractPredicateDetector.java
@@ -33,13 +33,11 @@ public abstract class AbstractPredicateDetector {
 		List<Constituent> list = new ArrayList<>();

 		for (int i = 0; i < ta.size(); i++) {
-
-			Option<String> opt = getLemma(ta, i);
+            Option<String> opt = getLemma(ta, i);

 			if (opt.isPresent()) {
 				Constituent c = new Constituent("", "", ta, i, i + 1);
-				c.addAttribute(CoNLLColumnFormatReader.LemmaIdentifier,
-						opt.get());
+				c.addAttribute(CoNLLColumnFormatReader.LemmaIdentifier, opt.get());
 				list.add(c);
 			}
 		}