######## This is the default tagger.properties file ######## This file is used for training and testing only, ######## The configuration for tagging is directly tuned in the descriptor "HmmTagger.xml" ########### ONLY FOR TRAINING ##################################################### ####### FILE OR DIRECTORY CONTAINING TRAINING CORPUS: ####### can be in specified either as an absolute or as a relative path ####### e.g. FILE = ../../tueba_tigerFormat.txt or FILE = C:/Data/tueba.txt FILE = ########################## BOTH FOR TRAINING AND EVALUATION ################################ ######## THESE ARE THE DEFAULT MODEL FILES FOR GERMAN AND ENGLISH ######## You can either uncomment one of them, if you want to replace given models with your own one, #MODEL_FILE = resources/german/TuebaModel.dat MODEL_FILE = resources/english/BrownModel.dat ######## or specify a completely different name # MODEL_FILE = ######## If mapping of tags is desired, uncomment the following DO_MAPPING = true ####### EXAMPLES OF MAPPING CLASSES ## Basic mapping for the Brown corpus (nltk distribution) tagset: to get 93 tags out of 473 MAPPING = org.apache.uima.examples.tagger.trainAndTest.TagMappingBrown ## Basic mapping for STTS tagset: from 54 tags onto the basic ca. 15 classes plus punctuation #MAPPING = org.apache.uima.examples.tagger.trainAndTest.GrobMappingTueba ## If you implement your own mapping, you should specify here in the same manner as above a java-path to the class #MAPPING = ######## If corpus is in a different format and cannot be read with the provided READERS, ######## you should specify here a java-path to the class (s. examples below) #CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.TT_FormatReader CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.BrownReader #CORPUS_READER = ################# ONLY FOR EVALUATION ############################### ######### GOLD STANDARD CORPUS FILE: ######### can be specified as an absolute or as a relative path ##e.g. GOLD_STANDARD = ../../tueba_tigerFormat.txt or GOLD_STANDARD = C:/Data/tueba.txt GOLD_STANDARD = ######### Here we specify whether one intends to test a bi- or a trigram model (default is a trigram model) N=3