Coverage Report - org.apache.commons.codec.language.bm.Rule
 
Classes in this File Line Coverage Branch Coverage Complexity
Rule
92%
155/167
90%
98/108
3.275
Rule$1
100%
2/2
N/A
3.275
Rule$10
100%
4/4
N/A
3.275
Rule$2
33%
3/9
N/A
3.275
Rule$3
100%
2/2
100%
2/2
3.275
Rule$4
100%
2/2
N/A
3.275
Rule$5
100%
2/2
N/A
3.275
Rule$6
100%
2/2
N/A
3.275
Rule$7
100%
2/2
100%
4/4
3.275
Rule$8
100%
2/2
100%
4/4
3.275
Rule$9
100%
2/2
100%
4/4
3.275
Rule$Phoneme
100%
11/11
N/A
3.275
Rule$Phoneme$1
100%
10/10
100%
8/8
3.275
Rule$PhonemeExpr
N/A
N/A
3.275
Rule$PhonemeList
100%
5/5
N/A
3.275
Rule$RPattern
N/A
N/A
3.275
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.commons.codec.language.bm;
 19  
 
 20  
 import java.io.InputStream;
 21  
 import java.util.ArrayList;
 22  
 import java.util.Arrays;
 23  
 import java.util.Collections;
 24  
 import java.util.Comparator;
 25  
 import java.util.EnumMap;
 26  
 import java.util.HashMap;
 27  
 import java.util.HashSet;
 28  
 import java.util.List;
 29  
 import java.util.Map;
 30  
 import java.util.Scanner;
 31  
 import java.util.Set;
 32  
 import java.util.regex.Matcher;
 33  
 import java.util.regex.Pattern;
 34  
 
 35  
 /**
 36  
  * A phoneme rule.
 37  
  * <p>
 38  
  * Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply
 39  
  * and a logical flag indicating if all languages must be in play. A rule matches if:
 40  
  * <ul>
 41  
  * <li>the pattern matches at the current position</li>
 42  
  * <li>the string up until the beginning of the pattern matches the left context</li>
 43  
  * <li>the string from the end of the pattern matches the right context</li>
 44  
  * <li>logical is ALL and all languages are in scope; or</li>
 45  
  * <li>logical is any other value and at least one language is in scope</li>
 46  
  * </ul>
 47  
  * <p>
 48  
  * Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user
 49  
  * to explicitly construct their own.
 50  
  * <p>
 51  
  * Rules are immutable and thread-safe.
 52  
  * <p>
 53  
  * <b>Rules resources</b>
 54  
  * <p>
 55  
  * Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically
 56  
  * named following the pattern:
 57  
  * <blockquote>org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote>
 58  
  * <p>
 59  
  * The format of these resources is the following:
 60  
  * <ul>
 61  
  * <li><b>Rules:</b> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these
 62  
  * will be interpreted as:
 63  
  * <ol>
 64  
  * <li>pattern</li>
 65  
  * <li>left context</li>
 66  
  * <li>right context</li>
 67  
  * <li>phoneme</li>
 68  
  * </ol>
 69  
  * </li>
 70  
  * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be discarded
 71  
  * as a comment.</li>
 72  
  * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. This will skip
 73  
  * all content until a line ending in '*' and '/' is found.</li>
 74  
  * <li><b>Blank lines:</b> All blank lines will be skipped.</li>
 75  
  * </ul>
 76  
  *
 77  
  * @since 1.6
 78  
  * @version $Id$
 79  
  */
 80  106145
 public class Rule {
 81  
 
 82  4876451
     public static final class Phoneme implements PhonemeExpr {
 83  92232
         public static final Comparator<Phoneme> COMPARATOR = new Comparator<Phoneme>() {
 84  
             @Override
 85  
             public int compare(final Phoneme o1, final Phoneme o2) {
 86  1220580
                 for (int i = 0; i < o1.phonemeText.length(); i++) {
 87  1216151
                     if (i >= o2.phonemeText.length()) {
 88  720
                         return +1;
 89  
                     }
 90  1215431
                     final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i);
 91  1215431
                     if (c != 0) {
 92  87082
                         return c;
 93  
                     }
 94  
                 }
 95  
 
 96  4429
                 if (o1.phonemeText.length() < o2.phonemeText.length()) {
 97  1020
                     return -1;
 98  
                 }
 99  
 
 100  3409
                 return 0;
 101  
             }
 102  
         };
 103  
 
 104  
         private final CharSequence phonemeText;
 105  
         private final Languages.LanguageSet languages;
 106  
 
 107  542556
         public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) {
 108  542556
             this.phonemeText = phonemeText;
 109  542556
             this.languages = languages;
 110  542556
         }
 111  
 
 112  
         public Phoneme append(final CharSequence str) {
 113  152227
             return new Phoneme(this.phonemeText.toString() + str.toString(), this.languages);
 114  
         }
 115  
 
 116  
         public Languages.LanguageSet getLanguages() {
 117  316698
             return this.languages;
 118  
         }
 119  
 
 120  
         @Override
 121  
         public Iterable<Phoneme> getPhonemes() {
 122  37869
             return Collections.singleton(this);
 123  
         }
 124  
 
 125  
         public CharSequence getPhonemeText() {
 126  220364
             return this.phonemeText;
 127  
         }
 128  
 
 129  
         public Phoneme join(final Phoneme right) {
 130  175837
             return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(),
 131  
                                this.languages.restrictTo(right.languages));
 132  
         }
 133  
     }
 134  
 
 135  
     public interface PhonemeExpr {
 136  
         Iterable<Phoneme> getPhonemes();
 137  
     }
 138  
 
 139  63608
     public static final class PhonemeList implements PhonemeExpr {
 140  
         private final List<Phoneme> phonemes;
 141  
 
 142  1438
         public PhonemeList(final List<Phoneme> phonemes) {
 143  1438
             this.phonemes = phonemes;
 144  1438
         }
 145  
 
 146  
         @Override
 147  
         public List<Phoneme> getPhonemes() {
 148  63608
             return this.phonemes;
 149  
         }
 150  
     }
 151  
 
 152  
     /**
 153  
      * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations.
 154  
      */
 155  
     public static interface RPattern {
 156  
         boolean isMatch(CharSequence input);
 157  
     }
 158  
 
 159  1
     public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() {
 160  
         @Override
 161  
         public boolean isMatch(final CharSequence input) {
 162  66010
             return true;
 163  
         }
 164  
     };
 165  
 
 166  
     public static final String ALL = "ALL";
 167  
 
 168  
     private static final String DOUBLE_QUOTE = "\"";
 169  
 
 170  
     private static final String HASH_INCLUDE = "#include";
 171  
 
 172  1
     private static final Map<NameType, Map<RuleType, Map<String, List<Rule>>>> RULES =
 173  
             new EnumMap<NameType, Map<RuleType, Map<String, List<Rule>>>>(NameType.class);
 174  
 
 175  
     static {
 176  4
         for (final NameType s : NameType.values()) {
 177  3
             final Map<RuleType, Map<String, List<Rule>>> rts =
 178  
                     new EnumMap<RuleType, Map<String, List<Rule>>>(RuleType.class);
 179  
 
 180  12
             for (final RuleType rt : RuleType.values()) {
 181  9
                 final Map<String, List<Rule>> rs = new HashMap<String, List<Rule>>();
 182  
 
 183  9
                 final Languages ls = Languages.getInstance(s);
 184  9
                 for (final String l : ls.getLanguages()) {
 185  
                     try {
 186  108
                         rs.put(l, parseRules(createScanner(s, rt, l), createResourceName(s, rt, l)));
 187  0
                     } catch (final IllegalStateException e) {
 188  0
                         throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e);
 189  108
                     }
 190  
                 }
 191  9
                 if (!rt.equals(RuleType.RULES)) {
 192  6
                     rs.put("common", parseRules(createScanner(s, rt, "common"), createResourceName(s, rt, "common")));
 193  
                 }
 194  
 
 195  9
                 rts.put(rt, Collections.unmodifiableMap(rs));
 196  
             }
 197  
 
 198  3
             RULES.put(s, Collections.unmodifiableMap(rts));
 199  
         }
 200  1
     }
 201  
 
 202  
     private static boolean contains(final CharSequence chars, final char input) {
 203  312230
         for (int i = 0; i < chars.length(); i++) {
 204  270802
             if (chars.charAt(i) == input) {
 205  10453
                 return true;
 206  
             }
 207  
         }
 208  41428
         return false;
 209  
     }
 210  
 
 211  
     private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) {
 212  228
         return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt",
 213  
                              nameType.getName(), rt.getName(), lang);
 214  
     }
 215  
 
 216  
     private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) {
 217  114
         final String resName = createResourceName(nameType, rt, lang);
 218  114
         final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
 219  
 
 220  114
         if (rulesIS == null) {
 221  0
             throw new IllegalArgumentException("Unable to load resource: " + resName);
 222  
         }
 223  
 
 224  114
         return new Scanner(rulesIS, ResourceConstants.ENCODING);
 225  
     }
 226  
 
 227  
     private static Scanner createScanner(final String lang) {
 228  35
         final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang);
 229  35
         final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
 230  
 
 231  35
         if (rulesIS == null) {
 232  0
             throw new IllegalArgumentException("Unable to load resource: " + resName);
 233  
         }
 234  
 
 235  35
         return new Scanner(rulesIS, ResourceConstants.ENCODING);
 236  
     }
 237  
 
 238  
     private static boolean endsWith(final CharSequence input, final CharSequence suffix) {
 239  894
         if (suffix.length() > input.length()) {
 240  124
             return false;
 241  
         }
 242  783
         for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) {
 243  770
             if (input.charAt(i) != suffix.charAt(j)) {
 244  757
                 return false;
 245  
             }
 246  
         }
 247  13
         return true;
 248  
     }
 249  
 
 250  
     /**
 251  
      * Gets rules for a combination of name type, rule type and languages.
 252  
      *
 253  
      * @param nameType
 254  
      *            the NameType to consider
 255  
      * @param rt
 256  
      *            the RuleType to consider
 257  
      * @param langs
 258  
      *            the set of languages to consider
 259  
      * @return a list of Rules that apply
 260  
      */
 261  
     public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
 262  
                                          final Languages.LanguageSet langs) {
 263  134336
         return langs.isSingleton() ? getInstance(nameType, rt, langs.getAny()) :
 264  
                                      getInstance(nameType, rt, Languages.ANY);
 265  
     }
 266  
 
 267  
     /**
 268  
      * Gets rules for a combination of name type, rule type and a single language.
 269  
      *
 270  
      * @param nameType
 271  
      *            the NameType to consider
 272  
      * @param rt
 273  
      *            the RuleType to consider
 274  
      * @param lang
 275  
      *            the language to consider
 276  
      * @return a list rules for a combination of name type, rule type and a single language.
 277  
      */
 278  
     public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
 279  201505
         final List<Rule> rules = RULES.get(nameType).get(rt).get(lang);
 280  
 
 281  201505
         if (rules == null) {
 282  1
             throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.",
 283  
                                                nameType.getName(), rt.getName(), lang));
 284  
         }
 285  
 
 286  201504
         return rules;
 287  
     }
 288  
 
 289  
     private static Phoneme parsePhoneme(final String ph) {
 290  6388
         final int open = ph.indexOf("[");
 291  6388
         if (open >= 0) {
 292  597
             if (!ph.endsWith("]")) {
 293  0
                 throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'");
 294  
             }
 295  597
             final String before = ph.substring(0, open);
 296  597
             final String in = ph.substring(open + 1, ph.length() - 1);
 297  597
             final Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]")));
 298  
 
 299  597
             return new Phoneme(before, Languages.LanguageSet.from(langs));
 300  
         } else {
 301  5791
             return new Phoneme(ph, Languages.ANY_LANGUAGE);
 302  
         }
 303  
     }
 304  
 
 305  
     private static PhonemeExpr parsePhonemeExpr(final String ph) {
 306  4507
         if (ph.startsWith("(")) { // we have a bracketed list of options
 307  1438
             if (!ph.endsWith(")")) {
 308  0
                 throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'");
 309  
             }
 310  
 
 311  1438
             final List<Phoneme> phs = new ArrayList<Phoneme>();
 312  1438
             final String body = ph.substring(1, ph.length() - 1);
 313  4757
             for (final String part : body.split("[|]")) {
 314  3319
                 phs.add(parsePhoneme(part));
 315  
             }
 316  1438
             if (body.startsWith("|") || body.endsWith("|")) {
 317  48
                 phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
 318  
             }
 319  
 
 320  1438
             return new PhonemeList(phs);
 321  
         } else {
 322  3069
             return parsePhoneme(ph);
 323  
         }
 324  
     }
 325  
 
 326  
     private static List<Rule> parseRules(final Scanner scanner, final String location) {
 327  149
         final List<Rule> lines = new ArrayList<Rule>();
 328  149
         int currentLine = 0;
 329  
 
 330  149
         boolean inMultilineComment = false;
 331  8245
         while (scanner.hasNextLine()) {
 332  8096
             currentLine++;
 333  8096
             final String rawLine = scanner.nextLine();
 334  8096
             String line = rawLine;
 335  
 
 336  8096
             if (inMultilineComment) {
 337  2235
                 if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
 338  149
                     inMultilineComment = false;
 339  
                 }
 340  
             } else {
 341  5861
                 if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
 342  149
                     inMultilineComment = true;
 343  
                 } else {
 344  
                     // discard comments
 345  5712
                     final int cmtI = line.indexOf(ResourceConstants.CMT);
 346  5712
                     if (cmtI >= 0) {
 347  857
                         line = line.substring(0, cmtI);
 348  
                     }
 349  
 
 350  
                     // trim leading-trailing whitespace
 351  5712
                     line = line.trim();
 352  
 
 353  5712
                     if (line.length() == 0) {
 354  1170
                         continue; // empty lines can be safely skipped
 355  
                     }
 356  
 
 357  4542
                     if (line.startsWith(HASH_INCLUDE)) {
 358  
                         // include statement
 359  35
                         final String incl = line.substring(HASH_INCLUDE.length()).trim();
 360  35
                         if (incl.contains(" ")) {
 361  0
                             throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " +
 362  
                                                                location);
 363  
                         } else {
 364  35
                             lines.addAll(parseRules(createScanner(incl), location + "->" + incl));
 365  
                         }
 366  35
                     } else {
 367  
                         // rule
 368  4507
                         final String[] parts = line.split("\\s+");
 369  4507
                         if (parts.length != 4) {
 370  0
                             throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
 371  
                                                                " parts: " + rawLine + " in " + location);
 372  
                         } else {
 373  
                             try {
 374  4507
                                 final String pat = stripQuotes(parts[0]);
 375  4507
                                 final String lCon = stripQuotes(parts[1]);
 376  4507
                                 final String rCon = stripQuotes(parts[2]);
 377  4507
                                 final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3]));
 378  4507
                                 final int cLine = currentLine;
 379  4507
                                 final Rule r = new Rule(pat, lCon, rCon, ph) {
 380  4507
                                     private final int myLine = cLine;
 381  4507
                                     private final String loc = location;
 382  
 
 383  
                                     @Override
 384  
                                     public String toString() {
 385  0
                                         final StringBuilder sb = new StringBuilder();
 386  0
                                         sb.append("Rule");
 387  0
                                         sb.append("{line=").append(myLine);
 388  0
                                         sb.append(", loc='").append(loc).append('\'');
 389  0
                                         sb.append('}');
 390  0
                                         return sb.toString();
 391  
                                     }
 392  
                                 };
 393  4507
                                 lines.add(r);
 394  0
                             } catch (final IllegalArgumentException e) {
 395  0
                                 throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " +
 396  
                                                                 location, e);
 397  4507
                             }
 398  
                         }
 399  
                     }
 400  
                 }
 401  
             }
 402  6926
         }
 403  
 
 404  149
         return lines;
 405  
     }
 406  
 
 407  
     /**
 408  
      * Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case.
 409  
      *
 410  
      * @param regex
 411  
      *            the regular expression to compile
 412  
      * @return an RPattern that will match this regex
 413  
      */
 414  
     private static RPattern pattern(final String regex) {
 415  9016
         final boolean startsWith = regex.startsWith("^");
 416  9016
         final boolean endsWith = regex.endsWith("$");
 417  9016
         final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length());
 418  9016
         final boolean boxes = content.contains("[");
 419  
 
 420  9016
         if (!boxes) {
 421  8046
             if (startsWith && endsWith) {
 422  
                 // exact match
 423  633
                 if (content.length() == 0) {
 424  
                     // empty
 425  610
                     return new RPattern() {
 426  
                         @Override
 427  
                         public boolean isMatch(final CharSequence input) {
 428  34729
                             return input.length() == 0;
 429  
                         }
 430  
                     };
 431  
                 } else {
 432  23
                     return new RPattern() {
 433  
                         @Override
 434  
                         public boolean isMatch(final CharSequence input) {
 435  6720
                             return input.equals(content);
 436  
                         }
 437  
                     };
 438  
                 }
 439  7413
             } else if ((startsWith || endsWith) && content.length() == 0) {
 440  
                 // matches every string
 441  7191
                 return ALL_STRINGS_RMATCHER;
 442  222
             } else if (startsWith) {
 443  
                 // matches from start
 444  188
                 return new RPattern() {
 445  
                     @Override
 446  
                     public boolean isMatch(final CharSequence input) {
 447  53370
                         return startsWith(input, content);
 448  
                     }
 449  
                 };
 450  34
             } else if (endsWith) {
 451  
                 // matches from start
 452  34
                 return new RPattern() {
 453  
                     @Override
 454  
                     public boolean isMatch(final CharSequence input) {
 455  894
                         return endsWith(input, content);
 456  
                     }
 457  
                 };
 458  
             }
 459  
         } else {
 460  970
             final boolean startsWithBox = content.startsWith("[");
 461  970
             final boolean endsWithBox = content.endsWith("]");
 462  
 
 463  970
             if (startsWithBox && endsWithBox) {
 464  946
                 String boxContent = content.substring(1, content.length() - 1);
 465  946
                 if (!boxContent.contains("[")) {
 466  
                     // box containing alternatives
 467  933
                     final boolean negate = boxContent.startsWith("^");
 468  933
                     if (negate) {
 469  28
                         boxContent = boxContent.substring(1);
 470  
                     }
 471  933
                     final String bContent = boxContent;
 472  933
                     final boolean shouldMatch = !negate;
 473  
 
 474  933
                     if (startsWith && endsWith) {
 475  
                         // exact match
 476  55
                         return new RPattern() {
 477  
                             @Override
 478  
                             public boolean isMatch(final CharSequence input) {
 479  15896
                                 return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch;
 480  
                             }
 481  
                         };
 482  878
                     } else if (startsWith) {
 483  
                         // first char
 484  650
                         return new RPattern() {
 485  
                             @Override
 486  
                             public boolean isMatch(final CharSequence input) {
 487  43307
                                 return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch;
 488  
                             }
 489  
                         };
 490  228
                     } else if (endsWith) {
 491  
                         // last char
 492  228
                         return new RPattern() {
 493  
                             @Override
 494  
                             public boolean isMatch(final CharSequence input) {
 495  13986
                                 return input.length() > 0 &&
 496  
                                        contains(bContent, input.charAt(input.length() - 1)) == shouldMatch;
 497  
                             }
 498  
                         };
 499  
                     }
 500  
                 }
 501  
             }
 502  
         }
 503  
 
 504  37
         return new RPattern() {
 505  37
             Pattern pattern = Pattern.compile(regex);
 506  
 
 507  
             @Override
 508  
             public boolean isMatch(final CharSequence input) {
 509  16196
                 final Matcher matcher = pattern.matcher(input);
 510  16196
                 return matcher.find();
 511  
             }
 512  
         };
 513  
     }
 514  
 
 515  
     private static boolean startsWith(final CharSequence input, final CharSequence prefix) {
 516  53370
         if (prefix.length() > input.length()) {
 517  4830
             return false;
 518  
         }
 519  50012
         for (int i = 0; i < prefix.length(); i++) {
 520  49856
             if (input.charAt(i) != prefix.charAt(i)) {
 521  48384
                 return false;
 522  
             }
 523  
         }
 524  156
         return true;
 525  
     }
 526  
 
 527  
     private static String stripQuotes(String str) {
 528  18028
         if (str.startsWith(DOUBLE_QUOTE)) {
 529  18028
             str = str.substring(1);
 530  
         }
 531  
 
 532  18028
         if (str.endsWith(DOUBLE_QUOTE)) {
 533  18019
             str = str.substring(0, str.length() - 1);
 534  
         }
 535  
 
 536  18028
         return str;
 537  
     }
 538  
 
 539  
     private final RPattern lContext;
 540  
 
 541  
     private final String pattern;
 542  
 
 543  
     private final PhonemeExpr phoneme;
 544  
 
 545  
     private final RPattern rContext;
 546  
 
 547  
     /**
 548  
      * Creates a new rule.
 549  
      *
 550  
      * @param pattern
 551  
      *            the pattern
 552  
      * @param lContext
 553  
      *            the left context
 554  
      * @param rContext
 555  
      *            the right context
 556  
      * @param phoneme
 557  
      *            the resulting phoneme
 558  
      */
 559  4508
     public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) {
 560  4508
         this.pattern = pattern;
 561  4508
         this.lContext = pattern(lContext + "$");
 562  4508
         this.rContext = pattern("^" + rContext);
 563  4508
         this.phoneme = phoneme;
 564  4508
     }
 565  
 
 566  
     /**
 567  
      * Gets the left context. This is a regular expression that must match to the left of the pattern.
 568  
      *
 569  
      * @return the left context Pattern
 570  
      */
 571  
     public RPattern getLContext() {
 572  0
         return this.lContext;
 573  
     }
 574  
 
 575  
     /**
 576  
      * Gets the pattern. This is a string-literal that must exactly match.
 577  
      *
 578  
      * @return the pattern
 579  
      */
 580  
     public String getPattern() {
 581  32427496
         return this.pattern;
 582  
     }
 583  
 
 584  
     /**
 585  
      * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
 586  
      *
 587  
      * @return the phoneme
 588  
      */
 589  
     public PhonemeExpr getPhoneme() {
 590  31736
         return this.phoneme;
 591  
     }
 592  
 
 593  
     /**
 594  
      * Gets the right context. This is a regular expression that must match to the right of the pattern.
 595  
      *
 596  
      * @return the right context Pattern
 597  
      */
 598  
     public RPattern getRContext() {
 599  0
         return this.rContext;
 600  
     }
 601  
 
 602  
     /**
 603  
      * Decides if the pattern and context match the input starting at a position. It is a match if the
 604  
      * <code>lContext</code> matches <code>input</code> up to <code>i</code>, <code>pattern</code> matches at i and
 605  
      * <code>rContext</code> matches from the end of the match of <code>pattern</code> to the end of <code>input</code>.
 606  
      *
 607  
      * @param input
 608  
      *            the input String
 609  
      * @param i
 610  
      *            the int position within the input
 611  
      * @return true if the pattern and left/right context match, false otherwise
 612  
      */
 613  
     public boolean patternAndContextMatches(final CharSequence input, final int i) {
 614  32427497
         if (i < 0) {
 615  1
             throw new IndexOutOfBoundsException("Can not match pattern at negative indexes");
 616  
         }
 617  
 
 618  32427496
         final int patternLength = this.pattern.length();
 619  32427496
         final int ipl = i + patternLength;
 620  
 
 621  32427496
         if (ipl > input.length()) {
 622  
             // not enough room for the pattern to match
 623  13169700
             return false;
 624  
         }
 625  
 
 626  
         // evaluate the pattern, left context and right context
 627  
         // fail early if any of the evaluations is not successful
 628  19257796
         if (!input.subSequence(i, ipl).equals(this.pattern)) {
 629  19056307
             return false;
 630  201489
         } else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) {
 631  151870
             return false;
 632  
         }
 633  49619
         return this.lContext.isMatch(input.subSequence(0, i));
 634  
     }
 635  
 }