View Javadoc
1   package org.apache.maven.shared.utils;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.Locale;
23  
24  import org.apache.commons.lang3.StringEscapeUtils;
25  import org.junit.AfterClass;
26  import org.junit.Assert;
27  import org.junit.ComparisonFailure;
28  import org.junit.Test;
29  
30  /**
31   * Test case for character case changes, to precisely point the situations when character case comparison doesn't
32   * give intuitive result, or why one should avoid {@link String#toUpperCase()} and {@link String#toLowerCase()}
33   * (platform locale dependent, with sometimes unexpected results)
34   * but <b>prefer {@link String#equalsIgnoreCase(String)} when possible</b>.
35   * 
36   * @author Hervé Boutemy
37   * @see <a href="http://sim.ivi.co/2011/07/trap-of-case-insensitive-string.html">Simple Smiles - Xuelei Fan's Blog</a>
38   */
39  public class CaseTest
40      extends Assert
41  {
42      private final static Locale LOCALE_TURKISH = new Locale( "tr" );
43  
44      /** common ASCII 'i' */
45      private final static char DOTTED_i = '\u0069';
46  
47      /** common ASCII 'I' */
48      private final static char DOTLESS_I = '\u0049';
49  
50      /** turkish dotless i = ı */
51      private final static char DOTLESS_i = '\u0131';
52  
53      /** turkish dotted I = İ */
54      private final static char DOTTED_I = '\u0130';
55  
56      /** http://en.wikipedia.org/wiki/Dot_(diacritic) */
57      private final static char COMBINING_DOT_ABOVE = '\u0307';
58  
59      private final static Locale SAVED_DEFAULT_LOCALE = Locale.getDefault();
60  
61      @AfterClass
62      public static void restoreDefaultLocale()
63      {
64          Locale.setDefault( SAVED_DEFAULT_LOCALE );
65      }
66  
67      /**
68       * test the known case of upper I which doesn't give commonly expected i in Turkish locale, but ı (dotless i).
69       * @see <a href="http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug">The infamous Turkish locale bug</a>
70       */
71      @Test
72      public void testTurkishI()
73      {
74          // check common i and I
75          assertEquals( "common lowercase i should have a dot", 'i', DOTTED_i );
76          assertEquals( "common uppercase I should not have a dot", 'I', DOTLESS_I );
77  
78          final String iIıİ = "iIıİ";
79  
80          // check source encoding doesn't wreck havoc */
81          assertUnicodeEquals( "misc i directly in (UTF-8) source", iIıİ, "" + DOTTED_i + DOTLESS_I + DOTLESS_i
82              + DOTTED_I );
83  
84          // check toUpperCase and toLowerCase difference with turkish and english locales
85          assertUnicodeEquals( "'iIıİ'.toUpperCase('tr')=='İIIİ'", "" + DOTTED_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
86                               iIıİ.toUpperCase( LOCALE_TURKISH ) );
87          assertUnicodeEquals( "'iIıİ'.toLowerCase('tr')=='iııi'", "" + DOTTED_i + DOTLESS_i + DOTLESS_i + DOTTED_i,
88                               iIıİ.toLowerCase( LOCALE_TURKISH ) );
89          assertUnicodeEquals( "'iIıİ'.toUpperCase('en')=='IIIİ'", "" + DOTLESS_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
90                               iIıİ.toUpperCase( Locale.ENGLISH ) );
91          String lower = iIıİ.toLowerCase( Locale.ENGLISH ); // on some platforms, ends with extra COMBINED DOT ABOVE
92          assertUnicodeEquals( "'iIıİ'.toLowerCase('en')=='iiıi'", "" + DOTTED_i + DOTTED_i + DOTLESS_i + DOTTED_i
93              + ( lower.length() > 4 ? COMBINING_DOT_ABOVE : "" ), lower );
94  
95          // check equalsIgnoreCase() , which has no locale
96          for ( int i = 0; i < iIıİ.length(); i++ )
97          {
98              char currentI = iIıİ.charAt( i );
99  
100             StringBuilder sb = new StringBuilder( iIıİ.length() );
101             for ( int j = 0; j < iIıİ.length(); j++ )
102             {
103                 sb.append( currentI );
104             }
105             String current = sb.toString();
106 
107             assertTrue( "'" + current + "'.equalsIgnoreCase('" + iIıİ + "')", current.equalsIgnoreCase( iIıİ ) );
108         }
109     }
110 
111     /**
112      * Assert equals, and in case the result isn't as expected, display content unicode-escaped.
113      * @param message
114      * @param expected
115      * @param actual
116      */
117     private void assertUnicodeEquals( String message, String expected, String actual )
118     {
119         if ( expected.equals( actual ) )
120         {
121             return;
122         }
123 
124         throw new ComparisonFailure( message, StringEscapeUtils.escapeJava( expected ),
125                                      StringEscapeUtils.escapeJava( actual ) );
126     }
127 
128     /**
129      * Test case change on all ascii characters with every available locale, to check that turkish i is the only
130      * exception on these characters.
131      */
132     @Test
133     public void testAsciiAvailableLocales()
134     {
135         final String lower = "abcdefghijklmnopqrstuvwxyz";
136         final String upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
137 
138         for ( Locale locale : Locale.getAvailableLocales() )
139         {
140             // check that toUpper() == toUpper(default locale) and toLower() = toLower(default locale)
141             Locale.setDefault( locale );
142             assertEquals( lower.toUpperCase(), lower.toUpperCase( locale ) );
143             assertEquals( upper.toLowerCase(), upper.toLowerCase( locale ) );
144 
145             // check result
146             String expectedToUpperCase = upper;
147             String expectedToLowerCase = lower;
148             if ( LOCALE_TURKISH.getLanguage().equals( locale.getLanguage() ) )
149             {
150                 expectedToUpperCase = upper.replace( DOTLESS_I, DOTTED_I );
151                 expectedToLowerCase = lower.replace( DOTTED_i, DOTLESS_i );
152             }
153 
154             assertEquals( "'" + lower + "'.toUpperCase('" + locale.toString() + "')", expectedToUpperCase,
155                           lower.toUpperCase( locale ) );
156             assertEquals( "'" + upper + "'.toLowerCase('" + locale.toString() + "')", expectedToLowerCase,
157                           upper.toLowerCase( locale ) );
158 
159             // check that toLowerCase on lower and toUpperCase on upper don't cause harm
160             assertEquals( "'" + lower + "'.toLowerCase('" + locale.toString() + "')", lower, lower.toLowerCase( locale ) );
161             assertEquals( "'" + upper + "'.toUpperCase('" + locale.toString() + "')", upper, upper.toUpperCase( locale ) );
162 
163             // check equalsIgnoreCase
164             assertTrue( "'" + upper + "'.equalsIgnoreCase('" + lower + "')", upper.equalsIgnoreCase( lower ) );
165             assertTrue( "'" + upper + "'.equalsIgnoreCase('" + expectedToLowerCase + "')",
166                         upper.equalsIgnoreCase( expectedToLowerCase ) );
167             assertTrue( "'" + expectedToUpperCase + "'.equalsIgnoreCase('" + lower + "')",
168                         expectedToUpperCase.equalsIgnoreCase( lower ) );
169         }
170     }
171 }