CaseTest

package org.apache.maven.shared.utils;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.util.Locale;

import org.apache.commons.lang3.StringEscapeUtils;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.ComparisonFailure;
import org.junit.Test;

/**
 * Test case for character case changes, to precisely point the situations when character case comparison doesn't
 * give intuitive result, or why one should avoid {@link String#toUpperCase()} and {@link String#toLowerCase()}
 * (platform locale dependent, with sometimes unexpected results)
 * but <b>prefer {@link String#equalsIgnoreCase(String)} when possible</b>.
 *
 * @author Hervé Boutemy
 * @see <a href="http://sim.ivi.co/2011/07/trap-of-case-insensitive-string.html">Simple Smiles - Xuelei Fan's Blog</a>
 */
public class CaseTest
    extends Assert
{
    private final static Locale LOCALE_TURKISH = new Locale( "tr" );

    /** common ASCII 'i' */
    private final static char DOTTED_i = '\u0069';

    /** common ASCII 'I' */
    private final static char DOTLESS_I = '\u0049';

    /** turkish dotless i = ı */
    private final static char DOTLESS_i = '\u0131';

    /** turkish dotted I = İ */
    private final static char DOTTED_I = '\u0130';

    /** http://en.wikipedia.org/wiki/Dot_(diacritic) */
    private final static char COMBINING_DOT_ABOVE = '\u0307';

    private final static Locale SAVED_DEFAULT_LOCALE = Locale.getDefault();

    @AfterClass
    public static void restoreDefaultLocale()
    {
        Locale.setDefault( SAVED_DEFAULT_LOCALE );
    }

    /**
     * test the known case of upper I which doesn't give commonly expected i in Turkish locale, but ı (dotless i).
     * @see <a href="http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug">The infamous Turkish locale bug</a>
     */
    @Test
    public void testTurkishI()
    {
        // check common i and I
        assertEquals( "common lowercase i should have a dot", 'i', DOTTED_i );
        assertEquals( "common uppercase I should not have a dot", 'I', DOTLESS_I );

        final String iIıİ = "iIıİ";

        // check source encoding doesn't wreck havoc */
        assertUnicodeEquals( "misc i directly in (UTF-8) source", iIıİ, "" + DOTTED_i + DOTLESS_I + DOTLESS_i
            + DOTTED_I );

        // check toUpperCase and toLowerCase difference with turkish and english locales
        assertUnicodeEquals( "'iIıİ'.toUpperCase('tr')=='İIIİ'", "" + DOTTED_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
                             iIıİ.toUpperCase( LOCALE_TURKISH ) );
        assertUnicodeEquals( "'iIıİ'.toLowerCase('tr')=='iııi'", "" + DOTTED_i + DOTLESS_i + DOTLESS_i + DOTTED_i,
                             iIıİ.toLowerCase( LOCALE_TURKISH ) );
        assertUnicodeEquals( "'iIıİ'.toUpperCase('en')=='IIIİ'", "" + DOTLESS_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
                             iIıİ.toUpperCase( Locale.ENGLISH ) );
        String lower = iIıİ.toLowerCase( Locale.ENGLISH ); // on some platforms, ends with extra COMBINED DOT ABOVE
        assertUnicodeEquals( "'iIıİ'.toLowerCase('en')=='iiıi'", "" + DOTTED_i + DOTTED_i + DOTLESS_i + DOTTED_i
            + ( lower.length() > 4 ? COMBINING_DOT_ABOVE : "" ), lower );

        // check equalsIgnoreCase() , which has no locale
        for ( int i = 0; i < iIıİ.length(); i++ )
        {
            char currentI = iIıİ.charAt( i );

            StringBuilder sb = new StringBuilder( iIıİ.length() );
            for ( int j = 0; j < iIıİ.length(); j++ )
            {
                sb.append( currentI );
            }
            String current = sb.toString();

            assertTrue( "'" + current + "'.equalsIgnoreCase('" + iIıİ + "')", current.equalsIgnoreCase( iIıİ ) );
        }
    }

    /**
     * Assert equals, and in case the result isn't as expected, display content unicode-escaped.
     * @param message
     * @param expected
     * @param actual
     */
    private void assertUnicodeEquals( String message, String expected, String actual )
    {
        if ( expected.equals( actual ) )
        {
            return;
        }

        throw new ComparisonFailure( message, StringEscapeUtils.escapeJava( expected ),
                                     StringEscapeUtils.escapeJava( actual ) );
    }

    /**
     * Test case change on all ascii characters with every available locale, to check that turkish i is the only
     * exception on these characters.
     */
    @Test
    public void testAsciiAvailableLocales()
    {
        final String lower = "abcdefghijklmnopqrstuvwxyz";
        final String upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

        for ( Locale locale : Locale.getAvailableLocales() )
        {
            // check that toUpper() == toUpper(default locale) and toLower() = toLower(default locale)
            Locale.setDefault( locale );
            assertEquals( lower.toUpperCase(), lower.toUpperCase( locale ) );
            assertEquals( upper.toLowerCase(), upper.toLowerCase( locale ) );

            // check result
            String expectedToUpperCase = upper;
            String expectedToLowerCase = lower;
            if ( LOCALE_TURKISH.getLanguage().equals( locale.getLanguage() ) )
            {
                expectedToUpperCase = upper.replace( DOTLESS_I, DOTTED_I );
                expectedToLowerCase = lower.replace( DOTTED_i, DOTLESS_i );
            }

            assertEquals( "'" + lower + "'.toUpperCase('" + locale.toString() + "')", expectedToUpperCase,
                          lower.toUpperCase( locale ) );
            assertEquals( "'" + upper + "'.toLowerCase('" + locale.toString() + "')", expectedToLowerCase,
                          upper.toLowerCase( locale ) );

            // check that toLowerCase on lower and toUpperCase on upper don't cause harm
            assertEquals( "'" + lower + "'.toLowerCase('" + locale.toString() + "')", lower, lower.toLowerCase( locale ) );
            assertEquals( "'" + upper + "'.toUpperCase('" + locale.toString() + "')", upper, upper.toUpperCase( locale ) );

            // check equalsIgnoreCase
            assertTrue( "'" + upper + "'.equalsIgnoreCase('" + lower + "')", upper.equalsIgnoreCase( lower ) );
            assertTrue( "'" + upper + "'.equalsIgnoreCase('" + expectedToLowerCase + "')",
                        upper.equalsIgnoreCase( expectedToLowerCase ) );
            assertTrue( "'" + expectedToUpperCase + "'.equalsIgnoreCase('" + lower + "')",
                        expectedToUpperCase.equalsIgnoreCase( lower ) );
        }
    }
}