/*************************************************************************** * * 22.locale.ctype.narrow.cpp - tests exercising the narrow() and widen() * member functions of the ctype facet * * $Id$ * *************************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * Copyright 2001-2008 Rogue Wave Software, Inc. * **************************************************************************/ // DESCRIPTION: The test iterates over a subset of locales installed // on a machine, calling the C character classification functions and // their C++ counterpart(s), comparing the results of the calls against // one another. #include #if defined __linux__ // on Linux define _XOPEN_SOURCE to get CODESET defined in # define _XOPEN_SOURCE 500 /* Single Unix conformance */ // bring __int32_t into scope (otherwise fails to compile) # include #endif // __linux__ // see Onyx PR #28150 #if defined (__SUNPRO_CC) && __SUNPRO_CC <= 0x540 # include #endif // defined (__SUNPRO_CC) && __SUNPRO_CC <= 0x540 #include #include #include #include #include #include #include #include // for WEOF, btowc(), wctob() #include // for iswxxx() #if !defined (_WIN32) # if !defined (LC_MESSAGES) # define LC_MESSAGES _RWSTD_LC_MESSAGES # endif // LC_MESSAGES # include #endif // _WIN32 #include #include // for SLASH #include // for rw_locale_query() /**************************************************************************/ // the root of the locale directory (RWSTD_LOCALE_ROOT) // not set here to avoid Solaris 7 putenv() bug (PR #30017) const char* locale_root; #define NLOOPS 25 #define MAX_STR_SIZE 16 #define LOCALES "{{en-US,de-DE,fr-FR,es-ES}-*-{ISO-8859-*,UTF-8,CP125?}," \ "{ja-JP-*-{EUC-JP,SHIFT_JIS,UTF-8,CP125?}}," \ "{zh-CN-*-{GB*,UTF-8,CP125?}}," \ "{ru-RU-*-{KOI*,UTF-8,CP125?}}}" const char* locale_list = 0; #define BEGIN_LOCALE_LOOP(num, locname, loop_cntrl) \ if (!locale_list) locale_list = rw_locale_query (LC_CTYPE, LOCALES); \ for (const char* locname = locale_list; \ locname && *locname; locname += std::strlen (locname) + 1) { \ _TRY { \ const std::locale loc (locname); \ const std::ctype &ctc = \ _STD_USE_FACET (std::ctype, loc); \ _RWSTD_UNUSED (ctc); \ const std::ctype &ctp = \ _STD_USE_FACET (std::ctype, loc); \ for (int loop_cntrl = 0; loop_cntrl < int (num); loop_cntrl++) #define END_LOCALE_LOOP(locname) \ } \ _CATCH (...) { \ rw_assert (0, 0, __LINE__, \ "locale (\"%s\") threw an exception", locname); \ } \ } // for notational convenience typedef unsigned char UChar; #define ALPHA std::ctype_base::alpha #define UPPER std::ctype_base::upper #define LOWER std::ctype_base::lower #define DIGIT std::ctype_base::digit #define SPACE std::ctype_base::space #define CNTRL std::ctype_base::cntrl #define PUNCT std::ctype_base::punct #define XDIGIT std::ctype_base::xdigit #define GRAPH std::ctype_base::graph #define PRINT std::ctype_base::print // wrapper functions for the c library char and wchar_t functions std::ctype_base::mask libc_mask (int mask, char ch, const char *locname) { char curlocname [256]; if (locname) { std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0)); if (0 == std::setlocale (LC_CTYPE, locname)) return std::ctype_base::mask (); } const int c = UChar (ch); int result = 0; if (mask & ALPHA && (std::isalpha)(c)) result |= ALPHA; if (mask & CNTRL && (std::iscntrl)(c)) result |= CNTRL; if (mask & DIGIT && (std::isdigit)(c)) result |= DIGIT; if (mask & GRAPH && (std::isgraph)(c)) result |= GRAPH; if (mask & LOWER && (std::islower)(c)) result |= LOWER; if (mask & PRINT && (std::isprint)(c)) result |= PRINT; if (mask & PUNCT && (std::ispunct)(c)) result |= PUNCT; if (mask & SPACE && (std::isspace)(c)) result |= SPACE; if (mask & UPPER && (std::isupper)(c)) result |= UPPER; if (mask & XDIGIT && (std::isxdigit)(c)) result |= XDIGIT; if (locname) std::setlocale (LC_CTYPE, curlocname); return std::ctype_base::mask (result); } inline bool libc_is (std::ctype_base::mask mask, char ch, const char *locname) { const std::ctype_base::mask m = libc_mask (mask, ch, locname); return 0 != (m & mask); } const char* narrow (char *dst, const char *src) { if (src == dst || !src || !dst) return src; std::memcpy (dst, src, std::strlen (src) + 1); return dst; } const char* widen (char *dst, const char *src) { if (src == dst || !src || !dst) return src; std::memcpy (dst, src, std::strlen (src) + 1); return dst; } char widen (char, char ch, const char*) { return ch; } char narrow (char ch, const char*) { return ch; } // cond1() verifies condition [1] in test_narrow_widen() // below using libc functions bool cond1 (std::ctype_base::mask mask, char ch, const char *locname) { char curlocname [256]; std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0)); if (0 == std::setlocale (LC_CTYPE, locname)) return false; #ifdef __SUNPRO_CC // working around a SunPro bug (PR #28150) using std::wint_t; #endif // __SUNPRO_CC #ifndef _RWSTD_NO_BTOWC const std::wint_t wc = std::btowc (UChar (ch)); #elif !defined (_RWSTD_NO_MBSTOWCS) wchar_t tmp; const std::wint_t wc = 1 == std::mbstowcs (&tmp, &ch, 1) ? tmp : WEOF; #else const std::wint_t wc = WEOF; #endif // _RWSTD_NO_BTOWC, _RWSTD_NO_MBSTOWCS const bool result = WEOF == wc || libc_is (mask, ch, 0) || !libc_is (mask, wchar_t (wc), 0); std::setlocale (LC_CTYPE, curlocname); return result; } // cond3() overloads verify condition [3] in test_narrow_widen() // below using libc functions bool cond3 (std::ctype_base::mask, char, const char*) { return true; } #ifndef _RWSTD_NO_WCHAR_T const char* narrow (char *dst, const wchar_t *src) { static char buf [4096]; if (!src) return 0; if (!dst) dst = buf; std::size_t len = std::wcslen (src); _RWSTD_ASSERT (len < sizeof buf); len = std::wcstombs (dst, src, sizeof buf / sizeof *buf); if (std::size_t (-1) == len) *dst = 0; return dst; } const wchar_t* widen (wchar_t *dst, const char *src) { static wchar_t buf [4096]; if (!src) return 0; if (!dst) dst = buf; std::size_t len = std::strlen (src); _RWSTD_ASSERT (len < sizeof buf /sizeof *buf); len = std::mbstowcs (dst, src, sizeof buf / sizeof *buf); if (std::size_t (-1) == len) *dst = 0; return dst; } std::ctype_base::mask libc_mask (int mask, wchar_t ch, const char *locname) { char curlocname [256]; if (locname) { std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0)); if (0 == std::setlocale (LC_CTYPE, locname)) return std::ctype_base::mask (); } int result = 0; if (mask & ALPHA && (std::iswalpha)(ch)) result |= ALPHA; if (mask & CNTRL && (std::iswcntrl)(ch)) result |= CNTRL; if (mask & DIGIT && (std::iswdigit)(ch)) result |= DIGIT; if (mask & GRAPH && (std::iswgraph)(ch)) result |= GRAPH; if (mask & LOWER && (std::iswlower)(ch)) result |= LOWER; if (mask & PRINT && (std::iswprint)(ch)) result |= PRINT; if (mask & PUNCT && (std::iswpunct)(ch)) result |= PUNCT; if (mask & SPACE && (std::iswspace)(ch)) result |= SPACE; if (mask & UPPER && (std::iswupper)(ch)) result |= UPPER; if (mask & XDIGIT && (std::iswxdigit)(ch)) result |= XDIGIT; if (locname) std::setlocale (LC_CTYPE, curlocname); return std::ctype_base::mask (result); } bool libc_is (std::ctype_base::mask mask, wchar_t ch, const char *locname) { const std::ctype_base::mask m = libc_mask (mask, ch, locname); return 0 != (m & mask); } wchar_t widen (wchar_t, char ch, const char *locname) { char curlocname [256]; std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0)); if (0 == std::setlocale (LC_CTYPE, locname)) return UChar (ch); wchar_t result; #ifndef _RWSTD_NO_BTOWC result = std::btowc (UChar (ch)); #elif !defined (_RWSTD_NO_MBTOWC) if (1 != std::mbtowc (&result, &ch, 1)) result = wchar_t (WEOF); #else result = UChar (ch); #endif // _RWSTD_NO_BTOWC, _RWSTD_NO_MBTOWC if (locname) std::setlocale (LC_CTYPE, curlocname); return result; } char narrow (wchar_t ch, const char *locname) { char curlocname [256]; std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0)); if (0 == std::setlocale (LC_CTYPE, locname)) return UChar (ch); char result [MB_LEN_MAX]; #ifndef _RWSTD_NO_WCTOB result [0] = std::wctob (ch); #elif !defined (_RWSTD_NO_WCTOMB) if (1 != std::wctomb (result, ch)) result [0] = '\377'; #else result [0] = char (ch); #endif // _RWSTD_NO_WCTOB, _RWSTD_NO_WCTOMB if (locname) std::setlocale (LC_CTYPE, curlocname); return result [0]; } bool cond3 (std::ctype_base::mask mask, wchar_t ch, const char *locname) { char curlocname [256]; std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0)); if (0 == std::setlocale (LC_CTYPE, locname)) return false; #ifndef _RWSTD_NO_WCTOB const int byte = std::wctob (ch); #elif !defined (_RWSTD_NO_WCTOMB) char buf [MB_LEN_MAX]; const int byte = 1 == std::wctomb (buf, ch) ? buf [0] : EOF; #else const int byte = EOF; #endif // _RWSTD_NO_WCTOB, _RWSTD_NO_WCTOMB const bool result = EOF == byte || !libc_is (mask, char (byte), 0) || libc_is (mask, ch, 0); std::setlocale (LC_CTYPE, curlocname); return result; } #endif // _RWSTD_NO_WCHAR_T /**************************************************************************/ template void test_narrow_widen (charT, const char *cname) { // 22.2.1.1.2, p11 requires that the conditions below hold for all // facets ctc and ct whose types are ctype and ctype, // respectively: // [1] (ctc.is (M, c) || !ct.is (M, ctc.do_widen (c))) holds for // all narrow characters c // i.e., narrow characters that are NOT members of a certain // category may not belong to the same category when widened // Note: this implies that some sort of code conversion may // be necessary in order to implement a conforming do_widen() // 22.2.1.1.2, p13 requires that: // [2] (ct.do_widen (ct.do_narrow (c, dfault)) == c) holds unless // (ct.do_narrow (c, dfault) == dfault) holds // [3] (ct.is (M, c) || !ctc.is (M, ct.do_narrow (c, dfault))) holds // unless (ct.do_narrow(c, dfault) == dfault) holds // // C99: each of the iswxxx() functions returns true for each // wide character that corresponds (as if by a call to the // wctob() function) to a single-byte character for which the // corresponding character classification function from 7.4.1 // returns true, except that the iswgraph() and iswpunct() // functions may differ with respect to wide characters other // than L' ' that are both printing and white-space wide // characters. // // [4] (ct.do_narrow (c, default) - '0') evaluates to the digit // value of the character for all c for which ct.is(digit, c) // returns true rw_info (0, 0, __LINE__, "std::ctype<%s>::narrow(%1$s), widen(char)", cname); rw_info (0, 0, __LINE__, "std::ctype<%s>::narrow(const %1$s*, const %1$s*, char*), " "widen(const char*, const char*, %1$s*)", cname); #define STR(x) #x // verify condition [1] above; if it fails, verify that // the same condition also fails to hold when using the // corresponding libc functions #define COND1(what) \ if (!(ctc.is (what, c) || !ctp.is (what, ctp.widen (c)))) { \ rw_assert (!cond1 (what, c, locname), 0, __LINE__, \ "ctype::is (" STR (what) ", %{#lc})" \ " || !ctype<%1$s>::is (" STR (what) ", " \ "ctype<%s>::widen (%{#lc}) = %{#lc})" \ " returned false in locale(%#s)", \ c, cname, c, ctp.widen (c), locname); \ } else (void)0 // verify condition [3] above; if it fails, verify that // the same condition also fails to hold when using the // corresponding libc functions #define COND3(what) \ if ( ctp.narrow (ch, dfault) != dfault \ && !(ctp.is (what, ch) || !ctc.is (what, ctp.narrow (ch, dfault)))) { \ rw_assert (!cond3 (what, ch, locname), 0, __LINE__, \ "ctype<%s>::is (" STR (what) ", %{#lc})" \ " || !ctype::is (" STR (what) ", " \ "ctype<%1$s>::narrow (%{#lc}, %{#c}) = %{#lc})" \ " returned false in locale(%#s)", cname, ch, \ ch, dfault, ctp.narrow (ch, '\0'), \ locname); \ } else (void)0 char c_locname [256]; std::strcpy (c_locname, std::setlocale (LC_ALL, 0)); BEGIN_LOCALE_LOOP (UCHAR_MAX, locname, i) { #if defined (_RWSTD_OS_SUNOS) && _RWSTD_OS_MAJOR == 5 && _RWSTD_OS_MINOR <= 10 // avoid a libc SIGSEGV in mbtowc() in zh_HK and zh_TW // locales encoded using the BIG5 codeset (see bug #603) if ( 0 == std::strncmp ("zh_HK.BIG5", locname, 10) || 0 == std::strncmp ("zh_TW.BIG5", locname, 10)) continue; #endif // SunOS < 5.10 { // verify that the global C locale stays unchanged const char* const curlocname = std::setlocale (LC_ALL, 0); rw_assert (!std::strcmp (c_locname, curlocname), 0, __LINE__, "setlocale(LC_ALL, 0) == \"%s\", got \"%s\"", c_locname, curlocname); } if (0 == i) rw_info (0, 0, __LINE__, "std::ctype<%s> in locale(%#s)", cname, locname); const char c = char (i); const charT ch = charT (i); // verify that condition [1] holds COND1 (ALPHA); COND1 (CNTRL); COND1 (DIGIT); COND1 (GRAPH); COND1 (LOWER); COND1 (PRINT); COND1 (PUNCT); COND1 (SPACE); COND1 (UPPER); COND1 (XDIGIT); // verify that condition [2] holds char dfault = c ? '\0' : '\1'; const charT ret = ctp.widen (ctp.narrow (ch, dfault)); if (ret != charT (dfault) && ret != ch) { rw_assert (ch != widen (ch, narrow (ch, locname), locname), 0, __LINE__, "ctype<%s>::widen (ctype<%1$s>::narrow " "(%{#lc}, %{#c})) == %{#c}; got %{#c} " "in locale (%#s)", cname, ch, dfault, ch, ret, locname); } // finally verify that condition [3] holds COND3 (ALPHA); COND3 (CNTRL); COND3 (DIGIT); COND3 (GRAPH); COND3 (LOWER); COND3 (PRINT); COND3 (PUNCT); COND3 (SPACE); COND3 (UPPER); COND3 (XDIGIT); // now perform a relitively simple sanity check on the 3-argument // overloads of narrow() and widen(). Make sure that the 3-argument // overloads return the same value that the other overload produces // Only do this the first time through the locale list. if (i == 0) { // arrays of all tested narrow and wide characters charT wide_chars [UCHAR_MAX + 1]; char narrow_chars [UCHAR_MAX + 1]; charT narrow_in [UCHAR_MAX + 1]; char widen_in [UCHAR_MAX + 1]; // zero out the last element to allow printing wide_chars [UCHAR_MAX] = charT (); narrow_chars [UCHAR_MAX] = char (); narrow_in [UCHAR_MAX] = charT (); widen_in [UCHAR_MAX] = char (); // set the `dfault' character to something unlikely // but other than '\0' dfault = '\377'; for (unsigned j = 0; j <= UCHAR_MAX; j++) { wide_chars [j] = ctp.widen (char (j)); narrow_chars [j] = ctp.narrow (wide_chars [j], dfault); narrow_in [j] = ctp.widen (char (j)); widen_in [j] = char (j); } charT widen_out [UCHAR_MAX + 1]; char narrow_out [UCHAR_MAX + 1]; widen_out [UCHAR_MAX] = charT (); narrow_out [UCHAR_MAX] = char (); // narrow source buffer into the destination // and compare with expected values ctp.narrow (narrow_in, narrow_in + UCHAR_MAX + 1, dfault, narrow_out); bool success = !std::memcmp (narrow_chars, narrow_out, sizeof narrow_chars); rw_assert (success, 0, __LINE__, "ctype<%s>::narrow (%{*.*Ac}\", ... , %{#c}) " "== %{.*Ac}, got %{.*Ac} in locale (%#s)", cname, int (sizeof *narrow_in), UCHAR_MAX, narrow_in, dfault, UCHAR_MAX, narrow_chars, UCHAR_MAX, narrow_out, locname); // widen source buffer into the destination // and compare with expected values ctp.widen (widen_in, widen_in + UCHAR_MAX + 1, widen_out); success = !std::memcmp (wide_chars, widen_out, sizeof wide_chars); rw_assert (success, 0, __LINE__, "ctype<%s>::widen (%{.*Ac}, ...) == " "%{*.*Ac}, got %{*.*Ac} in locale (%#s)", cname, UCHAR_MAX, widen_in, int (sizeof *wide_chars), UCHAR_MAX, wide_chars, int (sizeof *wide_chars), UCHAR_MAX, widen_out, locname); } } END_LOCALE_LOOP (locname); } /**************************************************************************/ // exercise the behavior of the libc-based C++ locale implementation template void test_libc (charT, const char *cname) { test_narrow_widen (charT (), cname); } /**************************************************************************/ // exercise the behavior of our own C++ locale implementation template void test_libstd (charT, const char *cname) { rw_warn (0, 0, __LINE__, "stdcxx implementation of std::ctype<%s> not exercised", cname); } /**************************************************************************/ template void run_test (charT, const char *cname) { if (0) { // do a compile time only test on use_facet and has_facet _STD_HAS_FACET (std::ctype_byname, std::locale ()); _STD_USE_FACET (std::ctype_byname, std::locale ()); } // exercise the behavior of the libc-based C++ locale implementation test_libc (charT (), cname); // exercise the behavior of our own C++ locale implementation test_libstd (charT (), cname); } /**************************************************************************/ static int run_test (int, char**) { // set the global locale_list pointer to point to the array // of NUL-separated locale names set on the command line via // the --locales=... option, if specified, or to 0 (in which // case we'll generate our own list) locale_list = rw_opt_locales; run_test (char (), "char"); #ifndef _RWSTD_NO_WCHAR_T run_test (wchar_t (), "wchar_t"); #endif // _RWSTD_NO_WCHAR_T return 0; } /**************************************************************************/ int main (int argc, char *argv[]) { return rw_test (argc, argv, __FILE__, "lib.category.ctype", "narrow and widen", run_test, "|-locales= ", &rw_opt_setlocales, (void*)0 /* sentinel */); }