00001 /*! 00002 * @file utf.c 00003 * 00004 * @brief Manipulate UTF-8 CONSTANT_Utf8_info character strings. 00005 * 00006 * There are three character string types in this program: 00007 * null-terminated @link #rchar (rchar)@endlink strings 00008 * @e ala 'C' language, UTF-8 00009 * @link #CONSTANT_Utf8_info (CONSTANT_Utf8_info)@endlink strings, 00010 * and Unicode @link #jchar (jchar)[]@endlink strings. 00011 * 00012 * Convert one or UTF-8 (jbyte) bytes to and from Unicode (jchar) 00013 * characters, plus related functions, like comparison and string 00014 * length. 00015 * 00016 * Why are these functions called @b utf_XXX() instead of @b utf8_XXX()? 00017 * Originally, they were called such, but when the JDK 1.5 class file 00018 * spec, section 4, was reviewed (after working with the 1.2/1.4 00019 * versions), it was discovered that certain other @b UTF-xx formats 00020 * were also provided in the spec, even if not accurately defined. 00021 * (Due to errors in the revised class file specification, the 21-bit 00022 * UTF characters (6 bytes) will not be implemented until a definitive 00023 * correction is located. However, in anticipation of this correction, 00024 * the functions are now named utf_XXX() without respect to character 00025 * bit width.) Notice, however, that the spec, section 4, defines a 00026 * CONSTANT_Utf8 and a CONSTANT_Utf8_info. Therefore, these 00027 * designations will remain in the code unless changed in the spec. 00028 * 00029 * 00030 * @section Control 00031 * 00032 * \$URL: https://svn.apache.org/path/name/utf.c $ \$Id: utf.c 0 09/28/2005 dlydick $ 00033 * 00034 * Copyright 2005 The Apache Software Foundation 00035 * or its licensors, as applicable. 00036 * 00037 * Licensed under the Apache License, Version 2.0 ("the License"); 00038 * you may not use this file except in compliance with the License. 00039 * You may obtain a copy of the License at 00040 * 00041 * http://www.apache.org/licenses/LICENSE-2.0 00042 * 00043 * Unless required by applicable law or agreed to in writing, 00044 * software distributed under the License is distributed on an 00045 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 00046 * either express or implied. 00047 * 00048 * See the License for the specific language governing permissions 00049 * and limitations under the License. 00050 * 00051 * @version \$LastChangedRevision: 0 $ 00052 * 00053 * @date \$LastChangedDate: 09/28/2005 $ 00054 * 00055 * @author \$LastChangedBy: dlydick $ 00056 * Original code contributed by Daniel Lydick on 09/28/2005. 00057 * 00058 * @section Reference 00059 * 00060 */ 00061 00062 #include "arch.h" 00063 ARCH_COPYRIGHT_APACHE(utf, c, "$URL: https://svn.apache.org/path/name/utf.c $ $Id: utf.c 0 09/28/2005 dlydick $"); 00064 00065 00066 #include <string.h> 00067 00068 #include "jvmcfg.h" 00069 #include "cfmacros.h" 00070 #include "classfile.h" 00071 #include "nts.h" 00072 #include "util.h" 00073 00074 00075 /*! 00076 * Store a Unicode @c @b ? when invalid UTF state found, 00077 * adj return code 00078 */ 00079 #define MAP_INVALID_UTF8_TO_QUESTION_MARK *outbfr++ = (jchar) '?'; \ 00080 inbfr++ 00081 00082 /*! Detect NUL character and quit when found */ 00083 #define RETURN_IF_NUL_BYTE if (UTF8_FORBIDDEN_ZERO == *inbfr) \ 00084 {return(charcnvcount); } 00085 00086 /*! 00087 * @brief Convert UTF8 buffer into Unicode buffer. 00088 * 00089 * 00090 * @param[in] utf_inbfr UTF string structure 00091 * 00092 * @param[out] outbfr Buffer for resulting Unicode character string 00093 * 00094 * 00095 * @returns Two returns, one a buffer, the other a count: 00096 * 00097 * *outbfr Unicode version of @b utf_inbfr string in @b outbfr 00098 * 00099 * charcnvcount (Return value of function) Number of Unicode 00100 * characters in @b outbfr. This will only be the 00101 * same as @b length when ALL UTF characters are 00102 * ASCII. It will otherwise be less than that. 00103 * 00104 * SPEC AMBIGUITY: In case of invalid characters, a Unicode 00105 * @c @b ? is inserted and processing continues. In this way, 00106 * the result string will still be invalid, but at least it will be 00107 * proper Unicode. This may prove more than is necessary, but the 00108 * spec says nothing at all about this matter. Since the NUL character 00109 * may not appear in UTF-8, if a buffer is terminated by a NUL in the 00110 * first @c @b utf_inbfr->length bytes, termination will be 00111 * assumed. If a @link #UTF8_FORBIDDEN_MIN UTF8_FORBIDDEN_xxx@endlink 00112 * character is read, it is converted to a Unicode @c @b ? also. 00113 * 00114 */ 00115 00116 jshort utf_utf2unicode(CONSTANT_Utf8_info *utf_inbfr, jchar *outbfr) 00117 { 00118 jshort charcnvcount; 00119 00120 jubyte *inbfr = (jubyte *) utf_inbfr->bytes; 00121 00122 for (charcnvcount = 0; 00123 charcnvcount < utf_inbfr->length; 00124 charcnvcount++) 00125 { 00126 RETURN_IF_NUL_BYTE; 00127 if (UTF8_SINGLE_MAX >= *inbfr) 00128 { 00129 /* Process one-byte form */ 00130 *outbfr++ = (jchar) *inbfr++; 00131 } 00132 else 00133 { 00134 /* Process two-byte form */ 00135 if (UTF8_TRIPLE_FIRST_VAL > *inbfr) 00136 { 00137 if (UTF8_DOUBLE_FIRST_VAL > *inbfr) 00138 { 00139 MAP_INVALID_UTF8_TO_QUESTION_MARK; 00140 continue; 00141 } 00142 00143 /* Store top half of Unicode character */ 00144 *outbfr = (jchar) 00145 (((*inbfr++) & UTF8_DOUBLE_FIRST_MASK0) 00146 << UTF8_DOUBLE_FIRST_SHIFT); 00147 00148 /* Abort if next byte is NUL */ 00149 RETURN_IF_NUL_BYTE; 00150 00151 if ((UTF8_DOUBLE_SECOND_VAL | UTF8_DOUBLE_SECOND_MASK0) 00152 < *inbfr) 00153 { 00154 /* 00155 * Map invalid forms to @c @b ? and 00156 * move to next char 00157 */ 00158 MAP_INVALID_UTF8_TO_QUESTION_MARK; 00159 continue; 00160 } 00161 00162 /* Store bottom half of Unicode character */ 00163 *outbfr++ |= (jchar) 00164 ((*inbfr++) & UTF8_DOUBLE_SECOND_MASK0); 00165 } 00166 else 00167 { 00168 /* Process three-byte form */ 00169 if ((UTF8_TRIPLE_FIRST_VAL | UTF8_TRIPLE_FIRST_MASK0) 00170 < *inbfr) 00171 { 00172 /* This also considers UTF8_FORBIDDEN_MIN/MAX 00173 bytes */ 00174 MAP_INVALID_UTF8_TO_QUESTION_MARK; 00175 continue; 00176 } 00177 00178 /* Store top third of Unicode character */ 00179 *outbfr = (jchar) 00180 (((*inbfr++) & UTF8_TRIPLE_FIRST_MASK0) 00181 << UTF8_TRIPLE_FIRST_SHIFT); 00182 00183 /* Abort if next byte is NUL */ 00184 RETURN_IF_NUL_BYTE; 00185 00186 if ((UTF8_TRIPLE_SECOND_VAL | UTF8_TRIPLE_SECOND_MASK0) 00187 < *inbfr) 00188 { 00189 /* 00190 * Map invalid forms to @c @b ? and 00191 * move to next char 00192 */ 00193 MAP_INVALID_UTF8_TO_QUESTION_MARK; 00194 continue; 00195 } 00196 00197 /* Store middle third of Unicode character */ 00198 *outbfr |= (jchar) 00199 (((*inbfr++) & UTF8_TRIPLE_SECOND_MASK0) 00200 << UTF8_TRIPLE_SECOND_SHIFT); 00201 00202 /* Abort if next byte is NUL */ 00203 RETURN_IF_NUL_BYTE; 00204 00205 if ((UTF8_TRIPLE_THIRD_VAL | UTF8_TRIPLE_THIRD_MASK0) 00206 < *inbfr) 00207 { 00208 /* 00209 * Map invalid forms to @c @b ? and 00210 * move to next char 00211 */ 00212 MAP_INVALID_UTF8_TO_QUESTION_MARK; 00213 continue; 00214 } 00215 00216 /* Store bottom third of Unicode character */ 00217 *outbfr++ |= (jchar) 00218 ((*inbfr++) & UTF8_TRIPLE_THIRD_MASK0); 00219 } 00220 } 00221 00222 } /* for (i) */ 00223 00224 /* Done. Return number of characters processed */ 00225 return(charcnvcount); 00226 00227 } /* END of utf_utf2unicode() */ 00228 00229 00230 /*! 00231 * @brief Convert a UTF string from a (CONSTANT_Utf8_info *) into a 00232 * null-terminated string by allocating heap and copying the UTF data. 00233 * 00234 * When done with result, perform HEAP_FREE_DATA(result). 00235 * 00236 * @param src Pointer to UTF string, most likely from constant pool 00237 * 00238 * @returns Null-terminated string in heap or 00239 * @link #rnull rnull@endlink if heap alloc error. 00240 * 00241 */ 00242 00243 rchar *utf_utf2prchar(CONSTANT_Utf8_info *src) 00244 { 00245 /* Allocate heap for UTF data plus NUL byte */ 00246 rchar *rc = HEAP_GET_DATA(sizeof(rchar) + src->length, rfalse); 00247 00248 /* Copy to heap area */ 00249 memcpy(rc, &src->bytes[0], src->length); 00250 00251 /* Append NUL character */ 00252 rc[src->length] = '\0'; 00253 00254 /* Produce result */ 00255 return(rc); 00256 00257 } /* END of utf_utf2prchar() */ 00258 00259 00260 /*! 00261 * @brief Compare two strings of any length, and potentially neither 00262 * null-terminated, that is, could be a UTF string. 00263 * 00264 * If strings are of equal length, this function is equivalent 00265 * to @c @b strcmp(3). If not of equal length, result is like 00266 * comparing @c @b n bytes of @c @b strncmp(3), where non-equal 00267 * result is returned, but if equal result, it is like 00268 * @c @b n+1, where the final byte is a @c @b \\0 (NUL) 00269 * character, so longer string's @c @b n+1 character 00270 * is reported, either as positive value (@b s1 longer) or as 00271 * negative value (@b s2 longer). 00272 * 00273 * This function should be used on ALL string comparisons that 00274 * potentially involve lack of NUL termination, namely, @e anything 00275 * to do with UTF strings of any sort. It is recommended also for 00276 * any null-terminated string just so all string comparisons work 00277 * @e exactly alike, no matter whether (rchar *) or UTF, whether of 00278 * equal length or not. 00279 * 00280 * @param s1 (rchar *) to first string 00281 * 00282 * @param l1 Length of string @b s1, regardless of any 00283 * null termination being present or absent 00284 * in @b s1. 00285 * 00286 * @param s2 (rchar *) to second string 00287 * 00288 * @param l2 length of string @b s2, regardless of any 00289 * null termination being present or absent 00290 * in @b s2. 00291 * 00292 * @returns lexicographical difference of <b><code>s1 - s2</code></b>. 00293 * Notice that the (rchar) data is implicitly unsigned 00294 * (although the actual signage is left to the compiler), 00295 * while the (jbyte) result is explicitly signed, due to the 00296 * arithmetic nature of the calculation. 00297 * 00298 */ 00299 static jbyte s1_s2_strncmp(u1 *s1, int l1, u1 *s2, int l2) 00300 { 00301 /* Compare shortest common run length */ 00302 int cmplen = (l1 < l2) ? l1 : l2; 00303 jbyte rc = strncmp(s1, s2, cmplen); 00304 00305 /* 00306 * THIS LOGIC IS THE SAME AS FOR unicode_strncmp(), BUT 00307 * OPERATES ON (jchar) instead of (rchar) 00308 */ 00309 00310 /* Return from several permutations of strlen */ 00311 if (l1 == l2) 00312 { 00313 return(rc); 00314 } 00315 else 00316 if (l1 > l2) 00317 { 00318 /* 00319 * If a difference existed, return it, else use 00320 * the last character of @b s1 as character minus 00321 * NUL byte (or zero), which equals character. 00322 */ 00323 if (0 != rc) 00324 { 00325 return(rc); 00326 } 00327 00328 /* 00329 * First character of @b s1 past length of @b s2 00330 */ 00331 return((jbyte) s1[l2]); 00332 } 00333 else 00334 { 00335 /* If a difference existed, return it, else use end of @b s2 */ 00336 /* 00337 * If a difference existed, return it, else use 00338 * the last character of @b s1 as NUL byte (or zero) 00339 * minus character, which equals negative of character. 00340 */ 00341 if (0 != rc) 00342 { 00343 return(rc); 00344 } 00345 00346 /* First character of @b s2 past length of @b s1 */ 00347 return((jbyte) (0 - s2[l1])); 00348 } 00349 } /* END of s1_s2_strncmp() */ 00350 00351 00352 /*! 00353 * @brief Compare two UTF strings from constant_pool, @b s1 minus @b s2 00354 * 00355 * @param s1 First of two UTF strings to compare 00356 * 00357 * @param s2 Second of two UTF strings to compare 00358 * 00359 * @returns lexicographical value of first difference in strings, 00360 * else 0. 00361 * 00362 */ 00363 jbyte utf_utf_strcmp(CONSTANT_Utf8_info *s1, CONSTANT_Utf8_info *s2) 00364 { 00365 /* Perform unified comparison of both UTF strings */ 00366 return(s1_s2_strncmp(s1->bytes, s1->length, s2->bytes, s2->length)); 00367 00368 } /* END of utf_utf_strcmp() */ 00369 00370 00371 /*! 00372 * @brief Compare contents of null-terminated string to contents of 00373 * a UTF string from a class file structure. 00374 * 00375 * @param s1 Null-terminated string name 00376 * 00377 * @param pcfs2 ClassFile where UTF string is found 00378 * 00379 * @param cpidx2 Index in @b pcfs2 constant_pool of UTF string 00380 * 00381 * 00382 * @returns lexicographical value of first difference in strings, 00383 * else 0. 00384 * 00385 */ 00386 jbyte utf_prchar_pcfs_strcmp(rchar *s1, 00387 ClassFile *pcfs2, 00388 jvm_constant_pool_index cpidx2) 00389 { 00390 int l1 = strlen(s1); 00391 00392 u1 *s2 = PTR_CP_THIS_STRNAME(pcfs2, cpidx2); 00393 00394 int l2 = CP_THIS_STRLEN(pcfs2, cpidx2); 00395 00396 /* Perform unified comparison of null-terminated vs UTF string */ 00397 return(s1_s2_strncmp(s1, l1, s2, l2)); 00398 00399 } /* END of utf_prchar_pcfs_strcmp() */ 00400 00401 00402 /*! 00403 * @brief Compare contents of UTF string to contents of a UTF string 00404 * from a class file structure. 00405 * 00406 * @param s1 UTF string name 00407 * 00408 * @param pcfs2 ClassFile where UTF string is found 00409 * 00410 * @param cpidx2 Index in @b pcfs2 constant_pool of UTF string 00411 * 00412 * 00413 * @returns lexicographical value of first difference in strings, 00414 * else 0. 00415 * 00416 */ 00417 jbyte utf_pcfs_strcmp(CONSTANT_Utf8_info *s1, 00418 ClassFile *pcfs2, 00419 jvm_constant_pool_index cpidx2) 00420 { 00421 u1 *s2 = PTR_CP_THIS_STRNAME(pcfs2, cpidx2); 00422 00423 int l2 = CP_THIS_STRLEN(pcfs2, cpidx2); 00424 00425 /* Perform unified comparison of null-terminated vs UTF string */ 00426 return(s1_s2_strncmp(s1->bytes, s1->length, s2, l2)); 00427 00428 } /* END of utf_pcfs_strcmp() */ 00429 00430 00431 /*! 00432 * @brief Common generic comparison, all parameters regularized. 00433 * 00434 * Compare a UTF or null-terminated string containing a 00435 * formatted or unformatted class name with an @e unformatted UTF 00436 * string from constant_pool. 00437 * Compare @b s1 minus @b s2, but skipping, where applicable, 00438 * the @b s1 initial BASETYPE_CHAR_L and the terminating 00439 * BASETYPE_CHAR_L_TERM, plus any array dimension modifiers. The second 00440 * string is specified by a constant_pool index. Notice that there 00441 * are @e NO formatted class string names in the (CONSTANT_Class_info) 00442 * entries of the constant_pool because such would be redundant. (Such 00443 * entries @e are the @e formal definition of the class.) 00444 * 00445 * 00446 * @param s1 UTF string pointer to u1 array of characters. 00447 * 00448 * @param l1 length of @b s1. 00449 * 00450 * @param pcfs2 ClassFile structure containing second string 00451 * (containing an @e unformatted class name) 00452 * 00453 * @param cpidx2 constant_pool index of CONSTANT_Class_info entry 00454 * whose name will be compared (by getting its 00455 * @link CONSTANT_Class_info#name_index name_index@endlink 00456 * and the UTF string name of it) 00457 * 00458 * 00459 * @returns lexicographical value of first difference in strings, 00460 * else 0. 00461 * 00462 */ 00463 static jbyte utf_common_classname_strcmp(u1 *s1, 00464 int l1, 00465 ClassFile *pcfs2, 00466 jvm_constant_pool_index cpidx2) 00467 { 00468 CONSTANT_Class_info *pci = PTR_CP_ENTRY_CLASS(pcfs2, cpidx2); 00469 00470 u1 *s2 = PTR_CP_THIS_STRNAME(pcfs2, pci->name_index); 00471 int l2 = CP_THIS_STRLEN(pcfs2, pci->name_index); 00472 00473 if (rtrue == nts_prchar_isclassformatted(s1)) 00474 { 00475 s1++; /* Point PAST the BASETYPE_CHAR_L character */ 00476 l1--; 00477 00478 u1 *ps1end = strchr(s1, BASETYPE_CHAR_L_TERM); 00479 00480 /* Should @e always be @link #rtrue rtrue@endlink */ 00481 if (rnull != ps1end) 00482 { 00483 l1 = ps1end - (u1 *) s1; /* Adjust for terminator */ 00484 } 00485 } 00486 00487 00488 /* 00489 * Perform unified comparison of (possibly) null-terminated 00490 * vs UTF string 00491 */ 00492 return(s1_s2_strncmp(s1, l1, s2, l2)); 00493 00494 } /* END of utf_common_classname_strcmp() */ 00495 00496 00497 /*! 00498 * @brief Compare a null-terminated string containing a 00499 * formatted or unformatted class name with an @e unformatted UTF 00500 * string from constant_pool. 00501 * 00502 * 00503 * @param s1 Null-terminated string to compare, containing 00504 * formatted @e or unformatted class name 00505 * (utf_prchar_classname_strcmp() only). 00506 * 00507 * @param pcfs2 ClassFile structure containing second string 00508 * (containing an @e unformatted class name) 00509 * 00510 * @param cpidx2 constant_pool index of CONSTANT_Class_info entry 00511 * whose name will be compared (by getting its 00512 * @link CONSTANT_Class_info#name_index name_index@endlink 00513 * and the UTF string name of it) 00514 * 00515 * 00516 * @returns lexicographical value of first difference in strings, 00517 * else 0. 00518 * 00519 */ 00520 jbyte utf_prchar_classname_strcmp(rchar *s1, 00521 ClassFile *pcfs2, 00522 jvm_constant_pool_index cpidx2) 00523 { 00524 return(utf_common_classname_strcmp((u1 *) s1, 00525 strlen(s1), 00526 pcfs2, 00527 cpidx2)); 00528 00529 } /* END of utf_prchar_classname_strcmp() */ 00530 00531 00532 /*! 00533 * @brief Compare a UTF string containing a 00534 * formatted or unformatted class name with an @e unformatted UTF 00535 * string from constant_pool. 00536 * 00537 * 00538 * @param s1 UTF string to compare, containing formatted @e or 00539 * unformatted class name. 00540 * 00541 * @param pcfs2 ClassFile structure containing second string 00542 * (containing an @e unformatted class name) 00543 * 00544 * @param cpidx2 constant_pool index of CONSTANT_Class_info entry 00545 * whose name will be compared (by getting its 00546 * @link CONSTANT_Class_info#name_index name_index@endlink 00547 * and the UTF string name of it) 00548 * 00549 * 00550 * @returns lexicographical value of first difference in strings, 00551 * else 0. 00552 * 00553 */ 00554 jbyte utf_classname_strcmp(CONSTANT_Utf8_info *s1, 00555 ClassFile *pcfs2, 00556 jvm_constant_pool_index cpidx2) 00557 { 00558 return(utf_common_classname_strcmp(s1->bytes, 00559 s1->length, 00560 pcfs2, 00561 cpidx2)); 00562 00563 } /* END of utf_classname_strcmp() */ 00564 00565 00566 /*! 00567 * @brief Report the number of array dimensions prefixing a Java type 00568 * string. 00569 * 00570 * No overflow condition is reported since it is assumed that @b inbfr 00571 * is formatted with correct length. Notice that because this logic 00572 * checks @e only for array specifiers and does not care about the rest 00573 * of the string, it may be used to evaluate field descriptions, which 00574 * will not contain any class formatting information. 00575 * 00576 * If there is even a @e remote possibility that more than 00577 * CONSTANT_MAX_ARRAY_DIMS dimensions will be found, compare 00578 * the result of this function with the result of utf_isarray(). 00579 * If there is a discrepancy, then there was an overflow here. 00580 * Properly formatted class files will @e never contain code with 00581 * this condition. 00582 * 00583 * @note This function is identical to nts_get_arraydims() except 00584 * that it works on (CONSTANT_Utf8_info *) instead of (rchar *). 00585 * 00586 * 00587 * @param inbfr CONSTANT_Utf8_info string. 00588 * 00589 * 00590 * @returns Number of array dimensions in string. For example, 00591 * this string contains three array dimensions: 00592 * 00593 * @c @b [[[Lsome/path/name/filename; 00594 * 00595 * If more than CONSTANT_MAX_ARRAY_DIMS are located, the 00596 * result is zero-- no other error is reported. 00597 * 00598 */ 00599 00600 jvm_array_dim utf_get_utf_arraydims(CONSTANT_Utf8_info *inbfr) 00601 { 00602 /* Make return code wider than max to check overflow */ 00603 u4 rc = 0; 00604 00605 /* Start scanning at beginning of string */ 00606 u1 *pclsname = (u1 *) &inbfr->bytes[0]; 00607 00608 /* Keep scanning until no more array specifications are found */ 00609 while (BASETYPE_CHAR_ARRAY == *pclsname++) 00610 { 00611 rc++; 00612 } 00613 00614 /* Check overflow, return default if so, else number of dimensions*/ 00615 if (CONSTANT_MAX_ARRAY_DIMS < rc) 00616 { 00617 return(LOCAL_CONSTANT_NO_ARRAY_DIMS); 00618 } 00619 else 00620 { 00621 /* Perform narrowing conversion into proper type for max */ 00622 return((jvm_array_dim) rc); 00623 } 00624 00625 } /* END of utf_get_utf_arraydims() */ 00626 00627 00628 /*! 00629 * @brief Test whether or not a Java type string is an array or not. 00630 * 00631 * 00632 * @param inbfr CONSTANT_Utf8_info string. 00633 * 00634 * 00635 * @returns @link #rtrue rtrue@endlink if this is an array 00636 * specfication, else @link #rfalse rfalse@endlink. 00637 * 00638 */ 00639 00640 rboolean utf_isarray(CONSTANT_Utf8_info *inbfr) 00641 { 00642 return((BASETYPE_CHAR_ARRAY == (u1)inbfr->bytes[0]) ? rtrue : rfalse); 00643 00644 } /* END of utf_isarray() */ 00645 00646 00647 /*! 00648 * @brief Convert and an un-formatted class name UTF string (of the 00649 * type @c @b ClassName and not of type 00650 * @c @b [[[LClassName) from a (CONSTANT_Utf8_info *) into 00651 * a null-terminated string with Java class formatting items. Result 00652 * is delivered in a heap-allocated buffer. When done with result, 00653 * perform HEAP_FREE_DATA(result) to return that buffer to the heap. 00654 * 00655 * This function @e will work on formatted class names 00656 * @c @b [[[LClassName; and the difference is benign, 00657 * but that is not its purpose. 00658 * 00659 * @param src Pointer to UTF string, most likely from constant pool 00660 * 00661 * @returns Null-terminated string @c @b LClasSName; in heap 00662 * or @link #rnull rnull@endlink if heap alloc error. 00663 * 00664 */ 00665 00666 rchar *utf_utf2prchar_classname(CONSTANT_Utf8_info *src) 00667 { 00668 /* Retrieve string from UTF data first */ 00669 rchar *pstr = utf_utf2prchar(src); 00670 00671 if (rnull == pstr) 00672 { 00673 return(pstr); 00674 } 00675 00676 /* Allocate heap for formatted version */ 00677 00678 rchar *rc = HEAP_GET_DATA(sizeof(rchar) + /* Type specifier */ 00679 sizeof(rchar) + /* Type spec terminator */ 00680 sizeof(rchar) + /* NUL character */ 00681 src->length, /* data */ 00682 rfalse); 00683 00684 int pstrlen = strlen(pstr); 00685 rboolean isfmt = nts_prchar_isclassformatted(pstr); 00686 00687 if (rtrue == isfmt) 00688 { 00689 /* 00690 * Copy entire string plus NUL character into heap area, 00691 * ignoring excess allocation when formatting is @e added 00692 * to string. 00693 */ 00694 memcpy(&rc[0], pstr, pstrlen); 00695 rc[pstrlen] = '\0'; 00696 } 00697 else 00698 { 00699 /* Initial formatting */ 00700 rc[0] = BASETYPE_CHAR_L; 00701 00702 /* Copy to heap area */ 00703 memcpy(&rc[1], pstr, pstrlen); 00704 00705 /* Append end formatting and NUL character */ 00706 rc[1 + pstrlen] = BASETYPE_CHAR_L_TERM; 00707 rc[2 + pstrlen] = '\0'; 00708 } 00709 00710 HEAP_FREE_DATA(pstr); 00711 00712 00713 /* Produce result */ 00714 return(rc); 00715 00716 } /* END of utf_utf2prchar_classname() */ 00717 00718 00719 /*! 00720 * @brief Verify if a UTF string contains class formatting or not. 00721 * 00722 * 00723 * @param src Pointer to UTF string, most likely from constant pool 00724 * 00725 * 00726 * @returns @link #rtrue rtrue@endlink if string is formtted as 00727 * @c @b LClasSName; but 00728 * @link #rfalse rfalse@endlink otherwise, may also have 00729 * array descriptor prefixed, thus @c @b [[LClassName; 00730 * 00731 * 00732 * @note This function works just like nts_prchar_isclassformatted() 00733 * except that it works on (CONSTANT_Utf8_info) strings rather 00734 * than on (rchar *) strings. 00735 */ 00736 00737 rboolean utf_utf_isclassformatted(CONSTANT_Utf8_info *src) 00738 { 00739 jvm_utf_string_index utfidx; 00740 rboolean rc = rfalse; 00741 00742 /* Chk array or class specifier. If neither, cannot be formatted */ 00743 switch (src->bytes[0]) 00744 { 00745 case BASETYPE_CHAR_ARRAY: 00746 case BASETYPE_CHAR_L: 00747 break; 00748 default: 00749 return(rfalse); 00750 } 00751 00752 00753 /* 00754 * Now assume a potentially formatted string. 00755 * Check for termination byte next. If not present, 00756 * nothing else matters and string cannot be formatted. 00757 */ 00758 u1 *pbytes = src->bytes; 00759 00760 for (utfidx = 0; utfidx < src->length; utfidx++) 00761 { 00762 if (BASETYPE_CHAR_L_TERM == pbytes[utfidx]) 00763 { 00764 rc = rtrue; 00765 break; 00766 } 00767 } 00768 00769 /* If not terminated, then cannot be class formatted */ 00770 if (rfalse == rc) 00771 { 00772 return(rc); 00773 } 00774 00775 /* Check initial formatting, including array spec */ 00776 jvm_array_dim arraydims = utf_get_utf_arraydims(src); 00777 00778 /* If any array specs, look immediately past them for class spec */ 00779 if (BASETYPE_CHAR_L == pbytes[arraydims]) 00780 { 00781 return(rtrue); 00782 } 00783 else 00784 { 00785 return(rfalse); 00786 } 00787 00788 } /* END of utf_utf_isclassformatted() */ 00789 00790 00791 /*! 00792 * 00793 * @brief Strip a UTF string of any class formatting it contains 00794 * and return result in a heap-allocated buffer. 00795 * 00796 * When done with this result, perform HEAP_DATA_FREE(result) to 00797 * return buffer to heap. 00798 * 00799 * 00800 * @param inbfr Pointer to UTF string that is potentially formatted 00801 * as @c @b LClassName; and which may also have 00802 * array descriptor prefixed, thus 00803 * @c @b [[LClassName; . This will 00804 * typically be an entry from the constant_pool. 00805 * 00806 * 00807 * @returns heap-allocated buffer containing @c @b ClassName 00808 * with no formatting, regardless of input formatting or 00809 * lack thereof. 00810 * 00811 * 00812 * @note This function works just like 00813 * nts_prchar2prchar_unformatted_classname() except that 00814 * it takes a (CONSTANT_Utf8_info) string rather 00815 * than a (rchar *) string and returns a (CONSTANT_Utf8_info *). 00816 * 00817 */ 00818 00819 cp_info_dup *utf_utf2utf_unformatted_classname(cp_info_dup *inbfr) 00820 { 00821 rchar *pstr = utf_utf2prchar(PTR_THIS_CP_Utf8(inbfr)); 00822 00823 rchar *punf = nts_prchar2prchar_unformatted_classname(pstr); 00824 00825 HEAP_FREE_DATA(pstr); 00826 00827 cp_info_dup *rc = nts_prchar2utf(punf); 00828 00829 HEAP_FREE_DATA(punf); 00830 00831 return(rc); 00832 00833 } /* END of utf_utf2utf_unformatted_classname() */ 00834 00835 00836 /* EOF */ 00837