00001 /*! 00002 * @file nts.c 00003 * 00004 * @brief Manipulate null-terminated (@link #rchar rchar@endlink) 00005 * character strings. 00006 * 00007 * There are three character string types in this program: 00008 * null-terminated @link #rchar (rchar)@endlink strings 00009 * @e ala 'C' language, UTF-8 00010 * @link #CONSTANT_Utf8_info (CONSTANT_Utf8_info)@endlink strings, 00011 * and Unicode @link #jchar (jchar)[]@endlink strings. 00012 * 00013 * 00014 * @section Control 00015 * 00016 * \$URL: https://svn.apache.org/path/name/nts.c $ \$Id: nts.c 0 09/28/2005 dlydick $ 00017 * 00018 * Copyright 2005 The Apache Software Foundation 00019 * or its licensors, as applicable. 00020 * 00021 * Licensed under the Apache License, Version 2.0 ("the License"); 00022 * you may not use this file except in compliance with the License. 00023 * You may obtain a copy of the License at 00024 * 00025 * http://www.apache.org/licenses/LICENSE-2.0 00026 * 00027 * Unless required by applicable law or agreed to in writing, 00028 * software distributed under the License is distributed on an 00029 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 00030 * either express or implied. 00031 * 00032 * See the License for the specific language governing permissions 00033 * and limitations under the License. 00034 * 00035 * @version \$LastChangedRevision: 0 $ 00036 * 00037 * @date \$LastChangedDate: 09/28/2005 $ 00038 * 00039 * @author \$LastChangedBy: dlydick $ 00040 * Original code contributed by Daniel Lydick on 09/28/2005. 00041 * 00042 * @section Reference 00043 * 00044 */ 00045 00046 #include "arch.h" 00047 ARCH_COPYRIGHT_APACHE(nts, c, "$URL: https://svn.apache.org/path/name/nts.c $ $Id: nts.c 0 09/28/2005 dlydick $"); 00048 00049 00050 #include <string.h> 00051 00052 #include "jvmcfg.h" 00053 #include "cfmacros.h" 00054 #include "classfile.h" 00055 #include "nts.h" 00056 00057 00058 /*! 00059 * @brief Convert null-terminated string buffer into UTF8 buffer. 00060 * 00061 * 00062 * @param inbfr String (rchar *) string 00063 * 00064 * 00065 * @returns UTF8 structure containing length and rchar bfr (plus tag), 00066 * but return in (cp_info_dup) for full proper word alignment. 00067 * When done with the data, call HEAP_FREE_DATA() on it. 00068 * 00069 * @c @b rc->bytes UTF8 version of @b inbfr string 00070 * 00071 * @c @b rc->length Number of UTF8 bytes in 00072 * @c @b rc->bytes 00073 */ 00074 00075 cp_info_dup *nts_prchar2utf(rchar *inbfr) 00076 { 00077 jshort len = strlen(inbfr); 00078 00079 /* 00080 * Allocate enough heap space for output string, but within the 00081 * context of the output result type. The size calculation 00082 * replaces generic (cp_info) with specifc (CONSTANT_Utf8_info) 00083 * info, adjusting for the amount of string data to be stored 00084 * into the result. 00085 */ 00086 cp_info_dup *rc = HEAP_GET_DATA(sizeof(cp_info_dup) - 00087 sizeof(cp_info) + 00088 sizeof(CONSTANT_Utf8_info) - 00089 sizeof(u1) + 00090 len, 00091 rfalse); 00092 00093 /* Move (rchar *) string into (CONSTANT_Utf8_info) */ 00094 CONSTANT_Utf8_info *pcpui = PTR_THIS_CP_Utf8(rc); 00095 pcpui->tag = CONSTANT_Utf8; 00096 pcpui->length = len; 00097 00098 memcpy((jubyte *) pcpui->bytes, inbfr, len); 00099 00100 rc->empty[0] = FILL_INFO_DUP0; 00101 rc->empty[1] = FILL_INFO_DUP1; 00102 rc->empty[2] = FILL_INFO_DUP2; 00103 00104 return(rc); 00105 00106 } /* END of nts_prchar2utf() */ 00107 00108 00109 /*! 00110 * @brief Convert null-terminated string into Unicode buffer. 00111 * 00112 * 00113 * @param[in] inbfr Null-terminated string 00114 * 00115 * @param[out] outbfr Buffer for resulting Unicode character string. 00116 * This buffer will need to be the same size in 00117 * Unicode (jchar) characters as @b inbfr is in 00118 * native characters (rchar) since the 00119 * conversion is simply putting the ASCII 00120 * into the LS byte of the Unicode character. 00121 * 00122 * 00123 * @returns Two returns, one a buffer, the other a count: 00124 * 00125 * *outbfr Unicode version of @b inbfr string in @b outbfr 00126 * 00127 * charcnvcount (Return value of function) Number of Unicode 00128 * characters in @b outbfr. 00129 * 00130 */ 00131 00132 jshort nts_prchar2unicode(rchar *inbfr, jchar *outbfr) 00133 { 00134 jshort charcnvcount; 00135 00136 jchar inbfrcnv; 00137 00138 jshort len = strlen(inbfr); 00139 00140 00141 for (charcnvcount = 0; charcnvcount < len; charcnvcount++) 00142 { 00143 /* Put ASCII into LS byte of output */ 00144 inbfrcnv = 0; 00145 inbfrcnv |= inbfr[charcnvcount]; 00146 00147 outbfr[charcnvcount] = inbfrcnv; 00148 00149 } 00150 00151 /* Done. Return number of characters processed */ 00152 return(charcnvcount); 00153 00154 } /* END of nts_prchar2unicode() */ 00155 00156 00157 00158 00159 /*! 00160 * @brief Format a string buffer into UTF8 buffer with Java class 00161 * information, including number of array dimensions. 00162 * 00163 * 00164 * @param inbfr String (rchar *) string 00165 * 00166 * @param arraydims Number of array dimensions 00167 * 00168 * 00169 * @returns UTF8 structure containing length and rchar bfr (plus tag), 00170 * but return in (cp_info_dup) for full proper word alignment. 00171 * When done with the data, call HEAP_FREE_DATA() on it. 00172 * With @b inbfr of @c @b some/path/name/filename, 00173 * the result will be, with 3 array dimensions: 00174 * 00175 * @verbatim 00176 00177 [[[Lsome/path/name/filename;\0 00178 00179 @endverbatim 00180 * 00181 * The string then has a @c @b \\0 NUL character 00182 * appended to it for strfn() convenience, but this is 00183 * not reported in the UTF8 string length. 00184 * 00185 * 00186 * @c @b rc->bytes UTF8 version of @b inbfr string 00187 * 00188 * @c @b rc->length Number of UTF8 bytes in 00189 * @c @b rc->bytes 00190 * 00191 */ 00192 00193 cp_info_dup *nts_prchar2utf_classname(rchar *inbfr, 00194 jvm_array_dim arraydims) 00195 { 00196 jshort inbfrlen = strlen(inbfr); 00197 00198 /* 00199 * Allocate enough heap space for output string, but within the 00200 * context of the output result type. The size calculation 00201 * replaces generic (cp_info) with specifc (CONSTANT_Utf8_info) 00202 * info, adjusting for the amount of string data to be stored 00203 * into the result, as adjusted for Java class name formatting. 00204 */ 00205 00206 /* This calculation follows the above description of text format */ 00207 int fmtlen = arraydims + /* Bracket characters */ 00208 sizeof(u1) + /* Type specifier */ 00209 inbfrlen + /* Data */ 00210 sizeof(u1) + /* Type terminator */ 00211 sizeof(u1); /* NUL character */ 00212 00213 cp_info_dup *rc = 00214 HEAP_GET_DATA(sizeof(cp_info_dup) - /* Enclosing structure */ 00215 sizeof(cp_info) + /* Basic type */ 00216 sizeof(CONSTANT_Utf8_info) - /* UTF8 type */ 00217 sizeof(u1) + /* Data place holder */ 00218 fmtlen, /* UTF8 data area */ 00219 rfalse); 00220 00221 /* Move (rchar *) string into (CONSTANT_Utf8_info) */ 00222 CONSTANT_Utf8_info *pcpui = PTR_THIS_CP_Utf8(rc); 00223 pcpui->tag = CONSTANT_Utf8; 00224 pcpui->length = fmtlen - sizeof(u1);/*Adjust out trailing \0 rchar*/ 00225 00226 /* Format array dimensions, Java type name, class name, terminator*/ 00227 jvm_utf_string_index utfidx = 0; 00228 for (utfidx = 0; utfidx < arraydims; utfidx++) 00229 { 00230 pcpui->bytes[utfidx] = BASETYPE_CHAR_ARRAY; 00231 } 00232 00233 pcpui->bytes[utfidx] = BASETYPE_CHAR_L; 00234 utfidx++; 00235 00236 memcpy((jubyte *) &pcpui->bytes[utfidx], inbfr, inbfrlen); 00237 00238 rc->empty[0] = FILL_INFO_DUP0; 00239 rc->empty[1] = FILL_INFO_DUP1; 00240 rc->empty[2] = FILL_INFO_DUP2; 00241 00242 pcpui->bytes[utfidx + inbfrlen] = BASETYPE_CHAR_L_TERM; 00243 pcpui->bytes[utfidx + inbfrlen + 1] = '\0'; 00244 00245 return(rc); 00246 00247 } /* END of nts_prchar2utf_classname() */ 00248 00249 00250 /*! 00251 * @brief Report the number of array dimensions prefixing a Java type 00252 * string. 00253 * 00254 * No overflow condition is reported since it is assumed that @b inbfr 00255 * is a valid (rchar *) string. Notice that because this logic checks 00256 * @e only for array specifiers and does not care about the rest of the 00257 * string, it may be used to evaluate field descriptions, which will 00258 * not contain any class formatting information. 00259 * 00260 * If there is even a @e remote possibility that more than 00261 * CONSTANT_MAX_ARRAY_DIMS dimensions will be found, compare the 00262 * result of this function with the result of nts_prchar_isarray(). 00263 * If there is a discrepancy, then there was an overflow here. 00264 * Properly formatted class files will @e never contain code with 00265 * this condition. 00266 * 00267 * @note This function is identical to nts_get_prchararraydims() 00268 * except that it works on (rchar *) instead of 00269 * (CONSTANT_Utf8_info *). 00270 * 00271 * 00272 * @param inbfr (rchar *) string. 00273 * 00274 * 00275 * @returns Number of array dimensions in string. For example, 00276 * this string contains three array dimensions: 00277 * 00278 * @verbatim 00279 00280 [[[Lsome/path/name/filename; 00281 00282 @endverbatim 00283 * 00284 * The string does @e not have a @c @b \\0 NUL 00285 * character appended in this instance. If more than 00286 * CONSTANT_MAX_ARRAY_DIMS are located, the 00287 * result is zero-- no other error is reported. 00288 * 00289 */ 00290 00291 jvm_array_dim nts_get_prchar_arraydims(rchar *inbfr) 00292 { 00293 /* Make return code wider than max to check overflow */ 00294 u4 rc = 0; 00295 00296 /* Start scanning at beginning of string */ 00297 u1 *pclsname = (u1 *) inbfr; 00298 00299 /* Keep scanning until no more array specifications are found */ 00300 while (BASETYPE_CHAR_ARRAY == *pclsname++) 00301 { 00302 rc++; 00303 } 00304 00305 /* Check overflow, return default if so, else number of dimensions*/ 00306 if (CONSTANT_MAX_ARRAY_DIMS < rc) 00307 { 00308 return(LOCAL_CONSTANT_NO_ARRAY_DIMS); 00309 } 00310 else 00311 { 00312 /* Perform narrowing conversion into proper type for max */ 00313 return((jvm_array_dim) rc); 00314 } 00315 00316 } /* END of nts_get_prchar_arraydims() */ 00317 00318 00319 /*! 00320 * @brief Test whether or not a Java type string is an array or not. 00321 * 00322 * 00323 * @param inbfr (rchar *) string. 00324 * 00325 * 00326 * @returns @link #rtrue rtrue@endlink if this is an array 00327 * specfication, else @link #rfalse rfalse@endlink. 00328 * 00329 */ 00330 00331 rboolean nts_prchar_isarray(rchar *inbfr) 00332 { 00333 return((BASETYPE_CHAR_ARRAY == (u1) inbfr[0]) ? rtrue : rfalse); 00334 00335 } /* END of nts_prchar_isarray() */ 00336 00337 00338 /*! 00339 * @brief Verify if a null-terminated string contains PRIMATIVE 00340 * formatting or not. May be prefixed with array specifiers. 00341 * Everything after the base type character is ignored. 00342 * 00343 * 00344 * @param src Pointer to null-terminated string. 00345 * 00346 * 00347 * @returns @link #rtrue rtrue@endlink if string is formtted as 00348 * @c @b LClassName; but 00349 * @link #rfalse rfalse@endlink otherwise, may also have 00350 * array descriptor prefixed, 00351 * thus @c @b [[LClassName; 00352 * 00353 * @link #rtrue rtrue@endlink if string is formatted as 00354 * @c @b \@ (where @c @b \@ is any 00355 * @link #BASETYPE_CHAR_B BASETYPE_CHAR_x@endlink character), 00356 * @link #rfalse rfalse@endlink otherwise. May also have 00357 * array descriptor prefixed, 00358 * thus @c @b [[\@, eg, @c @b [[I or @c @b [[[[D 00359 * 00360 */ 00361 00362 rboolean nts_prchar_isprimativeformatted(rchar *src) 00363 { 00364 jvm_array_dim arraydims = nts_get_prchar_arraydims(src); 00365 00366 /* 00367 * Chk if @e any primative base type, 00368 * but NOT class (the @c @b L fmt) 00369 */ 00370 switch (src[arraydims]) 00371 { 00372 case BASETYPE_CHAR_B: 00373 case BASETYPE_CHAR_C: 00374 case BASETYPE_CHAR_D: 00375 case BASETYPE_CHAR_F: 00376 case BASETYPE_CHAR_I: 00377 case BASETYPE_CHAR_J: 00378 case BASETYPE_CHAR_S: 00379 case BASETYPE_CHAR_Z: 00380 return(rtrue); 00381 00382 default: 00383 return(rfalse); 00384 } 00385 00386 } /* END of nts_prchar_isprimativeformatted() */ 00387 00388 00389 /*! 00390 * @brief Verify if a null-terminated string contains CLASS formatting 00391 * or not. 00392 * 00393 * 00394 * @param src Pointer to null-terminated string. 00395 * 00396 * 00397 * @returns @link #rtrue rtrue@endlink if string is formatted as 00398 * @c @b LClasSName; but 00399 * @link #rfalse rfalse@endlink otherwise. May also have 00400 * array descriptor prefixed, thus @c @b [[LClassName; 00401 * 00402 * 00403 * @note This function works just like utf_isclassformatted() except 00404 * that it works on (rchar *) strings rather than 00405 * on (CONSTANT_Utf8_info) strings. 00406 */ 00407 00408 rboolean nts_prchar_isclassformatted(rchar *src) 00409 { 00410 u2 idx; 00411 rint rc = rfalse; 00412 00413 /* Chk array or class specifier. If neither, cannot be formatted */ 00414 switch (src[0]) 00415 { 00416 case BASETYPE_CHAR_ARRAY: 00417 case BASETYPE_CHAR_L: 00418 break; 00419 default: 00420 return(rfalse); 00421 } 00422 00423 /* 00424 * Now assume a potentially formatted string. 00425 * Check for termination byte next. If not present, 00426 * nothing else matters and string cannot be formatted. 00427 */ 00428 u1 *pbytes = src; 00429 int len = strlen(src); 00430 00431 for (idx = 0; idx < len; idx++) 00432 { 00433 if (BASETYPE_CHAR_L_TERM == pbytes[idx]) 00434 { 00435 rc = rtrue; 00436 break; 00437 } 00438 } 00439 00440 /* If not terminated, then cannot be class formatted */ 00441 if (rfalse == rc) 00442 { 00443 return(rc); 00444 } 00445 00446 /* Check initial formatting, including array spec */ 00447 jvm_array_dim arraydims = nts_get_prchar_arraydims(src); 00448 00449 /* If any array specs, look immediately past them for class spec */ 00450 if (BASETYPE_CHAR_L == pbytes[arraydims]) 00451 { 00452 return(rtrue); 00453 } 00454 else 00455 { 00456 return(rfalse); 00457 } 00458 00459 } /* END of nts_prchar_isclassformatted() */ 00460 00461 00462 /*! 00463 * @brief Strip a null-terminated string of any class formatting it 00464 * contains and return result in a heap-allocated buffer. When done 00465 * with this result, perform HEAP_DATA_FREE(result) to return buffer 00466 * to heap. 00467 * 00468 * 00469 * @param inbfr Pointer to null-terminated string that is potentially 00470 * formatted as @c @b LClassName; and which may 00471 * also have array descriptor prefixed, thus 00472 * @c @b [[LClassName; 00473 * 00474 * 00475 * @returns heap-allocated buffer containing @c @b ClassName 00476 * with no formatting, regardless of input formatting or lack 00477 * thereof. 00478 * 00479 * 00480 * @note This function works just like 00481 * utf_utf2utf_unformatted_classname() 00482 * except that it takes a (rchar *) string 00483 * rather than a (CONSTANT_Utf8_info) string 00484 * and returns a (rchar *). 00485 * 00486 */ 00487 00488 rchar *nts_prchar2prchar_unformatted_classname(rchar *inbfr) 00489 { 00490 int inbfrlen = strlen(inbfr); 00491 rint isfmt = nts_prchar_isclassformatted(inbfr); 00492 jvm_array_dim arraydims = nts_get_prchar_arraydims(inbfr); 00493 rchar *psemi; 00494 int allocsize; 00495 int startposn; 00496 00497 if (rtrue == isfmt) 00498 { 00499 psemi = strchr(inbfr, BASETYPE_CHAR_L_TERM); 00500 psemi--; 00501 00502 allocsize = inbfrlen - /* Input data size */ 00503 arraydims - /* Array specifiers */ 00504 sizeof(u1) - /* Type specifier */ 00505 sizeof(u1) + /* Type terminator */ 00506 sizeof(u1); /* NUL terminator */ 00507 00508 startposn = arraydims + sizeof(u1); /* Skip array & type */ 00509 } 00510 else 00511 { 00512 psemi = (rchar *) rnull; 00513 allocsize = inbfrlen + /* Input data size */ 00514 sizeof(u1); /* NUL terminator */ 00515 00516 startposn = 0; /* Copy the whole string */ 00517 } 00518 00519 rchar *rc = HEAP_GET_DATA(allocsize, rfalse); 00520 00521 /* Extract input class name from input buffer, add null char */ 00522 memcpy(rc, &inbfr[startposn], allocsize); 00523 rc[allocsize - sizeof(u1)] = '\0'; 00524 00525 return(rc); 00526 00527 } /* END of nts_prchar2prchar_unformatted_classname() */ 00528 00529 00530 /* EOF */ 00531