Main Page | Namespace List | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

utf.c

Go to the documentation of this file.
00001 /*!
00002  * @file utf.c
00003  *
00004  * @brief Manipulate UTF-8 CONSTANT_Utf8_info character strings.
00005  *
00006  * There are three character string types in this program:
00007  * null-terminated @link #rchar (rchar)@endlink strings
00008  * @e ala 'C' language, UTF-8
00009  * @link #CONSTANT_Utf8_info (CONSTANT_Utf8_info)@endlink strings,
00010  * and Unicode @link #jchar (jchar)[]@endlink strings.
00011  *
00012  * Convert one or UTF-8 (jbyte) bytes to and from Unicode (jchar)
00013  * characters, plus related functions, like comparison and string
00014  * length.
00015  *
00016  * Why are these functions called @b utf_XXX() instead of @b utf8_XXX()?
00017  * Originally, they were called such, but when the JDK 1.5 class file
00018  * spec, section 4, was reviewed (after working with the 1.2/1.4
00019  * versions), it was discovered that certain other @b UTF-xx formats
00020  * were also provided in the spec, even if not accurately defined.
00021  * (Due to errors in the revised class file specification, the 21-bit
00022  * UTF characters (6 bytes) will not be implemented until a definitive
00023  * correction is located.  However, in anticipation of this correction,
00024  * the functions are now named utf_XXX() without respect to character
00025  * bit width.)  Notice, however, that the spec, section 4, defines a
00026  * CONSTANT_Utf8 and a CONSTANT_Utf8_info.  Therefore, these
00027  * designations will remain in the code unless changed in the spec.
00028  *
00029  *
00030  * @section Control
00031  *
00032  * \$URL: https://svn.apache.org/path/name/utf.c $ \$Id: utf.c 0 09/28/2005 dlydick $
00033  *
00034  * Copyright 2005 The Apache Software Foundation
00035  * or its licensors, as applicable.
00036  *
00037  * Licensed under the Apache License, Version 2.0 ("the License");
00038  * you may not use this file except in compliance with the License.
00039  * You may obtain a copy of the License at
00040  *
00041  *     http://www.apache.org/licenses/LICENSE-2.0
00042  *
00043  * Unless required by applicable law or agreed to in writing,
00044  * software distributed under the License is distributed on an
00045  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
00046  * either express or implied.
00047  *
00048  * See the License for the specific language governing permissions
00049  * and limitations under the License.
00050  *
00051  * @version \$LastChangedRevision: 0 $
00052  *
00053  * @date \$LastChangedDate: 09/28/2005 $
00054  *
00055  * @author \$LastChangedBy: dlydick $
00056  *         Original code contributed by Daniel Lydick on 09/28/2005.
00057  *
00058  * @section Reference
00059  *
00060  */
00061 
00062 #include "arch.h"
00063 ARCH_COPYRIGHT_APACHE(utf, c, "$URL: https://svn.apache.org/path/name/utf.c $ $Id: utf.c 0 09/28/2005 dlydick $");
00064 
00065 
00066 #include <string.h>
00067 
00068 #include "jvmcfg.h" 
00069 #include "cfmacros.h"
00070 #include "classfile.h"
00071 #include "nts.h"
00072 #include "util.h"
00073 
00074 
00075 /*!
00076  * Store a Unicode @c @b ? when invalid UTF state found,
00077  * adj return code
00078  */
00079 #define MAP_INVALID_UTF8_TO_QUESTION_MARK *outbfr++ = (jchar) '?'; \
00080                                          inbfr++
00081 
00082 /*! Detect NUL character and quit when found */
00083 #define RETURN_IF_NUL_BYTE if (UTF8_FORBIDDEN_ZERO == *inbfr) \
00084                            {return(charcnvcount); }
00085 
00086 /*!
00087  * @brief Convert UTF8 buffer into Unicode buffer.
00088  *
00089  *
00090  * @param[in]  utf_inbfr  UTF string structure
00091  *
00092  * @param[out] outbfr     Buffer for resulting Unicode character string
00093  *
00094  *
00095  * @returns  Two returns, one a buffer, the other a count:
00096  *
00097  *    *outbfr        Unicode version of @b utf_inbfr string in @b outbfr
00098  *
00099  *    charcnvcount   (Return value of function)  Number of Unicode
00100  *                     characters in @b outbfr.  This will only be the
00101  *                     same as @b length when ALL UTF characters are
00102  *                     ASCII.  It will otherwise be less than that.
00103  *
00104  * SPEC AMBIGUITY:  In case of invalid characters, a Unicode
00105  * @c @b ? is inserted and processing continues.  In this way,
00106  * the result string will still be invalid, but at least it will be
00107  * proper Unicode.  This may prove more than is necessary, but the
00108  * spec says nothing at all about this matter.  Since the NUL character
00109  * may not appear in UTF-8, if a buffer is terminated by a NUL in the
00110  * first @c @b utf_inbfr->length bytes, termination will be
00111  * assumed.  If a @link #UTF8_FORBIDDEN_MIN UTF8_FORBIDDEN_xxx@endlink
00112  * character is read, it is converted to a Unicode @c @b ? also.
00113  *
00114  */
00115 
00116 jshort utf_utf2unicode(CONSTANT_Utf8_info *utf_inbfr, jchar *outbfr)
00117 {
00118     jshort charcnvcount;
00119 
00120     jubyte *inbfr = (jubyte *) utf_inbfr->bytes;
00121 
00122     for (charcnvcount = 0;
00123          charcnvcount < utf_inbfr->length;
00124          charcnvcount++)
00125     {
00126         RETURN_IF_NUL_BYTE;
00127         if (UTF8_SINGLE_MAX >= *inbfr)
00128         {
00129             /* Process one-byte form */
00130             *outbfr++ = (jchar) *inbfr++;
00131         }
00132         else
00133         {
00134             /* Process two-byte form */
00135             if (UTF8_TRIPLE_FIRST_VAL > *inbfr)
00136             {
00137                 if (UTF8_DOUBLE_FIRST_VAL > *inbfr)
00138                 {
00139                     MAP_INVALID_UTF8_TO_QUESTION_MARK;
00140                     continue;
00141                 }
00142 
00143                 /* Store top half of Unicode character */
00144                 *outbfr = (jchar)
00145                           (((*inbfr++) & UTF8_DOUBLE_FIRST_MASK0)
00146                             << UTF8_DOUBLE_FIRST_SHIFT);
00147 
00148                 /* Abort if next byte is NUL */
00149                 RETURN_IF_NUL_BYTE;
00150 
00151                 if ((UTF8_DOUBLE_SECOND_VAL | UTF8_DOUBLE_SECOND_MASK0)
00152                     < *inbfr)
00153                 {
00154                     /*
00155                      * Map invalid forms to @c @b ? and
00156                      * move to next char 
00157                      */
00158                     MAP_INVALID_UTF8_TO_QUESTION_MARK;
00159                     continue;
00160                 }
00161 
00162                 /* Store bottom half of Unicode character */
00163                 *outbfr++ |= (jchar)
00164                              ((*inbfr++) & UTF8_DOUBLE_SECOND_MASK0);
00165             }
00166             else
00167             {
00168                 /* Process three-byte form */
00169                 if ((UTF8_TRIPLE_FIRST_VAL | UTF8_TRIPLE_FIRST_MASK0)
00170                        < *inbfr)
00171                 {
00172                     /* This also considers UTF8_FORBIDDEN_MIN/MAX
00173                          bytes */
00174                     MAP_INVALID_UTF8_TO_QUESTION_MARK;
00175                     continue;
00176                 }
00177 
00178                 /* Store top third of Unicode character */
00179                 *outbfr = (jchar)
00180                           (((*inbfr++) & UTF8_TRIPLE_FIRST_MASK0)
00181                             << UTF8_TRIPLE_FIRST_SHIFT);
00182 
00183                 /* Abort if next byte is NUL */
00184                 RETURN_IF_NUL_BYTE;
00185 
00186                 if ((UTF8_TRIPLE_SECOND_VAL | UTF8_TRIPLE_SECOND_MASK0)
00187                     < *inbfr)
00188                 {
00189                     /*
00190                      * Map invalid forms to @c @b ? and
00191                      * move to next char 
00192                      */
00193                     MAP_INVALID_UTF8_TO_QUESTION_MARK;
00194                     continue;
00195                 }
00196 
00197                 /* Store middle third of Unicode character */
00198                 *outbfr |= (jchar)
00199                            (((*inbfr++) & UTF8_TRIPLE_SECOND_MASK0)
00200                              << UTF8_TRIPLE_SECOND_SHIFT);
00201 
00202                 /* Abort if next byte is NUL */
00203                 RETURN_IF_NUL_BYTE;
00204 
00205                 if ((UTF8_TRIPLE_THIRD_VAL | UTF8_TRIPLE_THIRD_MASK0)
00206                     < *inbfr)
00207                 {
00208                     /*
00209                      * Map invalid forms to @c @b ? and
00210                      * move to next char 
00211                      */
00212                     MAP_INVALID_UTF8_TO_QUESTION_MARK;
00213                     continue;
00214                 }
00215 
00216                 /* Store bottom third of Unicode character */
00217                 *outbfr++ |= (jchar)
00218                              ((*inbfr++) & UTF8_TRIPLE_THIRD_MASK0);
00219             }
00220         }
00221 
00222     } /* for (i) */
00223 
00224     /* Done.  Return number of characters processed */
00225     return(charcnvcount);
00226 
00227 } /* END of utf_utf2unicode() */
00228 
00229 
00230 /*!
00231  * @brief Convert a UTF string from a (CONSTANT_Utf8_info *) into a
00232  * null-terminated string by allocating heap and copying the UTF data.
00233  *
00234  * When done with result, perform HEAP_FREE_DATA(result).
00235  *
00236  * @param   src   Pointer to UTF string, most likely from constant pool
00237  *
00238  * @returns Null-terminated string in heap or
00239  *          @link #rnull rnull@endlink if heap alloc error.
00240  *
00241  */
00242 
00243 rchar *utf_utf2prchar(CONSTANT_Utf8_info *src)
00244 {
00245     /* Allocate heap for UTF data plus NUL byte */
00246     rchar *rc = HEAP_GET_DATA(sizeof(rchar) + src->length, rfalse);
00247 
00248     /* Copy to heap area */
00249     memcpy(rc, &src->bytes[0], src->length);
00250 
00251     /* Append NUL character */
00252     rc[src->length] = '\0';
00253 
00254     /* Produce result */
00255     return(rc);
00256 
00257 } /* END of utf_utf2prchar() */
00258 
00259 
00260 /*!
00261  * @brief Compare two strings of any length, and potentially neither
00262  * null-terminated, that is, could be a UTF string.
00263  *
00264  * If strings are of equal length, this function is equivalent
00265  * to @c @b strcmp(3).  If not of equal length, result is like
00266  * comparing @c @b n bytes of @c @b strncmp(3), where non-equal
00267  * result is returned, but if equal result, it is like
00268  * @c @b n+1, where the final byte is a @c @b \\0 (NUL)
00269  * character, so longer string's @c @b n+1 character
00270  * is reported, either as positive value (@b s1 longer) or as
00271  * negative value (@b s2 longer).
00272  *
00273  * This function should be used on ALL string comparisons that
00274  * potentially involve lack of NUL termination, namely, @e anything
00275  * to do with UTF strings of any sort.  It is recommended also for
00276  * any null-terminated string just so all string comparisons work
00277  * @e exactly alike, no matter whether (rchar *) or UTF, whether of
00278  * equal length or not.
00279  *
00280  * @param  s1        (rchar *) to first string
00281  *
00282  * @param  l1        Length of string @b s1, regardless of any
00283  *                     null termination being present or absent
00284  *                    in @b s1.
00285  *
00286  * @param  s2        (rchar *) to second string
00287  *
00288  * @param  l2        length of string @b s2, regardless of any
00289  *                     null termination being present or absent
00290  *                     in @b s2.
00291  *
00292  * @returns lexicographical difference of <b><code>s1 - s2</code></b>.
00293  *          Notice that the (rchar) data is implicitly unsigned
00294  *          (although the actual signage is left to the compiler),
00295  *          while the (jbyte) result is explicitly signed, due to the
00296  *          arithmetic nature of the calculation.
00297  *
00298  */
00299 static jbyte s1_s2_strncmp(u1 *s1, int l1, u1 *s2, int l2)
00300 {
00301     /* Compare shortest common run length */
00302     int cmplen = (l1 < l2) ? l1 : l2;
00303     jbyte rc = strncmp(s1, s2, cmplen);
00304 
00305     /*
00306      * THIS LOGIC IS THE SAME AS FOR unicode_strncmp(), BUT
00307      * OPERATES ON (jchar) instead of (rchar)
00308      */
00309 
00310     /* Return from several permutations of strlen */
00311     if (l1 == l2)
00312     {
00313         return(rc);
00314     }
00315     else
00316     if (l1 > l2)
00317     {
00318         /*
00319          * If a difference existed, return it, else use
00320          * the last character of @b s1 as character minus
00321          * NUL byte (or zero), which equals character.
00322          */
00323         if (0 != rc)
00324         {
00325             return(rc);
00326         }
00327 
00328         /*
00329          * First character of @b s1 past length of @b s2 
00330          */
00331         return((jbyte) s1[l2]);
00332     }
00333     else
00334     {
00335         /* If a difference existed, return it, else use end of @b s2 */
00336         /*
00337          * If a difference existed, return it, else use
00338          * the last character of @b s1 as NUL byte (or zero)
00339          * minus character, which equals negative of character.
00340          */
00341         if (0 != rc)
00342         {
00343             return(rc);
00344         }
00345 
00346         /* First character of @b s2 past length of @b s1 */
00347         return((jbyte) (0 - s2[l1]));
00348     }
00349 } /* END of s1_s2_strncmp() */
00350 
00351 
00352 /*!
00353  * @brief Compare two UTF strings from constant_pool, @b s1 minus @b s2
00354  *
00355  * @param s1   First of two UTF strings to compare
00356  *
00357  * @param s2   Second of two UTF strings to compare
00358  *
00359  * @returns lexicographical value of first difference in strings,
00360  *          else 0.
00361  *
00362  */
00363 jbyte utf_utf_strcmp(CONSTANT_Utf8_info *s1, CONSTANT_Utf8_info *s2)
00364 {
00365     /* Perform unified comparison of both UTF strings */
00366     return(s1_s2_strncmp(s1->bytes, s1->length, s2->bytes, s2->length));
00367 
00368 } /* END of utf_utf_strcmp() */
00369 
00370 
00371 /*!
00372  * @brief Compare contents of null-terminated string to contents of
00373  * a UTF string from a class file structure.
00374  *
00375  * @param  s1     Null-terminated string name
00376  *
00377  * @param  pcfs2  ClassFile where UTF string is found
00378  *
00379  * @param  cpidx2 Index in @b pcfs2 constant_pool of UTF string
00380  *
00381  *
00382  * @returns lexicographical value of first difference in strings,
00383  *          else 0.
00384  *
00385  */
00386 jbyte utf_prchar_pcfs_strcmp(rchar                   *s1,
00387                              ClassFile               *pcfs2,
00388                              jvm_constant_pool_index  cpidx2)
00389 {
00390     int l1 = strlen(s1);
00391 
00392     u1 *s2 = PTR_CP_THIS_STRNAME(pcfs2, cpidx2);
00393 
00394     int l2 = CP_THIS_STRLEN(pcfs2, cpidx2);
00395 
00396     /* Perform unified comparison of null-terminated vs UTF string */
00397     return(s1_s2_strncmp(s1, l1, s2, l2));
00398 
00399 } /* END of utf_prchar_pcfs_strcmp() */
00400 
00401 
00402 /*!
00403  * @brief Compare contents of UTF string to contents of a UTF string
00404  * from a class file structure.
00405  *
00406  * @param  s1     UTF string name
00407  *
00408  * @param  pcfs2  ClassFile where UTF string is found
00409  *
00410  * @param  cpidx2 Index in @b pcfs2 constant_pool of UTF string
00411  *
00412  *
00413  * @returns lexicographical value of first difference in strings,
00414  *          else 0.
00415  *
00416  */
00417 jbyte utf_pcfs_strcmp(CONSTANT_Utf8_info      *s1,
00418                       ClassFile               *pcfs2,
00419                       jvm_constant_pool_index  cpidx2)
00420 {
00421     u1 *s2 = PTR_CP_THIS_STRNAME(pcfs2, cpidx2);
00422 
00423     int l2 = CP_THIS_STRLEN(pcfs2, cpidx2);
00424 
00425     /* Perform unified comparison of null-terminated vs UTF string */
00426     return(s1_s2_strncmp(s1->bytes, s1->length, s2, l2));
00427 
00428 } /* END of utf_pcfs_strcmp() */
00429 
00430 
00431 /*!
00432  * @brief Common generic comparison, all parameters regularized.
00433  *
00434  * Compare a UTF or null-terminated string containing a
00435  * formatted or unformatted class name with an @e unformatted UTF
00436  * string from constant_pool.
00437  * Compare @b s1 minus @b s2, but skipping, where applicable,
00438  * the @b s1 initial BASETYPE_CHAR_L and the terminating
00439  * BASETYPE_CHAR_L_TERM, plus any array dimension modifiers.  The second
00440  * string is specified by a constant_pool index.  Notice that there
00441  * are @e NO formatted class string names in the (CONSTANT_Class_info)
00442  * entries of the constant_pool because such would be redundant.  (Such
00443  * entries @e are the @e formal definition of the class.)
00444  *
00445  *
00446  * @param s1     UTF string pointer to u1 array of characters.
00447  *
00448  * @param l1     length of @b s1.
00449  *
00450  * @param pcfs2  ClassFile structure containing second string
00451  *               (containing an @e unformatted class name)
00452  *
00453  * @param cpidx2 constant_pool index of CONSTANT_Class_info entry
00454  *               whose name will be compared (by getting its
00455  *               @link CONSTANT_Class_info#name_index name_index@endlink
00456  *               and the UTF string name of it)
00457  *
00458  *
00459  * @returns lexicographical value of first difference in strings,
00460  *          else 0.
00461  *
00462  */
00463 static jbyte utf_common_classname_strcmp(u1                      *s1,
00464                                          int                      l1,
00465                                          ClassFile               *pcfs2,
00466                                          jvm_constant_pool_index cpidx2)
00467 {
00468     CONSTANT_Class_info *pci = PTR_CP_ENTRY_CLASS(pcfs2, cpidx2);
00469 
00470     u1 *s2 = PTR_CP_THIS_STRNAME(pcfs2, pci->name_index);
00471     int l2 = CP_THIS_STRLEN(pcfs2, pci->name_index);
00472 
00473     if (rtrue == nts_prchar_isclassformatted(s1))
00474     {
00475         s1++; /* Point PAST the BASETYPE_CHAR_L character */
00476         l1--;
00477 
00478         u1 *ps1end = strchr(s1, BASETYPE_CHAR_L_TERM);
00479 
00480         /* Should @e always be @link #rtrue rtrue@endlink */
00481         if (rnull != ps1end)
00482         {
00483             l1 = ps1end - (u1 *) s1; /* Adjust for terminator */
00484         }
00485     }
00486 
00487 
00488     /*
00489      * Perform unified comparison of (possibly) null-terminated
00490      * vs UTF string
00491      */
00492     return(s1_s2_strncmp(s1, l1, s2, l2));
00493 
00494 } /* END of utf_common_classname_strcmp() */
00495 
00496 
00497 /*!
00498  * @brief Compare a null-terminated string containing a
00499  * formatted or unformatted class name with an @e unformatted UTF
00500  * string from constant_pool.
00501  *
00502  *
00503  * @param s1     Null-terminated string to compare, containing
00504  *               formatted @e or unformatted class name
00505  *               (utf_prchar_classname_strcmp() only).
00506  *
00507  * @param pcfs2  ClassFile structure containing second string
00508  *               (containing an @e unformatted class name)
00509  *
00510  * @param cpidx2 constant_pool index of CONSTANT_Class_info entry
00511  *               whose name will be compared (by getting its
00512  *               @link CONSTANT_Class_info#name_index name_index@endlink
00513  *               and the UTF string name of it)
00514  *
00515  *
00516  * @returns lexicographical value of first difference in strings,
00517  *          else 0.
00518  *
00519  */
00520 jbyte utf_prchar_classname_strcmp(rchar                   *s1,
00521                                   ClassFile               *pcfs2,
00522                                   jvm_constant_pool_index  cpidx2)
00523 {
00524     return(utf_common_classname_strcmp((u1 *) s1,
00525                                        strlen(s1),
00526                                        pcfs2,
00527                                        cpidx2));
00528 
00529 } /* END of utf_prchar_classname_strcmp() */
00530 
00531 
00532 /*!
00533  * @brief Compare a UTF string containing a
00534  * formatted or unformatted class name with an @e unformatted UTF
00535  * string from constant_pool.
00536  *
00537  *
00538  * @param s1     UTF string to compare, containing formatted @e or
00539  *               unformatted class name.
00540  *
00541  * @param pcfs2  ClassFile structure containing second string
00542  *               (containing an @e unformatted class name)
00543  *
00544  * @param cpidx2 constant_pool index of CONSTANT_Class_info entry
00545  *               whose name will be compared (by getting its
00546  *               @link CONSTANT_Class_info#name_index name_index@endlink
00547  *               and the UTF string name of it)
00548  *
00549  *
00550  * @returns lexicographical value of first difference in strings,
00551  *          else 0.
00552  *
00553  */
00554 jbyte utf_classname_strcmp(CONSTANT_Utf8_info      *s1,
00555                            ClassFile               *pcfs2,
00556                            jvm_constant_pool_index  cpidx2)
00557 {
00558     return(utf_common_classname_strcmp(s1->bytes,
00559                                        s1->length,
00560                                        pcfs2,
00561                                        cpidx2));
00562 
00563 } /* END of utf_classname_strcmp() */
00564 
00565 
00566 /*!
00567  * @brief Report the number of array dimensions prefixing a Java type
00568  * string.
00569  *
00570  * No overflow condition is reported since it is assumed that @b inbfr
00571  * is formatted with correct length.  Notice that because this logic
00572  * checks @e only for array specifiers and does not care about the rest
00573  * of the string, it may be used to evaluate field descriptions, which
00574  * will not contain any class formatting information.
00575  *
00576  * If there is even a @e remote possibility that more than
00577  * CONSTANT_MAX_ARRAY_DIMS dimensions will be found, compare
00578  * the result of this function with the result of utf_isarray().
00579  * If there is a discrepancy, then there was an overflow here.
00580  * Properly formatted class files will @e never contain code with
00581  * this condition.
00582  *
00583  * @note  This function is identical to nts_get_arraydims() except
00584  *        that it works on (CONSTANT_Utf8_info *) instead of (rchar *).
00585  *
00586  *
00587  * @param    inbfr   CONSTANT_Utf8_info string.
00588  *
00589  *
00590  * @returns  Number of array dimensions in string.  For example,
00591  *           this string contains three array dimensions:
00592  *
00593  *               @c @b [[[Lsome/path/name/filename;
00594  *
00595  *           If more than CONSTANT_MAX_ARRAY_DIMS are located, the
00596  *           result is zero-- no other error is reported.
00597  *
00598  */
00599 
00600 jvm_array_dim utf_get_utf_arraydims(CONSTANT_Utf8_info *inbfr)
00601 {
00602     /* Make return code wider than max to check overflow */
00603     u4 rc = 0;
00604 
00605     /* Start scanning at beginning of string */
00606     u1 *pclsname = (u1 *) &inbfr->bytes[0];
00607 
00608     /* Keep scanning until no more array specifications are found */
00609     while (BASETYPE_CHAR_ARRAY == *pclsname++)
00610     {
00611         rc++; 
00612     }
00613 
00614     /* Check overflow, return default if so, else number of dimensions*/
00615     if (CONSTANT_MAX_ARRAY_DIMS < rc)
00616     {
00617         return(LOCAL_CONSTANT_NO_ARRAY_DIMS);
00618     }
00619     else
00620     {
00621         /* Perform narrowing conversion into proper type for max */
00622         return((jvm_array_dim) rc);
00623     }
00624 
00625 } /* END of utf_get_utf_arraydims() */
00626 
00627 
00628 /*!
00629  * @brief Test whether or not a Java type string is an array or not.
00630  *
00631  *
00632  * @param    inbfr   CONSTANT_Utf8_info string.
00633  *
00634  *
00635  * @returns  @link #rtrue rtrue@endlink if this is an array
00636  *           specfication, else @link #rfalse rfalse@endlink.
00637  *
00638  */
00639 
00640 rboolean utf_isarray(CONSTANT_Utf8_info *inbfr)
00641 {
00642   return((BASETYPE_CHAR_ARRAY == (u1)inbfr->bytes[0]) ? rtrue : rfalse);
00643 
00644 } /* END of utf_isarray() */
00645 
00646 
00647 /*!
00648  * @brief Convert and an un-formatted class name UTF string (of the
00649  * type @c @b ClassName and not of type
00650  * @c @b [[[LClassName) from a (CONSTANT_Utf8_info *) into
00651  * a null-terminated string with Java class formatting items.  Result
00652  * is delivered in a heap-allocated buffer.  When done with result,
00653  * perform HEAP_FREE_DATA(result) to return that buffer to the heap.
00654  *
00655  * This function @e will work on formatted class names
00656  * @c @b [[[LClassName; and the difference is benign,
00657  * but that is not its purpose.
00658  *
00659  * @param  src   Pointer to UTF string, most likely from constant pool
00660  *
00661  * @returns Null-terminated string @c @b LClasSName; in heap
00662  *          or @link #rnull rnull@endlink if heap alloc error.
00663  *
00664  */
00665 
00666 rchar *utf_utf2prchar_classname(CONSTANT_Utf8_info *src)
00667 {
00668     /* Retrieve string from UTF data first */
00669     rchar *pstr = utf_utf2prchar(src);
00670 
00671     if (rnull == pstr)
00672     {
00673         return(pstr);
00674     }
00675 
00676     /* Allocate heap for formatted version */
00677 
00678     rchar *rc = HEAP_GET_DATA(sizeof(rchar) + /* Type specifier */
00679                               sizeof(rchar) + /* Type spec terminator */
00680                               sizeof(rchar) + /* NUL character */
00681                               src->length,    /* data */
00682                              rfalse);
00683 
00684     int pstrlen = strlen(pstr);
00685     rboolean isfmt  = nts_prchar_isclassformatted(pstr);
00686 
00687     if (rtrue == isfmt)
00688     {
00689         /*
00690          * Copy entire string plus NUL character into heap area,
00691          * ignoring excess allocation when formatting is @e added
00692          * to string.
00693          */
00694         memcpy(&rc[0], pstr, pstrlen);
00695         rc[pstrlen] = '\0';
00696     }
00697     else
00698     {
00699         /* Initial formatting */
00700         rc[0] = BASETYPE_CHAR_L;
00701 
00702         /* Copy to heap area */
00703         memcpy(&rc[1], pstr, pstrlen);
00704 
00705         /* Append end formatting and NUL character */
00706         rc[1 + pstrlen] = BASETYPE_CHAR_L_TERM;
00707         rc[2 + pstrlen] = '\0';
00708     }
00709 
00710     HEAP_FREE_DATA(pstr);
00711 
00712 
00713     /* Produce result */
00714     return(rc);
00715 
00716 } /* END of utf_utf2prchar_classname() */
00717 
00718 
00719 /*!
00720  * @brief Verify if a UTF string contains class formatting or not.
00721  *
00722  *
00723  * @param  src   Pointer to UTF string, most likely from constant pool
00724  *
00725  *
00726  * @returns @link #rtrue rtrue@endlink if string is formtted as
00727  *          @c @b LClasSName; but
00728  *          @link #rfalse rfalse@endlink otherwise, may also have
00729  *          array descriptor prefixed, thus @c @b [[LClassName;
00730  *
00731  *
00732  * @note  This function works just like nts_prchar_isclassformatted()
00733  *        except that it works on (CONSTANT_Utf8_info) strings rather
00734  *        than on (rchar *) strings.
00735  */
00736 
00737 rboolean utf_utf_isclassformatted(CONSTANT_Utf8_info *src)
00738 {
00739     jvm_utf_string_index utfidx;
00740     rboolean rc = rfalse;
00741 
00742     /* Chk array or class specifier.  If neither, cannot be formatted */
00743     switch (src->bytes[0])
00744     {
00745         case BASETYPE_CHAR_ARRAY:
00746         case BASETYPE_CHAR_L:
00747             break;
00748         default:
00749             return(rfalse);
00750     }
00751 
00752 
00753     /*
00754      * Now assume a potentially formatted string.
00755      * Check for termination byte next.  If not present,
00756      * nothing else matters and string cannot be formatted.
00757      */
00758     u1 *pbytes = src->bytes;
00759 
00760     for (utfidx = 0; utfidx < src->length; utfidx++)
00761     {
00762         if (BASETYPE_CHAR_L_TERM == pbytes[utfidx])
00763         {
00764             rc = rtrue;
00765             break;
00766         }
00767     }
00768 
00769     /* If not terminated, then cannot be class formatted */
00770     if (rfalse == rc)
00771     {
00772         return(rc);
00773     }
00774 
00775     /* Check initial formatting, including array spec */
00776     jvm_array_dim arraydims = utf_get_utf_arraydims(src);
00777 
00778     /* If any array specs, look immediately past them for class spec */
00779     if (BASETYPE_CHAR_L == pbytes[arraydims])
00780     {
00781         return(rtrue);
00782     }
00783     else
00784     {
00785         return(rfalse);
00786     }
00787 
00788 } /* END of utf_utf_isclassformatted() */
00789 
00790 
00791 /*!
00792  *
00793  * @brief Strip a UTF string of any class formatting it contains
00794  * and return result in a heap-allocated buffer.
00795  *
00796  * When done with this result, perform HEAP_DATA_FREE(result) to
00797  * return buffer to heap.
00798  *
00799  *
00800  * @param  inbfr   Pointer to UTF string that is potentially formatted
00801  *                 as @c @b LClassName; and which may also have
00802  *                 array descriptor prefixed, thus
00803  *                 @c @b [[LClassName; .  This will
00804  *                 typically be an entry from the constant_pool.
00805  *
00806  *
00807  * @returns heap-allocated buffer containing @c @b ClassName
00808  *          with no formatting, regardless of input formatting or
00809  *          lack thereof.
00810  *
00811  *
00812  * @note  This function works just like
00813  *        nts_prchar2prchar_unformatted_classname() except that
00814  *        it takes a (CONSTANT_Utf8_info) string rather
00815  *        than a (rchar *) string and returns a (CONSTANT_Utf8_info *).
00816  *
00817  */
00818 
00819 cp_info_dup *utf_utf2utf_unformatted_classname(cp_info_dup *inbfr)
00820 {
00821     rchar *pstr = utf_utf2prchar(PTR_THIS_CP_Utf8(inbfr));
00822 
00823     rchar *punf = nts_prchar2prchar_unformatted_classname(pstr);
00824 
00825     HEAP_FREE_DATA(pstr);
00826 
00827     cp_info_dup *rc = nts_prchar2utf(punf);
00828 
00829     HEAP_FREE_DATA(punf);
00830 
00831     return(rc);
00832 
00833 } /* END of utf_utf2utf_unformatted_classname() */
00834 
00835 
00836 /* EOF */
00837 

Generated on Fri Sep 30 18:49:09 2005 by  doxygen 1.4.4