Main Page | Namespace List | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

nts.c

Go to the documentation of this file.
00001 /*!
00002  * @file nts.c
00003  *
00004  * @brief Manipulate null-terminated (@link #rchar rchar@endlink)
00005  * character strings.
00006  *
00007  * There are three character string types in this program:
00008  * null-terminated @link #rchar (rchar)@endlink strings
00009  * @e ala 'C' language, UTF-8
00010  * @link #CONSTANT_Utf8_info (CONSTANT_Utf8_info)@endlink strings,
00011  * and Unicode @link #jchar (jchar)[]@endlink strings.
00012  *
00013  *
00014  * @section Control
00015  *
00016  * \$URL: https://svn.apache.org/path/name/nts.c $ \$Id: nts.c 0 09/28/2005 dlydick $
00017  *
00018  * Copyright 2005 The Apache Software Foundation
00019  * or its licensors, as applicable.
00020  *
00021  * Licensed under the Apache License, Version 2.0 ("the License");
00022  * you may not use this file except in compliance with the License.
00023  * You may obtain a copy of the License at
00024  *
00025  *     http://www.apache.org/licenses/LICENSE-2.0
00026  *
00027  * Unless required by applicable law or agreed to in writing,
00028  * software distributed under the License is distributed on an
00029  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
00030  * either express or implied.
00031  *
00032  * See the License for the specific language governing permissions
00033  * and limitations under the License.
00034  *
00035  * @version \$LastChangedRevision: 0 $
00036  *
00037  * @date \$LastChangedDate: 09/28/2005 $
00038  *
00039  * @author \$LastChangedBy: dlydick $
00040  *         Original code contributed by Daniel Lydick on 09/28/2005.
00041  *
00042  * @section Reference
00043  *
00044  */
00045 
00046 #include "arch.h"
00047 ARCH_COPYRIGHT_APACHE(nts, c, "$URL: https://svn.apache.org/path/name/nts.c $ $Id: nts.c 0 09/28/2005 dlydick $");
00048 
00049 
00050 #include <string.h>
00051 
00052 #include "jvmcfg.h" 
00053 #include "cfmacros.h"
00054 #include "classfile.h"
00055 #include "nts.h"
00056 
00057 
00058 /*!
00059  * @brief Convert null-terminated string buffer into UTF8 buffer.
00060  *
00061  *
00062  * @param    inbfr   String (rchar *) string
00063  *
00064  *
00065  * @returns  UTF8 structure containing length and rchar bfr (plus tag),
00066  *           but return in (cp_info_dup) for full proper word alignment.
00067  *           When done with the data, call HEAP_FREE_DATA() on it.
00068  *
00069  *    @c @b rc->bytes      UTF8 version of @b inbfr string
00070  *
00071  *    @c @b rc->length     Number of UTF8 bytes in
00072  *                          @c @b rc->bytes
00073  */
00074 
00075 cp_info_dup *nts_prchar2utf(rchar *inbfr)
00076 {
00077     jshort len = strlen(inbfr);
00078 
00079     /*
00080      * Allocate enough heap space for output string, but within the
00081      * context of the output result type.  The size calculation
00082      * replaces generic (cp_info) with specifc (CONSTANT_Utf8_info)
00083      * info, adjusting for the amount of string data to be stored
00084      * into the result.
00085      */
00086     cp_info_dup *rc = HEAP_GET_DATA(sizeof(cp_info_dup) -
00087                                         sizeof(cp_info) +
00088                                         sizeof(CONSTANT_Utf8_info) -
00089                                         sizeof(u1) +
00090                                         len,
00091                                     rfalse);
00092 
00093     /* Move (rchar *) string into (CONSTANT_Utf8_info) */
00094     CONSTANT_Utf8_info *pcpui = PTR_THIS_CP_Utf8(rc);
00095     pcpui->tag = CONSTANT_Utf8;
00096     pcpui->length = len;
00097 
00098     memcpy((jubyte *) pcpui->bytes, inbfr, len);
00099 
00100     rc->empty[0] = FILL_INFO_DUP0;
00101     rc->empty[1] = FILL_INFO_DUP1;
00102     rc->empty[2] = FILL_INFO_DUP2;
00103 
00104     return(rc);
00105 
00106 } /* END of nts_prchar2utf() */
00107 
00108 
00109 /*!
00110  * @brief Convert null-terminated string into Unicode buffer.
00111  *
00112  *
00113  * @param[in]  inbfr     Null-terminated string
00114  *
00115  * @param[out] outbfr    Buffer for resulting Unicode character string.
00116  *                       This buffer will need to be the same size in
00117  *                       Unicode (jchar) characters as @b inbfr is in
00118  *                       native characters (rchar) since the
00119  *                       conversion is simply putting the ASCII
00120  *                       into the LS byte of the Unicode character.
00121  *
00122  *
00123  * @returns  Two returns, one a buffer, the other a count:
00124  *
00125  *    *outbfr        Unicode version of @b inbfr string in @b outbfr
00126  *
00127  *    charcnvcount   (Return value of function)  Number of Unicode
00128  *                     characters in @b outbfr.
00129  *
00130  */
00131 
00132 jshort nts_prchar2unicode(rchar *inbfr, jchar *outbfr)
00133 {
00134     jshort charcnvcount;
00135 
00136     jchar inbfrcnv;
00137 
00138     jshort len = strlen(inbfr);
00139 
00140     
00141     for (charcnvcount = 0; charcnvcount < len; charcnvcount++)
00142     {
00143         /* Put ASCII into LS byte of output */
00144         inbfrcnv = 0;
00145         inbfrcnv |= inbfr[charcnvcount];
00146 
00147         outbfr[charcnvcount] = inbfrcnv;
00148 
00149     }
00150 
00151     /* Done.  Return number of characters processed */
00152     return(charcnvcount);
00153 
00154 } /* END of nts_prchar2unicode() */
00155 
00156 
00157 
00158 
00159 /*!
00160  * @brief Format a string buffer into UTF8 buffer with Java class
00161  * information, including number of array dimensions.
00162  *
00163  *
00164  * @param    inbfr     String (rchar *) string
00165  *
00166  * @param    arraydims Number of array dimensions
00167  *
00168  *
00169  * @returns  UTF8 structure containing length and rchar bfr (plus tag),
00170  *           but return in (cp_info_dup) for full proper word alignment.
00171  *           When done with the data, call HEAP_FREE_DATA() on it.
00172  *           With @b inbfr of @c @b some/path/name/filename,
00173  *           the result will be, with 3 array dimensions:
00174  *
00175  * @verbatim
00176 
00177                  [[[Lsome/path/name/filename;\0
00178 
00179    @endverbatim
00180  *
00181  *           The string then has a @c @b \\0 NUL character
00182  *           appended to it for strfn() convenience, but this is
00183  *           not reported in the UTF8 string length.
00184  *
00185  *
00186  *    @c @b rc->bytes     UTF8 version of @b inbfr string
00187  *
00188  *    @c @b rc->length    Number of UTF8 bytes in
00189  *                         @c @b rc->bytes
00190  *
00191  */
00192 
00193 cp_info_dup *nts_prchar2utf_classname(rchar         *inbfr,
00194                                       jvm_array_dim  arraydims)
00195 {
00196     jshort inbfrlen = strlen(inbfr);
00197 
00198     /*
00199      * Allocate enough heap space for output string, but within the
00200      * context of the output result type.  The size calculation
00201      * replaces generic (cp_info) with specifc (CONSTANT_Utf8_info)
00202      * info, adjusting for the amount of string data to be stored
00203      * into the result, as adjusted for Java class name formatting.
00204      */
00205 
00206     /* This calculation follows the above description of text format */
00207     int fmtlen = arraydims +      /* Bracket characters */
00208                  sizeof(u1) +     /* Type specifier */
00209                  inbfrlen +       /* Data */
00210                  sizeof(u1) +     /* Type terminator */
00211                  sizeof(u1);      /* NUL character */
00212 
00213     cp_info_dup *rc =
00214         HEAP_GET_DATA(sizeof(cp_info_dup) -   /* Enclosing structure */
00215                           sizeof(cp_info) +   /* Basic type */
00216                           sizeof(CONSTANT_Utf8_info) - /* UTF8 type */
00217                           sizeof(u1) +        /* Data place holder */
00218                           fmtlen,             /* UTF8 data area */
00219                       rfalse);
00220 
00221     /* Move (rchar *) string into (CONSTANT_Utf8_info) */
00222     CONSTANT_Utf8_info *pcpui = PTR_THIS_CP_Utf8(rc);
00223     pcpui->tag = CONSTANT_Utf8;
00224     pcpui->length = fmtlen - sizeof(u1);/*Adjust out trailing \0 rchar*/
00225 
00226     /* Format array dimensions, Java type name, class name, terminator*/
00227     jvm_utf_string_index utfidx = 0;
00228     for (utfidx = 0; utfidx < arraydims; utfidx++)
00229     {
00230         pcpui->bytes[utfidx] = BASETYPE_CHAR_ARRAY;
00231     }
00232 
00233     pcpui->bytes[utfidx] = BASETYPE_CHAR_L;
00234     utfidx++;
00235 
00236     memcpy((jubyte *) &pcpui->bytes[utfidx], inbfr, inbfrlen);
00237 
00238     rc->empty[0] = FILL_INFO_DUP0;
00239     rc->empty[1] = FILL_INFO_DUP1;
00240     rc->empty[2] = FILL_INFO_DUP2;
00241 
00242     pcpui->bytes[utfidx + inbfrlen]     = BASETYPE_CHAR_L_TERM;
00243     pcpui->bytes[utfidx + inbfrlen + 1] = '\0';
00244 
00245     return(rc);
00246 
00247 } /* END of nts_prchar2utf_classname() */
00248 
00249 
00250 /*!
00251  * @brief Report the number of array dimensions prefixing a Java type
00252  * string.
00253  *
00254  * No overflow condition is reported since it is assumed that @b inbfr
00255  * is a valid (rchar *) string.  Notice that because this logic checks
00256  * @e only for array specifiers and does not care about the rest of the
00257  * string, it may be used to evaluate field descriptions, which will
00258  * not contain any class formatting information.
00259  *
00260  * If there is even a @e remote possibility that more than
00261  * CONSTANT_MAX_ARRAY_DIMS dimensions will be found, compare the
00262  * result of this function with the result of nts_prchar_isarray().
00263  *  If there is a discrepancy, then there was an overflow here.
00264  * Properly formatted class files will @e never contain code with
00265  * this condition.
00266  *
00267  * @note  This function is identical to nts_get_prchararraydims()
00268  *        except that it works on (rchar *) instead of
00269  *        (CONSTANT_Utf8_info *).
00270  *
00271  *
00272  * @param    inbfr   (rchar *) string.
00273  *
00274  *
00275  * @returns  Number of array dimensions in string.  For example,
00276  *           this string contains three array dimensions:
00277  *
00278  * @verbatim
00279 
00280                  [[[Lsome/path/name/filename;
00281 
00282    @endverbatim
00283  *
00284  *           The string does @e not have a @c @b \\0 NUL
00285  *           character appended in this instance.  If more than
00286  *           CONSTANT_MAX_ARRAY_DIMS are located, the
00287  *           result is zero-- no other error is reported.
00288  *
00289  */
00290 
00291 jvm_array_dim nts_get_prchar_arraydims(rchar *inbfr)
00292 {
00293     /* Make return code wider than max to check overflow */
00294     u4 rc = 0;
00295 
00296     /* Start scanning at beginning of string */
00297     u1 *pclsname = (u1 *) inbfr;
00298 
00299     /* Keep scanning until no more array specifications are found */
00300     while (BASETYPE_CHAR_ARRAY == *pclsname++)
00301     {
00302         rc++; 
00303     }
00304 
00305     /* Check overflow, return default if so, else number of dimensions*/
00306     if (CONSTANT_MAX_ARRAY_DIMS < rc)
00307     {
00308         return(LOCAL_CONSTANT_NO_ARRAY_DIMS);
00309     }
00310     else
00311     {
00312         /* Perform narrowing conversion into proper type for max */
00313         return((jvm_array_dim) rc);
00314     }
00315 
00316 } /* END of nts_get_prchar_arraydims() */
00317 
00318 
00319 /*!
00320  * @brief Test whether or not a Java type string is an array or not.
00321  *
00322  *
00323  * @param    inbfr   (rchar *) string.
00324  *
00325  *
00326  * @returns  @link #rtrue rtrue@endlink if this is an array
00327  *           specfication, else @link #rfalse rfalse@endlink.
00328  *
00329  */
00330 
00331 rboolean nts_prchar_isarray(rchar *inbfr)
00332 {
00333     return((BASETYPE_CHAR_ARRAY == (u1) inbfr[0]) ? rtrue : rfalse);
00334 
00335 } /* END of nts_prchar_isarray() */
00336 
00337 
00338 /*!
00339  * @brief Verify if a null-terminated string contains PRIMATIVE
00340  * formatting or not.  May be prefixed with array specifiers.
00341  * Everything after the base type character is ignored.
00342  *
00343  *
00344  * @param  src   Pointer to null-terminated string.
00345  *
00346  *
00347  * @returns @link #rtrue rtrue@endlink if string is formtted as
00348  *          @c @b LClassName; but
00349  *          @link #rfalse rfalse@endlink otherwise, may also have
00350  *          array descriptor prefixed,
00351  *          thus @c @b [[LClassName;
00352  *
00353  *          @link #rtrue rtrue@endlink if string is formatted as
00354  *          @c @b \@ (where @c @b \@ is any
00355  *          @link #BASETYPE_CHAR_B BASETYPE_CHAR_x@endlink character),
00356  *          @link #rfalse rfalse@endlink otherwise.  May also have
00357  *          array descriptor prefixed,
00358  *          thus @c @b [[\@, eg, @c @b [[I or @c @b [[[[D
00359  *
00360  */
00361 
00362 rboolean nts_prchar_isprimativeformatted(rchar *src)
00363 {
00364     jvm_array_dim arraydims = nts_get_prchar_arraydims(src);
00365 
00366     /*
00367      * Chk if @e any primative base type,
00368      * but NOT class (the @c @b L fmt) 
00369      */
00370     switch (src[arraydims])
00371     {
00372         case BASETYPE_CHAR_B:
00373         case BASETYPE_CHAR_C:
00374         case BASETYPE_CHAR_D:
00375         case BASETYPE_CHAR_F:
00376         case BASETYPE_CHAR_I:
00377         case BASETYPE_CHAR_J:
00378         case BASETYPE_CHAR_S:
00379         case BASETYPE_CHAR_Z:
00380             return(rtrue);
00381 
00382         default:
00383             return(rfalse);
00384     }
00385 
00386 } /* END of nts_prchar_isprimativeformatted() */
00387 
00388 
00389 /*!
00390  * @brief Verify if a null-terminated string contains CLASS formatting
00391  * or not.
00392  *
00393  *
00394  * @param  src   Pointer to null-terminated string.
00395  *
00396  *
00397  * @returns @link #rtrue rtrue@endlink if string is formatted as
00398  *          @c @b LClasSName; but
00399  *          @link #rfalse rfalse@endlink otherwise.  May also have
00400  *          array descriptor prefixed, thus @c @b [[LClassName;
00401  *
00402  *
00403  * @note  This function works just like utf_isclassformatted() except
00404  *        that it works on (rchar *) strings rather than
00405  *        on (CONSTANT_Utf8_info) strings.
00406  */
00407 
00408 rboolean nts_prchar_isclassformatted(rchar *src)
00409 {
00410     u2 idx;
00411     rint rc = rfalse;
00412 
00413     /* Chk array or class specifier.  If neither, cannot be formatted */
00414     switch (src[0])
00415     {
00416         case BASETYPE_CHAR_ARRAY:
00417         case BASETYPE_CHAR_L:
00418             break;
00419         default:
00420             return(rfalse);
00421     }
00422 
00423     /*
00424      * Now assume a potentially formatted string.
00425      * Check for termination byte next.  If not present,
00426      * nothing else matters and string cannot be formatted.
00427      */
00428     u1 *pbytes = src;
00429     int len    = strlen(src);
00430 
00431     for (idx = 0; idx < len; idx++)
00432     {
00433         if (BASETYPE_CHAR_L_TERM == pbytes[idx])
00434         {
00435             rc = rtrue;
00436             break;
00437         }
00438     }
00439 
00440     /* If not terminated, then cannot be class formatted */
00441     if (rfalse == rc)
00442     {
00443         return(rc);
00444     }
00445 
00446     /* Check initial formatting, including array spec */
00447     jvm_array_dim arraydims = nts_get_prchar_arraydims(src);
00448 
00449     /* If any array specs, look immediately past them for class spec */
00450     if (BASETYPE_CHAR_L == pbytes[arraydims])
00451     {
00452         return(rtrue);
00453     }
00454     else
00455     {
00456         return(rfalse);
00457     }
00458 
00459 } /* END of nts_prchar_isclassformatted() */
00460 
00461 
00462 /*!
00463  * @brief Strip a null-terminated string of any class formatting it
00464  * contains and return result in a heap-allocated buffer.  When done
00465  * with this result, perform HEAP_DATA_FREE(result) to return buffer
00466  * to heap.
00467  *
00468  *
00469  * @param  inbfr Pointer to null-terminated string that is potentially
00470  *               formatted as @c @b LClassName; and which may
00471  *               also have array descriptor prefixed, thus 
00472  *               @c @b [[LClassName;
00473  *
00474  *
00475  * @returns heap-allocated buffer containing @c @b ClassName
00476  *          with no formatting, regardless of input formatting or lack
00477  *          thereof.
00478  *
00479  *
00480  * @note  This function works just like
00481  *        utf_utf2utf_unformatted_classname()
00482  *        except that it takes a (rchar *) string
00483  *        rather than a (CONSTANT_Utf8_info) string
00484  *        and returns a (rchar *).
00485  *
00486  */
00487 
00488 rchar *nts_prchar2prchar_unformatted_classname(rchar *inbfr)
00489 {
00490     int inbfrlen            = strlen(inbfr);
00491     rint isfmt              = nts_prchar_isclassformatted(inbfr);
00492     jvm_array_dim arraydims = nts_get_prchar_arraydims(inbfr);
00493     rchar *psemi;
00494     int allocsize;
00495     int startposn;
00496 
00497     if (rtrue == isfmt)
00498     {
00499         psemi = strchr(inbfr, BASETYPE_CHAR_L_TERM);
00500         psemi--;
00501 
00502         allocsize = inbfrlen -   /* Input data size */
00503                     arraydims -  /* Array specifiers */
00504                     sizeof(u1) - /* Type specifier */
00505                     sizeof(u1) + /* Type terminator */
00506                     sizeof(u1);  /* NUL terminator */
00507 
00508         startposn = arraydims + sizeof(u1);  /* Skip array & type */
00509     }
00510     else
00511     {
00512         psemi = (rchar *) rnull;
00513         allocsize = inbfrlen +   /* Input data size */
00514                     sizeof(u1);  /* NUL terminator */
00515 
00516         startposn = 0; /* Copy the whole string */
00517     }
00518 
00519     rchar *rc = HEAP_GET_DATA(allocsize, rfalse);
00520 
00521     /* Extract input class name from input buffer, add null char */
00522     memcpy(rc, &inbfr[startposn], allocsize);
00523     rc[allocsize - sizeof(u1)] = '\0';
00524 
00525     return(rc);
00526 
00527 } /* END of nts_prchar2prchar_unformatted_classname() */
00528 
00529 
00530 /* EOF */
00531 

Generated on Fri Sep 30 18:59:32 2005 by  doxygen 1.4.4