/************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *************************************************************/ #ifndef __com_sun_star_i18n_XCharacterClassification_idl__ #define __com_sun_star_i18n_XCharacterClassification_idl__ #include #ifndef __com_sun_star_lang_Locale_idl__ #include #endif #ifndef __com_sun_star_uno_XInterface_idl__ #include #endif //============================================================================ module com { module sun { module star { module i18n { //============================================================================ /* Possible tokens to be parsed with parse...Token(): UPASCALPHA=[A-Z] LOASCALPHA=[a-z] ASCALPHA=1*(UPASCALPHA|LOASCALPHA) ASCDIGIT=[0-9] ASC_UNDERSCORE='_' ASC_SPACE=' ' ASC_HT='\0x9' ASC_VT='\0xb' ASC_WS=ASC_SPACE|ASC_HT|ASC_VT ASC_DBL_QUOTE=\"; ASC_QUOTE=\' UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE) ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit ALNUM=ALPHA|DIGIT CHAR=anycharacter WS=isWhiteSpace() SIGN='+'|'-' DECSEP= GRPSEP= EXPONENT=(E|e)[SIGN]1*ASC_DIGIT IDENTIFIER=ALPHA *ALNUM UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE) ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS) ANY_NAME=1*(ALNUM|DEFCHARS) SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT] NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT] */ //============================================================================ /** Character classification (upper, lower, digit, letter, number, ...) and generic Unicode enabled parser. */ published interface XCharacterClassification : com::sun::star::uno::XInterface { //------------------------------------------------------------------------ /** Convert lower case alpha to upper case alpha, starting at position nPos for nCount code points. */ string toUpper( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); //------------------------------------------------------------------------ /** Convert upper case alpha to lower case alpha, starting at position nPos for nCount code points. */ string toLower( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); //------------------------------------------------------------------------ /** Convert to title case, starting at position nPos for nCount code points. */ string toTitle( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); //------------------------------------------------------------------------ /// Get UnicodeType of character at position nPos. short getType( [in] string aText, [in] long nPos ); //------------------------------------------------------------------------ /** Get DirectionProperty of character at position nPos. */ short getCharacterDirection( [in] string aText, [in] long nPos ); //------------------------------------------------------------------------ /// Get UnicodeScript of character at position nPos. short getScript( [in] string aText, [in] long nPos ); //------------------------------------------------------------------------ /// Get KCharacterType of character at position nPos. long getCharacterType( [in] string aText, [in] long nPos, [in] com::sun::star::lang::Locale aLocale ); //------------------------------------------------------------------------ /** Get accumulated KCharacterTypes of string starting at position nPos of length nCount code points. @returns A number with appropriate flags set to indicate what type of characters the string contains, each flag value being one of KCharacterType values. */ long getStringType( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); //------------------------------------------------------------------------ /** Parse a string for a token starting at position nPos.

A name or identifier must match the KParseTokens criteria passed in nStartCharFlags and nContCharFlags and may additionally contain characters of aUserDefinedCharactersStart and/or aUserDefinedCharactersCont.

@returns A filled ParseResult structure. If no unambigous token could be parsed, ParseResult::TokenType will be set to 0 (zero), other fields will contain the values parsed so far.

If a token may represent either a numeric value or a name according to the passed Start/Cont-Flags/Chars, both KParseType::ASC_NUM (or KParseType::UNI_NUM) and KParseType::IDENTNAME are set in ParseResult::TokenType. @param aText Text to be parsed. @param nPos Position where parsing starts. @param aLocale The locale, for example, for decimal and group separator or character type determination. @param nStartCharFlags A set of KParseTokens constants determining the allowed characters a name or identifier may start with. @param aUserDefinedCharactersStart A set of additionally allowed characters a name or identifier may start with. @param nContCharFlags A set of KParseTokens constants determining the allowed characters a name or identifier may continue with. @param aUserDefinedCharactersCont A set of additionally allowed characters a name or identifier may continue with. @example:C++

using namespace ::com::sun::star::i18n; // First character of an identifier may be any alphabetic or underscore. sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE; // Continuing characters may be any alphanumeric or underscore or dot. sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT; // No further characters assumed to be contained in an identifier String aEmptyString; // Parse any token. ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale, nStartFlags, aEmptyString, nContFlags, aEmptyString ); // Get parsed token. if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) ) fValue = rRes.Value; if ( rRes.TokenType & KParseType::IDENTNAME ) aName = aText.Copy( nPos, rRes.EndPos - nPos ); else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME ) aName = rRes.DequotedNameOrString; else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING ) aString = rRes.DequotedNameOrString; else if ( rRes.TokenType & KParseType::BOOLEAN ) aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR ) aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); */ ParseResult parseAnyToken( [in] string aText, [in] long nPos, [in] com::sun::star::lang::Locale aLocale, [in] long nStartCharFlags, [in] string aUserDefinedCharactersStart, [in] long nContCharFlags, [in] string aUserDefinedCharactersCont ); //------------------------------------------------------------------------ /** Parse a string for a token of type nTokenType starting at position nPos.

Other parameters are the same as in parseAnyToken. If the actual token does not match the passed nTokenType a ParseResult::TokenType set to 0 (zero) is returned.

@param nTokenType One or more of the KParseType constants. @example:C++ // Determine if a given name is a valid name (not quoted) and contains // only allowed characters. using namespace ::com::sun::star::i18n; // First character of an identifier may be any alphanumeric or underscore. sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE; // No further characters assumed to be contained in an identifier start. String aEmptyString; // Continuing characters may be any alphanumeric or underscore. sal_Int32 nContFlags = nStartFlags; // Additionally, continuing characters may contain a blank. String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") ); // Parse predefined (must be an IDENTNAME) token. ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale, nStartFlags, aEmptyString, nContFlags, aContChars ); // Test if it is an identifier name and if it only is one // and no more else is following it. bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len(); */ ParseResult parsePredefinedToken( [in] long nTokenType, [in] string aText, [in] long nPos, [in] com::sun::star::lang::Locale aLocale, [in] long nStartCharFlags, [in] string aUserDefinedCharactersStart, [in] long nContCharFlags, [in] string aUserDefinedCharactersCont ); }; //============================================================================= }; }; }; }; #endif