/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ parcel Lucy; inert class Clownfish::Util::StringHelper cnick StrHelp { /* A table where the values indicate the number of bytes in a UTF-8 * sequence implied by the leading utf8 byte. */ inert const uint8_t[] UTF8_COUNT; /** Return the number of bytes that two strings have in common. */ inert int32_t overlap(const char *a, const char *b, size_t a_len, size_t b_len); /** Encode a NULL-terminated string representation of a value in base 36 * into buffer. * * @param value The number to be encoded. * @param buffer A buffer at least MAX_BASE36_BYTES bytes long. * @return the number of digits encoded (not including the terminating * NULL). */ inert uint32_t to_base36(uint64_t value, void *buffer); /** Return true if the string is valid UTF-8, false otherwise. */ inert bool_t utf8_valid(const char *ptr, size_t len); /** Returns true if the code point qualifies as Unicode whitespace. */ inert bool_t is_whitespace(uint32_t code_point); /** Encode a Unicode code point to a UTF-8 sequence. * * @param code_point A legal unicode code point. * @param buffer Write buffer which must hold at least 4 bytes (the * maximum legal length for a UTF-8 char). */ inert uint32_t encode_utf8_char(uint32_t code_point, void *buffer); /** Decode a UTF-8 sequence to a Unicode code point. Assumes valid UTF-8. */ inert uint32_t decode_utf8_char(const char *utf8); /** Return the first non-continuation byte before the supplied pointer. * If backtracking progresses beyond the supplied start, return NULL. */ inert nullable const char* back_utf8_char(const char *utf8, char *start); } __C__ /** The maximum number of bytes encoded by to_base36(), including the * terminating NULL. */ #define lucy_StrHelp_MAX_BASE36_BYTES 14 #ifdef LUCY_USE_SHORT_NAMES #define StrHelp_MAX_BASE36_BYTES lucy_StrHelp_MAX_BASE36_BYTES #endif __END_C__