/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ parcel Lucy; /** Suppress a "stoplist" of common words. * * A "stoplist" is collection of "stopwords": words which are common enough to * be of little value when determining search results. For example, so many * documents in English contain "the", "if", and "maybe" that it may improve * both performance and relevance to block them. * * Before filtering stopwords: * * ("i", "am", "the", "walrus") * * After filtering stopwords: * * ("walrus") * * SnowballStopFilter provides default stoplists for several languages, courtesy of * the Snowball project (), or you may supply * your own. * * |-----------------------| * | ISO CODE | LANGUAGE | * |-----------------------| * | da | Danish | * | de | German | * | en | English | * | es | Spanish | * | fi | Finnish | * | fr | French | * | hu | Hungarian | * | it | Italian | * | nl | Dutch | * | no | Norwegian | * | pt | Portuguese | * | sv | Swedish | * | ru | Russian | * |-----------------------| */ public class Lucy::Analysis::SnowballStopFilter cnick SnowStop inherits Lucy::Analysis::Analyzer : dumpable { Hash *stoplist; inert const uint8_t** snow_da; inert const uint8_t** snow_de; inert const uint8_t** snow_en; inert const uint8_t** snow_es; inert const uint8_t** snow_fi; inert const uint8_t** snow_fr; inert const uint8_t** snow_hu; inert const uint8_t** snow_it; inert const uint8_t** snow_nl; inert const uint8_t** snow_no; inert const uint8_t** snow_pt; inert const uint8_t** snow_ru; inert const uint8_t** snow_sv; inert incremented SnowballStopFilter* new(const CharBuf *language = NULL, Hash *stoplist = NULL); /** * @param stoplist A hash with stopwords as the keys. * @param language The ISO code for a supported language. */ public inert SnowballStopFilter* init(SnowballStopFilter *self, const CharBuf *language = NULL, Hash *stoplist = NULL); /** Return a Hash with the Snowball stoplist for the supplied language. */ inert incremented Hash* gen_stoplist(const CharBuf *language); public incremented Inversion* Transform(SnowballStopFilter *self, Inversion *inversion); public bool_t Equals(SnowballStopFilter *self, Obj *other); public void Destroy(SnowballStopFilter *self); } class Lucy::Analysis::SnowballStopFilter::NoCloneHash inherits Clownfish::Hash { inert incremented NoCloneHash* new(uint32_t capacity = 0); inert NoCloneHash* init(NoCloneHash *self, uint32_t capacity = 0); public incremented Obj* Make_Key(NoCloneHash *self, Obj *key, int32_t hash_sum); }