/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ parcel Lucy; /** Tokenize/modify/filter text. * * An Analyzer is a filter which processes text, transforming it from one form * into another. For instance, an analyzer might break up a long text into * smaller pieces (L), or it * might perform case folding to facilitate case-insensitive search * (L). */ public abstract class Lucy::Analysis::Analyzer inherits Clownfish::Obj : dumpable { public inert Analyzer* init(Analyzer *self); /** Take a single L as input * and returns an Inversion, either the same one (presumably transformed * in some way), or a new one. */ public abstract incremented Inversion* Transform(Analyzer *self, Inversion *inversion); /** Kick off an analysis chain, creating an Inversion from string input. * The default implementation simply creates an initial Inversion with a * single Token, then calls Transform(), but occasionally subclasses will * provide an optimized implementation which minimizes string copies. */ public incremented Inversion* Transform_Text(Analyzer *self, CharBuf *text); /** Analyze text and return an array of token texts. */ public incremented VArray* Split(Analyzer *self, CharBuf *text); }