/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ parcel Lucy; /** Write data to an index. * * DataWriter is an abstract base class for writing index data, generally in * segment-sized chunks. Each component of an index -- e.g. stored fields, * lexicon, postings, deletions -- is represented by a * DataWriter/L pair. * * Components may be specified per index by subclassing * L. */ public class Lucy::Index::DataWriter inherits Lucy::Object::Obj { Snapshot *snapshot; Segment *segment; PolyReader *polyreader; Schema *schema; Folder *folder; /** * @param snapshot The Snapshot that will be committed at the end of the * indexing session. * @param segment The Segment in progress. * @param polyreader A PolyReader representing all existing data in the * index. (If the index is brand new, the PolyReader will have no * sub-readers). */ public inert DataWriter* init(DataWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader); /** Process a document, previously inverted by inverter. * * @param inverter An Inverter wrapping an inverted document. * @param doc_id Internal number assigned to this document within the * segment. */ public abstract void Add_Inverted_Doc(DataWriter *self, Inverter *inverter, int32_t doc_id); /** Add content from an existing segment into the one currently being * written. * * @param reader The SegReader containing content to add. * @param doc_map An array of integers mapping old document ids to * new. Deleted documents are mapped to 0, indicating that they should be * skipped. */ public abstract void Add_Segment(DataWriter *self, SegReader *reader, I32Array *doc_map = NULL); /** Remove a segment's data. The default implementation is a no-op, as * all files within the segment directory will be automatically deleted. * Subclasses which manage their own files outside of the segment system * should override this method and use it as a trigger for cleaning up * obsolete data. * * @param reader The SegReader containing content to merge, which must * represent a segment which is part of the the current snapshot. */ public void Delete_Segment(DataWriter *self, SegReader *reader); /** Move content from an existing segment into the one currently being * written. * * The default implementation calls Add_Segment() then Delete_Segment(). * * @param reader The SegReader containing content to merge, which must * represent a segment which is part of the the current snapshot. * @param doc_map An array of integers mapping old document ids to * new. Deleted documents are mapped to 0, indicating that they should be * skipped. */ public void Merge_Segment(DataWriter *self, SegReader *reader, I32Array *doc_map = NULL); /** Complete the segment: close all streams, store metadata, etc. */ public abstract void Finish(DataWriter *self); /** Arbitrary metadata to be serialized and stored by the Segment. The * default implementation supplies a Hash with a single key-value pair for * "format". */ public incremented Hash* Metadata(DataWriter *self); /** Every writer must specify a file format revision number, which should * increment each time the format changes. Responsibility for revision * checking is left to the companion DataReader. */ public abstract int32_t Format(DataWriter *self); /** Accessor for "snapshot" member var. */ public Snapshot* Get_Snapshot(DataWriter *self); /** Accessor for "segment" member var. */ public Segment* Get_Segment(DataWriter *self); /** Accessor for "polyreader" member var. */ public PolyReader* Get_PolyReader(DataWriter *self); /** Accessor for "schema" member var. */ public Schema* Get_Schema(DataWriter *self); /** Accessor for "folder" member var. */ public Folder* Get_Folder(DataWriter *self); public void Destroy(DataWriter *self); }