/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Directory = Lucene.Net.Store.Directory; using IndexOutput = Lucene.Net.Store.IndexOutput; using StringHelper = Lucene.Net.Util.StringHelper; using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; namespace Lucene.Net.Index { sealed class TermVectorsWriter : IDisposable { private readonly IndexOutput tvx = null; private readonly IndexOutput tvd = null; private readonly IndexOutput tvf = null; private readonly FieldInfos fieldInfos; internal UnicodeUtil.UTF8Result[] utf8Results = new[]{new UnicodeUtil.UTF8Result(), new UnicodeUtil.UTF8Result()}; public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); this.fieldInfos = fieldInfos; } /// Add a complete document specified by all its term vectors. If document has no /// term vectors, add value for tvx. /// /// /// /// /// IOException public void AddAllDocVectors(ITermFreqVector[] vectors) { tvx.WriteLong(tvd.FilePointer); tvx.WriteLong(tvf.FilePointer); if (vectors != null) { int numFields = vectors.Length; tvd.WriteVInt(numFields); var fieldPointers = new long[numFields]; for (int i = 0; i < numFields; i++) { fieldPointers[i] = tvf.FilePointer; int fieldNumber = fieldInfos.FieldNumber(vectors[i].Field); // 1st pass: write field numbers to tvd tvd.WriteVInt(fieldNumber); int numTerms = vectors[i].Size; tvf.WriteVInt(numTerms); TermPositionVector tpVector; byte bits; bool storePositions; bool storeOffsets; if (vectors[i] is TermPositionVector) { // May have positions & offsets tpVector = (TermPositionVector) vectors[i]; storePositions = tpVector.Size > 0 && tpVector.GetTermPositions(0) != null; storeOffsets = tpVector.Size > 0 && tpVector.GetOffsets(0) != null; bits = (byte) ((storePositions?TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR: (byte) 0) + (storeOffsets?TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR: (byte) 0)); } else { tpVector = null; bits = 0; storePositions = false; storeOffsets = false; } tvf.WriteVInt(bits); System.String[] terms = vectors[i].GetTerms(); int[] freqs = vectors[i].GetTermFrequencies(); int utf8Upto = 0; utf8Results[1].length = 0; for (int j = 0; j < numTerms; j++) { UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].Length, utf8Results[utf8Upto]); int start = StringHelper.BytesDifference(utf8Results[1 - utf8Upto].result, utf8Results[1 - utf8Upto].length, utf8Results[utf8Upto].result, utf8Results[utf8Upto].length); int length = utf8Results[utf8Upto].length - start; tvf.WriteVInt(start); // write shared prefix length tvf.WriteVInt(length); // write delta length tvf.WriteBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes utf8Upto = 1 - utf8Upto; int termFreq = freqs[j]; tvf.WriteVInt(termFreq); if (storePositions) { int[] positions = tpVector.GetTermPositions(j); if (positions == null) throw new System.SystemException("Trying to write positions that are null!"); System.Diagnostics.Debug.Assert(positions.Length == termFreq); // use delta encoding for positions int lastPosition = 0; foreach (int position in positions) { tvf.WriteVInt(position - lastPosition); lastPosition = position; } } if (storeOffsets) { TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j); if (offsets == null) throw new System.SystemException("Trying to write offsets that are null!"); System.Diagnostics.Debug.Assert(offsets.Length == termFreq); // use delta encoding for offsets int lastEndOffset = 0; foreach (TermVectorOffsetInfo t in offsets) { int startOffset = t.StartOffset; int endOffset = t.EndOffset; tvf.WriteVInt(startOffset - lastEndOffset); tvf.WriteVInt(endOffset - startOffset); lastEndOffset = endOffset; } } } } // 2nd pass: write field pointers to tvd if (numFields > 1) { long lastFieldPointer = fieldPointers[0]; for (int i = 1; i < numFields; i++) { long fieldPointer = fieldPointers[i]; tvd.WriteVLong(fieldPointer - lastFieldPointer); lastFieldPointer = fieldPointer; } } } else tvd.WriteVInt(0); } /// Do a bulk copy of numDocs documents from reader to our /// streams. This is used to expedite merging, if the /// field numbers are congruent. /// internal void AddRawDocuments(TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) { long tvdPosition = tvd.FilePointer; long tvfPosition = tvf.FilePointer; long tvdStart = tvdPosition; long tvfStart = tvfPosition; for (int i = 0; i < numDocs; i++) { tvx.WriteLong(tvdPosition); tvdPosition += tvdLengths[i]; tvx.WriteLong(tvfPosition); tvfPosition += tvfLengths[i]; } tvd.CopyBytes(reader.GetTvdStream(), tvdPosition - tvdStart); tvf.CopyBytes(reader.GetTvfStream(), tvfPosition - tvfStart); System.Diagnostics.Debug.Assert(tvd.FilePointer == tvdPosition); System.Diagnostics.Debug.Assert(tvf.FilePointer == tvfPosition); } /// Close all streams. public void Dispose() { // Move to a protected method if class becomes unsealed // make an effort to close all streams we can but remember and re-throw // the first exception encountered in this process System.IO.IOException keep = null; if (tvx != null) try { tvx.Close(); } catch (System.IO.IOException e) { keep = e; } if (tvd != null) try { tvd.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (tvf != null) try { tvf.Close(); } catch (System.IO.IOException e) { if (keep == null) keep = e; } if (keep != null) { throw new System.IO.IOException(keep.StackTrace); } } } }