/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Lucene.Net.Documents; using Analyzer = Lucene.Net.Analysis.Analyzer; using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer; using Document = Lucene.Net.Documents.Document; using Field = Lucene.Net.Documents.Field; using Directory = Lucene.Net.Store.Directory; using Similarity = Lucene.Net.Search.Similarity; namespace Lucene.Net.Index { class DocHelper { public const System.String FIELD_1_TEXT = "field one text"; public const System.String TEXT_FIELD_1_KEY = "textField1"; public static Field textField1; public const System.String FIELD_2_TEXT = "field field field two text"; //Fields will be lexicographically sorted. So, the order is: field, text, two public static readonly int[] FIELD_2_FREQS = new int[]{3, 1, 1}; public const System.String TEXT_FIELD_2_KEY = "textField2"; public static Field textField2; public const System.String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms"; public const System.String TEXT_FIELD_3_KEY = "textField3"; public static Field textField3; public const System.String KEYWORD_TEXT = "Keyword"; public const System.String KEYWORD_FIELD_KEY = "keyField"; public static Field keyField; public const System.String NO_NORMS_TEXT = "omitNormsText"; public const System.String NO_NORMS_KEY = "omitNorms"; public static Field noNormsField; public const System.String NO_TF_TEXT = "analyzed with no tf and positions"; public const System.String NO_TF_KEY = "omitTermFreqAndPositions"; public static Field noTFField; public const System.String UNINDEXED_FIELD_TEXT = "unindexed field text"; public const System.String UNINDEXED_FIELD_KEY = "unIndField"; public static Field unIndField; public const System.String UNSTORED_1_FIELD_TEXT = "unstored field text"; public const System.String UNSTORED_FIELD_1_KEY = "unStoredField1"; public static Field unStoredField1; public const System.String UNSTORED_2_FIELD_TEXT = "unstored field text"; public const System.String UNSTORED_FIELD_2_KEY = "unStoredField2"; public static Field unStoredField2; public const System.String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary"; public static byte[] LAZY_FIELD_BINARY_BYTES; public static Field lazyFieldBinary; public const System.String LAZY_FIELD_KEY = "lazyField"; public const System.String LAZY_FIELD_TEXT = "These are some field bytes"; public static Field lazyField; public const System.String LARGE_LAZY_FIELD_KEY = "largeLazyField"; public static System.String LARGE_LAZY_FIELD_TEXT; public static Field largeLazyField; //From Issue 509 public const System.String FIELD_UTF1_TEXT = "field one \u4e00text"; public const System.String TEXT_FIELD_UTF1_KEY = "textField1Utf8"; public static Field textUtfField1; public const System.String FIELD_UTF2_TEXT = "field field field \u4e00two text"; //Fields will be lexicographically sorted. So, the order is: field, text, two public static readonly int[] FIELD_UTF2_FREQS = new int[]{3, 1, 1}; public const System.String TEXT_FIELD_UTF2_KEY = "textField2Utf8"; public static Field textUtfField2; public static System.Collections.IDictionary nameValues = null; // ordered list of all the fields... // could use LinkedHashMap for this purpose if Java1.4 is OK public static Field[] fields = null; // Map public static System.Collections.IDictionary all = new System.Collections.Hashtable(); public static System.Collections.IDictionary indexed = new System.Collections.Hashtable(); public static System.Collections.IDictionary stored = new System.Collections.Hashtable(); public static System.Collections.IDictionary unstored = new System.Collections.Hashtable(); public static System.Collections.IDictionary unindexed = new System.Collections.Hashtable(); public static System.Collections.IDictionary termvector = new System.Collections.Hashtable(); public static System.Collections.IDictionary notermvector = new System.Collections.Hashtable(); public static System.Collections.IDictionary lazy = new System.Collections.Hashtable(); public static System.Collections.IDictionary noNorms = new System.Collections.Hashtable(); public static System.Collections.IDictionary noTf = new System.Collections.Hashtable(); private static void Add(System.Collections.IDictionary map, IFieldable field) { map[field.Name] = field; } /// Adds the fields above to a document /// The document to write /// public static void SetupDoc(Document doc) { for (int i = 0; i < fields.Length; i++) { doc.Add(fields[i]); } } /// Writes the document to the directory using a segment /// named "test"; returns the SegmentInfo describing the new /// segment /// /// /// /// /// /// IOException public static SegmentInfo WriteDoc(Directory dir, Document doc) { return WriteDoc(dir, new WhitespaceAnalyzer(), Similarity.Default, doc); } /// Writes the document to the directory using the analyzer /// and the similarity score; returns the SegmentInfo /// describing the new segment /// /// /// /// /// /// /// /// /// /// IOException public static SegmentInfo WriteDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) { IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.setUseCompoundFile(false); writer.AddDocument(doc); writer.Commit(); SegmentInfo info = writer.NewestSegment(); writer.Close(); return info; } public static int NumFields(Document doc) { return doc.GetFields().Count; } static DocHelper() { textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED); { textField3.OmitNorms = true; } keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED); noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED); { noTFField.OmitTermFreqAndPositions = true; } unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO); unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); fields = new Field[] { textField1, textField2, textField3, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField }; { //Initialize the large Lazy Field System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < 10000; i++) { buffer.Append("Lazily loading lengths of language in lieu of laughing "); } try { LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes"); } catch (System.IO.IOException) { } lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES); fields[fields.Length - 2] = lazyFieldBinary; LARGE_LAZY_FIELD_TEXT = buffer.ToString(); largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); fields[fields.Length - 1] = largeLazyField; for (int i = 0; i < fields.Length; i++) { IFieldable f = fields[i]; Add(all, f); if (f.IsIndexed) Add(indexed, f); else Add(unindexed, f); if (f.IsTermVectorStored) Add(termvector, f); if (f.IsIndexed && !f.IsTermVectorStored) Add(notermvector, f); if (f.IsStored) Add(stored, f); else Add(unstored, f); if (f.OmitNorms) Add(noNorms, f); if (f.OmitTermFreqAndPositions) Add(noTf, f); if (f.IsLazy) Add(lazy, f); } } { nameValues = new System.Collections.Hashtable(); nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT; nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT; nameValues[TEXT_FIELD_3_KEY] = FIELD_3_TEXT; nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT; nameValues[NO_NORMS_KEY] = NO_NORMS_TEXT; nameValues[NO_TF_KEY] = NO_TF_TEXT; nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT; nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT; nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT; nameValues[LAZY_FIELD_KEY] = LAZY_FIELD_TEXT; nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES; nameValues[LARGE_LAZY_FIELD_KEY] = LARGE_LAZY_FIELD_TEXT; nameValues[TEXT_FIELD_UTF1_KEY] = FIELD_UTF1_TEXT; nameValues[TEXT_FIELD_UTF2_KEY] = FIELD_UTF2_TEXT; } } } }