/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ using System; using System.Collections.Generic; using System.IO; using System.Text; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Standard; using Lucene.Net.Documents; using Lucene.Net.QueryParsers; using Lucene.Net.Search; using Lucene.Net.Store; using Lucene.Net.Test.Analysis; using NUnit.Framework; using Version = Lucene.Net.Util.Version; namespace Lucene.Net.Index.Memory.Test { /** * Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour, * returning the same results for queries on some randomish indexes. */ public class MemoryIndexTest : BaseTokenStreamTestCase { private readonly HashSet _queries = new HashSet(); private Random random; public static int ITERATIONS = 100; [SetUp] public override void SetUp() { base.SetUp(); _queries.UnionWith(ReadQueries("testqueries.txt")); _queries.UnionWith(ReadQueries("testqueries2.txt")); random = NewRandom(); } /** * read a set of queries from a resource file */ private IEnumerable ReadQueries(String resource) { var queries = new HashSet(); using (var fs = File.Open(resource, FileMode.Open, FileAccess.Read)) using (var reader = new StreamReader(fs, Encoding.UTF8)) { string line; while ((line = reader.ReadLine()) != null) { line = line.Trim(); if (line.Length > 0 && !line.StartsWith("#") && !line.StartsWith("//")) { queries.Add(line); } } return queries; } } /** * runs random tests, up to ITERATIONS times. */ [Test] public void TestRandomQueries() { for (int i = 0; i < ITERATIONS; i++) AssertAgainstRAMDirectory(); } /** * Build a randomish document for both RAMDirectory and MemoryIndex, * and run all the queries against it. */ public void AssertAgainstRAMDirectory() { var fooField = new StringBuilder(); var termField = new StringBuilder(); // add up to 250 terms to field "foo" for (int i = 0; i < random.Next(250); i++) { fooField.Append(" "); fooField.Append(RandomTerm()); } // add up to 250 terms to field "term" for (int i = 0; i < random.Next(250); i++) { termField.Append(" "); termField.Append(RandomTerm()); } var ramdir = new RAMDirectory(); var analyzer = RandomAnalyzer(); var writer = new IndexWriter(ramdir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); var doc = new Document(); var field1 = new Field("foo", fooField.ToString(), Field.Store.NO, Field.Index.ANALYZED); var field2 = new Field("term", termField.ToString(), Field.Store.NO, Field.Index.ANALYZED); doc.Add(field1); doc.Add(field2); writer.AddDocument(doc); writer.Close(); var memory = new MemoryIndex(); memory.AddField("foo", fooField.ToString(), analyzer); memory.AddField("term", termField.ToString(), analyzer); AssertAllQueries(memory, ramdir, analyzer); } /** * Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same. */ public void AssertAllQueries(MemoryIndex memory, RAMDirectory ramdir, Analyzer analyzer) { var ram = new IndexSearcher(ramdir); var mem = memory.CreateSearcher(); var qp = new QueryParser(Version.LUCENE_CURRENT, "foo", analyzer); foreach (String query in _queries) { var ramDocs = ram.Search(qp.Parse(query), 1); var memDocs = mem.Search(qp.Parse(query), 1); Assert.AreEqual(ramDocs.TotalHits, memDocs.TotalHits); } } /** * Return a random analyzer (Simple, Stop, Standard) to analyze the terms. */ private Analyzer RandomAnalyzer() { switch (random.Next(3)) { case 0: return new SimpleAnalyzer(); case 1: return new StopAnalyzer(Version.LUCENE_CURRENT); default: return new StandardAnalyzer(Version.LUCENE_CURRENT); } } /** * Some terms to be indexed, in addition to random words. * These terms are commonly used in the queries. */ private static readonly string[] TEST_TERMS = { "term", "Term", "tErm", "TERM", "telm", "stop", "drop", "roll", "phrase", "a", "c", "bar", "blar", "gack", "weltbank", "worlbank", "hello", "on", "the", "apache" , "Apache", "copyright", "Copyright" }; /** * half of the time, returns a random term from TEST_TERMS. * the other half of the time, returns a random unicode string. */ private String RandomTerm() { if (random.Next(2) == 1) { // return a random TEST_TERM return TEST_TERMS[random.Next(TEST_TERMS.Length)]; } else { // return a random unicode term return RandomString(); } } /** * Return a random unicode term, like TestStressIndexing. */ private String RandomString() { int end = random.Next(20); if (buffer.Length < 1 + end) { char[] newBuffer = new char[(int) ((1 + end)*1.25)]; Array.Copy(buffer, 0, newBuffer, 0, buffer.Length); buffer = newBuffer; } for (int i = 0; i < end - 1; i++) { int t = random.Next(6); if (0 == t && i < end - 1) { // Make a surrogate pair // High surrogate buffer[i++] = (char) NextInt(0xd800, 0xdc00); // Low surrogate buffer[i] = (char) NextInt(0xdc00, 0xe000); } else if (t <= 1) buffer[i] = (char) random.Next(0x80); else if (2 == t) buffer[i] = (char) NextInt(0x80, 0x800); else if (3 == t) buffer[i] = (char) NextInt(0x800, 0xd7ff); else if (4 == t) buffer[i] = (char) NextInt(0xe000, 0xffff); else if (5 == t) { // Illegal unpaired surrogate if (random.Next(1) == 1) buffer[i] = (char) NextInt(0xd800, 0xdc00); else buffer[i] = (char) NextInt(0xdc00, 0xe000); } } return new String(buffer, 0, end); } private char[] buffer = new char[20]; // start is inclusive and end is exclusive private int NextInt(int start, int end) { return start + random.Next(end - start); } } }