# ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== from unittest import TestCase from lucene import \ IndexWriter, Term, RAMDirectory, Document, Field, \ IndexSearcher, QueryParser, Version from lia.analysis.AnalyzerUtils import AnalyzerUtils from lia.analysis.positional.PositionalPorterStopAnalyzer import \ PositionalPorterStopAnalyzer class PositionalPorterStopAnalyzerTest(TestCase): porterAnalyzer = PositionalPorterStopAnalyzer() def setUp(self): self.directory = RAMDirectory() writer = IndexWriter(self.directory, self.porterAnalyzer, True, IndexWriter.MaxFieldLength.UNLIMITED) doc = Document() doc.add(Field("contents", "The quick brown fox jumps over the lazy dogs", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) writer.close() def testStems(self): searcher = IndexSearcher(self.directory) query = QueryParser(Version.LUCENE_CURRENT, "contents", self.porterAnalyzer).parse("laziness") topDocs = searcher.search(query, 50) self.assertEqual(1, topDocs.totalHits, "lazi") query = QueryParser(Version.LUCENE_CURRENT, "contents", self.porterAnalyzer).parse('"fox jumped"') topDocs = searcher.search(query, 50) self.assertEqual(1, topDocs.totalHits, "jump jumps jumped jumping") def testExactPhrase(self): searcher = IndexSearcher(self.directory, True) query = QueryParser(Version.LUCENE_24, "contents", self.porterAnalyzer).parse('"over the lazy"') topDocs = searcher.search(query, 50) self.assertEqual(0, topDocs.totalHits, "exact match not found!") def testWithSlop(self): searcher = IndexSearcher(self.directory, True) parser = QueryParser(Version.LUCENE_CURRENT, "contents", self.porterAnalyzer) parser.setPhraseSlop(1) query = parser.parse('"over the lazy"') topDocs = searcher.search(query, 50) self.assertEqual(1, topDocs.totalHits, "hole accounted for") def main(cls): text = "The quick brown fox jumps over the lazy dogs" AnalyzerUtils.displayTokensWithPositions(cls.porterAnalyzer, text) print '' main = classmethod(main)