# ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import os from unittest import TestCase from time import time from datetime import timedelta from lucene import \ IndexWriter, SimpleAnalyzer, Document, Field, System, File, \ SimpleFSDirectory, RAMDirectory class FSversusRAMDirectoryTest(TestCase): def __init__(self, *args): super(FSversusRAMDirectoryTest, self).__init__(*args) self.docs = self.loadDocuments(3000, 5) def setUp(self): fsIndexDir = os.path.join(System.getProperty("java.io.tmpdir", "tmp"), "fs-index") self.ramDir = RAMDirectory() self.fsDir = SimpleFSDirectory(File(fsIndexDir)) def testTiming(self): ramTiming = self.timeIndexWriter(self.ramDir) fsTiming = self.timeIndexWriter(self.fsDir) #self.assert_(fsTiming > ramTiming) print "RAMDirectory Time:", ramTiming print "FSDirectory Time :", fsTiming def timeIndexWriter(self, dir): start = time() self.addDocuments(dir) return timedelta(seconds=time() - start) def addDocuments(self, dir): writer = IndexWriter(dir, SimpleAnalyzer(), True, IndexWriter.MaxFieldLength.UNLIMITED) # # change to adjust performance of indexing with FSDirectory # writer.mergeFactor = writer.mergeFactor # writer.maxMergeDocs = writer.maxMergeDocs # writer.minMergeDocs = writer.minMergeDocs # for word in self.docs: doc = Document() doc.add(Field("keyword", word, Field.Store.YES, Field.Index.NOT_ANALYZED)) doc.add(Field("unindexed", word, Field.Store.YES, Field.Index.NO)) doc.add(Field("unstored", word, Field.Store.NO, Field.Index.ANALYZED)) doc.add(Field("text", word, Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) writer.optimize() writer.close() def loadDocuments(self, numDocs, wordsPerDoc): return ["Bibamus " * wordsPerDoc] * numDocs