# ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== from unittest import TestCase, main from lucene import * class TestBinaryDocument(TestCase): binaryValStored = "this text will be stored as a byte array in the index" binaryValCompressed = "this text will be also stored and compressed as a byte array in the index" def testBinaryFieldInIndex(self): data = JArray('byte')(bytes(self.binaryValStored, 'utf-8')) binaryFldStored = Field("binaryStored", data, Field.Store.YES) stringFldStored = Field("stringStored", self.binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO) try: # binary fields with store off are not allowed Field("fail", data, Field.Store.NO) self.fail() except JavaError as e: self.assertEqual(e.getJavaException().getClass().getName(), 'java.lang.IllegalArgumentException') doc = Document() doc.add(binaryFldStored) doc.add(stringFldStored) # test for field count self.assertEqual(2, doc.fields.size()) # add the doc to a ram index dir = RAMDirectory() writer = IndexWriter(dir, StandardAnalyzer(Version.LUCENE_CURRENT), True, IndexWriter.MaxFieldLength.LIMITED) writer.addDocument(doc) writer.close() # open a reader and fetch the document reader = IndexReader.open(dir, False) docFromReader = reader.document(0) self.assert_(docFromReader is not None) # fetch the binary stored field and compare it's content with the # original one data = docFromReader.getBinaryValue("binaryStored") binaryFldStoredTest = data.string_ self.assertEqual(binaryFldStoredTest, self.binaryValStored) # fetch the string field and compare it's content with the original # one stringFldStoredTest = docFromReader.get("stringStored") self.assertEqual(stringFldStoredTest, self.binaryValStored) # delete the document from index reader.deleteDocument(0) self.assertEqual(0, reader.numDocs()) reader.close() dir.close() def testCompressionTools(self): data = JArray('byte')(bytes(self.binaryValCompressed, 'utf-8')) binaryFldCompressed = Field("binaryCompressed", CompressionTools.compress(data), Field.Store.YES) stringFldCompressed = Field("stringCompressed", CompressionTools.compressString(self.binaryValCompressed), Field.Store.YES) doc = Document() doc.add(binaryFldCompressed) doc.add(stringFldCompressed) # add the doc to a ram index dir = RAMDirectory() writer = IndexWriter(dir, StandardAnalyzer(Version.LUCENE_CURRENT), True, IndexWriter.MaxFieldLength.LIMITED) writer.addDocument(doc) writer.close() # open a reader and fetch the document reader = IndexReader.open(dir, False) docFromReader = reader.document(0) self.assert_(docFromReader is not None) # fetch the binary compressed field and compare it's content with # the original one data = CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")) binaryFldCompressedTest = data.string_ self.assertEqual(binaryFldCompressedTest, self.binaryValCompressed) self.assertEqual(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")), self.binaryValCompressed) reader.close() dir.close() if __name__ == '__main__': import sys, lucene lucene.initVM() if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: main() except: pass else: main()