# ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== from lia.analysis.AnalyzerUtils import AnalyzerUtils from lucene import Version, \ StopAnalyzer, SimpleAnalyzer, WhitespaceAnalyzer, StandardAnalyzer class AnalyzerDemo(object): examples = ["The quick brown fox jumped over the lazy dogs", "XY&Z Corporation - xyz@example.com"] analyzers = [WhitespaceAnalyzer(), SimpleAnalyzer(), StopAnalyzer(Version.LUCENE_CURRENT), StandardAnalyzer(Version.LUCENE_CURRENT)] def main(cls, argv): # Use the embedded example strings, unless # command line arguments are specified, then use those. strings = cls.examples if len(argv) > 1: strings = argv[1:] for string in strings: cls.analyze(string) def analyze(cls, text): print 'Analyzing "%s"' %(text) for analyzer in cls.analyzers: name = type(analyzer).__name__ print " %s:" %(name), AnalyzerUtils.displayTokens(analyzer, text) print print main = classmethod(main) analyze = classmethod(analyze) if __name__ == "__main__": import sys AnalyzerDemo.main(sys.argv)