# ====================================================================
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
# ====================================================================

from unittest import TestCase, main
from lucene import JArray

from java.io import StringReader
from java.lang import Boolean
from org.apache.lucene.analysis.tokenattributes import \
    OffsetAttribute, CharTermAttribute, TypeAttribute, \
    PositionIncrementAttribute
from org.apache.pylucene.util import PythonAttributeImpl

class BaseTokenStreamTestCase(TestCase):
    """
    some helpers to test Analyzers and TokenStreams
    """

    class CheckClearAttributesAttributeImpl(PythonAttributeImpl):

        def __init__(_self):
            super(PythonAttributeImpl, _self).__init__()
            _self.clearCalled = False

        def getAndResetClearCalled(_self):
            try:
                return _self.clearCalled
            finally:
                _self.clearCalled = False

        def clear(_self):
            _self.clearCalled = True

        def equals(_self, other):
            return (
                CheckClearAttributesAttributeImpl.instance_(other) and
                CheckClearAttributesAttributeImpl.cast_(other).clearCalled ==
                _self.clearCalled)

        def hashCode(_self):
            return 76137213 ^ Boolean.valueOf(_self.clearCalled).hashCode()

        def copyTo(_self, target):
            CheckClearAttributesAttributeImpl.cast_(target).clear()


    def _assertTokenStreamContents(self, ts, output,
                                   startOffsets=None, endOffsets=None,
                                   types=None, posIncrements=None,
                                   finalOffset=None):

        #checkClearAtt = ts.addAttribute(PythonAttribute.class_);

        self.assertTrue(output is not None)
        self.assertTrue(ts.hasAttribute(CharTermAttribute.class_),
                                     "has no CharTermAttribute")

        termAtt = ts.getAttribute(CharTermAttribute.class_)

        offsetAtt = None
        if (startOffsets is not None or
            endOffsets is not None or
            finalOffset is not None):
            self.assertTrue(ts.hasAttribute(OffsetAttribute.class_),
                                         "has no OffsetAttribute")
            offsetAtt = ts.getAttribute(OffsetAttribute.class_)

        typeAtt = None
        if types is not None:
            self.assertTrue(ts.hasAttribute(TypeAttribute.class_),
                         "has no TypeAttribute")
            typeAtt = ts.getAttribute(TypeAttribute.class_)

        posIncrAtt = None
        if posIncrements is not None:
            self.assertTrue(ts.hasAttribute(PositionIncrementAttribute.class_),
                         "has no PositionIncrementAttribute")
            posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class_)

        ts.reset()
        for i in range(len(output)):
            # extra safety to enforce, that the state is not preserved and
            # also assign bogus values
            ts.clearAttributes()
            termAtt.setEmpty().append("bogusTerm")
            if offsetAtt is not None:
                offsetAtt.setOffset(14584724, 24683243)
            if typeAtt is not None:
                typeAtt.setType("bogusType")
            if posIncrAtt is not None:
                posIncrAtt.setPositionIncrement(45987657)

            self.assertTrue(ts.incrementToken(), "token %d exists" %(i))
            self.assertEqual(output[i], termAtt.toString(), "term %d" %(i))
            if startOffsets is not None:
                self.assertEqual(startOffsets[i], offsetAtt.startOffset(),
                                 "startOffset %d" %(i))
            if endOffsets is not None:
                self.assertEqual(endOffsets[i], offsetAtt.endOffset(),
                                 "endOffset %d" %(i))
            if types is not None:
                self.assertEqual(types[i], typeAtt.type(), "type %d" %(i))
            if posIncrements is not None:
                self.assertEqual(posIncrements[i],
                                 posIncrAtt.getPositionIncrement(),
                                 "posIncrement %d" %(i))

        self.assertTrue(not ts.incrementToken(), "end of stream")
        ts.end()
        ts.close()

    def _assertAnalyzesTo(self, a, input, output,
                          startOffsets=None, endOffsets=None,
                          posIncrements=None):

        ts = a.tokenStream("dummy", StringReader(input))
        self._assertTokenStreamContents(ts, output, startOffsets, endOffsets,
                                        None, posIncrements)

    def _assertAnalyzesToReuse(self, a, input, output,
                               startOffsets=None, endOffsets=None,
                               types=None, posIncrements=None):

        ts = a.reusableTokenStream("dummy", StringReader(input))
        self._assertTokenStreamContents(ts, output, startOffsets, endOffsets,
                                        types, posIncrements)

    # simple utility method for testing stemmers
    def _checkOneTerm(self, a, input, expected):
        self._assertAnalyzesTo(a, input, JArray('string')(expected))

    def _checkOneTermReuse(self, a, input, expected):
        self._assertAnalyzesToReuse(a, input, JArray('string')(expected))