/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
*/

using System;
using System.IO;
using System.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.De;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
using Version=Lucene.Net.Util.Version;

namespace Lucene.Net.Analyzers.De
{
    /**
     * Test the German stemmer. The stemming algorithm is known to work less 
     * than perfect, as it doesn't use any word lists with exceptions. We 
     * also check some of the cases where the algorithm is wrong.
     *
     */
    [TestFixture]
    public class TestGermanStemFilter : BaseTokenStreamTestCase
    {
        const string TestFile = @"De\data.txt";
        const string TestFileDin2 = @"De\data_din2.txt";

        [Test]
        public void TestDin1Stemming()
        {
            // read test cases from external file:
            using (var fis = new FileStream(TestFile, FileMode.Open, FileAccess.Read, FileShare.Read))
            using (var breader = new StreamReader(fis, Encoding.GetEncoding("iso-8859-1")))
            {
                while (true)
                {
                    String line = breader.ReadLine();
                    if (line == null)
                        break;
                    line = line.Trim();
                    if (line.StartsWith("#") || string.IsNullOrEmpty(line))
                        continue; // ignore comments and empty lines
                    String[] parts = line.Split(';');
                    //System.out.println(parts[0] + " -- " + parts[1]);
                    Check(parts[0], parts[1], false);
                }
            }
        }

        [Test]
        public void TestDin2Stemming()
        {
            // read test cases from external file(s):
            foreach (var file in new[] { TestFile, TestFileDin2 })
            {
                using (var fis = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read))
                using (var breader = new StreamReader(fis, Encoding.GetEncoding("iso-8859-1")))
                {
                    string line;
                    while ((line = breader.ReadLine()) != null)
                    {
                        line = line.Trim();
                        if (line.StartsWith("#") || string.IsNullOrEmpty(line))
                            continue; // ignore comments and empty lines

                        var parts = line.Split(';');
                        Check(parts[0], parts[1], true);
                    }
                }
            }
        }

        [Test]
        public void TestReusableTokenStream()
        {
            Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
            CheckReuse(a, "Tisch", "tisch");
            CheckReuse(a, "Tische", "tisch");
            CheckReuse(a, "Tischen", "tisch");
        }

        /**
         * subclass that acts just like whitespace analyzer for testing
         */
        private sealed class GermanSubclassAnalyzer : GermanAnalyzer
        {
            public GermanSubclassAnalyzer(Version matchVersion)
                : base(matchVersion)
            {
            }

            public override TokenStream TokenStream(String fieldName, TextReader reader)
            {
                return new WhitespaceTokenizer(reader);
            }
        }

        [Test]
        public void TestLucene1678BwComp()
        {
            CheckReuse(new GermanSubclassAnalyzer(Version.LUCENE_CURRENT), "Tischen", "Tischen");
        }

        /* 
         * Test that changes to the exclusion table are applied immediately
         * when using reusable token streams.
         */
        [Test]
        public void TestExclusionTableReuse()
        {
            var a = new GermanAnalyzer(Version.LUCENE_CURRENT);
            CheckReuse(a, "tischen", "tisch");
            a.SetStemExclusionTable(new[] { "tischen" });
            CheckReuse(a, "tischen", "tischen");
        }

        private void Check(String input, String expected, bool useDin2)
        {
            CheckOneTerm(new GermanAnalyzer(Version.LUCENE_CURRENT, useDin2), input, expected);
        }

        private void CheckReuse(Analyzer a, String input, String expected)
        {
            CheckOneTermReuse(a, input, expected);
        }
    }
}