/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using System.IO;
using Version = Lucene.Net.Util.Version;

/**
 * Analyzer for Brazilian language. Supports an external list of stopwords (words that
 * will not be indexed at all) and an external list of exclusions (word that will
 * not be stemmed, but indexed).
 *
 */
namespace Lucene.Net.Analysis.BR
{
    public sealed class BrazilianAnalyzer : Analyzer
    {
        /**
         * List of typical Brazilian stopwords.
         */
        //TODO: Make this private in 3.1
        public static string[] BRAZILIAN_STOP_WORDS = {
                                                          "a", "ainda", "alem", "ambas", "ambos", "antes",
                                                          "ao", "aonde", "aos", "apos", "aquele", "aqueles",
                                                          "as", "assim", "com", "como", "contra", "contudo",
                                                          "cuja", "cujas", "cujo", "cujos", "da", "das", "de",
                                                          "dela", "dele", "deles", "demais", "depois", "desde",
                                                          "desta", "deste", "dispoe", "dispoem", "diversa",
                                                          "diversas", "diversos", "do", "dos", "durante", "e",
                                                          "ela", "elas", "ele", "eles", "em", "entao", "entre",
                                                          "essa", "essas", "esse", "esses", "esta", "estas",
                                                          "este", "estes", "ha", "isso", "isto", "logo", "mais",
                                                          "mas", "mediante", "menos", "mesma", "mesmas", "mesmo",
                                                          "mesmos", "na", "nas", "nao", "nas", "nem", "nesse", "neste",
                                                          "nos", "o", "os", "ou", "outra", "outras", "outro", "outros",
                                                          "pelas", "pelas", "pelo", "pelos", "perante", "pois", "por",
                                                          "porque", "portanto", "proprio", "propios", "quais", "qual",
                                                          "qualquer", "quando", "quanto", "que", "quem", "quer", "se",
                                                          "seja", "sem", "sendo", "seu", "seus", "sob", "sobre", "sua",
                                                          "suas", "tal", "tambem", "teu", "teus", "toda", "todas",
                                                          "todo",
                                                          "todos", "tua", "tuas", "tudo", "um", "uma", "umas", "uns"
                                                      };

        /// <summary>
        /// Returns an unmodifiable instance of the default stop-words set.
        /// </summary>
        /// <returns>Returns an unmodifiable instance of the default stop-words set.</returns>
        public static ISet<string> GetDefaultStopSet()
        {
            return DefaultSetHolder.DEFAULT_STOP_SET;
        }

        private static class DefaultSetHolder
        {
            internal static ISet<string> DEFAULT_STOP_SET =
                CharArraySet.UnmodifiableSet(new CharArraySet(BRAZILIAN_STOP_WORDS, false));
        }

        /// <summary>
        /// Contains the stopwords used with the StopFilter.
        /// </summary>
        private ISet<string> stoptable = new HashSet<string>();

        private readonly Version matchVersion;

        /// <summary>
        /// Contains words that should be indexed but not stemmed.
        // TODO: make this private in 3.1
        /// </summary>
        private ISet<string> excltable = new HashSet<string>();

        public BrazilianAnalyzer(Version matchVersion)
            : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
        {
        }

        /**
           * Builds an analyzer with the given stop words
           * 
           * @param matchVersion
           *          lucene compatibility version
           * @param stopwords
           *          a stopword set
           */

        public BrazilianAnalyzer(Version matchVersion, ISet<string> stopwords)
        {
            stoptable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
            this.matchVersion = matchVersion;
        }

        /**
         * Builds an analyzer with the given stop words and stemming exclusion words
         * 
         * @param matchVersion
         *          lucene compatibility version
         * @param stopwords
         *          a stopword set
         */

        public BrazilianAnalyzer(Version matchVersion, ISet<string> stopwords,
                                 ISet<string> stemExclusionSet)
            : this(matchVersion, stopwords)
        {

            excltable = CharArraySet.UnmodifiableSet(CharArraySet
                                                         .Copy(stemExclusionSet));
        }

        /**
         * Builds an analyzer with the given stop words.
         * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
         */

        public BrazilianAnalyzer(Version matchVersion, params string[] stopwords)
            : this(matchVersion, StopFilter.MakeStopSet(stopwords))
        {

        }

        /**
   * Builds an analyzer with the given stop words. 
   * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
   */

        public BrazilianAnalyzer(Version matchVersion, IDictionary<string, string> stopwords)
            : this(matchVersion, stopwords.Keys.ToArray())
        {

        }

        /**
   * Builds an analyzer with the given stop words.
   * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
   */

        public BrazilianAnalyzer(Version matchVersion, FileInfo stopwords)
            : this(matchVersion, WordlistLoader.GetWordSet(stopwords))
        {
        }

        /**
         * Builds an exclusionlist from an array of Strings.
         * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
         */

        public void SetStemExclusionTable(params string[] exclusionlist)
        {
            excltable = StopFilter.MakeStopSet(exclusionlist);
            PreviousTokenStream = null; // force a new stemmer to be created
        }

        /**
         * Builds an exclusionlist from a {@link Map}.
         * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
         */

        public void SetStemExclusionTable(IDictionary<string, string> exclusionlist)
        {
            excltable = new HashSet<string>(exclusionlist.Keys);
            PreviousTokenStream = null; // force a new stemmer to be created
        }

        /**
         * Builds an exclusionlist from the words contained in the given file.
         * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
         */

        public void SetStemExclusionTable(FileInfo exclusionlist)
        {
            excltable = WordlistLoader.GetWordSet(exclusionlist);
            PreviousTokenStream = null; // force a new stemmer to be created
        }

        /**
         * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
         *
         * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
         * 			{@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and 
         *          {@link BrazilianStemFilter}.
         */
        public override TokenStream TokenStream(String fieldName, TextReader reader)
        {
            TokenStream result = new StandardTokenizer(matchVersion, reader);
            result = new LowerCaseFilter(result);
            result = new StandardFilter(result);
            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                    result, stoptable);
            result = new BrazilianStemFilter(result, excltable);
            return result;
        }

        private class SavedStreams
        {
            protected internal Tokenizer source;
            protected internal TokenStream result;
        };

        /**
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
         * in the provided {@link Reader}.
         *
         * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
         *          {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and 
         *          {@link BrazilianStemFilter}.
         */

        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            SavedStreams streams = (SavedStreams) PreviousTokenStream;
            if (streams == null)
            {
                streams = new SavedStreams();
                streams.source = new StandardTokenizer(matchVersion, reader);
                streams.result = new LowerCaseFilter(streams.source);
                streams.result = new StandardFilter(streams.result);
                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.result, stoptable);
                streams.result = new BrazilianStemFilter(streams.result, excltable);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return streams.result;
        }
    }
}