/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections; using System.Linq; using System.Collections.Generic; namespace Lucene.Net.Analysis { /// A simple class that stores Strings as char[]'s in a /// hash table. Note that this is not a general purpose /// class. For example, it cannot remove items from the /// set, nor does it resize its hash table to be smaller, /// etc. It is designed to be quick to test if a char[] /// is in the set without the necessity of converting it /// to a String first. ///

/// Please note: This class implements but /// does not behave like it should in all cases. The generic type is /// , because you can add any object to it, /// that has a string representation. The add methods will use /// and store the result using a /// buffer. The same behaviour have the methods. /// The method returns an IEnumerable. /// For type safety also {@link #stringIterator()} is provided. ///

// TODO: java uses wildcards, .net doesn't have this, easiest way is to // make the entire class generic. Ultimately, though, since this // works with strings, I can't think of a reason not to just declare // this as an ISet. public class CharArraySet : ISet { bool _ReadOnly = false; const int INIT_SIZE = 8; char[][] _Entries; int _Count; bool _IgnoreCase; public static CharArraySet EMPTY_SET = UnmodifiableSet(new CharArraySet(0, false)); private void Init(int startSize, bool ignoreCase) { this._IgnoreCase = ignoreCase; int size = INIT_SIZE; while (startSize + (startSize >> 2) > size) size <<= 1; _Entries = new char[size][]; } /// Create set with enough capacity to hold startSize /// terms /// public CharArraySet(int startSize, bool ignoreCase) { Init(startSize, ignoreCase); } public CharArraySet(IEnumerable c, bool ignoreCase) { Init(c.Count(), ignoreCase); AddItems(c); } /// Create set from a Collection of char[] or String public CharArraySet(IEnumerable c, bool ignoreCase) { Init(c.Count(), ignoreCase); AddItems(c); } private void AddItems(IEnumerable items) { foreach(var item in items) { Add(item.ToString()); } } /// Create set from entries private CharArraySet(char[][] entries, bool ignoreCase, int count) { this._Entries = entries; this._IgnoreCase = ignoreCase; this._Count = count; } /// true if the len chars of text starting at off /// are in the set /// public virtual bool Contains(char[] text, int off, int len) { return _Entries[GetSlot(text, off, len)] != null; } public virtual bool Contains(string text) { return _Entries[GetSlot(text)] != null; } private int GetSlot(char[] text, int off, int len) { int code = GetHashCode(text, off, len); int pos = code & (_Entries.Length - 1); char[] text2 = _Entries[pos]; if (text2 != null && !Equals(text, off, len, text2)) { int inc = ((code >> 8) + code) | 1; do { code += inc; pos = code & (_Entries.Length - 1); text2 = _Entries[pos]; } while (text2 != null && !Equals(text, off, len, text2)); } return pos; } /// Returns true if the String is in the set private int GetSlot(string text) { int code = GetHashCode(text); int pos = code & (_Entries.Length - 1); char[] text2 = _Entries[pos]; if (text2 != null && !Equals(text, text2)) { int inc = ((code >> 8) + code) | 1; do { code += inc; pos = code & (_Entries.Length - 1); text2 = _Entries[pos]; } while (text2 != null && !Equals(text, text2)); } return pos; } public bool Add(string text) { if (_ReadOnly) throw new NotSupportedException(); return Add(text.ToCharArray()); } /// Add this char[] directly to the set. /// If ignoreCase is true for this Set, the text array will be directly modified. /// The user should never modify this text array after calling this method. /// public bool Add(char[] text) { if (_ReadOnly) throw new NotSupportedException(); if (_IgnoreCase) for (int i = 0; i < text.Length; i++) text[i] = Char.ToLower(text[i]); int slot = GetSlot(text, 0, text.Length); if (_Entries[slot] != null) return false; _Entries[slot] = text; _Count++; if (_Count + (_Count >> 2) > _Entries.Length) { Rehash(); } return true; } private bool Equals(char[] text1, int off, int len, char[] text2) { if (len != text2.Length) return false; if (_IgnoreCase) { for (int i = 0; i < len; i++) { if (char.ToLower(text1[off + i]) != text2[i]) return false; } } else { for (int i = 0; i < len; i++) { if (text1[off + i] != text2[i]) return false; } } return true; } private bool Equals(string text1, char[] text2) { int len = text1.Length; if (len != text2.Length) return false; if (_IgnoreCase) { for (int i = 0; i < len; i++) { if (char.ToLower(text1[i]) != text2[i]) return false; } } else { for (int i = 0; i < len; i++) { if (text1[i] != text2[i]) return false; } } return true; } private void Rehash() { int newSize = 2 * _Entries.Length; char[][] oldEntries = _Entries; _Entries = new char[newSize][]; for (int i = 0; i < oldEntries.Length; i++) { char[] text = oldEntries[i]; if (text != null) { // todo: could be faster... no need to compare strings on collision _Entries[GetSlot(text, 0, text.Length)] = text; } } } private int GetHashCode(char[] text, int offset, int len) { int code = 0; int stop = offset + len; if (_IgnoreCase) { for (int i = offset; i < stop; i++) { code = code * 31 + char.ToLower(text[i]); } } else { for (int i = offset; i < stop; i++) { code = code * 31 + text[i]; } } return code; } private int GetHashCode(string text) { int code = 0; int len = text.Length; if (_IgnoreCase) { for (int i = 0; i < len; i++) { code = code * 31 + char.ToLower(text[i]); } } else { for (int i = 0; i < len; i++) { code = code * 31 + text[i]; } } return code; } public int Count { get { return _Count; } } public bool IsEmpty { get { return _Count == 0; } } public bool Contains(object item) { var text = item as char[]; return text != null ? Contains(text, 0, text.Length) : Contains(item.ToString()); } public bool Add(object item) { return Add(item.ToString()); } void ICollection.Add(string item) { this.Add(item); } /// /// Returns an unmodifiable . This allows to provide /// unmodifiable views of internal sets for "read-only" use /// /// A Set for which the unmodifiable set it returns. /// A new unmodifiable /// ArgumentNullException of the given set is null public static CharArraySet UnmodifiableSet(CharArraySet set) { if(set == null) throw new ArgumentNullException("Given set is null"); if (set == EMPTY_SET) return EMPTY_SET; if (set._ReadOnly) return set; var newSet = new CharArraySet(set._Entries, set._IgnoreCase, set.Count) {IsReadOnly = true}; return newSet; } /// /// returns a copy of the given set as a . If the given set /// is a the ignoreCase property will be preserved. /// /// A set to copy /// a copy of the given set as a . If the given set /// is a the ignoreCase property will be preserved. public static CharArraySet Copy(ISet set) { if (set == null) throw new ArgumentNullException("set", "Given set is null!"); if (set == EMPTY_SET) return EMPTY_SET; bool ignoreCase = set is CharArraySet && ((CharArraySet)set)._IgnoreCase; var arrSet = new CharArraySet(set.Count, ignoreCase); arrSet.AddItems(set); return arrSet; } public void Clear() { throw new NotSupportedException("Remove not supported!"); } public bool IsReadOnly { get { return _ReadOnly; } private set { _ReadOnly = value; } } /// Adds all of the elements in the specified collection to this collection public void UnionWith(IEnumerable other) { if (_ReadOnly) throw new NotSupportedException(); foreach (string s in other) { Add(s.ToCharArray()); } } /// Wrapper that calls UnionWith public void AddAll(IEnumerable coll) { UnionWith(coll); } #region Unneeded methods public void RemoveAll(ICollection c) { throw new NotSupportedException(); } public void RetainAll(ICollection c) { throw new NotSupportedException(); } void ICollection.CopyTo(string[] array, int arrayIndex) { throw new NotSupportedException(); } void ISet.IntersectWith(IEnumerable other) { throw new NotSupportedException(); } void ISet.ExceptWith(IEnumerable other) { throw new NotSupportedException(); } void ISet.SymmetricExceptWith(IEnumerable other) { throw new NotSupportedException(); } bool ISet.IsSubsetOf(IEnumerable other) { throw new NotSupportedException(); } bool ISet.IsSupersetOf(IEnumerable other) { throw new NotSupportedException(); } bool ISet.IsProperSupersetOf(IEnumerable other) { throw new NotSupportedException(); } bool ISet.IsProperSubsetOf(IEnumerable other) { throw new NotSupportedException(); } bool ISet.Overlaps(IEnumerable other) { throw new NotSupportedException(); } bool ISet.SetEquals(IEnumerable other) { throw new NotSupportedException(); } bool ICollection.Remove(string item) { throw new NotSupportedException(); } #endregion /// /// The IEnumerator<String> for this set. Strings are constructed on the fly, /// so use nextCharArray for more efficient access /// public class CharArraySetEnumerator : IEnumerator { readonly CharArraySet _Creator; int pos = -1; char[] cur; protected internal CharArraySetEnumerator(CharArraySet creator) { _Creator = creator; } public bool MoveNext() { cur = null; pos++; while (pos < _Creator._Entries.Length && (cur = _Creator._Entries[pos]) == null) pos++; return cur != null; } /// do not modify the returned char[] public char[] NextCharArray() { return cur; } public string Current { get { return new string(NextCharArray()); } } public void Dispose() { } object IEnumerator.Current { get { return new string(NextCharArray()); } } public void Reset() { throw new NotImplementedException(); } } public IEnumerator StringEnumerator() { return new CharArraySetEnumerator(this); } public IEnumerator GetEnumerator() { return new CharArraySetEnumerator(this); } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } } }