package org.apache.lucene.analysis; import java.util.AbstractSet; import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.Set; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * A simple class that stores Strings as char[]'s in a * hash table. Note that this is not a general purpose * class. For example, it cannot remove items from the * set, nor does it resize its hash table to be smaller, * etc. It is designed to be quick to test if a char[] * is in the set without the necessity of converting it * to a String first. *

* Please note: This class implements {@link java.util.Set Set} but * does not behave like it should in all cases. The generic type is * {@code Set}, because you can add any object to it, * that has a string representation. The add methods will use * {@link Object#toString} and store the result using a {@code char[]} * buffer. The same behaviour have the {@code contains()} methods. * The {@link #iterator()} returns an {@code Iterator}. * For type safety also {@link #stringIterator()} is provided. */ public class CharArraySet extends AbstractSet { private final static int INIT_SIZE = 8; private char[][] entries; private int count; private final boolean ignoreCase; public static final CharArraySet EMPTY_SET = CharArraySet.unmodifiableSet(new CharArraySet(0, false)); /** Create set with enough capacity to hold startSize * terms */ public CharArraySet(int startSize, boolean ignoreCase) { this.ignoreCase = ignoreCase; int size = INIT_SIZE; while(startSize + (startSize>>2) > size) size <<= 1; entries = new char[size][]; } /** Create set from a Collection of char[] or String */ public CharArraySet(Collection c, boolean ignoreCase) { this(c.size(), ignoreCase); addAll(c); } /** Create set from entries */ private CharArraySet(char[][] entries, boolean ignoreCase, int count){ this.entries = entries; this.ignoreCase = ignoreCase; this.count = count; } /** true if the len chars of text starting at off * are in the set */ public boolean contains(char[] text, int off, int len) { return entries[getSlot(text, off, len)] != null; } /** true if the CharSequence is in the set */ public boolean contains(CharSequence cs) { return entries[getSlot(cs)] != null; } private int getSlot(char[] text, int off, int len) { int code = getHashCode(text, off, len); int pos = code & (entries.length-1); char[] text2 = entries[pos]; if (text2 != null && !equals(text, off, len, text2)) { final int inc = ((code>>8)+code)|1; do { code += inc; pos = code & (entries.length-1); text2 = entries[pos]; } while (text2 != null && !equals(text, off, len, text2)); } return pos; } /** Returns true if the String is in the set */ private int getSlot(CharSequence text) { int code = getHashCode(text); int pos = code & (entries.length-1); char[] text2 = entries[pos]; if (text2 != null && !equals(text, text2)) { final int inc = ((code>>8)+code)|1; do { code += inc; pos = code & (entries.length-1); text2 = entries[pos]; } while (text2 != null && !equals(text, text2)); } return pos; } /** Add this CharSequence into the set */ public boolean add(CharSequence text) { return add(text.toString()); // could be more efficient } /** Add this String into the set */ public boolean add(String text) { return add(text.toCharArray()); } /** Add this char[] directly to the set. * If ignoreCase is true for this Set, the text array will be directly modified. * The user should never modify this text array after calling this method. */ public boolean add(char[] text) { if (ignoreCase) for(int i=0;i>2) > entries.length) { rehash(); } return true; } private boolean equals(char[] text1, int off, int len, char[] text2) { if (len != text2.length) return false; if (ignoreCase) { for(int i=0;inull. */ public static CharArraySet unmodifiableSet(CharArraySet set) { if (set == null) throw new NullPointerException("Given set is null"); if (set == EMPTY_SET) return EMPTY_SET; if (set instanceof UnmodifiableCharArraySet) return set; /* * Instead of delegating calls to the given set copy the low-level values to * the unmodifiable Subclass */ return new UnmodifiableCharArraySet(set.entries, set.ignoreCase, set.count); } /** * Returns a copy of the given set as a {@link CharArraySet}. If the given set * is a {@link CharArraySet} the ignoreCase property will be preserved. * * @param set * a set to copy * @return a copy of the given set as a {@link CharArraySet}. If the given set * is a {@link CharArraySet} the ignoreCase property will be * preserved. */ public static CharArraySet copy(Set set) { if (set == null) throw new NullPointerException("Given set is null"); if(set == EMPTY_SET) return EMPTY_SET; final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase : false; return new CharArraySet(set, ignoreCase); } /** The Iterator for this set. Strings are constructed on the fly, so * use nextCharArray for more efficient access. */ public class CharArraySetIterator implements Iterator { int pos=-1; char[] next; CharArraySetIterator() { goNext(); } private void goNext() { next = null; pos++; while (pos < entries.length && (next=entries[pos]) == null) pos++; } public boolean hasNext() { return next != null; } /** do not modify the returned char[] */ public char[] nextCharArray() { char[] ret = next; goNext(); return ret; } /** Returns the next String, as a Set would... * use nextCharArray() for better efficiency. */ public String next() { return new String(nextCharArray()); } public void remove() { throw new UnsupportedOperationException(); } } /** returns an iterator of new allocated Strings */ public Iterator stringIterator() { return new CharArraySetIterator(); } /** returns an iterator of new allocated Strings, this method violates the Set interface */ @Override @SuppressWarnings("unchecked") public Iterator iterator() { return (Iterator) stringIterator(); } /** * Efficient unmodifiable {@link CharArraySet}. This implementation does not * delegate calls to a give {@link CharArraySet} like * {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes * the internal representation of a {@link CharArraySet} to a super * constructor and overrides all mutators. */ private static final class UnmodifiableCharArraySet extends CharArraySet { private UnmodifiableCharArraySet(char[][] entries, boolean ignoreCase, int count) { super(entries, ignoreCase, count); } @Override public boolean add(Object o){ throw new UnsupportedOperationException(); } @Override public boolean addAll(Collection coll) { throw new UnsupportedOperationException(); } @Override public boolean add(char[] text) { throw new UnsupportedOperationException(); } @Override public boolean add(CharSequence text) { throw new UnsupportedOperationException(); } @Override public boolean add(String text) { throw new UnsupportedOperationException(); } } }