/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
using System;
using System.IO;
using System.Collections;
namespace Lucene.Net.Analysis
{
///
/// Loads a text file and adds every line as an entry to a Hashtable. Every line
/// should contain only one word. If the file is not found or on any error, an
/// empty table is returned.
///
public class WordlistLoader
{
///
/// Load words table from the file
///
/// Path to the wordlist
/// Name of the wordlist
///
public static Hashtable GetWordtable( String path, String wordfile )
{
if ( path == null || wordfile == null )
{
return new Hashtable();
}
return GetWordtable(new FileInfo(path + "\\" + wordfile));
}
///
/// Load words table from the file
///
/// Complete path to the wordlist
///
public static Hashtable GetWordtable( String wordfile )
{
if ( wordfile == null )
{
return new Hashtable();
}
return GetWordtable( new FileInfo( wordfile ) );
}
///
/// Load words table from the file
///
/// File containing the wordlist
///
public static Hashtable GetWordtable( FileInfo wordfile )
{
if ( wordfile == null )
{
return new Hashtable();
}
StreamReader lnr = new StreamReader(wordfile.FullName);
return GetWordtable(lnr);
}
///
/// Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
/// leading and trailing whitespace). Every line of the Reader should contain only
/// one word. The words need to be in lowercase if you make use of an
/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
///
/// Reader containing the wordlist
/// A Hashtable with the reader's words
public static Hashtable GetWordtable(TextReader reader)
{
Hashtable result = new Hashtable();
try
{
ArrayList stopWords = new ArrayList();
String word = null;
while ( ( word = reader.ReadLine() ) != null )
{
stopWords.Add(word.Trim());
}
result = MakeWordTable( (String[])stopWords.ToArray(typeof(string)), stopWords.Count);
}
// On error, use an empty table
catch (IOException)
{
result = new Hashtable();
}
return result;
}
///
/// Builds the wordlist table.
///
/// Word that where read
/// Amount of words that where read into words
///
private static Hashtable MakeWordTable( String[] words, int length )
{
Hashtable table = new Hashtable( length );
for ( int i = 0; i < length; i++ )
{
table.Add(words[i], words[i]);
}
return table;
}
}
}