/*
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using NUnit.Framework;
namespace Lucene.Net.Analysis.RU
{
[TestFixture]
public class TestRussianStem
{
private System.Collections.ArrayList words = new System.Collections.ArrayList();
private System.Collections.ArrayList stems = new System.Collections.ArrayList();
///
///
[TestFixtureSetUp]
protected virtual void SetUp()
{
//base.SetUp();
//System.out.println(new java.util.Date());
System.String str;
System.IO.FileInfo dataDir = new System.IO.FileInfo(SupportClass.AppSettings.Get("dataDir", @".\"));
// open and read words into an array list
System.IO.StreamReader inWords = new System.IO.StreamReader(
new System.IO.StreamReader(
new System.IO.FileStream(
new System.IO.FileInfo(
dataDir.FullName + @"\Analysis\RU\wordsUnicode.txt").FullName,
System.IO.FileMode.Open, System.IO.FileAccess.Read),
System.Text.Encoding.GetEncoding("Unicode")).BaseStream,
new System.IO.StreamReader(
new System.IO.FileStream(
new System.IO.FileInfo(
dataDir.FullName + @"\Analysis\RU\wordsUnicode.txt").FullName,
System.IO.FileMode.Open,
System.IO.FileAccess.Read),
System.Text.Encoding.GetEncoding("Unicode")).CurrentEncoding);
while ((str = inWords.ReadLine()) != null)
{
words.Add(str);
}
inWords.Close();
// open and read stems into an array list
System.IO.StreamReader inStems = new System.IO.StreamReader(
new System.IO.StreamReader(
new System.IO.FileStream(
new System.IO.FileInfo(
dataDir.FullName + @"\Analysis\RU\stemsUnicode.txt").FullName,
System.IO.FileMode.Open,
System.IO.FileAccess.Read),
System.Text.Encoding.GetEncoding("Unicode")).BaseStream,
new System.IO.StreamReader(
new System.IO.FileStream(
new System.IO.FileInfo(
dataDir.FullName + @"\Analysis\RU\stemsUnicode.txt").FullName,
System.IO.FileMode.Open,
System.IO.FileAccess.Read),
System.Text.Encoding.GetEncoding("Unicode")).CurrentEncoding);
while ((str = inStems.ReadLine()) != null)
{
stems.Add(str);
}
inStems.Close();
}
///
///
[TestFixtureTearDown]
protected virtual void TearDown()
{
//base.TearDown();
}
[Test]
public virtual void TestStem()
{
for (int i = 0; i < words.Count; i++)
{
//if ( (i % 100) == 0 ) System.err.println(i);
System.String realStem = RussianStemmer.Stem((System.String) words[i], RussianCharsets.UnicodeRussian);
Assert.AreEqual(stems[i], realStem, "unicode");
}
}
private System.String printChars(System.String output)
{
System.Text.StringBuilder s = new System.Text.StringBuilder();
for (int i = 0; i < output.Length; i++)
{
s.Append(output[i]);
}
return s.ToString();
}
}
}