/* * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using NUnit.Framework; namespace Lucene.Net.Analysis.RU { [TestFixture] public class TestRussianStem { private System.Collections.ArrayList words = new System.Collections.ArrayList(); private System.Collections.ArrayList stems = new System.Collections.ArrayList(); /// /// [TestFixtureSetUp] protected virtual void SetUp() { //base.SetUp(); //System.out.println(new java.util.Date()); System.String str; System.IO.FileInfo dataDir = new System.IO.FileInfo(SupportClass.AppSettings.Get("dataDir", @".\")); // open and read words into an array list System.IO.StreamReader inWords = new System.IO.StreamReader( new System.IO.StreamReader( new System.IO.FileStream( new System.IO.FileInfo( dataDir.FullName + @"\Analysis\RU\wordsUnicode.txt").FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read), System.Text.Encoding.GetEncoding("Unicode")).BaseStream, new System.IO.StreamReader( new System.IO.FileStream( new System.IO.FileInfo( dataDir.FullName + @"\Analysis\RU\wordsUnicode.txt").FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read), System.Text.Encoding.GetEncoding("Unicode")).CurrentEncoding); while ((str = inWords.ReadLine()) != null) { words.Add(str); } inWords.Close(); // open and read stems into an array list System.IO.StreamReader inStems = new System.IO.StreamReader( new System.IO.StreamReader( new System.IO.FileStream( new System.IO.FileInfo( dataDir.FullName + @"\Analysis\RU\stemsUnicode.txt").FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read), System.Text.Encoding.GetEncoding("Unicode")).BaseStream, new System.IO.StreamReader( new System.IO.FileStream( new System.IO.FileInfo( dataDir.FullName + @"\Analysis\RU\stemsUnicode.txt").FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read), System.Text.Encoding.GetEncoding("Unicode")).CurrentEncoding); while ((str = inStems.ReadLine()) != null) { stems.Add(str); } inStems.Close(); } /// /// [TestFixtureTearDown] protected virtual void TearDown() { //base.TearDown(); } [Test] public virtual void TestStem() { for (int i = 0; i < words.Count; i++) { //if ( (i % 100) == 0 ) System.err.println(i); System.String realStem = RussianStemmer.Stem((System.String) words[i], RussianCharsets.UnicodeRussian); Assert.AreEqual(stems[i], realStem, "unicode"); } } private System.String printChars(System.String output) { System.Text.StringBuilder s = new System.Text.StringBuilder(); for (int i = 0; i < output.Length; i++) { s.Append(output[i]); } return s.ToString(); } } }