/[Apache-SVN]/lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java
ViewVC logotype

Diff of /lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

--- lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java	2006/02/11 10:48:34	376965
+++ lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java	2006/02/11 10:56:14	376966
@@ -4,18 +4,25 @@
  */
 package org.apache.nutch.parse.msexcel;
 
+// Nutch imports
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.ProtocolFactory;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ProtocolException;
-
-import org.apache.nutch.parse.ParserFactory;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.util.NutchConfiguration;
 
+// JUnit imports
 import junit.framework.TestCase;
 
+// Hadoop imports
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.UTF8;
+
+
 /** 
  * Based on Unit tests for MSWordParser by John Xing
  *
@@ -31,31 +38,32 @@ public class TestMSExcelParser extends T
   
   private String[] sampleFiles = {"test.xls"};
 
-  private String expectedText = "BitStream test.xls 321654.0 Apache incubator 1234.0 Doug Cutting 89078.0 CS 599 Search Engines Spring 2005.0 SBC 1234.0 764893.0 Java NUTCH!! ";
+  private String expectedText = "BitStream test.xls 321654.0 Apache " +
+                                "incubator 1234.0 Doug Cutting 89078.0 " +
+                                "CS 599 Search Engines Spring 2005.0 SBC " +
+                                "1234.0 764893.0 Java NUTCH!! ";
 
   public TestMSExcelParser(String name) { 
-    super(name); 
+    super(name);
   }
 
-  protected void setUp() {}
-
-  protected void tearDown() {}
-
   public void testIt() throws ProtocolException, ParseException {
+
     String urlString;
     Protocol protocol;
     Content content;
-    Parser parser;
     Parse parse;
 
+    Configuration conf = NutchConfiguration.create();
+    ParseUtil parser = new ParseUtil(conf);
+    ProtocolFactory factory = new ProtocolFactory(conf);
     for (int i = 0; i < sampleFiles.length; i++) {
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
-      protocol = ProtocolFactory.getProtocol(urlString);
-      content = protocol.getContent(urlString);
-
-      parser = ParserFactory.getParser(content.getContentType(), urlString);
-      parse = parser.getParse(content);
+      protocol = factory.getProtocol(urlString);
+      content = protocol.getProtocolOutput(new UTF8(urlString),
+                                           new CrawlDatum()).getContent();
+      parse = parser.parseByParserId("parse-msexcel", content);
 
       assertTrue(parse.getText().equals(expectedText));
     }

 

infrastructure at apache.org
ViewVC Help
Powered by ViewVC 1.1.26