Diff of /lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java
Parent Directory
| Revision Log
| Patch
--- lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java 2006/02/11 10:48:34 376965
+++ lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java 2006/02/11 10:56:14 376966
@@ -4,18 +4,25 @@
*/
package org.apache.nutch.parse.msexcel;
+// Nutch imports
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ProtocolException;
-
-import org.apache.nutch.parse.ParserFactory;
-import org.apache.nutch.parse.Parser;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.util.NutchConfiguration;
+// JUnit imports
import junit.framework.TestCase;
+// Hadoop imports
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.UTF8;
+
+
/**
* Based on Unit tests for MSWordParser by John Xing
*
@@ -31,31 +38,32 @@ public class TestMSExcelParser extends T
private String[] sampleFiles = {"test.xls"};
- private String expectedText = "BitStream test.xls 321654.0 Apache incubator 1234.0 Doug Cutting 89078.0 CS 599 Search Engines Spring 2005.0 SBC 1234.0 764893.0 Java NUTCH!! ";
+ private String expectedText = "BitStream test.xls 321654.0 Apache " +
+ "incubator 1234.0 Doug Cutting 89078.0 " +
+ "CS 599 Search Engines Spring 2005.0 SBC " +
+ "1234.0 764893.0 Java NUTCH!! ";
public TestMSExcelParser(String name) {
- super(name);
+ super(name);
}
- protected void setUp() {}
-
- protected void tearDown() {}
-
public void testIt() throws ProtocolException, ParseException {
+
String urlString;
Protocol protocol;
Content content;
- Parser parser;
Parse parse;
+ Configuration conf = NutchConfiguration.create();
+ ParseUtil parser = new ParseUtil(conf);
+ ProtocolFactory factory = new ProtocolFactory(conf);
for (int i = 0; i < sampleFiles.length; i++) {
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
- protocol = ProtocolFactory.getProtocol(urlString);
- content = protocol.getContent(urlString);
-
- parser = ParserFactory.getParser(content.getContentType(), urlString);
- parse = parser.getParse(content);
+ protocol = factory.getProtocol(urlString);
+ content = protocol.getProtocolOutput(new UTF8(urlString),
+ new CrawlDatum()).getContent();
+ parse = parser.parseByParserId("parse-msexcel", content);
assertTrue(parse.getText().equals(expectedText));
}