/[Apache-SVN]/lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java
ViewVC logotype

Contents of /lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 823614 - (show annotations)
Fri Oct 9 17:02:32 2009 UTC (6 weeks, 6 days ago) by ab
File size: 2890 byte(s)
NUTCH-758 Set subversion eol-style to "native".
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.nutch.parse.msexcel;
18
19 // JDK imports
20 import java.io.InputStream;
21
22 // Jakarta POI imports
23 import org.apache.poi.hssf.usermodel.HSSFCell;
24 import org.apache.poi.hssf.usermodel.HSSFRow;
25 import org.apache.poi.hssf.usermodel.HSSFSheet;
26 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
27
28 // Nutch imports
29 import org.apache.nutch.parse.ms.MSExtractor;
30
31
32 /**
33 * Excel Text and Properties extractor.
34 *
35 * @author Rohit Kulkarni & Ashish Vaidya
36 * @author Jérôme Charron
37 */
38 class ExcelExtractor extends MSExtractor {
39
40
41 protected String extractText(InputStream input) throws Exception {
42
43 StringBuilder resultText = new StringBuilder();
44 HSSFWorkbook wb = new HSSFWorkbook(input);
45 if (wb == null) {
46 return resultText.toString();
47 }
48
49 HSSFSheet sheet;
50 HSSFRow row;
51 HSSFCell cell;
52 int sNum = 0;
53 int rNum = 0;
54 int cNum = 0;
55
56 sNum = wb.getNumberOfSheets();
57
58 for (int i=0; i<sNum; i++) {
59 if ((sheet = wb.getSheetAt(i)) == null) {
60 continue;
61 }
62 rNum = sheet.getLastRowNum();
63 for (int j=0; j<=rNum; j++) {
64 if ((row = sheet.getRow(j)) == null){
65 continue;
66 }
67 cNum = row.getLastCellNum();
68
69 for (int k=0; k<cNum; k++) {
70 if ((cell = row.getCell((short) k)) != null) {
71 /*if(HSSFDateUtil.isCellDateFormatted(cell) == true) {
72 resultText.append(cell.getDateCellValue().toString())
73 } else
74 */
75 if (cell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
76 resultText.append(cell.getStringCellValue()).append(" ");
77 } else if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
78 double d = cell.getNumericCellValue();
79 resultText.append(d).append(" ");
80 }
81 /* else if(cell.getCellType() == HSSFCell.CELL_TYPE_FORMULA){
82 resultText.append(cell.getCellFormula());
83 }
84 */
85 }
86 }
87 }
88 }
89 return resultText.toString();
90 }
91
92 }

Properties

Name Value
svn:eol-style native
svn:keywords Date Author Id Revision HeadURL

apache@apache.org
ViewVC Help
Powered by ViewVC 1.1.2