/[Apache-SVN]/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
ViewVC logotype

Contents of /lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 823614 - (show annotations)
Fri Oct 9 17:02:32 2009 UTC (6 weeks ago) by ab
File size: 3491 byte(s)
NUTCH-758 Set subversion eol-style to "native".
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.nutch.indexer;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.List;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.conf.Configured;
28 import org.apache.hadoop.fs.Path;
29 import org.apache.hadoop.mapred.FileOutputFormat;
30 import org.apache.hadoop.mapred.JobClient;
31 import org.apache.hadoop.mapred.JobConf;
32 import org.apache.hadoop.util.StringUtils;
33 import org.apache.hadoop.util.Tool;
34 import org.apache.hadoop.util.ToolRunner;
35 import org.apache.nutch.indexer.lucene.LuceneWriter;
36 import org.apache.nutch.util.NutchConfiguration;
37 import org.apache.nutch.util.NutchJob;
38
39 /** Create indexes for segments. */
40 public class Indexer extends Configured implements Tool {
41
42 public static final String DONE_NAME = "index.done";
43
44 public static final Log LOG = LogFactory.getLog(Indexer.class);
45
46 public Indexer() {
47 super(null);
48 }
49
50 public Indexer(Configuration conf) {
51 super(conf);
52 }
53
54 public void index(Path luceneDir, Path crawlDb,
55 Path linkDb, List<Path> segments)
56 throws IOException {
57 LOG.info("Indexer: starting");
58
59 final JobConf job = new NutchJob(getConf());
60 job.setJobName("index-lucene " + luceneDir);
61
62 IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);
63
64 FileOutputFormat.setOutputPath(job, luceneDir);
65
66 LuceneWriter.addFieldOptions("segment", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job);
67 LuceneWriter.addFieldOptions("digest", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job);
68 LuceneWriter.addFieldOptions("boost", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job);
69
70 NutchIndexWriterFactory.addClassToConf(job, LuceneWriter.class);
71
72 JobClient.runJob(job);
73 LOG.info("Indexer: done");
74 }
75
76 public int run(String[] args) throws Exception {
77 if (args.length < 4) {
78 System.err.println("Usage: Indexer <index> <crawldb> <linkdb> <segment> ...");
79 return -1;
80 }
81
82 final Path luceneDir = new Path(args[0]);
83 final Path crawlDb = new Path(args[1]);
84 final Path linkDb = new Path(args[2]);
85
86 final List<Path> segments = new ArrayList<Path>();
87 for (int i = 3; i < args.length; i++) {
88 segments.add(new Path(args[i]));
89 }
90
91 try {
92 index(luceneDir, crawlDb, linkDb, segments);
93 return 0;
94 } catch (final Exception e) {
95 LOG.fatal("Indexer: " + StringUtils.stringifyException(e));
96 return -1;
97 }
98 }
99
100 public static void main(String[] args) throws Exception {
101 final int res = ToolRunner.run(NutchConfiguration.create(), new Indexer(), args);
102 System.exit(res);
103 }
104 }

Properties

Name Value
svn:eol-style native
svn:keywords Date Author Id Revision HeadURL

apache@apache.org
ViewVC Help
Powered by ViewVC 1.1.2