1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.any23.cli; |
19 | |
|
20 | |
import org.apache.any23.extractor.ExampleInputOutput; |
21 | |
import org.apache.any23.extractor.ExtractionException; |
22 | |
import org.apache.any23.extractor.ExtractorFactory; |
23 | |
import org.apache.any23.extractor.ExtractorRegistry; |
24 | |
import org.apache.any23.util.LogUtils; |
25 | |
import org.apache.any23.extractor.Extractor; |
26 | |
import org.apache.any23.extractor.Extractor.BlindExtractor; |
27 | |
import org.apache.any23.extractor.Extractor.ContentExtractor; |
28 | |
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor; |
29 | |
|
30 | |
import java.io.IOException; |
31 | |
|
32 | |
|
33 | |
|
34 | |
|
35 | |
|
36 | |
@ToolRunner.Description("Utility for obtaining documentation about metadata extractors.") |
37 | 0 | public class ExtractorDocumentation implements Tool { |
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
public static void main(String[] args) throws ExtractionException, IOException { |
60 | 0 | System.exit( new ExtractorDocumentation().run(args) ); |
61 | 0 | } |
62 | |
|
63 | |
public int run(String[] args) { |
64 | 0 | LogUtils.setDefaultLogging(); |
65 | |
try { |
66 | 0 | if (args.length == 0) { |
67 | 0 | printUsage(); |
68 | 0 | return 1; |
69 | |
} |
70 | |
|
71 | 0 | final String option = args[0]; |
72 | 0 | if ("-list".equals(option)) { |
73 | 0 | if (args.length > 1) { |
74 | 0 | printUsage(); |
75 | 0 | return 2; |
76 | |
} |
77 | 0 | printExtractorList(); |
78 | |
} |
79 | 0 | else if ("-i".equals(option)) { |
80 | 0 | if (args.length > 2) { |
81 | 0 | printUsage(); |
82 | 0 | return 3; |
83 | |
} |
84 | 0 | if (args.length < 2) { |
85 | 0 | printError("Required argument for -i: extractor name"); |
86 | 0 | return 4; |
87 | |
} |
88 | 0 | printExampleInput(args[1]); |
89 | |
} |
90 | 0 | else if ("-o".equals(option)) { |
91 | 0 | if (args.length > 2) { |
92 | 0 | printUsage(); |
93 | 0 | return 5; |
94 | |
} |
95 | 0 | if (args.length < 2) { |
96 | 0 | printError("Required argument for -o: extractor name"); |
97 | 0 | return 6; |
98 | |
} |
99 | 0 | printExampleOutput(args[1]); |
100 | |
} |
101 | 0 | else if ("-all".equals(option)) { |
102 | 0 | if (args.length > 1) { |
103 | 0 | printUsage(); |
104 | 0 | return 7; |
105 | |
} |
106 | 0 | printReport(); |
107 | |
} else { |
108 | 0 | printUsage(); |
109 | |
} |
110 | 0 | } catch (Exception e) { |
111 | 0 | e.printStackTrace(System.err); |
112 | 0 | return 8; |
113 | 0 | } |
114 | 0 | return 0; |
115 | |
} |
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
public void printUsage() { |
121 | 0 | System.out.println("Usage:"); |
122 | 0 | System.out.println(" " + ExtractorDocumentation.class.getSimpleName() + " -list"); |
123 | 0 | System.out.println(" shows the names of all available extractors"); |
124 | 0 | System.out.println(); |
125 | 0 | System.out.println(" " + ExtractorDocumentation.class.getSimpleName() + " -i extractor-name"); |
126 | 0 | System.out.println(" shows example input for the given extractor"); |
127 | 0 | System.out.println(); |
128 | 0 | System.out.println(" " + ExtractorDocumentation.class.getSimpleName() + " -o extractor-name"); |
129 | 0 | System.out.println(" shows example output for the given extractor"); |
130 | 0 | System.out.println(); |
131 | 0 | System.out.println(" " + ExtractorDocumentation.class.getSimpleName() + " -all"); |
132 | 0 | System.out.println(" shows a report about all available extractors"); |
133 | 0 | System.out.println(); |
134 | 0 | } |
135 | |
|
136 | |
|
137 | |
|
138 | |
|
139 | |
|
140 | |
|
141 | |
public void printError(String msg) { |
142 | 0 | System.err.println(msg); |
143 | 0 | } |
144 | |
|
145 | |
|
146 | |
|
147 | |
|
148 | |
public void printExtractorList() { |
149 | 0 | for(ExtractorFactory factory : ExtractorRegistry.getInstance().getExtractorGroup()) { |
150 | 0 | System.out.println( String.format("%25s [%15s]", factory.getExtractorName(), factory.getExtractorType())); |
151 | |
} |
152 | 0 | } |
153 | |
|
154 | |
|
155 | |
|
156 | |
|
157 | |
|
158 | |
|
159 | |
|
160 | |
public void printExampleInput(String extractorName) throws IOException { |
161 | 0 | ExtractorFactory<?> factory = getFactory(extractorName); |
162 | 0 | ExampleInputOutput example = new ExampleInputOutput(factory); |
163 | 0 | String input = example.getExampleInput(); |
164 | 0 | if (input == null) { |
165 | 0 | throw new IllegalArgumentException("Extractor " + extractorName + " provides no example input"); |
166 | |
} |
167 | 0 | System.out.println(input); |
168 | 0 | } |
169 | |
|
170 | |
|
171 | |
|
172 | |
|
173 | |
|
174 | |
|
175 | |
|
176 | |
|
177 | |
public void printExampleOutput(String extractorName) throws IOException, ExtractionException { |
178 | 0 | ExtractorFactory<?> factory = getFactory(extractorName); |
179 | 0 | ExampleInputOutput example = new ExampleInputOutput(factory); |
180 | 0 | String output = example.getExampleOutput(); |
181 | 0 | if (output == null) { |
182 | 0 | throw new IllegalArgumentException("Extractor " + extractorName + " provides no example output"); |
183 | |
} |
184 | 0 | System.out.println(output); |
185 | 0 | } |
186 | |
|
187 | |
|
188 | |
|
189 | |
|
190 | |
|
191 | |
|
192 | |
|
193 | |
public void printReport() throws IOException, ExtractionException { |
194 | 0 | for (String extractorName : ExtractorRegistry.getInstance().getAllNames()) { |
195 | 0 | ExtractorFactory<?> factory = ExtractorRegistry.getInstance().getFactory(extractorName); |
196 | 0 | ExampleInputOutput example = new ExampleInputOutput(factory); |
197 | 0 | System.out.println("Extractor: " + extractorName); |
198 | 0 | System.out.println("\ttype: " + getType(factory)); |
199 | 0 | System.out.println(); |
200 | 0 | final String exampleInput = example.getExampleInput(); |
201 | 0 | if(exampleInput == null) { |
202 | 0 | System.out.println("(No Example Available)"); |
203 | |
} else { |
204 | 0 | System.out.println("-------- Example Input --------"); |
205 | 0 | System.out.println(exampleInput); |
206 | 0 | System.out.println("-------- Example Output --------"); |
207 | 0 | String output = example.getExampleOutput(); |
208 | 0 | System.out.println(output == null || output.trim().length() == 0 ? "(No Output Generated)" : output); |
209 | |
} |
210 | 0 | System.out.println("================================"); |
211 | 0 | System.out.println(); |
212 | 0 | } |
213 | 0 | } |
214 | |
|
215 | |
private ExtractorFactory<?> getFactory(String name) { |
216 | 0 | if (!ExtractorRegistry.getInstance().isRegisteredName(name)) { |
217 | 0 | throw new IllegalArgumentException("Unknown extractor name: " + name); |
218 | |
} |
219 | 0 | return ExtractorRegistry.getInstance().getFactory(name); |
220 | |
} |
221 | |
|
222 | |
private String getType(ExtractorFactory<?> factory) { |
223 | 0 | Extractor<?> extractor = factory.createExtractor(); |
224 | 0 | if (extractor instanceof BlindExtractor) { |
225 | 0 | return BlindExtractor.class.getSimpleName(); |
226 | |
} |
227 | 0 | if (extractor instanceof TagSoupDOMExtractor) { |
228 | 0 | return TagSoupDOMExtractor.class.getSimpleName(); |
229 | |
} |
230 | 0 | if (extractor instanceof ContentExtractor) { |
231 | 0 | return ContentExtractor.class.getSimpleName(); |
232 | |
} |
233 | 0 | return "?"; |
234 | |
} |
235 | |
|
236 | |
} |