1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.any23.cli; |
19 | |
|
20 | |
import org.apache.any23.configuration.DefaultConfiguration; |
21 | |
import org.apache.any23.http.DefaultHTTPClient; |
22 | |
import org.apache.any23.http.HTTPClient; |
23 | |
import org.apache.any23.http.HTTPClientConfiguration; |
24 | |
import org.apache.any23.mime.MIMEType; |
25 | |
import org.apache.any23.mime.MIMETypeDetector; |
26 | |
import org.apache.any23.mime.TikaMIMETypeDetector; |
27 | |
import org.apache.any23.source.DocumentSource; |
28 | |
import org.apache.any23.source.FileDocumentSource; |
29 | |
import org.apache.any23.source.HTTPDocumentSource; |
30 | |
import org.apache.any23.source.StringDocumentSource; |
31 | |
|
32 | |
import java.io.File; |
33 | |
import java.net.URISyntaxException; |
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
@ToolRunner.Description("MIME Type Detector Tool.") |
43 | 0 | public class MimeDetector implements Tool{ |
44 | |
|
45 | |
public static final String FILE_DOCUMENT_PREFIX = "file://"; |
46 | |
public static final String INLINE_DOCUMENT_PREFIX = "inline://"; |
47 | |
public static final String URL_DOCUMENT_RE = "^https?://.*"; |
48 | |
|
49 | |
public static void main(String[] args) { |
50 | 0 | System.exit( new MimeDetector().run(args) ); |
51 | 0 | } |
52 | |
|
53 | |
@Override |
54 | |
public int run(String[] args) { |
55 | 0 | if(args.length != 1) { |
56 | 0 | System.err.println("USAGE: {http://path/to/resource.html|file:///path/to/local.file|inline:// some inline content}"); |
57 | 0 | return 1; |
58 | |
} |
59 | |
|
60 | 0 | final String document = args[0]; |
61 | |
try { |
62 | 0 | final DocumentSource documentSource = createDocumentSource(document); |
63 | 0 | final MIMETypeDetector detector = new TikaMIMETypeDetector(); |
64 | 0 | final MIMEType mimeType = detector.guessMIMEType( |
65 | |
documentSource.getDocumentURI(), |
66 | |
documentSource.openInputStream(), |
67 | |
MIMEType.parse(documentSource.getContentType()) |
68 | |
); |
69 | 0 | System.out.println(mimeType); |
70 | 0 | return 0; |
71 | 0 | } catch (Exception e) { |
72 | 0 | System.err.print("Error while detecting MIME Type."); |
73 | 0 | e.printStackTrace(System.err); |
74 | 0 | return 1; |
75 | |
} |
76 | |
} |
77 | |
|
78 | |
private DocumentSource createDocumentSource(String document) throws URISyntaxException { |
79 | 0 | if(document.startsWith(FILE_DOCUMENT_PREFIX)) { |
80 | 0 | return new FileDocumentSource( |
81 | |
new File( |
82 | |
document.substring(FILE_DOCUMENT_PREFIX.length()) |
83 | |
) |
84 | |
); |
85 | |
} |
86 | 0 | if(document.startsWith(INLINE_DOCUMENT_PREFIX)) { |
87 | 0 | return new StringDocumentSource( |
88 | |
document.substring(INLINE_DOCUMENT_PREFIX.length()), |
89 | |
"" |
90 | |
); |
91 | |
} |
92 | 0 | if(document.matches(URL_DOCUMENT_RE)) { |
93 | 0 | final HTTPClient client = new DefaultHTTPClient(); |
94 | |
|
95 | 0 | client.init(new HTTPClientConfiguration() { |
96 | |
public String getUserAgent() { |
97 | 0 | return DefaultConfiguration.singleton().getPropertyOrFail("any23.http.user.agent.default"); |
98 | |
} |
99 | |
public String getAcceptHeader() { |
100 | 0 | return ""; |
101 | |
} |
102 | |
public int getDefaultTimeout() { |
103 | 0 | return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.timeout"); |
104 | |
} |
105 | |
public int getMaxConnections() { |
106 | 0 | return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.max.connections"); |
107 | |
} |
108 | |
}); |
109 | 0 | return new HTTPDocumentSource(client, document); |
110 | |
} |
111 | 0 | throw new IllegalArgumentException("Unsupported protocol for document " + document); |
112 | |
} |
113 | |
|
114 | |
} |