1 | |
package org.apache.maven.archetype.common.util; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import org.codehaus.plexus.logging.AbstractLogEnabled; |
23 | |
import org.mozilla.intl.chardet.nsDetector; |
24 | |
import org.mozilla.intl.chardet.nsICharsetDetectionObserver; |
25 | |
import org.mozilla.intl.chardet.nsPSMDetector; |
26 | |
|
27 | |
import java.io.BufferedInputStream; |
28 | |
import java.io.File; |
29 | |
import java.io.FileInputStream; |
30 | |
import java.io.FileNotFoundException; |
31 | |
import java.io.IOException; |
32 | |
import java.io.InputStream; |
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | 0 | public class FileCharsetDetector |
38 | |
extends AbstractLogEnabled |
39 | |
{ |
40 | 110 | private String charset = null; |
41 | |
|
42 | 110 | private boolean found = false; |
43 | |
|
44 | |
public FileCharsetDetector( File detectedFile ) |
45 | |
throws FileNotFoundException, IOException |
46 | 110 | { |
47 | 110 | nsDetector det = new nsDetector( nsPSMDetector.ALL ); |
48 | |
|
49 | 110 | det.Init( new nsICharsetDetectionObserver() |
50 | 110 | { |
51 | |
public void Notify( String charset ) |
52 | |
{ |
53 | 0 | FileCharsetDetector.this.charset = charset; |
54 | 0 | FileCharsetDetector.this.found = true; |
55 | 0 | } |
56 | |
} ); |
57 | |
|
58 | 110 | FileInputStream fileInputStream = new FileInputStream( detectedFile ); |
59 | 110 | BufferedInputStream imp = new BufferedInputStream( fileInputStream ); |
60 | |
try |
61 | |
{ |
62 | |
|
63 | 110 | byte[] buf = new byte[1024]; |
64 | |
int len; |
65 | 110 | boolean done = false; |
66 | 110 | boolean isAscii = true; |
67 | |
|
68 | 206 | while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 ) |
69 | |
{ |
70 | |
|
71 | 96 | if ( isAscii ) |
72 | |
{ |
73 | 96 | isAscii = det.isAscii( buf, len ); |
74 | |
} |
75 | |
|
76 | |
|
77 | 96 | if ( !isAscii && !done ) |
78 | |
{ |
79 | 0 | done = det.DoIt( buf, len, false ); |
80 | 0 | found = done; |
81 | |
} |
82 | |
} |
83 | 110 | det.DataEnd(); |
84 | |
|
85 | 110 | if ( !isFound() ) |
86 | |
{ |
87 | 110 | String[] prob = det.getProbableCharsets(); |
88 | |
|
89 | 110 | if ( prob.length > 0 ) |
90 | |
{ |
91 | 110 | charset = prob[0]; |
92 | |
} |
93 | |
} |
94 | |
|
95 | 110 | if ( isAscii ) |
96 | |
{ |
97 | 110 | charset = "ASCII"; |
98 | |
} |
99 | |
} |
100 | |
finally |
101 | |
{ |
102 | 110 | imp.close(); |
103 | 110 | fileInputStream.close(); |
104 | 110 | } |
105 | 110 | } |
106 | |
|
107 | |
|
108 | |
|
109 | |
public FileCharsetDetector( InputStream detectedStream ) |
110 | |
throws FileNotFoundException, IOException |
111 | 0 | { |
112 | 0 | nsDetector det = new nsDetector( nsPSMDetector.ALL ); |
113 | |
|
114 | 0 | det.Init( new nsICharsetDetectionObserver() |
115 | 0 | { |
116 | |
public void Notify( String charset ) |
117 | |
{ |
118 | 0 | FileCharsetDetector.this.charset = charset; |
119 | 0 | FileCharsetDetector.this.found = true; |
120 | 0 | } |
121 | |
} ); |
122 | |
|
123 | 0 | BufferedInputStream imp = new BufferedInputStream( detectedStream ); |
124 | |
|
125 | 0 | byte[] buf = new byte[1024]; |
126 | |
int len; |
127 | 0 | boolean done = false; |
128 | 0 | boolean isAscii = true; |
129 | |
|
130 | 0 | while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 ) |
131 | |
{ |
132 | |
|
133 | 0 | if ( isAscii ) |
134 | |
{ |
135 | 0 | isAscii = det.isAscii( buf, len ); |
136 | |
} |
137 | |
|
138 | |
|
139 | 0 | if ( !isAscii && !done ) |
140 | |
{ |
141 | 0 | done = det.DoIt( buf, len, false ); |
142 | 0 | found = done; |
143 | |
} |
144 | |
} |
145 | 0 | det.DataEnd(); |
146 | |
|
147 | 0 | if ( !isFound() ) |
148 | |
{ |
149 | 0 | String[] prob = det.getProbableCharsets(); |
150 | |
|
151 | 0 | if ( prob.length > 0 ) |
152 | |
{ |
153 | 0 | charset = prob[0]; |
154 | |
} |
155 | |
} |
156 | |
|
157 | 0 | if ( isAscii ) |
158 | |
{ |
159 | 0 | charset = "ASCII"; |
160 | |
} |
161 | 0 | } |
162 | |
|
163 | |
public String getCharset() |
164 | |
{ |
165 | 0 | return charset; |
166 | |
} |
167 | |
|
168 | |
public boolean isFound() |
169 | |
{ |
170 | 220 | return found; |
171 | |
} |
172 | |
} |