1 | |
package org.apache.maven.archetype.common.util; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import org.codehaus.plexus.logging.AbstractLogEnabled; |
23 | |
import org.mozilla.intl.chardet.nsDetector; |
24 | |
import org.mozilla.intl.chardet.nsICharsetDetectionObserver; |
25 | |
import org.mozilla.intl.chardet.nsPSMDetector; |
26 | |
|
27 | |
import java.io.BufferedInputStream; |
28 | |
import java.io.File; |
29 | |
import java.io.FileInputStream; |
30 | |
import java.io.FileNotFoundException; |
31 | |
import java.io.IOException; |
32 | |
import java.io.InputStream; |
33 | |
|
34 | |
|
35 | 0 | public class FileCharsetDetector |
36 | |
extends AbstractLogEnabled |
37 | |
{ |
38 | 55 | private String charset = null; |
39 | |
|
40 | 55 | private boolean found = false; |
41 | |
|
42 | |
public FileCharsetDetector( File detectedFile ) |
43 | |
throws FileNotFoundException, IOException |
44 | 55 | { |
45 | 55 | nsDetector det = new nsDetector( nsPSMDetector.ALL ); |
46 | |
|
47 | 55 | det.Init( |
48 | |
new nsICharsetDetectionObserver() |
49 | |
{ |
50 | 55 | public void Notify( String charset ) |
51 | |
{ |
52 | 0 | FileCharsetDetector.this.charset = charset; |
53 | 0 | FileCharsetDetector.this.found = true; |
54 | 0 | } |
55 | |
} |
56 | |
); |
57 | |
|
58 | 55 | BufferedInputStream imp = new BufferedInputStream( new FileInputStream( detectedFile ) ); |
59 | |
|
60 | 55 | byte[] buf = new byte[1024]; |
61 | |
int len; |
62 | 55 | boolean done = false; |
63 | 55 | boolean isAscii = true; |
64 | |
|
65 | 103 | while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 ) |
66 | |
{ |
67 | |
|
68 | 48 | if ( isAscii ) |
69 | |
{ |
70 | 48 | isAscii = det.isAscii( buf, len ); |
71 | |
} |
72 | |
|
73 | |
|
74 | 48 | if ( !isAscii && !done ) |
75 | |
{ |
76 | 0 | done = det.DoIt( buf, len, false ); |
77 | 0 | found = done; |
78 | |
} |
79 | |
} |
80 | 55 | det.DataEnd(); |
81 | |
|
82 | 55 | if ( !isFound() ) |
83 | |
{ |
84 | 55 | String[] prob = det.getProbableCharsets(); |
85 | |
|
86 | 55 | if ( prob.length > 0 ) |
87 | |
{ |
88 | 55 | charset = prob[0]; |
89 | |
} |
90 | |
} |
91 | |
|
92 | 55 | if ( isAscii ) |
93 | |
{ |
94 | 55 | charset = "ASCII"; |
95 | |
} |
96 | 55 | } |
97 | |
|
98 | |
public FileCharsetDetector( InputStream detectedStream ) |
99 | |
throws FileNotFoundException, IOException |
100 | 0 | { |
101 | 0 | nsDetector det = new nsDetector( nsPSMDetector.ALL ); |
102 | |
|
103 | 0 | det.Init( |
104 | |
new nsICharsetDetectionObserver() |
105 | |
{ |
106 | 0 | public void Notify( String charset ) |
107 | |
{ |
108 | 0 | FileCharsetDetector.this.charset = charset; |
109 | 0 | FileCharsetDetector.this.found = true; |
110 | 0 | } |
111 | |
} |
112 | |
); |
113 | |
|
114 | 0 | BufferedInputStream imp = new BufferedInputStream( detectedStream ); |
115 | |
|
116 | 0 | byte[] buf = new byte[1024]; |
117 | |
int len; |
118 | 0 | boolean done = false; |
119 | 0 | boolean isAscii = true; |
120 | |
|
121 | 0 | while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 ) |
122 | |
{ |
123 | |
|
124 | 0 | if ( isAscii ) |
125 | |
{ |
126 | 0 | isAscii = det.isAscii( buf, len ); |
127 | |
} |
128 | |
|
129 | |
|
130 | 0 | if ( !isAscii && !done ) |
131 | |
{ |
132 | 0 | done = det.DoIt( buf, len, false ); |
133 | 0 | found = done; |
134 | |
} |
135 | |
} |
136 | 0 | det.DataEnd(); |
137 | |
|
138 | 0 | if ( !isFound() ) |
139 | |
{ |
140 | 0 | String[] prob = det.getProbableCharsets(); |
141 | |
|
142 | 0 | if ( prob.length > 0 ) |
143 | |
{ |
144 | 0 | charset = prob[0]; |
145 | |
} |
146 | |
} |
147 | |
|
148 | 0 | if ( isAscii ) |
149 | |
{ |
150 | 0 | charset = "ASCII"; |
151 | |
} |
152 | 0 | } |
153 | |
|
154 | |
public String getCharset() |
155 | |
{ |
156 | 0 | return charset; |
157 | |
} |
158 | |
|
159 | |
public boolean isFound() |
160 | |
{ |
161 | 110 | return found; |
162 | |
} |
163 | |
} |