1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.imaging.formats.jpeg.iptc;
19
20 import static org.apache.commons.imaging.common.BinaryFunctions.read2Bytes;
21 import static org.apache.commons.imaging.common.BinaryFunctions.read4Bytes;
22 import static org.apache.commons.imaging.common.BinaryFunctions.readByte;
23 import static org.apache.commons.imaging.common.BinaryFunctions.readBytes;
24 import static org.apache.commons.imaging.common.BinaryFunctions.slice;
25 import static org.apache.commons.imaging.common.BinaryFunctions.startsWith;
26
27 import java.io.ByteArrayInputStream;
28 import java.io.ByteArrayOutputStream;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.nio.ByteOrder;
32 import java.nio.charset.Charset;
33 import java.nio.charset.StandardCharsets;
34 import java.util.ArrayList;
35 import java.util.Arrays;
36 import java.util.Comparator;
37 import java.util.List;
38 import java.util.Objects;
39 import java.util.logging.Level;
40 import java.util.logging.Logger;
41
42 import org.apache.commons.imaging.ImagingConstants;
43 import org.apache.commons.imaging.ImagingException;
44 import org.apache.commons.imaging.ImagingParameters;
45 import org.apache.commons.imaging.common.Allocator;
46 import org.apache.commons.imaging.common.BinaryFileParser;
47 import org.apache.commons.imaging.common.BinaryFunctions;
48 import org.apache.commons.imaging.common.BinaryOutputStream;
49 import org.apache.commons.imaging.common.ByteConversions;
50 import org.apache.commons.imaging.formats.jpeg.JpegConstants;
51 import org.apache.commons.imaging.formats.jpeg.JpegImagingParameters;
52 import org.apache.commons.imaging.internal.Debug;
53
54 public class IptcParser extends BinaryFileParser {
55
56 private static final Logger LOGGER = Logger.getLogger(IptcParser.class.getName());
57
58 private static final ByteOrder APP13_BYTE_ORDER = ByteOrder.BIG_ENDIAN;
59
60
61
62
63
64
65
66
67 private static final List<Integer> PHOTOSHOP_IGNORED_BLOCK_TYPE = Arrays.asList(1084, 1085, 1086, 1087);
68
69 private static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
70 private static final int ENV_TAG_CODED_CHARACTER_SET = 90;
71 private static final byte[] CHARACTER_ESCAPE_SEQUENCE = { '\u001B', '%', 'G' };
72
73 public IptcParser() {
74 super(ByteOrder.BIG_ENDIAN);
75 }
76
77 private Charset findCharset(final byte[] codedCharset) {
78 final String codedCharsetString = new String(codedCharset, StandardCharsets.ISO_8859_1);
79 try {
80 if (Charset.isSupported(codedCharsetString)) {
81 return Charset.forName(codedCharsetString);
82 }
83 } catch (final IllegalArgumentException ignored) {
84
85 }
86
87
88 final byte[] codedCharsetNormalized = Allocator.byteArray(codedCharset.length);
89 int j = 0;
90 for (final byte element : codedCharset) {
91 if (element != ' ') {
92 codedCharsetNormalized[j++] = element;
93 }
94 }
95
96 if (Objects.deepEquals(codedCharsetNormalized, CHARACTER_ESCAPE_SEQUENCE)) {
97 return StandardCharsets.UTF_8;
98 }
99 return DEFAULT_CHARSET;
100 }
101
102 public boolean isPhotoshopJpegSegment(final byte[] segmentData) {
103 if (!startsWith(segmentData, JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING)) {
104 return false;
105 }
106
107 final int index = JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size();
108 return index + 4 <= segmentData.length && ByteConversions.toInt(segmentData, index, APP13_BYTE_ORDER) == JpegConstants.CONST_8BIM;
109 }
110
111 protected List<IptcBlock> parseAllBlocks(final byte[] bytes, final boolean strict) throws ImagingException, IOException {
112 final List<IptcBlock> blocks = new ArrayList<>();
113
114 try (InputStream bis = new ByteArrayInputStream(bytes)) {
115
116
117
118
119 final byte[] idString = readBytes("", bis, JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size(), "App13 Segment missing identification string");
120 if (!JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.equals(idString)) {
121 throw new ImagingException("Not a Photoshop App13 Segment");
122 }
123
124
125
126 while (true) {
127 final int imageResourceBlockSignature;
128 try {
129 imageResourceBlockSignature = read4Bytes("", bis, "Image Resource Block missing identification string", APP13_BYTE_ORDER);
130 } catch (final IOException ioEx) {
131 break;
132 }
133 if (imageResourceBlockSignature != JpegConstants.CONST_8BIM) {
134 throw new ImagingException("Invalid Image Resource Block Signature");
135 }
136
137 final int blockType = read2Bytes("", bis, "Image Resource Block missing type", APP13_BYTE_ORDER);
138 Debug.debug("blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")");
139
140
141 if (PHOTOSHOP_IGNORED_BLOCK_TYPE.contains(blockType)) {
142 Debug.debug("Skipping blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")");
143
144
145
146 BinaryFunctions.searchQuad(JpegConstants.CONST_8BIM, bis);
147 continue;
148 }
149
150 final int blockNameLength = readByte("Name length", bis, "Image Resource Block missing name length");
151 if (blockNameLength > 0) {
152 Debug.debug("blockNameLength: " + blockNameLength + " (0x" + Integer.toHexString(blockNameLength) + ")");
153 }
154 byte[] blockNameBytes;
155 if (blockNameLength == 0) {
156 readByte("Block name bytes", bis, "Image Resource Block has invalid name");
157 blockNameBytes = ImagingConstants.EMPTY_BYTE_ARRAY;
158 } else {
159 try {
160 blockNameBytes = readBytes("", bis, blockNameLength, "Invalid Image Resource Block name");
161 } catch (final IOException ioEx) {
162 if (strict) {
163 throw ioEx;
164 }
165 break;
166 }
167
168 if (blockNameLength % 2 == 0) {
169 readByte("Padding byte", bis, "Image Resource Block missing padding byte");
170 }
171 }
172
173 final int blockSize = read4Bytes("", bis, "Image Resource Block missing size", APP13_BYTE_ORDER);
174 Debug.debug("blockSize: " + blockSize + " (0x" + Integer.toHexString(blockSize) + ")");
175
176
177
178
179 if (blockSize > bytes.length) {
180 throw new ImagingException("Invalid Block Size : " + blockSize + " > " + bytes.length);
181 }
182
183 final byte[] blockData;
184 try {
185 blockData = readBytes("", bis, blockSize, "Invalid Image Resource Block data");
186 } catch (final IOException ioEx) {
187 if (strict) {
188 throw ioEx;
189 }
190 break;
191 }
192
193 blocks.add(new IptcBlock(blockType, blockNameBytes, blockData));
194
195 if (blockSize % 2 != 0) {
196 readByte("Padding byte", bis, "Image Resource Block missing padding byte");
197 }
198 }
199
200 return blocks;
201 }
202 }
203
204 protected List<IptcRecord> parseIptcBlock(final byte[] bytes) {
205 Charset charset = DEFAULT_CHARSET;
206 final List<IptcRecord> elements = new ArrayList<>();
207
208 int index = 0;
209
210 while (index + 1 < bytes.length) {
211 final int tagMarker = 0xff & bytes[index++];
212 Debug.debug("tagMarker: " + tagMarker + " (0x" + Integer.toHexString(tagMarker) + ")");
213
214 if (tagMarker != IptcConstants.IPTC_RECORD_TAG_MARKER) {
215 if (LOGGER.isLoggable(Level.FINE)) {
216 LOGGER.fine("Unexpected record tag marker in IPTC data.");
217 }
218 return elements;
219 }
220
221 final int recordNumber = 0xff & bytes[index++];
222 Debug.debug("recordNumber: " + recordNumber + " (0x" + Integer.toHexString(recordNumber) + ")");
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242 final int recordType = 0xff & bytes[index];
243 Debug.debug("recordType: " + recordType + " (0x" + Integer.toHexString(recordType) + ")");
244 index++;
245
246 final int recordSize = ByteConversions.toUInt16(bytes, index, getByteOrder());
247 index += 2;
248
249 final boolean extendedDataset = recordSize > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE;
250 final int dataFieldCountLength = recordSize & 0x7fff;
251 if (extendedDataset) {
252 Debug.debug("extendedDataset. dataFieldCountLength: " + dataFieldCountLength);
253 }
254 if (extendedDataset) {
255
256 return elements;
257 }
258
259 final byte[] recordData = slice(bytes, index, recordSize);
260 index += recordSize;
261
262
263
264
265 if (recordNumber == IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER && recordType == ENV_TAG_CODED_CHARACTER_SET) {
266 charset = findCharset(recordData);
267 continue;
268 }
269
270 if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) {
271 continue;
272 }
273
274 if (recordType == 0) {
275 if (LOGGER.isLoggable(Level.FINE)) {
276 LOGGER.fine("ignore record version record! " + elements.size());
277 }
278
279 continue;
280 }
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305 final String value = new String(recordData, charset);
306
307 final IptcType iptcType = IptcTypeLookup.getIptcType(recordType);
308
309
310
311
312
313
314
315
316
317
318
319
320 final IptcRecord element = new IptcRecord(iptcType, value);
321 elements.add(element);
322 }
323
324 return elements;
325 }
326
327 public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final boolean strict) throws ImagingException, IOException {
328 final List<IptcRecord> records = new ArrayList<>();
329
330 final List<IptcBlock> blocks = parseAllBlocks(bytes, strict);
331
332 for (final IptcBlock block : blocks) {
333
334 if (!block.isIptcBlock()) {
335 continue;
336 }
337
338 records.addAll(parseIptcBlock(block.getBlockData()));
339 }
340
341 return new PhotoshopApp13Data(records, blocks);
342 }
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373 public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final ImagingParameters<JpegImagingParameters> params)
374 throws ImagingException, IOException {
375 final boolean strict = params != null && params.isStrict();
376
377 return parsePhotoshopSegment(bytes, strict);
378 }
379
380 public byte[] writeIptcBlock(List<IptcRecord> elements) throws ImagingException, IOException {
381 Charset charset = DEFAULT_CHARSET;
382 for (final IptcRecord element : elements) {
383 final byte[] recordData = element.getValue().getBytes(charset);
384 if (!new String(recordData, charset).equals(element.getValue())) {
385 charset = StandardCharsets.UTF_8;
386 break;
387 }
388 }
389 final byte[] blockData;
390 final ByteArrayOutputStream baos = new ByteArrayOutputStream();
391 try (BinaryOutputStream bos = BinaryOutputStream.create(baos, getByteOrder())) {
392 if (!charset.equals(DEFAULT_CHARSET)) {
393 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
394 bos.write(IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER);
395 bos.write(ENV_TAG_CODED_CHARACTER_SET);
396 final byte[] codedCharset = CHARACTER_ESCAPE_SEQUENCE;
397 bos.write2Bytes(codedCharset.length);
398 bos.write(codedCharset);
399 }
400
401
402 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
403 bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
404 bos.write(IptcTypes.RECORD_VERSION.type);
405
406 bos.write2Bytes(2);
407 bos.write2Bytes(2);
408
409
410 elements = new ArrayList<>(elements);
411
412
413 final Comparator<IptcRecord> comparator = (e1, e2) -> e2.iptcType.getType() - e1.iptcType.getType();
414 elements.sort(comparator);
415
416
417
418 for (final IptcRecord element : elements) {
419 if (element.iptcType == IptcTypes.RECORD_VERSION) {
420 continue;
421 }
422
423 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
424 bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
425 if (element.iptcType.getType() < 0 || element.iptcType.getType() > 0xff) {
426 throw new ImagingException("Invalid record type: " + element.iptcType.getType());
427 }
428 bos.write(element.iptcType.getType());
429
430 final byte[] recordData = element.getValue().getBytes(charset);
431
432
433
434
435
436 bos.write2Bytes(recordData.length);
437 bos.write(recordData);
438 }
439 }
440
441 return baos.toByteArray();
442 }
443
444 public byte[] writePhotoshopApp13Segment(final PhotoshopApp13Data data) throws IOException, ImagingException {
445 try (ByteArrayOutputStream os = new ByteArrayOutputStream();
446 BinaryOutputStream bos = BinaryOutputStream.bigEndian(os)) {
447
448 JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.writeTo(bos);
449
450 final List<IptcBlock> blocks = data.getRawBlocks();
451 for (final IptcBlock block : blocks) {
452 bos.write4Bytes(JpegConstants.CONST_8BIM);
453
454 if (block.getBlockType() < 0 || block.getBlockType() > 0xffff) {
455 throw new ImagingException("Invalid IPTC block type.");
456 }
457 bos.write2Bytes(block.getBlockType());
458
459 final byte[] blockNameBytes = block.getBlockNameBytes();
460 if (blockNameBytes.length > 255) {
461 throw new ImagingException("IPTC block name is too long: " + blockNameBytes.length);
462 }
463 bos.write(blockNameBytes.length);
464 bos.write(blockNameBytes);
465 if (blockNameBytes.length % 2 == 0) {
466 bos.write(0);
467 }
468
469 final byte[] blockData = block.getBlockData();
470 if (blockData.length > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE) {
471 throw new ImagingException("IPTC block data is too long: " + blockData.length);
472 }
473 bos.write4Bytes(blockData.length);
474 bos.write(blockData);
475 if (blockData.length % 2 == 1) {
476 bos.write(0);
477 }
478 }
479
480 bos.flush();
481 return os.toByteArray();
482 }
483 }
484
485 }