Skip to content

Commit 1f333ee

Browse files
committed
Fix XMP metadata parser to be able to parse not finished ASCII control characters
DEVSIX-9085
1 parent 2abc824 commit 1f333ee

File tree

12 files changed

+68
-67
lines changed

12 files changed

+68
-67
lines changed

‎commons/src/sharpenconfig/java/com/itextpdf/commons/SharpenConfigMapping.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,6 @@ public void applyMappingConfiguration(MappingConfigurator configurator) {
8383
configurator.mapType("java.io.PrintWriter", "iText.Commons.Utils.FormattingStreamWriter");
8484
configurator.mapMethod("java.nio.charset.Charset.forName", "iText.Commons.Utils.EncodingUtil.GetEncoding", false);
8585
configurator.mapField("java.nio.charset.StandardCharsets.ISO_8859_1", "iText.Commons.Utils.EncodingUtil.ISO_8859_1");
86-
configurator.mapType("java.io.PushbackReader", "iText.Commons.Utils.PushbackReader");
87-
configurator.mapType("java.io.FilterReader", "iText.Commons.Utils.FilterReader");
8886
configurator.mapMethod("java.lang.String.valueOf(char[])", "iText.Commons.Utils.JavaUtil.GetStringForChars", false);
8987
configurator.mapMethod("java.lang.String.valueOf(char[],int,int)", "iText.Commons.Utils.JavaUtil.GetStringForChars", false);
9088
configurator.mapMethod("java.lang.String.String(byte[])", "iText.Commons.Utils.JavaUtil.getStringForBytes", false);

‎kernel/src/main/java/com/itextpdf/kernel/xmp/impl/FixASCIIControlsReader.java

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,12 @@
3131
package com.itextpdf.kernel.xmp.impl;
3232

3333
import java.io.IOException;
34-
import java.io.PushbackReader;
3534
import java.io.Reader;
3635

37-
3836
/**
3937
* @since 22.08.2006
4038
*/
41-
public class FixASCIIControlsReader extends PushbackReader
39+
public class FixASCIIControlsReader extends Reader
4240
{
4341
/** */
4442
private static final int STATE_START = 0;
@@ -52,43 +50,41 @@ public class FixASCIIControlsReader extends PushbackReader
5250
private static final int STATE_DIG1 = 4;
5351
/** */
5452
private static final int STATE_ERROR = 5;
55-
/** */
56-
private static final int BUFFER_SIZE = 8;
5753
/** the state of the automaton */
5854
private int state = STATE_START;
5955
/** the result of the escaping sequence */
6056
private int control = 0;
6157
/** count the digits of the sequence */
62-
private int digits = 0;
63-
58+
private int digits = 0;
59+
60+
private Reader in;
61+
6462
/**
65-
* The look-ahead size is 6 at maximum («)
66-
* @see java.io.PushbackReader#PushbackReader(java.io.Reader, int)
63+
* A wrapper xmp reader to handle control characters («)
64+
*
6765
* @param input a Reader
6866
*/
6967
public FixASCIIControlsReader(Reader input)
7068
{
71-
super(input, BUFFER_SIZE);
69+
in = input;
7270
}
7371

74-
7572
/**
7673
* @see java.io.Reader#read(char[], int, int)
7774
*/
7875
public int read(char[] cbuf, int off, int len) throws IOException
7976
{
80-
int readAhead = 0;
8177
int read = 0;
8278
int pos = off;
83-
char[] readAheadBuffer = new char[BUFFER_SIZE];
79+
char[] readAheadBuffer = new char[1];
8480

8581
boolean available = true;
86-
while (available && read < len)
82+
while (available && read < len)
8783
{
88-
available = super.read(readAheadBuffer, readAhead, 1) == 1;
84+
available = in.read(readAheadBuffer, 0, 1) == 1;
8985
if (available)
9086
{
91-
char c = processChar(readAheadBuffer[readAhead]);
87+
char c = processChar(readAheadBuffer[0]);
9288
if (state == STATE_START)
9389
{
9490
// replace control chars with space
@@ -97,34 +93,26 @@ public int read(char[] cbuf, int off, int len) throws IOException
9793
c = ' ';
9894
}
9995
cbuf[pos++] = c;
100-
readAhead = 0;
10196
read++;
10297
}
10398
else if (state == STATE_ERROR)
10499
{
105-
unread(readAheadBuffer, 0, readAhead + 1);
106-
readAhead = 0;
100+
// It's broken ASCII character sequence, let's just skip them
101+
// If we try to preserve them, SAX parser will throw later on anyway
107102
}
108-
else
109-
{
110-
readAhead++;
111-
}
112-
}
113-
else if (readAhead > 0)
114-
{
115-
// handles case when file ends within excaped sequence
116-
unread(readAheadBuffer, 0, readAhead);
117-
state = STATE_ERROR;
118-
readAhead = 0;
119-
available = true;
120103
}
121104
}
122105

123-
124-
return read > 0 || available ? read : XMPUtilsImpl.eofReadBytesValue();
106+
return read > 0 || available ? read : XMPUtilsImpl.eofReadBytesValue();
125107
}
126-
127-
108+
109+
/**
110+
* {@inheritDoc}
111+
*/
112+
public void close() throws IOException {
113+
in.close();
114+
}
115+
128116
/**
129117
* Processes numeric escaped chars to find out if they are a control character.
130118
* @param ch a char

‎kernel/src/test/java/com/itextpdf/kernel/pdf/XMPMetadataTest.java

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ This file is part of the iText (R) project.
5151
@Tag("IntegrationTest")
5252
public class XMPMetadataTest extends ExtendedITextTest{
5353

54-
public static final String SOURCE_FOLDER = "./src/test/resources/com/itextpdf/kernel/pdf/XmpWriterTest/";
55-
public static final String DESTINATION_FOLDER = TestUtil.getOutputPath() + "/kernel/pdf/XmpWriterTest/";
54+
public static final String SOURCE_FOLDER = "./src/test/resources/com/itextpdf/kernel/pdf/XMPMetadataTest/";
55+
public static final String DESTINATION_FOLDER = TestUtil.getOutputPath() + "/kernel/pdf/XMPMetadataTest/";
5656

5757
@BeforeAll
5858
public static void beforeClass() {
@@ -419,4 +419,41 @@ public void listParsingTest() {
419419
Assertions.assertThrows(XMPException.class,
420420
() -> XMPMetaFactory.parseFromBuffer(xmp.getBytes(StandardCharsets.UTF_8)));
421421
}
422+
423+
@Test
424+
public void readDocumentWithControlCharactersInXMPMetadata() throws IOException {
425+
String src = SOURCE_FOLDER + "docWithControlCharactersInXmp.pdf";
426+
try (PdfDocument document = new PdfDocument(new PdfReader(src),
427+
new PdfWriter(new ByteArrayOutputStream()), new StampingProperties())) {
428+
Assertions.assertEquals(PdfConformance.PDF_A_3A, document.getConformance());
429+
}
430+
}
431+
432+
@Test
433+
public void readDocumentWithBrokenControlCharactersInXMPMetadata() throws IOException {
434+
String src = SOURCE_FOLDER + "docWithBrokenControlCharactersInXmp.pdf";
435+
try (PdfDocument document = new PdfDocument(new PdfReader(src),
436+
new PdfWriter(new ByteArrayOutputStream()), new StampingProperties())) {
437+
Assertions.assertEquals(PdfConformance.PDF_A_3A, document.getConformance());
438+
}
439+
}
440+
441+
@Test
442+
public void readDocumentWithInvalidConformance() throws IOException {
443+
String src = SOURCE_FOLDER + "docWithInvalidConformance.pdf";
444+
try (PdfDocument document = new PdfDocument(new PdfReader(src),
445+
new PdfWriter(new ByteArrayOutputStream()), new StampingProperties())) {
446+
Assertions.assertEquals(PdfConformance.PDF_NONE_CONFORMANCE, document.getConformance());
447+
}
448+
}
449+
450+
@LogMessages(messages = {@LogMessage(messageTemplate = IoLogMessageConstant.EXCEPTION_WHILE_UPDATING_XMPMETADATA)})
451+
@Test
452+
public void readDocumentWithInvalidXMPMetadata() throws IOException {
453+
String src = SOURCE_FOLDER + "docWithInvalidMetadata.pdf";
454+
try (PdfDocument document = new PdfDocument(new PdfReader(src),
455+
new PdfWriter(new ByteArrayOutputStream()), new StampingProperties())) {
456+
Assertions.assertEquals(PdfConformance.PDF_NONE_CONFORMANCE, document.getConformance());
457+
}
458+
}
422459
}

‎pdfa/src/test/java/com/itextpdf/pdfa/PdfAXmpTest.java

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -142,34 +142,6 @@ public void saveAndReadDocumentWithCanonicalXmpMetadata() throws IOException, XM
142142
}
143143
}
144144

145-
@Test
146-
public void readDocumentWithControlCharactersInXMPMetadata() throws IOException {
147-
String src = sourceFolder + "pdfs/docWithControlCharactersInXmp.pdf";
148-
try (PdfADocument document = new PdfADocument(new PdfReader(src),
149-
new PdfWriter(new java.io.ByteArrayOutputStream()), new StampingProperties())) {
150-
Assertions.assertEquals(PdfConformance.PDF_A_3A, document.getConformance());
151-
}
152-
}
153-
154-
@Test
155-
public void readDocumentWithInvalidConformance() throws IOException {
156-
String src = sourceFolder + "pdfs/docWithInvalidConformance.pdf";
157-
try (PdfDocument document = new PdfDocument(new PdfReader(src),
158-
new PdfWriter(new java.io.ByteArrayOutputStream()), new StampingProperties())) {
159-
Assertions.assertEquals(PdfConformance.PDF_NONE_CONFORMANCE, document.getConformance());
160-
}
161-
}
162-
163-
@LogMessages(messages = {@LogMessage(messageTemplate = IoLogMessageConstant.EXCEPTION_WHILE_UPDATING_XMPMETADATA)})
164-
@Test
165-
public void readDocumentWithInvalidXMPMetadata() throws IOException {
166-
String src = sourceFolder + "pdfs/docWithInvalidMetadata.pdf";
167-
try (PdfDocument document = new PdfDocument(new PdfReader(src),
168-
new PdfWriter(new java.io.ByteArrayOutputStream()), new StampingProperties())) {
169-
Assertions.assertEquals(PdfConformance.PDF_NONE_CONFORMANCE, document.getConformance());
170-
}
171-
}
172-
173145
@Test
174146
public void testPdfUAExtensionMetadata() throws IOException {
175147

‎pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,12 @@
503503
<excludes>
504504
<!-- Contains interfaces for bouncycastle wrappers which can be changes in any time -->
505505
<exclude>com.itextpdf.commons.bouncycastle</exclude>
506+
<!--
507+
Temporary exclude to be able to cleanup code.
508+
This class is not intended to be used by our users directly.
509+
TODO: DEVSIX-9096 - remove after 9.3.0 release
510+
-->
511+
<exclude>com.itextpdf.kernel.xmp.impl.FixASCIIControlsReader</exclude>
506512
</excludes>
507513
<excludeModules>
508514
<excludeModule>bouncy-castle-adapter</excludeModule>

0 commit comments

Comments
 (0)