Skip to content

Commit 5ec36b8

Browse files
committed
[FIX] Memory leak: RandomAccessRead passed to TrueTypeCollection constructor
[PERFORMANCE] Improve FileSystemFontProvider.scanFonts() performance by adding 'only headers' mode to TTF parser: * only read tables needed for FSFontInfo ('name', 'head', 'OS/2', 'CFF ', 'gcid') * 'CFF ' and 'head' table parsers finish as soon as it has all needed data (in 'only headers' mode) * streamline I/O: replace readByte() with read(array), avoid allocating byte[] where possible * NamingTable: use sorted list instead of multilevel HashMap, delay-load Strings * skip checksumming as it is now faster to simply re-parse (gated with "pdfbox.fontcache.skipchecksums" for backward compatibility) [DEV] Breaking change: NameRecord.getString() is now package-private and lazy, renamed to getStringLazy(). [DEV] Breaking change: new abstract method TTFDataStream.getSubReader()
1 parent ee47441 commit 5ec36b8

25 files changed

+957
-272
lines changed

fontbox/src/main/java/org/apache/fontbox/cff/CFFFont.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public abstract class CFFFont implements FontBoxFont
3535
{
3636
private String fontName;
3737
private CFFCharset charset;
38-
private CFFParser.ByteSource source;
38+
// private CFFParser.ByteSource source;
3939
protected final Map<String, Object> topDict = new LinkedHashMap<>();
4040
protected byte[][] charStrings;
4141
protected byte[][] globalSubrIndex;
@@ -140,25 +140,25 @@ public final List<byte[]> getCharStringBytes()
140140
return Arrays.asList(charStrings);
141141
}
142142

143-
/**
144-
* Sets a byte source to re-read the CFF data in the future.
145-
*/
146-
final void setData(CFFParser.ByteSource source)
147-
{
148-
this.source = source;
149-
}
150-
151-
/**
152-
* Returns the CFF data.
153-
*
154-
* @return the cff data as byte array
155-
*
156-
* @throws IOException if the data could not be read
157-
*/
158-
public byte[] getData() throws IOException
159-
{
160-
return source.getBytes();
161-
}
143+
// /**
144+
// * Sets a byte source to re-read the CFF data in the future.
145+
// */
146+
// final void setData(CFFParser.ByteSource source)
147+
// {
148+
// this.source = source;
149+
// }
150+
//
151+
// /**
152+
// * Returns the CFF data.
153+
// *
154+
// * @return the cff data as byte array
155+
// *
156+
// * @throws IOException if the data could not be read
157+
// */
158+
// public byte[] getData() throws IOException
159+
// {
160+
// return source.getBytes();
161+
// }
162162

163163
/**
164164
* Returns the number of charstrings in the font.

fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import org.apache.commons.logging.Log;
3030
import org.apache.commons.logging.LogFactory;
31+
import org.apache.fontbox.ttf.LoadOnlyHeaders;
3132
import org.apache.pdfbox.io.RandomAccessRead;
3233

3334

@@ -47,7 +48,8 @@ public class CFFParser
4748
private static final String TAG_TTFONLY = "\u0000\u0001\u0000\u0000";
4849

4950
private String[] stringIndex = null;
50-
private ByteSource source;
51+
// private ByteSource source;
52+
private LoadOnlyHeaders loadOnlyHeaders;
5153

5254
// for debugging only
5355
private String debugFontName;
@@ -66,6 +68,11 @@ public interface ByteSource
6668
byte[] getBytes() throws IOException;
6769
}
6870

71+
public void setLoadOnlyHeaders(LoadOnlyHeaders loadOnlyHeaders)
72+
{
73+
this.loadOnlyHeaders = loadOnlyHeaders;
74+
}
75+
6976
/**
7077
* Parse CFF font using byte array, also passing in a byte source for future use.
7178
*
@@ -77,7 +84,7 @@ public interface ByteSource
7784
public List<CFFFont> parse(byte[] bytes, ByteSource source) throws IOException
7885
{
7986
// TODO do we need to store the source data of the font? It isn't used at all
80-
this.source = source;
87+
// this.source = source;
8188
return parse(new DataInputByteArray(bytes));
8289
}
8390

@@ -91,17 +98,10 @@ public List<CFFFont> parse(byte[] bytes, ByteSource source) throws IOException
9198
public List<CFFFont> parse(RandomAccessRead randomAccessRead) throws IOException
9299
{
93100
// TODO do we need to store the source data of the font? It isn't used at all
94-
byte[] bytes = new byte[(int) randomAccessRead.length()];
95-
randomAccessRead.seek(0);
96-
int remainingBytes = bytes.length;
97-
int amountRead;
98-
while ((amountRead = randomAccessRead.read(bytes, bytes.length - remainingBytes,
99-
remainingBytes)) > 0)
100-
{
101-
remainingBytes -= amountRead;
102-
}
103101
randomAccessRead.seek(0);
104-
this.source = new CFFBytesource(bytes);
102+
// byte[] bytes = randomAccessRead.readNBytes((int) randomAccessRead.length());
103+
// randomAccessRead.seek(0);
104+
// this.source = new CFFBytesource(bytes);
105105
return parse(new DataInputRandomAccessRead(randomAccessRead));
106106
}
107107

@@ -151,7 +151,7 @@ private List<CFFFont> parse(DataInput input) throws IOException
151151
{
152152
CFFFont font = parseFont(input, nameIndex[i], topDictIndex[i]);
153153
font.setGlobalSubrIndex(globalSubrIndex);
154-
font.setData(source);
154+
// font.setData(source);
155155
fonts.add(font);
156156
}
157157
return fonts;
@@ -492,6 +492,15 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr
492492
cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());
493493

494494
font = cffCIDFont;
495+
if (loadOnlyHeaders != null)
496+
{
497+
loadOnlyHeaders.setOtfROS(
498+
cffCIDFont.getRegistry(),
499+
cffCIDFont.getOrdering(),
500+
cffCIDFont.getSupplement());
501+
// we just read (Registry, Ordering, Supplement) and don't need anything else
502+
return font;
503+
}
495504
}
496505
else
497506
{
@@ -501,6 +510,10 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr
501510
// name
502511
debugFontName = name;
503512
font.setName(name);
513+
if (loadOnlyHeaders != null)
514+
{
515+
return font; // not a 'CFFCIDFont' => cannot read properties needed by LoadOnlyHeaders
516+
}
504517

505518
// top dict
506519
font.addValueToTopDict("version", getString(topDict, "version"));

fontbox/src/main/java/org/apache/fontbox/cff/DataInputRandomAccessRead.java

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,7 @@ public byte[] readBytes(int length) throws IOException
170170
{
171171
throw new IOException("length is negative");
172172
}
173-
if (randomAccessRead.length() - randomAccessRead.getPosition() < length)
174-
{
175-
throw new IOException("Premature end of buffer reached");
176-
}
177-
byte[] bytes = new byte[length];
178-
for (int i = 0; i < length; i++)
179-
{
180-
bytes[i] = readByte();
181-
}
182-
return bytes;
173+
return randomAccessRead.readExact(length);
183174
}
184175

185176
@Override

fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.io.IOException;
2121
import org.apache.fontbox.cff.CFFFont;
2222
import org.apache.fontbox.cff.CFFParser;
23+
import org.apache.pdfbox.io.RandomAccessRead;
2324

2425
/**
2526
* PostScript font program (compact font format).
@@ -48,10 +49,24 @@ public class CFFTable extends TTFTable
4849
@Override
4950
void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
5051
{
51-
byte[] bytes = data.read((int)getLength());
52-
5352
CFFParser parser = new CFFParser();
54-
cffFont = parser.parse(bytes, new CFFBytesource(ttf)).get(0);
53+
parser.setLoadOnlyHeaders(ttf.getLoadOnlyHeaders());
54+
// assert data.getCurrentPosition() == getOffset();
55+
try (RandomAccessRead subReader = data.getSubReader(getLength()))
56+
{
57+
if (subReader != null)
58+
{
59+
cffFont = parser.parse(subReader).get(0);
60+
data.seek(getOffset() + getLength());
61+
}
62+
else
63+
{
64+
assert ttf.getLoadOnlyHeaders() == null
65+
: "It is inefficient to read whole CFF table to parse only headers, please use RandomAccessReadUncachedDataStream";
66+
byte[] bytes = data.read((int)getLength());
67+
cffFont = parser.parse(bytes, new CFFBytesource(ttf)).get(0);
68+
}
69+
}
5570

5671
initialized = true;
5772
}

fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ public class HeaderTable extends TTFTable
7474
@Override
7575
void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
7676
{
77+
LoadOnlyHeaders outHeaders = ttf.getLoadOnlyHeaders();
78+
if (outHeaders != null) {
79+
data.skip(44);
80+
macStyle = data.readUnsignedShort();
81+
outHeaders.setHeaderMacStyle(macStyle);
82+
initialized = true;
83+
return;
84+
}
85+
7786
version = data.read32Fixed();
7887
fontRevision = data.read32Fixed();
7988
checkSumAdjustment = data.readUnsignedInt();
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.fontbox.ttf;
18+
19+
import java.io.IOException;
20+
21+
/**
22+
* To improve performance of {@code FileSystemFontProvider.scanFonts(...)},
23+
* this class is used both as a marker to skip
24+
* unused data and as a storage for collected data.
25+
* <p>
26+
* Tables it needs:<ul>
27+
* <li>NamingTable.TAG,
28+
* <li>HeaderTable.TAG,
29+
* <li>OS2WindowsMetricsTable.TAG,
30+
* <li>OTF: CFFTable.TAG
31+
* <li>non-OTF: "gcid"
32+
* </ul>
33+
*
34+
* @author Mykola Bohdiuk
35+
*/
36+
public final class LoadOnlyHeaders
37+
{
38+
static final int BYTES_GCID = 142;
39+
40+
private IOException exception;
41+
private String name;
42+
private Integer headerMacStyle;
43+
private OS2WindowsMetricsTable os2Windows;
44+
private String fontFamily;
45+
private String fontSubFamily;
46+
private byte[] nonOtfGcid142;
47+
//
48+
private boolean isOTFAndPostScript;
49+
private String otfRegistry;
50+
private String otfOrdering;
51+
private int otfSupplement;
52+
53+
public IOException getException()
54+
{
55+
return exception;
56+
}
57+
58+
public String getName()
59+
{
60+
return name;
61+
}
62+
63+
/**
64+
* null == no HeaderTable, {@code ttf.getHeader().getMacStyle()}
65+
*/
66+
public Integer getHeaderMacStyle()
67+
{
68+
return headerMacStyle;
69+
}
70+
71+
public OS2WindowsMetricsTable getOS2Windows()
72+
{
73+
return os2Windows;
74+
}
75+
76+
// only when LOGGER(FileSystemFontProvider).isTraceEnabled() tracing: FontFamily, FontSubfamily
77+
public String getFontFamily()
78+
{
79+
return fontFamily;
80+
}
81+
82+
public String getFontSubFamily()
83+
{
84+
return fontSubFamily;
85+
}
86+
87+
public boolean isOpenTypePostScript()
88+
{
89+
return isOTFAndPostScript;
90+
}
91+
92+
public byte[] getNonOtfTableGCID142()
93+
{
94+
return nonOtfGcid142;
95+
}
96+
97+
public String getOtfRegistry()
98+
{
99+
return otfRegistry;
100+
}
101+
102+
public String getOtfOrdering()
103+
{
104+
return otfOrdering;
105+
}
106+
107+
public int getOtfSupplement()
108+
{
109+
return otfSupplement;
110+
}
111+
112+
void setException(IOException exception)
113+
{
114+
this.exception = exception;
115+
}
116+
117+
void setName(String name)
118+
{
119+
this.name = name;
120+
}
121+
122+
void setHeaderMacStyle(Integer headerMacStyle)
123+
{
124+
this.headerMacStyle = headerMacStyle;
125+
}
126+
127+
void setOs2Windows(OS2WindowsMetricsTable os2Windows)
128+
{
129+
this.os2Windows = os2Windows;
130+
}
131+
132+
void setFontFamily(String fontFamily, String fontSubFamily)
133+
{
134+
this.fontFamily = fontFamily;
135+
this.fontSubFamily = fontSubFamily;
136+
}
137+
138+
void setNonOtfGcid142(byte[] nonOtfGcid142)
139+
{
140+
this.nonOtfGcid142 = nonOtfGcid142;
141+
}
142+
143+
void setIsOTFAndPostScript(boolean isOTFAndPostScript)
144+
{
145+
this.isOTFAndPostScript = isOTFAndPostScript;
146+
}
147+
148+
// public because CFFParser is in a different package
149+
public void setOtfROS(String otfRegistry, String otfOrdering, int otfSupplement)
150+
{
151+
this.otfRegistry = otfRegistry;
152+
this.otfOrdering = otfOrdering;
153+
this.otfSupplement = otfSupplement;
154+
}
155+
}

fontbox/src/main/java/org/apache/fontbox/ttf/NameRecord.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ void initData( TrueTypeFont ttf, TTFDataStream data ) throws IOException
180180
*
181181
* @return A string for this class.
182182
*/
183+
@Override
183184
public String toString()
184185
{
185186
return
@@ -190,9 +191,10 @@ public String toString()
190191
" " + string;
191192
}
192193
/**
193-
* @return Returns the string.
194+
* Use {@link NamingTable#getString(NameRecord)}
195+
* @return Returns the string, if it was pre-loaded.
194196
*/
195-
public String getString()
197+
String getStringLazy()
196198
{
197199
return string;
198200
}

0 commit comments

Comments
 (0)