TomRoush · TomRoush · Dec 10, 2021 · Dec 10, 2021 · Dec 10, 2021 · Dec 10, 2021
diff --git a/NOTICE.txt b/NOTICE.txt
@@ -17,3 +17,6 @@ Copyright 1997, 1998, 2002, 2007, 2010 Adobe Systems Incorporated.
 
 Includes the Zapf Dingbats Glyph List
 Copyright 2002, 2010 Adobe Systems Incorporated.
+
+Includes the Script Property (Scripts-10.0.0.txt)
+Copyright 2017 Unicode, Inc.
diff --git a/library/src/androidTest/java/com/tom_roush/fontbox/ttf/TTFSubsetterInstrumentationTest.java b/library/src/androidTest/java/com/tom_roush/fontbox/ttf/TTFSubsetterInstrumentationTest.java
@@ -50,7 +50,7 @@ public void setUp() throws Exception
     }
 
     /**
-     * Test of PDFBOX-3757: check that postcript names that are not part of WGL4Names don't get
+     * Test of PDFBOX-3757: check that PostScript names that are not part of WGL4Names don't get
      * shuffled in buildPostTable().
      *
      * @throws java.io.IOException

diff --git a/library/src/androidTest/java/com/tom_roush/pdfbox/encryption/TestSymmetricKeyEncryption.java b/library/src/androidTest/java/com/tom_roush/pdfbox/encryption/TestSymmetricKeyEncryption.java
@@ -43,9 +43,12 @@
 import com.tom_roush.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
 import com.tom_roush.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
 import com.tom_roush.pdfbox.pdmodel.encryption.AccessPermission;
+import com.tom_roush.pdfbox.pdmodel.encryption.PDEncryption;
 import com.tom_roush.pdfbox.pdmodel.encryption.StandardProtectionPolicy;
+import com.tom_roush.pdfbox.pdmodel.encryption.StandardSecurityHandler;
 import com.tom_roush.pdfbox.pdmodel.graphics.image.ValidateXImage;
 import com.tom_roush.pdfbox.rendering.PDFRenderer;
+import com.tom_roush.pdfbox.util.Charsets;
 
 import org.junit.Assert;
 
@@ -107,7 +110,7 @@ protected void setUp() throws Exception
      * Test that permissions work as intended: the user psw ("user") is enough
      * to open the PDF with possibly restricted rights, the owner psw ("owner")
      * gives full permissions. The 3 files of this test were created by Maruan
-     * Sayhoun, NOT with PDFBox, but with Adobe Acrobat to ensure "the gold
+     * Sahyoun, NOT with PDFBox, but with Adobe Acrobat to ensure "the gold
      * standard". The restricted permissions prevent printing and text
      * extraction. In the 128 and 256 bit encrypted files, AssembleDocument,
      * ExtractForAccessibility and PrintDegraded are also disabled.
@@ -321,9 +324,25 @@ private PDDocument encrypt(int keyLength, boolean preferAES, int sizePriorToEncr
         encryptedDoc = PDDocument.load(pdfFile, ownerpassword);
         Assert.assertTrue(encryptedDoc.isEncrypted());
         Assert.assertTrue(encryptedDoc.getCurrentAccessPermission().isOwnerPermission());
+
+        // Older encryption allows to get the user password when the owner password is known
+        PDEncryption encryption = encryptedDoc.getEncryption();
+        int revision = encryption.getRevision();
+        if (revision < 5)
+        {
+            StandardSecurityHandler standardSecurityHandler = new StandardSecurityHandler();
+            int keyLengthInBytes = encryption.getVersion() == 1 ? 5 : encryption.getLength() / 8;
+            byte[] computedUserPassword = standardSecurityHandler.getUserPassword(
+                ownerpassword.getBytes(Charsets.ISO_8859_1),
+                encryption.getOwnerKey(),
+                revision,
+                keyLengthInBytes);
+            Assert.assertEquals(userpassword.substring(0, 32), new String(computedUserPassword, Charsets.ISO_8859_1));
+        }
+
         encryptedDoc.close();
 
-        // test with owner password => restricted permissions
+        // test with user password => restricted permissions
         encryptedDoc = PDDocument.load(pdfFile, userpassword);
         Assert.assertTrue(encryptedDoc.isEncrypted());
         Assert.assertFalse(encryptedDoc.getCurrentAccessPermission().isOwnerPermission());

diff --git a/library/src/androidTest/java/com/tom_roush/pdfbox/multipdf/PDFMergerUtilityTest.java b/library/src/androidTest/java/com/tom_roush/pdfbox/multipdf/PDFMergerUtilityTest.java
@@ -17,17 +17,30 @@
 
 import android.content.Context;
 import android.graphics.Bitmap;
+import android.util.Log;
 
 import androidx.test.platform.app.InstrumentationRegistry;
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
 
 import com.tom_roush.pdfbox.android.PDFBoxResourceLoader;
+import com.tom_roush.pdfbox.cos.COSArray;
+import com.tom_roush.pdfbox.cos.COSBase;
+import com.tom_roush.pdfbox.cos.COSDictionary;
+import com.tom_roush.pdfbox.cos.COSName;
+import com.tom_roush.pdfbox.cos.COSObject;
+import com.tom_roush.pdfbox.io.IOUtils;
 import com.tom_roush.pdfbox.io.MemoryUsageSetting;
 import com.tom_roush.pdfbox.pdmodel.PDDocument;
 import com.tom_roush.pdfbox.pdmodel.PDDocumentCatalog;
 import com.tom_roush.pdfbox.pdmodel.PDPage;
+import com.tom_roush.pdfbox.pdmodel.PDPageTree;
+import com.tom_roush.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureElement;
+import com.tom_roush.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
 import com.tom_roush.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination;
 import com.tom_roush.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageFitDestination;
 import com.tom_roush.pdfbox.rendering.PDFRenderer;
@@ -44,6 +57,7 @@ public class PDFMergerUtilityTest extends TestCase
 {
     final String SRCDIR = "pdfbox/input/merge";
     String TARGETTESTDIR;
+    private File TARGETPDFDIR;
     final int DPI = 96;
     private Context testContext;
 
@@ -55,6 +69,8 @@ protected void setUp() throws Exception
         testContext = InstrumentationRegistry.getInstrumentation().getContext();
         PDFBoxResourceLoader.init(testContext);
         TARGETTESTDIR = testContext.getCacheDir() + "/pdfbox-test-output/merge/";
+        TARGETPDFDIR = new File(testContext.getCacheDir(), "pdfs");
+        TARGETPDFDIR.mkdirs();
 
         new File(TARGETTESTDIR).mkdirs();
         if (!new File(TARGETTESTDIR).exists())
@@ -162,6 +178,94 @@ public void testPDFMergerOpenAction() throws IOException
         mergedDoc.close();
     }
 
+    /**
+     * PDFBOX-3999: check that page entries in the structure tree only reference pages from the page
+     * tree, i.e. that no orphan pages exist.
+     *
+     * @throws IOException
+     */
+    public void testStructureTreeMerge() throws IOException
+    {
+        File pdfFile = new File(TARGETPDFDIR, "PDFBOX-3999-GeneralForbearance.pdf");
+
+        if (!pdfFile.exists())
+        {
+            try
+            {
+                Log.i("PdfBox-Android", "PDF not cached, Downloading PDF for PDFMergerUtility.testStructureTreeMerge");
+                InputStream pdfUrlStream = new URL(
+                    "https://issues.apache.org/jira/secure/attachment/12896905/GeneralForbearance.pdf")
+                    .openStream();
+                IOUtils.copy(pdfUrlStream, new FileOutputStream(pdfFile));
+            }
+            catch (Exception e)
+            {
+                Log.w("PdfBox-Android", "Unable to download test PDF. Skipping test PDFMergerUtility.testStructureTreeMerge");
+                return;
+            }
+        }
+
+        PDFMergerUtility pdfMergerUtility = new PDFMergerUtility();
+        PDDocument src = PDDocument.load(pdfFile);
+        PDDocument dst = PDDocument.load(pdfFile);
+        pdfMergerUtility.appendDocument(dst, src);
+        src.close();
+        dst.save(new File(TARGETTESTDIR, "PDFBOX-3999-GovFormPreFlattened-merged.pdf"));
+        dst.close();
+
+        PDDocument doc = PDDocument.load(new File(TARGETTESTDIR, "PDFBOX-3999-GovFormPreFlattened-merged.pdf"));
+        PDPageTree pageTree = doc.getPages();
+
+        // check for orphan pages in the StructTreeRoot/K and StructTreeRoot/ParentTree trees.
+        PDStructureTreeRoot structureTreeRoot = doc.getDocumentCatalog().getStructureTreeRoot();
+        checkElement(pageTree, structureTreeRoot.getParentTree().getCOSObject());
+        checkElement(pageTree, structureTreeRoot.getK());
+    }
+
+    // Each element can be an array, a dictionary or a number.
+    // See PDF specification Table 37 â€“ Entries in a number tree node dictionary
+    // See PDF specification Table 322 â€“ Entries in the structure tree root
+    // example of file with /Kids: 000153.pdf 000208.pdf 000314.pdf 000359.pdf 000671.pdf
+    // from digitalcorpora site
+    private void checkElement(PDPageTree pageTree, COSBase base)
+    {
+        if (base instanceof COSArray)
+        {
+            for (COSBase base2 : (COSArray) base)
+            {
+                if (base2 instanceof COSObject)
+                {
+                    base2 = ((COSObject) base2).getObject();
+                }
+                checkElement(pageTree, base2);
+            }
+        }
+        else if (base instanceof COSDictionary)
+        {
+            COSDictionary kdict = (COSDictionary) base;
+            if (kdict.containsKey(COSName.PG))
+            {
+                PDStructureElement structureElement = new PDStructureElement(kdict);
+                checkForPage(pageTree, structureElement);
+            }
+            if (kdict.containsKey(COSName.K))
+            {
+                checkElement(pageTree, kdict.getDictionaryObject(COSName.K));
+                return;
+            }
+
+            // if we're in a number tree, check /Nums and /Kids
+            if (kdict.containsKey(COSName.KIDS))
+            {
+                checkElement(pageTree, kdict.getDictionaryObject(COSName.KIDS));
+            }
+            else if (kdict.containsKey(COSName.NUMS))
+            {
+                checkElement(pageTree, kdict.getDictionaryObject(COSName.NUMS));
+            }
+        }
+    }
+
     // checks that the result file of a merge has the same rendering as the two
     // source files
     private void checkMergeIdentical(String filename1, String filename2, String mergeFilename,
@@ -228,4 +332,12 @@ private void checkImagesIdentical(Bitmap bim1, Bitmap bim2)
         }
     }
 
+    private void checkForPage(PDPageTree pageTree, PDStructureElement structureElement)
+    {
+        PDPage page = structureElement.getPage();
+        if (page != null)
+        {
+            assertTrue("Page is not in the page tree", pageTree.indexOf(page) != -1);
+        }
+    }
 }
diff --git a/...ary/src/androidTest/java/com/tom_roush/pdfbox/pdfparser/TestPDFParserInstrumentation.java b/...ary/src/androidTest/java/com/tom_roush/pdfbox/pdfparser/TestPDFParserInstrumentation.java
@@ -42,16 +42,14 @@ public void testPDFBox3950() throws IOException
     {
         File TARGETPDFDIR = new File(testContext.getCacheDir(), "pdfs");
         TARGETPDFDIR.mkdirs();
-        File pdfFile = new File(testContext.getCacheDir(),
-            "PDFBOX-3950-23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf");
+        File pdfFile = new File(TARGETPDFDIR, "PDFBOX-3950-23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf");
 
         if (!pdfFile.exists())
         {
             try
             {
                 Log.i("PdfBox-Android", "PDF not cached, Downloading PDF for TestPDFParser.testPDFBox3950");
-                InputStream pdfUrlStream = new URL(
-                    "https://issues.apache.org/jira/secure/attachment/12890042/23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf")
+                InputStream pdfUrlStream = new URL("https://issues.apache.org/jira/secure/attachment/12890042/23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf")
                     .openStream();
                 IOUtils.copy(pdfUrlStream, new FileOutputStream(pdfFile));
             }

diff --git a/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/font/PDFontTest.java b/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/font/PDFontTest.java
@@ -1,6 +1,4 @@
 /*
- *  Copyright 2011 adam.
- *
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -20,6 +18,7 @@
 package com.tom_roush.pdfbox.pdmodel.font;
 
 import android.content.Context;
+import android.util.Log;
 
 import androidx.test.platform.app.InstrumentationRegistry;
 
@@ -28,8 +27,10 @@
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStream;
 import java.net.URISyntaxException;
+import java.net.URL;
 
 import com.tom_roush.fontbox.ttf.TTFParser;
 import com.tom_roush.fontbox.ttf.TrueTypeFont;
@@ -50,17 +51,23 @@
 /**
  *
  * @author adam
+ * @author Tilman Hausherr
  */
 public class PDFontTest
 {
-
+    private File IN_DIR;
+    private File OUT_DIR;
     private Context testContext;
 
     @Before
-    public void setUp() throws IOException
+    public void setUp() throws Exception
     {
         testContext = InstrumentationRegistry.getInstrumentation().getContext();
         PDFBoxResourceLoader.init(testContext);
+        IN_DIR = new File(testContext.getCacheDir(), "fonts");
+        IN_DIR.mkdirs();
+        OUT_DIR = new File(testContext.getCacheDir(), "pdfbox-test-output");
+        OUT_DIR.mkdirs();
     }
 
     /**
@@ -119,6 +126,74 @@ public void testPDFBox3826() throws IOException, URISyntaxException
         ttf2.close();
     }
 
+    /**
+     * PDFBOX-4115: Test ability to create PDF with german umlaut glyphs with a type 1 font.
+     * Test for everything that went wrong before this was fixed.
+     *
+     * @throws IOException
+     */
+    @Test
+    public void testPDFBOX4115() throws IOException
+    {
+        File fontFile = new File(IN_DIR, "n019003l.pfb");
+
+        if (!fontFile.exists())
+        {
+            try
+            {
+                Log.i("PdfBox-Android", "Font not cached, Downloading font for PDFontTest.testPDFBOX4115");
+                InputStream fontUrlStream = new URL(
+                    "https://issues.apache.org/jira/secure/attachment/12911053/n019003l.pfb")
+                    .openStream();
+                IOUtils.copy(fontUrlStream, new FileOutputStream(fontFile));
+            }
+            catch (Exception e)
+            {
+                Log.w("PdfBox-Android", "Unable to download test font. Skipping test PDFontTest.testPDFBOX4115");
+                return;
+            }
+        }
+
+        File outputFile = new File(OUT_DIR, "FontType1.pdf");
+        String text = "äöüÄÖÜ";
+
+        PDDocument doc = new PDDocument();
+
+        PDPage page = new PDPage();
+        PDPageContentStream contentStream = new PDPageContentStream(doc, page);
+
+        PDType1Font font = new PDType1Font(doc, new FileInputStream(fontFile), WinAnsiEncoding.INSTANCE);
+
+        contentStream.beginText();
+        contentStream.setFont(font, 10);
+        contentStream.newLineAtOffset(10, 700);
+        contentStream.showText(text);
+        contentStream.endText();
+        contentStream.close();
+
+        doc.addPage(page);
+
+        doc.save(outputFile);
+        doc.close();
+
+        doc = PDDocument.load(outputFile);
+
+        font = (PDType1Font) doc.getPage(0).getResources().getFont(COSName.getPDFName("F1"));
+        Assert.assertEquals(font.getEncoding(), WinAnsiEncoding.INSTANCE);
+
+        for (char c : text.toCharArray())
+        {
+            String name = font.getEncoding().getName(c);
+            Assert.assertEquals("dieresis", name.substring(1));
+            Assert.assertFalse(font.getPath(name).isEmpty());
+        }
+
+        PDFTextStripper stripper = new PDFTextStripper();
+        Assert.assertEquals(text, stripper.getText(doc).trim());
+
+        doc.close();
+    }
+
     private void testPDFBox3826checkFonts(byte[] byteArray, File fontFile) throws IOException
     {
         PDDocument doc = PDDocument.load(byteArray);