From 0c440fef01b8694a63740ed788ecca7b986fa8f9 Mon Sep 17 00:00:00 2001 From: NSoiffer Date: Tue, 22 Apr 2025 16:38:06 -0700 Subject: [PATCH 1/4] Rebased file on beta branch. Updated adobeAcrobat.py with suggestions as per the PR Updated changes.md as per the PR Fingers crossed I got this right... --- .../NVDAObjects/IAccessible/adobeAcrobat.py | 88 +++++++++++++------ user_docs/en/changes.md | 1 + 2 files changed, 60 insertions(+), 29 deletions(-) diff --git a/source/NVDAObjects/IAccessible/adobeAcrobat.py b/source/NVDAObjects/IAccessible/adobeAcrobat.py index cf9da55ec92..68d722b2984 100644 --- a/source/NVDAObjects/IAccessible/adobeAcrobat.py +++ b/source/NVDAObjects/IAccessible/adobeAcrobat.py @@ -119,19 +119,41 @@ def _isEqual(self, other): return self.accID == other.accID return super(AcrobatNode, self)._isEqual(other) - def _getNodeMathMl(self, node): - tag = node.GetTagName() - yield "<%s" % tag - # Output relevant attributes. - if tag == "mfenced": - for attr in "open", "close", "separators": - val = node.GetAttribute(attr, "XML-1.00") + def _getNodeMathMl(self, node) -> str: + """Traverse the MathML tree and return an XML string representing the math""" + + def getMathMLAttributes(element, attrList: list) -> str: + attrValues = "" + for attr in attrList: + val = element.GetAttribute(attr, "NSO") if val: - yield ' %s="%s"' % (attr, val) - yield ">" + attrValues += f' {attr}="{val}"' + return attrValues + + tag = node.GetTagName() + answer = f"<{tag}" + # Output relevant attributes + id = node.GetID() + if id: + answer += f' id="{id}"' + answer += getMathMLAttributes(node, ["intent", "arg"]) + match tag: + case "mi" | "mn" | "mo" | "mtext": + answer += getMathMLAttributes(node, ["mathvariant"]) + case "mfenced": + answer += getMathMLAttributes(node, ["open", "close", "separators"]) + case "menclose": + answer += getMathMLAttributes(node, ["notation", "notationtype"]) + case "annotation-xml" | "annotation": + answer += getMathMLAttributes(node, ["encoding"]) + case "ms": + answer += getMathMLAttributes(node, ["open", "close"]) + case _: + pass + answer += ">" val = node.GetValue() if val: - yield val + answer += val else: for childNum in range(node.GetChildCount()): try: @@ -139,41 +161,49 @@ def _getNodeMathMl(self, node): except COMError: continue for sub in self._getNodeMathMl(subNode): - yield sub - yield "" % tag + answer += sub + return answer + f"" def _get_mathMl(self) -> str: """Return the MathML associated with a Formula tag""" + # There are two ways that MathML can be represented in a PDF: + # 1. As a series of nested tags, each with a MathML element as the value. + # 2. As a Formula tag with MathML as the value (comes from MathML in an Associated File) if self.pdDomNode is None: log.debugWarning("_get_mathMl: self.pdDomNode is None!") raise LookupError + + # see if it is MathML tagging is used + for childNum in range(self.pdDomNode.GetChildCount()): + try: + child = self.pdDomNode.GetChild(childNum).QueryInterface(IPDDomElement) + except COMError: + log.debugWarning(f"COMError trying to get childNum={childNum}") + continue + if log.isEnabledFor(log.DEBUG): + log.debug(f"\t(PDF) get_mathMl: tag={child.GetTagName()}") + if child.GetTagName() == "math": + answer = "".join(self._getNodeMathMl(child)) + log.debug(f"_get_mathMl (PDF): found tagged MathML = {answer}") + return answer + mathMl = self.pdDomNode.GetValue() if log.isEnabledFor(log.DEBUG): log.debug( ( - f"_get_mathMl: math recognized: {mathMl.startswith('{self.pdDomNode.GetValue()}" + + # not MathML -- fall back to return the contents, which is hopefully alt text, inside an + answer = f"{mathMl}" + log.debug(f"_get_mathMl: didn't find MathML -- returning value as mtext: {answer}") + return answer class RootNode(AcrobatNode): diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index d71714e02e1..d1e9966e476 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -598,6 +598,7 @@ There are many minor bug fixes for applications, such as Thunderbird, Adobe Read ### Bug Fixes +* Fixed math attributes being read in Adobe Reader, this resulted in poor or wrong speech and braille. (#17980) * Windows 11 fixes: * NVDA will once again announce hardware keyboard input suggestions. (#16283, @josephsl) * In Version 24H2 (2024 Update and Windows Server 2025), mouse and touch interaction can be used in quick settings. (#16348, @josephsl) From 85a73358aedac38ca40e21a1ba38e1e814bca55c Mon Sep 17 00:00:00 2001 From: NSoiffer Date: Tue, 22 Apr 2025 23:09:58 -0700 Subject: [PATCH 2/4] Moved `getMathMLAttributes` out from being a nested function to being a static class method. Added a few more comments. --- .../NVDAObjects/IAccessible/adobeAcrobat.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/source/NVDAObjects/IAccessible/adobeAcrobat.py b/source/NVDAObjects/IAccessible/adobeAcrobat.py index 68d722b2984..37c57ab2e8b 100644 --- a/source/NVDAObjects/IAccessible/adobeAcrobat.py +++ b/source/NVDAObjects/IAccessible/adobeAcrobat.py @@ -119,35 +119,40 @@ def _isEqual(self, other): return self.accID == other.accID return super(AcrobatNode, self)._isEqual(other) + @staticmethod + def getMathMLAttributes(node, attrList: list) -> str: + """Get the MathML attributes in 'attrList' for a 'node' (MathML element).""" + attrValues = "" + for attr in attrList: + # "NSO" comes from the PDF spec + val = node.GetAttribute(attr, "NSO") + if val: + attrValues += f' {attr}="{val}"' + return attrValues + def _getNodeMathMl(self, node) -> str: """Traverse the MathML tree and return an XML string representing the math""" - def getMathMLAttributes(element, attrList: list) -> str: - attrValues = "" - for attr in attrList: - val = element.GetAttribute(attr, "NSO") - if val: - attrValues += f' {attr}="{val}"' - return attrValues - tag = node.GetTagName() answer = f"<{tag}" # Output relevant attributes id = node.GetID() if id: answer += f' id="{id}"' - answer += getMathMLAttributes(node, ["intent", "arg"]) + # The PDF interface lacks a way to get all the attributes, so we have to get specific ones + # The attributes below affect accessibility + answer += AcrobatNode.getMathMLAttributes(node, ["intent", "arg"]) match tag: case "mi" | "mn" | "mo" | "mtext": - answer += getMathMLAttributes(node, ["mathvariant"]) + answer += AcrobatNode.getMathMLAttributes(node, ["mathvariant"]) case "mfenced": - answer += getMathMLAttributes(node, ["open", "close", "separators"]) + answer += AcrobatNode.getMathMLAttributes(node, ["open", "close", "separators"]) case "menclose": - answer += getMathMLAttributes(node, ["notation", "notationtype"]) + answer += AcrobatNode.getMathMLAttributes(node, ["notation", "notationtype"]) case "annotation-xml" | "annotation": - answer += getMathMLAttributes(node, ["encoding"]) + answer += AcrobatNode.getMathMLAttributes(node, ["encoding"]) case "ms": - answer += getMathMLAttributes(node, ["open", "close"]) + answer += AcrobatNode.getMathMLAttributes(node, ["open", "close"]) case _: pass answer += ">" From 3a75019312a676094af4c5121212404157656459 Mon Sep 17 00:00:00 2001 From: Sascha Cowley <16543535+SaschaCowley@users.noreply.github.com> Date: Fri, 2 May 2025 14:21:29 +1000 Subject: [PATCH 3/4] Update source/NVDAObjects/IAccessible/adobeAcrobat.py Co-authored-by: Sean Budd --- source/NVDAObjects/IAccessible/adobeAcrobat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/NVDAObjects/IAccessible/adobeAcrobat.py b/source/NVDAObjects/IAccessible/adobeAcrobat.py index 37c57ab2e8b..4d61e2d4c0d 100644 --- a/source/NVDAObjects/IAccessible/adobeAcrobat.py +++ b/source/NVDAObjects/IAccessible/adobeAcrobat.py @@ -183,7 +183,7 @@ def _get_mathMl(self) -> str: try: child = self.pdDomNode.GetChild(childNum).QueryInterface(IPDDomElement) except COMError: - log.debugWarning(f"COMError trying to get childNum={childNum}") + log.debugWarning(f"COMError trying to get {childNum=}") continue if log.isEnabledFor(log.DEBUG): log.debug(f"\t(PDF) get_mathMl: tag={child.GetTagName()}") From 1577b3e9ae64165b4f4291f82e60f6e220633d48 Mon Sep 17 00:00:00 2001 From: Sean Budd Date: Fri, 2 May 2025 17:31:57 +1000 Subject: [PATCH 4/4] Apply suggestions from code review --- source/NVDAObjects/IAccessible/adobeAcrobat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/NVDAObjects/IAccessible/adobeAcrobat.py b/source/NVDAObjects/IAccessible/adobeAcrobat.py index 4d61e2d4c0d..55613202e63 100644 --- a/source/NVDAObjects/IAccessible/adobeAcrobat.py +++ b/source/NVDAObjects/IAccessible/adobeAcrobat.py @@ -120,7 +120,7 @@ def _isEqual(self, other): return super(AcrobatNode, self)._isEqual(other) @staticmethod - def getMathMLAttributes(node, attrList: list) -> str: + def getMathMLAttributes(node: IPDDomElement, attrList: list) -> str: """Get the MathML attributes in 'attrList' for a 'node' (MathML element).""" attrValues = "" for attr in attrList: @@ -130,7 +130,7 @@ def getMathMLAttributes(node, attrList: list) -> str: attrValues += f' {attr}="{val}"' return attrValues - def _getNodeMathMl(self, node) -> str: + def _getNodeMathMl(self, node: IPDDomElement) -> str: """Traverse the MathML tree and return an XML string representing the math""" tag = node.GetTagName()