From 0c440fef01b8694a63740ed788ecca7b986fa8f9 Mon Sep 17 00:00:00 2001
From: NSoiffer <NSoiffer@users.noreply.github.com>
Date: Tue, 22 Apr 2025 16:38:06 -0700
Subject: [PATCH 1/4] Rebased file on beta branch.

Updated adobeAcrobat.py with suggestions as per the PR
Updated changes.md as per the PR

Fingers crossed I got this right...
---
 .../NVDAObjects/IAccessible/adobeAcrobat.py   | 88 +++++++++++++------
 user_docs/en/changes.md                       |  1 +
 2 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/source/NVDAObjects/IAccessible/adobeAcrobat.py b/source/NVDAObjects/IAccessible/adobeAcrobat.py
index cf9da55ec92..68d722b2984 100644
--- a/source/NVDAObjects/IAccessible/adobeAcrobat.py
+++ b/source/NVDAObjects/IAccessible/adobeAcrobat.py
@@ -119,19 +119,41 @@ def _isEqual(self, other):
 			return self.accID == other.accID
 		return super(AcrobatNode, self)._isEqual(other)
 
-	def _getNodeMathMl(self, node):
-		tag = node.GetTagName()
-		yield "<%s" % tag
-		# Output relevant attributes.
-		if tag == "mfenced":
-			for attr in "open", "close", "separators":
-				val = node.GetAttribute(attr, "XML-1.00")
+	def _getNodeMathMl(self, node) -> str:
+		"""Traverse the MathML tree and return an XML string representing the math"""
+
+		def getMathMLAttributes(element, attrList: list) -> str:
+			attrValues = ""
+			for attr in attrList:
+				val = element.GetAttribute(attr, "NSO")
 				if val:
-					yield ' %s="%s"' % (attr, val)
-		yield ">"
+					attrValues += f' {attr}="{val}"'
+			return attrValues
+
+		tag = node.GetTagName()
+		answer = f"<{tag}"
+		# Output relevant attributes
+		id = node.GetID()
+		if id:
+			answer += f' id="{id}"'
+		answer += getMathMLAttributes(node, ["intent", "arg"])
+		match tag:
+			case "mi" | "mn" | "mo" | "mtext":
+				answer += getMathMLAttributes(node, ["mathvariant"])
+			case "mfenced":
+				answer += getMathMLAttributes(node, ["open", "close", "separators"])
+			case "menclose":
+				answer += getMathMLAttributes(node, ["notation", "notationtype"])
+			case "annotation-xml" | "annotation":
+				answer += getMathMLAttributes(node, ["encoding"])
+			case "ms":
+				answer += getMathMLAttributes(node, ["open", "close"])
+			case _:
+				pass
+		answer += ">"
 		val = node.GetValue()
 		if val:
-			yield val
+			answer += val
 		else:
 			for childNum in range(node.GetChildCount()):
 				try:
@@ -139,41 +161,49 @@ def _getNodeMathMl(self, node):
 				except COMError:
 					continue
 				for sub in self._getNodeMathMl(subNode):
-					yield sub
-		yield "</%s>" % tag
+					answer += sub
+		return answer + f"</{tag}>"
 
 	def _get_mathMl(self) -> str:
 		"""Return the MathML associated with a Formula tag"""
+		# There are two ways that MathML can be represented in a PDF:
+		# 1. As a series of nested tags, each with a MathML element as the value.
+		# 2. As a Formula tag with MathML as the value (comes from MathML in an Associated File)
 		if self.pdDomNode is None:
 			log.debugWarning("_get_mathMl: self.pdDomNode is None!")
 			raise LookupError
+
+		# see if it is MathML tagging is used
+		for childNum in range(self.pdDomNode.GetChildCount()):
+			try:
+				child = self.pdDomNode.GetChild(childNum).QueryInterface(IPDDomElement)
+			except COMError:
+				log.debugWarning(f"COMError trying to get childNum={childNum}")
+				continue
+			if log.isEnabledFor(log.DEBUG):
+				log.debug(f"\t(PDF) get_mathMl: tag={child.GetTagName()}")
+			if child.GetTagName() == "math":
+				answer = "".join(self._getNodeMathMl(child))
+				log.debug(f"_get_mathMl (PDF): found tagged MathML = {answer}")
+				return answer
+
 		mathMl = self.pdDomNode.GetValue()
 		if log.isEnabledFor(log.DEBUG):
 			log.debug(
 				(
-					f"_get_mathMl: math recognized: {mathMl.startswith('<math')}, "
+					f"_get_mathMl (PDF): math recognized: {mathMl.startswith('<math')}, "
 					f"child count={self.pdDomNode.GetChildCount()},"
-					f"\n  name='{self.pdDomNode.GetName()}', value='{mathMl}'"
+					f"\n  name='{self.pdDomNode.GetName()}', value found from AF ='{mathMl}'"
 				),
 			)
 		# this test and the replacement doesn't work if someone uses a namespace tag (which they shouldn't, but..)
 		if mathMl.startswith("<math"):
 			return mathMl.replace('xmlns:mml="http://www.w3.org/1998/Math/MathML"', "")
-		# Alternative for tagging: all the sub expressions are tagged -- gather up the MathML
-		for childNum in range(self.pdDomNode.GetChildCount()):
-			try:
-				child = self.pdDomNode.GetChild(childNum).QueryInterface(IPDDomElement)
-			except COMError:
-				log.debugWarning(f"COMError trying to get childNum={childNum}")
-				continue
-			if log.isEnabledFor(log.DEBUG):
-				log.debug(f"\tget_mathMl: tag={child.GetTagName()}")
-			if child.GetTagName() == "math":
-				return "".join(self._getNodeMathMl(child))
-		# fall back to return the contents, which is hopefully alt text
-		if log.isEnabledFor(log.DEBUG):
-			log.debug("_get_mathMl: didn't find MathML -- returning value as mtext")
-		return f"<math><mtext>{self.pdDomNode.GetValue()}</mtext></math>"
+
+		# not MathML -- fall back to return the contents, which is hopefully alt text, inside an <mtext>
+		answer = f"<math><mtext>{mathMl}</mtext></math>"
+		log.debug(f"_get_mathMl: didn't find MathML -- returning value as mtext: {answer}")
+		return answer
 
 
 class RootNode(AcrobatNode):
diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md
index d71714e02e1..d1e9966e476 100644
--- a/user_docs/en/changes.md
+++ b/user_docs/en/changes.md
@@ -598,6 +598,7 @@ There are many minor bug fixes for applications, such as Thunderbird, Adobe Read
 
 ### Bug Fixes
 
+* Fixed math attributes being read in Adobe Reader, this resulted in poor or wrong speech and braille. (#17980)
 * Windows 11 fixes:
   * NVDA will once again announce hardware keyboard input suggestions. (#16283, @josephsl)
   * In Version 24H2 (2024 Update and Windows Server 2025), mouse and touch interaction can be used in quick settings. (#16348, @josephsl)

From 85a73358aedac38ca40e21a1ba38e1e814bca55c Mon Sep 17 00:00:00 2001
From: NSoiffer <NSoiffer@users.noreply.github.com>
Date: Tue, 22 Apr 2025 23:09:58 -0700
Subject: [PATCH 2/4] Moved `getMathMLAttributes` out from being a nested
 function to being a static class method.

Added a few more comments.
---
 .../NVDAObjects/IAccessible/adobeAcrobat.py   | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/source/NVDAObjects/IAccessible/adobeAcrobat.py b/source/NVDAObjects/IAccessible/adobeAcrobat.py
index 68d722b2984..37c57ab2e8b 100644
--- a/source/NVDAObjects/IAccessible/adobeAcrobat.py
+++ b/source/NVDAObjects/IAccessible/adobeAcrobat.py
@@ -119,35 +119,40 @@ def _isEqual(self, other):
 			return self.accID == other.accID
 		return super(AcrobatNode, self)._isEqual(other)
 
+	@staticmethod
+	def getMathMLAttributes(node, attrList: list) -> str:
+		"""Get the MathML attributes in 'attrList' for a 'node' (MathML element)."""
+		attrValues = ""
+		for attr in attrList:
+			# "NSO" comes from the PDF spec
+			val = node.GetAttribute(attr, "NSO")
+			if val:
+				attrValues += f' {attr}="{val}"'
+		return attrValues
+
 	def _getNodeMathMl(self, node) -> str:
 		"""Traverse the MathML tree and return an XML string representing the math"""
 
-		def getMathMLAttributes(element, attrList: list) -> str:
-			attrValues = ""
-			for attr in attrList:
-				val = element.GetAttribute(attr, "NSO")
-				if val:
-					attrValues += f' {attr}="{val}"'
-			return attrValues
-
 		tag = node.GetTagName()
 		answer = f"<{tag}"
 		# Output relevant attributes
 		id = node.GetID()
 		if id:
 			answer += f' id="{id}"'
-		answer += getMathMLAttributes(node, ["intent", "arg"])
+		# The PDF interface lacks a way to get all the attributes, so we have to get specific ones
+		# The attributes below affect accessibility
+		answer += AcrobatNode.getMathMLAttributes(node, ["intent", "arg"])
 		match tag:
 			case "mi" | "mn" | "mo" | "mtext":
-				answer += getMathMLAttributes(node, ["mathvariant"])
+				answer += AcrobatNode.getMathMLAttributes(node, ["mathvariant"])
 			case "mfenced":
-				answer += getMathMLAttributes(node, ["open", "close", "separators"])
+				answer += AcrobatNode.getMathMLAttributes(node, ["open", "close", "separators"])
 			case "menclose":
-				answer += getMathMLAttributes(node, ["notation", "notationtype"])
+				answer += AcrobatNode.getMathMLAttributes(node, ["notation", "notationtype"])
 			case "annotation-xml" | "annotation":
-				answer += getMathMLAttributes(node, ["encoding"])
+				answer += AcrobatNode.getMathMLAttributes(node, ["encoding"])
 			case "ms":
-				answer += getMathMLAttributes(node, ["open", "close"])
+				answer += AcrobatNode.getMathMLAttributes(node, ["open", "close"])
 			case _:
 				pass
 		answer += ">"

From 3a75019312a676094af4c5121212404157656459 Mon Sep 17 00:00:00 2001
From: Sascha Cowley <16543535+SaschaCowley@users.noreply.github.com>
Date: Fri, 2 May 2025 14:21:29 +1000
Subject: [PATCH 3/4] Update source/NVDAObjects/IAccessible/adobeAcrobat.py

Co-authored-by: Sean Budd <seanbudd123@gmail.com>
---
 source/NVDAObjects/IAccessible/adobeAcrobat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/NVDAObjects/IAccessible/adobeAcrobat.py b/source/NVDAObjects/IAccessible/adobeAcrobat.py
index 37c57ab2e8b..4d61e2d4c0d 100644
--- a/source/NVDAObjects/IAccessible/adobeAcrobat.py
+++ b/source/NVDAObjects/IAccessible/adobeAcrobat.py
@@ -183,7 +183,7 @@ def _get_mathMl(self) -> str:
 			try:
 				child = self.pdDomNode.GetChild(childNum).QueryInterface(IPDDomElement)
 			except COMError:
-				log.debugWarning(f"COMError trying to get childNum={childNum}")
+				log.debugWarning(f"COMError trying to get {childNum=}")
 				continue
 			if log.isEnabledFor(log.DEBUG):
 				log.debug(f"\t(PDF) get_mathMl: tag={child.GetTagName()}")

From 1577b3e9ae64165b4f4291f82e60f6e220633d48 Mon Sep 17 00:00:00 2001
From: Sean Budd <seanbudd123@gmail.com>
Date: Fri, 2 May 2025 17:31:57 +1000
Subject: [PATCH 4/4] Apply suggestions from code review

---
 source/NVDAObjects/IAccessible/adobeAcrobat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/NVDAObjects/IAccessible/adobeAcrobat.py b/source/NVDAObjects/IAccessible/adobeAcrobat.py
index 4d61e2d4c0d..55613202e63 100644
--- a/source/NVDAObjects/IAccessible/adobeAcrobat.py
+++ b/source/NVDAObjects/IAccessible/adobeAcrobat.py
@@ -120,7 +120,7 @@ def _isEqual(self, other):
 		return super(AcrobatNode, self)._isEqual(other)
 
 	@staticmethod
-	def getMathMLAttributes(node, attrList: list) -> str:
+	def getMathMLAttributes(node: IPDDomElement, attrList: list) -> str:
 		"""Get the MathML attributes in 'attrList' for a 'node' (MathML element)."""
 		attrValues = ""
 		for attr in attrList:
@@ -130,7 +130,7 @@ def getMathMLAttributes(node, attrList: list) -> str:
 				attrValues += f' {attr}="{val}"'
 		return attrValues
 
-	def _getNodeMathMl(self, node) -> str:
+	def _getNodeMathMl(self, node: IPDDomElement) -> str:
 		"""Traverse the MathML tree and return an XML string representing the math"""
 
 		tag = node.GetTagName()