pymc-devs
diff --git a/‎pytensor/ifelse.py
Lines changed: 2 additions & 1 deletion b/‎pytensor/ifelse.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎pytensor/tensor/math.py
Lines changed: 214 additions & 4 deletions b/‎pytensor/tensor/math.py
Lines changed: 214 additions & 4 deletions
diff --git a/‎pytensor/tensor/rewriting/uncanonicalize.py
Lines changed: 25 additions & 24 deletions b/‎pytensor/tensor/rewriting/uncanonicalize.py
Lines changed: 25 additions & 24 deletions
@@ -477,7 +477,8 @@ def cond_make_inplace(fgraph, node):
     Reshape,
     Unbroadcast,
     pt.math.Dot,
-    pt.math.MaxAndArgmax,
+    pt.math.TensorMax,
+    pt.math.Argmax,
     pt.subtensor.Subtensor,
     pt.subtensor.IncSubtensor,
     pt.basic.Alloc,
 
@@ -149,6 +149,8 @@ class MaxAndArgmax(COp):
 
     def __init__(self, axis):
         assert isinstance(axis, tuple | list)
+        # print(axis)
+        # assert 0
         self.axis = tuple(axis)
 
     def get_params(self, node):
@@ -343,6 +345,208 @@ def grad(self, inp, grads):
         return (g_x,)
 
 
+class TensorMax(COp):
+    """
+    Calculate the max over a given axis or over all axes.
+
+    """
+
+    nin = 2  # tensor, axis
+    nout = 1  # max val
+    E_axis = "invalid axis"
+    params_type = Generic()
+    __props__ = ("axis",)
+    _f16_ok = True
+
+    def __init__(self, axis):
+        assert isinstance(axis, tuple | list)
+        self.axis = tuple(axis)
+
+    def get_params(self, node):
+        return self.axis
+
+    def make_node(self, x):
+        x = as_tensor_variable(x)
+
+        # Keep the original shapes for axes on which we do not perform the max/argmax.
+        all_axes = set(self.axis)
+        inputs = [x]
+        out_shape = tuple(s for i, s in enumerate(x.type.shape) if i not in all_axes)
+        outputs = [
+            tensor(dtype=x.type.dtype, shape=out_shape, name="max"),
+        ]
+        return Apply(self, inputs, outputs)
+
+    def prepare_node(self, node, storage_map, compute_map, impl):
+        if len(node.inputs) == 2:
+            raise ValueError(
+                "You are trying to compile a graph with an old Argmax node.  Either reoptimize your graph or rebuild it to get the new node format."
+            )
+
+    def perform(self, node, inp, outs):
+        x = inp[0]
+        axes = self.axis
+        # max, max_idx = outs
+        (max,) = outs
+        if axes is None:
+            axes = tuple(range(x.ndim))
+        else:
+            axes = tuple(int(ax) for ax in axes)
+        max[0] = _asarray(np.max(x, axes), dtype=node.outputs[0].dtype)
+        # # Numpy does not support multiple axes for argmax
+        # # Work around
+        # keep_axes = np.array([i for i in range(x.ndim) if i not in axes], dtype="int64")
+        # # Not-reduced axes in front
+        # transposed_x = np.transpose(x, np.concatenate((keep_axes, axes)))
+        # kept_shape = transposed_x.shape[: len(keep_axes)]
+        # reduced_shape = transposed_x.shape[len(keep_axes) :]
+
+        # # Numpy.prod returns 1.0 when arg is empty, so we cast it to int64
+        # # Otherwise reshape would complain citing float arg
+        # new_shape = (*kept_shape, np.prod(reduced_shape, dtype="int64"))
+        # reshaped_x = transposed_x.reshape(new_shape)
+
+        # max_idx[0] = _asarray(np.argmax(reshaped_x, axis=-1), dtype="int64")
+
+    def c_code(self, node, name, inp, out, sub):
+        if len(self.axis) != 1 and len(self.axis) != node.inputs[0].ndim:
+            raise NotImplementedError(
+                "NumPy C-API can compute max only for 1 axis or for all axes."
+            )
+        x = inp[0]
+        axis = sub["params"]
+        # max, argmax = out
+        (max,) = out
+        fail = sub["fail"]
+        ret = """
+        #if PY_MAJOR_VERSION >= 3
+            #ifndef PyInt_AS_LONG
+                #define PyInt_AS_LONG PyLong_AS_LONG
+            #endif
+        #endif
+
+        int axis;
+
+        if (PyTuple_GET_SIZE(%(axis)s) == PyArray_NDIM(%(x)s)) {
+            axis = NPY_MAXDIMS;
+        } else if(PyTuple_GET_SIZE(%(axis)s) == 1) {
+            PyObject* axis_object = PyTuple_GET_ITEM(%(axis)s, 0);
+            axis = (int)PyInt_AS_LONG(axis_object);
+            if (axis > PyArray_NDIM(%(x)s)-1 || axis < -PyArray_NDIM(%(x)s)) {
+                PyErr_SetString(PyExc_ValueError,
+                "TensorMax: bad axis argument");
+                %(fail)s
+            }
+        } else {
+            PyErr_SetString(PyExc_NotImplementedError,
+            "TensorMax: NumPy C-API can compute max only for 1 axis or for all axes.");
+            %(fail)s
+        }
+
+        Py_CLEAR(%(max)s);
+
+        %(max)s = (PyArrayObject*)PyArray_Max(%(x)s, axis, NULL);
+        if (%(max)s == NULL) {
+            %(fail)s;
+        }
+        if (!PyArray_CheckExact(%(max)s)) {
+            %(max)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(max)s, NULL, 0, 0, NPY_ARRAY_ENSUREARRAY, NULL);
+            if(%(max)s == NULL){
+                %(fail)s;
+            }
+        }
+        """
+        return ret % locals()
+
+    def c_code_cache_version(self):
+        return (5,)
+
+    def infer_shape(self, fgraph, node, shapes):
+        ishape = shapes[0]
+        rval = tuple(
+            ishape[i]
+            for (i, b) in enumerate(node.inputs[0].type.broadcastable)
+            if i not in self.axis
+        )
+        return [rval]
+
+    def R_op(self, inputs, eval_points):
+        if eval_points[0] is None:
+            return [None, None]
+
+        if len(self.axis) != 1:
+            raise ValueError("R_op supported for arg_max only for one axis!")
+        if self.axis[0] > 1:
+            raise ValueError("R_op supported for arg_max only when axis is 0 or 1")
+        if inputs[0].ndim != 2:
+            raise ValueError("R_op supported for arg_max only when input is a matrix")
+        # max_vals, max_pos = self.make_node(*inputs).outputs
+        # max_vals = self.make_node(*inputs).outputs
+        if self.axis[0] == 0:
+            return [eval_points[0][arange(eval_points[0].shape[1])], None]
+        else:
+            return [eval_points[0][arange(eval_points[0].shape[0])], None]
+
+    def grad(self, inp, grads):
+        # The strict sense mathematical gradient of the maximum function is
+        # not calculated here for it is not defined at every point where some
+        # coordinates are identical. However, since the latter set has null
+        # Lebesgue measure, the result may be interpreted as weak gradient.
+
+        # @note: This function should work correctly for L{vector}s.
+        # (x, y), (gz, gw)
+        # gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
+        # gMax * dMax/dx + gArgMax * dArgMax/dx,
+        # gMax * dMax/daxis + gArgMax * dArgMax/daxis
+        # g_max has one less dimension than x, so you need to complete
+        # g_max to x's shape when axis=0 the broadcasting mechanism
+        # does it automatically
+        x = inp[0]
+        axis = as_tensor_variable(self.axis)
+        # g_max, g_max_idx = grads
+        (g_max,) = grads
+
+        g_max_disconnected = isinstance(g_max.type, DisconnectedType)
+        # g_max_idx_disconnected = isinstance(g_max_idx.type, DisconnectedType)
+
+        # # if the op is totally disconnected, so are its inputs
+        # if g_max_disconnected and g_max_idx_disconnected:
+        #     return [DisconnectedType()(), DisconnectedType()()]
+
+        # if the op is totally disconnected, so are its inputs
+        if g_max_disconnected:
+            return [DisconnectedType()()]
+
+        # if the max is disconnected but the argmax is not,
+        # the gradient on its inputs is zero
+        # if g_max_disconnected:
+        #     return [x.zeros_like()]
+        if NoneConst.equals(axis):
+            axis_ = list(range(x.ndim))
+        else:
+            axis_ = axis
+        xmax = max(x, axis_)
+
+        # Raise the g_max and xmax to the same number of dim as the input.
+        pattern = []
+        out_dim = 0
+        if NoneConst.equals(axis):
+            # We are taking the max/argmax over all dimensions.
+            axis = None
+        for i in range(x.ndim):
+            if axis is None or i in axis.data:
+                pattern.append("x")
+            else:
+                pattern.append(out_dim)
+                out_dim += 1
+        g_max_pad = DimShuffle(g_max.broadcastable, pattern)(g_max)
+        xmax_pad = DimShuffle(xmax.broadcastable, pattern)(xmax)
+
+        # Set the grad to the correct position.
+        g_x = eq(xmax_pad, x) * g_max_pad
+        return (g_x,)
+
+
 class Argmax(COp):
     """
     Calculate the argmax over a given axis or over all axes.
@@ -357,8 +561,10 @@ class Argmax(COp):
     params_type = ParamsType(c_axis=ps.int64)
 
     def __init__(self, axis):
-        if axis is not None:
-            axis = tuple(axis)
+        # if axis is not None:
+        #     axis = tuple(axis)
+        assert isinstance(axis, tuple | list)
+        # print(axis)
         self.axis = tuple(axis)
 
     def get_params(self, node):
@@ -395,6 +601,8 @@ def perform(self, node, inp, outs):
         (max_idx,) = outs
         if axes is None:
             axes = tuple(range(x.ndim))
+        else:
+            axes = tuple(int(ax) for ax in axes)
 
         # Numpy does not support multiple axes for argmax
         # Work around
@@ -477,7 +685,7 @@ def grad(self, inp, grads):
 
 
 @_vectorize_node.register(Argmax)
-@_vectorize_node.register(MaxAndArgmax)
+# @_vectorize_node.register(MaxAndArgmax)
 def vectorize_argmax_node(op, node, batch_x):
     core_ndim = node.inputs[0].type.ndim
     batch_ndim = batch_x.type.ndim - core_ndim
@@ -600,7 +808,9 @@ def max_and_argmax(a, axis=None, keepdims=False):
     axis = check_and_normalize_axes(a, axis)
     if len(axis) == 0:
         axis = list(range(a.type.ndim))
-    out, argout = MaxAndArgmax(axis)(a)
+    out = TensorMax(axis)(a)
+    argout = Argmax(axis)(a)
+    # out, argout = MaxAndArgmax(axis)(a)
 
     if keepdims:
         out = makeKeepDims(a, out, axis)
 
@@ -31,33 +31,34 @@
 
 """
 
-from pytensor import scalar as ps
 from pytensor.graph.rewriting.basic import copy_stack_trace, node_rewriter
 from pytensor.tensor.basic import Alloc, alloc, constant
-from pytensor.tensor.elemwise import CAReduce, DimShuffle
-from pytensor.tensor.math import Argmax, Max, MaxAndArgmax, Min, neg
+from pytensor.tensor.elemwise import DimShuffle
+
+# from pytensor.tensor.math import Argmax, Max, MaxAndArgmax, Min, neg
+from pytensor.tensor.math import Min, TensorMax, neg
 from pytensor.tensor.rewriting.basic import register_uncanonicalize
 from pytensor.tensor.shape import Reshape, reshape
 from pytensor.tensor.subtensor import Subtensor
 
 
-@register_uncanonicalize
-@node_rewriter([MaxAndArgmax])
-def local_max_and_argmax(fgraph, node):
-    """
-    If we don't use the argmax, change it to a max only.
-    """
-    if isinstance(node.op, MaxAndArgmax):
-        axis = node.op.axis
-        if len(fgraph.clients[node.outputs[1]]) == 0:
-            new = Max(axis)(node.inputs[0])
-            copy_stack_trace(node.outputs[0], new)
-            return [new, None]
+# @register_uncanonicalize
+# @node_rewriter([MaxAndArgmax])
+# def local_max_and_argmax(fgraph, node):
+#     """
+#     If we don't use the argmax, change it to a max only.
+#     """
+#     if isinstance(node.op, MaxAndArgmax):
+#         axis = node.op.axis
+#         if len(fgraph.clients[node.outputs[1]]) == 0:
+#             new = Max(axis)(node.inputs[0])
+#             copy_stack_trace(node.outputs[0], new)
+#             return [new, None]
 
-        if len(fgraph.clients[node.outputs[0]]) == 0:
-            new = Argmax(axis)(node.inputs[0])
-            copy_stack_trace(node.outputs[0], new)
-            return [None, new]
+#         if len(fgraph.clients[node.outputs[0]]) == 0:
+#             new = Argmax(axis)(node.inputs[0])
+#             copy_stack_trace(node.outputs[0], new)
+#             return [None, new]
 
 
 @register_uncanonicalize
@@ -74,13 +75,13 @@ def local_max_to_min(fgraph, node):
     the interface put only MaxAndArgmax into the graph.
 
     """
+    # pytensor.dprint(node)
+    # print()
+    # print(node.op == neg)
     if node.op == neg and node.inputs[0].owner:
         max = node.inputs[0]
-        if (
-            max.owner
-            and isinstance(max.owner.op, CAReduce)
-            and max.owner.op.scalar_op == ps.scalar_maximum
-        ):
+        # print(max.owner.op.scalar_op)
+        if max.owner and isinstance(max.owner.op, TensorMax):
             neg_node = max.owner.inputs[0]
             if neg_node.owner and neg_node.owner.op == neg:
                 new = Min(max.owner.op.axis)(neg_node.owner.inputs[0])