address review comments

divyashreepathihalli · divyashreepathihalli · commit f652cb9a3159 · 2025-06-13T22:44:32.000Z
diff --git a/guides/distributed_training_with_jax.py b/guides/distributed_training_with_jax.py
@@ -53,7 +53,7 @@
 from jax.sharding import Mesh
 from jax.sharding import NamedSharding
 from jax.sharding import PartitionSpec as P
-from keras.src.backend.config import is_nnx_backend_enabled
+from keras.src.backend.config import is_nnx_enabled
 from keras.src.utils.jax_utils import jit
 from flax import nnx
 
@@ -189,7 +189,7 @@ def compute_loss(trainable_variables, non_trainable_variables, x, y):
 
 
 # Training step, Keras provides a pure functional optimizer.stateless_apply
-@jit()
+@jit
 def train_step(train_state, x, y):
     (
         trainable_variables,
diff --git a/keras/api/_tf_keras/keras/config/__init__.py b/keras/api/_tf_keras/keras/config/__init__.py
@@ -17,9 +17,7 @@
 from keras.src.backend.config import (
     is_flash_attention_enabled as is_flash_attention_enabled,
 )
-from keras.src.backend.config import (
-    is_nnx_backend_enabled as is_nnx_backend_enabled,
-)
+from keras.src.backend.config import is_nnx_enabled as is_nnx_enabled
 from keras.src.backend.config import max_epochs as max_epochs
 from keras.src.backend.config import max_steps_per_epoch as max_steps_per_epoch
 from keras.src.backend.config import set_epsilon as set_epsilon
diff --git a/keras/api/config/__init__.py b/keras/api/config/__init__.py
@@ -17,9 +17,7 @@
 from keras.src.backend.config import (
     is_flash_attention_enabled as is_flash_attention_enabled,
 )
-from keras.src.backend.config import (
-    is_nnx_backend_enabled as is_nnx_backend_enabled,
-)
+from keras.src.backend.config import is_nnx_enabled as is_nnx_enabled
 from keras.src.backend.config import max_epochs as max_epochs
 from keras.src.backend.config import max_steps_per_epoch as max_steps_per_epoch
 from keras.src.backend.config import set_epsilon as set_epsilon
diff --git a/keras/src/backend/config.py b/keras/src/backend/config.py
@@ -233,8 +233,8 @@ def is_flash_attention_enabled():
     return global_state.get_global_attribute("flash_attention", default=None)
 
 
-@keras_export("keras.config.is_nnx_backend_enabled")
-def is_nnx_backend_enabled():
+@keras_export("keras.config.is_nnx_enabled")
+def is_nnx_enabled():
     """Checks whether NNX specific features are enabled for the JAX backend.
 
     Returns:
diff --git a/keras/src/backend/jax/__init__.py b/keras/src/backend/jax/__init__.py
@@ -1,4 +1,4 @@
-from keras.src.backend.config import is_nnx_backend_enabled
+from keras.src.backend.config import is_nnx_enabled
 from keras.src.backend.jax import core
 from keras.src.backend.jax import distribution_lib
 from keras.src.backend.jax import image
@@ -11,11 +11,7 @@
 from keras.src.backend.jax.core import IS_THREAD_SAFE
 from keras.src.backend.jax.core import SUPPORTS_RAGGED_TENSORS
 from keras.src.backend.jax.core import SUPPORTS_SPARSE_TENSORS
-
-if is_nnx_backend_enabled():
-    from keras.src.backend.jax.core import NnxVariable as Variable
-else:
-    from keras.src.backend.jax.core import JaxVariable as Variable
+from keras.src.backend.jax.core import Variable as Variable
 from keras.src.backend.jax.core import cast
 from keras.src.backend.jax.core import compute_output_spec
 from keras.src.backend.jax.core import cond
diff --git a/keras/src/backend/jax/core.py b/keras/src/backend/jax/core.py
@@ -58,11 +58,11 @@ def __jax_array__(self):
         return self.value
 
 
-_JAX_VARIABLE_TYPE = JaxVariable
-if config.is_nnx_backend_enabled():
+Variable = JaxVariable
+if config.is_nnx_enabled():
     from flax import nnx
 
-    class NnxVariable(KerasVariable, nnx.Variable):
+    class NnxVariable(JaxVariable, nnx.Variable):
         def __init__(
             self,
             initializer,
@@ -77,43 +77,12 @@ def __init__(
             mutable=None,
             **nnx_metadata,
         ):
-            # Determine NNX mutability. This needs to be known for
-            # nnx.Variable.__init__.
-            if mutable is None:
-                actual_nnx_mutable = (
-                    trainable  # Keras 'trainable' maps to NNX 'mutable'
-                )
-            else:
-                actual_nnx_mutable = mutable
-
-            # Ensure 'mutable' is in nnx_metadata, but explicit 'mutable'
-            # param takes precedence.
-            if "mutable" in nnx_metadata and mutable is not None:
-                nnx_metadata["mutable"] = actual_nnx_mutable
-            elif "mutable" not in nnx_metadata:
-                nnx_metadata["mutable"] = actual_nnx_mutable
-
+            nnx_metadata["mutable"] = trainable if mutable is None else mutable
             # Initialize nnx.Variable first.
             # Determine the dtype for the placeholder.
-            _placeholder_value = None
-            if shape is not None:
-                if dtype is not None:
-                    _placeholder_value = jnp.zeros(
-                        shape, dtype=standardize_dtype(dtype)
-                    )
-                else:
-                    _placeholder_value = jnp.zeros(
-                        shape, dtype=standardize_dtype(config.floatx())
-                    )
-            else:
-                if dtype is not None:
-                    _placeholder_value = jnp.array(
-                        0.0, dtype=standardize_dtype(dtype)
-                    )
-                else:
-                    _placeholder_value = jnp.array(
-                        0.0, dtype=standardize_dtype(config.floatx())
-                    )
+            _placeholder_value = jnp.zeros(
+                shape or (), dtype=standardize_dtype(dtype)
+            )
 
             # Call nnx.Variable.__init__ directly.
             nnx.Variable.__init__(
@@ -152,10 +121,10 @@ def __getstate__(self):
             # Get the state from KerasVariable (attributes in __dict__)
             # KerasVariable does not have a custom __getstate__, so we mimic
             # default behavior.
-            keras_state = self.__dict__.copy()
+            keras_state = KerasVariable.__getstate__(self)
 
             # Get the state from nnx.Variable
-            nnx_specific_state = super(KerasVariable, self).__getstate__()
+            nnx_specific_state = nnx.Variable.__getstate__(self)
 
             # Merge them. Keras state is primary. NNX specific state adds
             # to it.
@@ -170,10 +139,6 @@ def __getstate__(self):
                     "_var_metadata"
                 ]
 
-            # Remove elements that might be problematic or redundant if
-            # nnx.Variable's __getstate__
-            keras_state.pop("raw_value", None)
-
             return keras_state
 
         def __setstate__(self, state):
@@ -202,38 +167,20 @@ def __setstate__(self, state):
 
             # Ensure Keras's self._value is also consistent with the
             # restored raw_value
-            object.__setattr__(self, "_value", nnx_raw_value)
+            self._value = nnx_raw_value
 
             if hasattr(self, "_shape") and self._shape is not None:
                 self._ndim = len(self._shape)
             else:
                 # Fallback if shape isn't immediately available.
                 self._ndim = len(self.raw_value.shape)
 
-        def _initialize(self, value):
-            # Note that variable.shape is needed by distribution_lib
-            self._shape = self._validate_shape(value.shape)
-            # We can't import the keras/distribution/distribution_lib
-            # due to circular dependency.
-            distribution = global_state.get_global_attribute("distribution")
-            if self._layout is None and distribution is not None:
-                tensor_layout = distribution.get_variable_layout(self)
-                from keras.src.distribution import TensorLayout
-
-                if isinstance(tensor_layout, TensorLayout):
-                    self._layout = tensor_layout.backend_layout
-                else:
-                    self._layout = tensor_layout
-            self._direct_assign(value)
-
         def _direct_assign(self, value):
             # Apply JAX-specific distribution if layout is present
             if self._layout is not None:
-                processed_value = distribution_lib.distribute_variable(
+                value = distribution_lib.distribute_variable(
                     value, self._layout
                 )
-            else:
-                processed_value = value
 
             # Ensure that nnx.Variable part is initialized
             if not hasattr(self, "_var_metadata"):
@@ -245,48 +192,31 @@ def _direct_assign(self, value):
                 hasattr(self, "_var_metadata")
                 and "on_set_value" in self._var_metadata
             ):
-                final_value = self._var_metadata["on_set_value"](
-                    self, processed_value
-                )
-            else:
-                final_value = processed_value
-
-            # Directly set raw_value. nnx.Variable handles mutable array
-            # updates
-            object.__setattr__(self, "raw_value", final_value)
-
-        def _convert_to_tensor(self, value, dtype=None):
-            return convert_to_tensor(value, dtype=dtype, sparse=False)
-
-        # Overload native accessor.
-        def __jax_array__(self):
-            return self.value
+                value = self._var_metadata["on_set_value"](self, value)
 
         @property
         def value(self):
             if not hasattr(self, "raw_value"):
-                if not hasattr(self, "_value") or self._value is None:
-                    if self._initializer is not None:
-                        initial_value = self._initializer(
-                            self._shape, dtype=self._dtype
-                        )
-                        return self._maybe_autocast(initial_value)
-                    else:
-                        raise AttributeError(
-                            "Variable is not properly initialized and has"
-                            " no initializer."
-                        )
-                current_value = self._value
-            else:
-                current_value = self.raw_value
-                if (
-                    hasattr(self, "_var_metadata")
-                    and "on_get_value" in self._var_metadata
-                ):
-                    current_value = self._var_metadata["on_get_value"](
-                        self, current_value
+                if self._initializer is not None:
+                    self._initialize(
+                        self._initializer(self.shape, dtype=self.dtype)
+                    )
+                else:
+                    # This implies nnx.Variable didn't set placeholder or init failed.
+                    raise AttributeError(
+                        "Variable is not properly initialized (raw_value missing) "
+                        "and has no initializer."
                     )
+            # Now, self.raw_value must exist. It's the source of truth.Add commentMore actions
+            current_value = self.raw_value
 
+            if (
+                hasattr(self, "_var_metadata")
+                and "on_get_value" in self._var_metadata
+            ):
+                current_value = self._var_metadata["on_get_value"](
+                    self, current_value
+                )
             if in_stateless_scope():
                 scope = get_stateless_scope()
                 stateless_value = scope.get_current_value(self)
@@ -298,7 +228,7 @@ def value(self):
         def __hash__(self):
             return id(self)
 
-    _JAX_VARIABLE_TYPE = NnxVariable
+    Variable = NnxVariable
 
 
 def convert_to_tensor(x, dtype=None, sparse=None, ragged=None):
@@ -314,7 +244,7 @@ def convert_to_tensor(x, dtype=None, sparse=None, ragged=None):
         # an existing distributed jax array will raise error.
         return x
 
-    if isinstance(x, _JAX_VARIABLE_TYPE):
+    if isinstance(x, Variable):
         if dtype is not None and x.dtype != dtype:
             return x.value.astype(dtype)
         return x.value
@@ -598,7 +528,7 @@ def fori_loop(lower, upper, body_fun, init_val):
 
 
 def stop_gradient(variable):
-    if isinstance(variable, _JAX_VARIABLE_TYPE):
+    if isinstance(variable, Variable):
         variable = variable.value
     return jax.lax.stop_gradient(variable)
 
diff --git a/keras/src/backend/jax/core_test.py b/keras/src/backend/jax/core_test.py
@@ -8,9 +8,9 @@
 import keras
 from keras.src import backend
 from keras.src import testing
-from keras.src.backend.config import is_nnx_backend_enabled
+from keras.src.backend.config import is_nnx_enabled
 
-if is_nnx_backend_enabled():
+if is_nnx_enabled():
     from flax import nnx
 
     from keras.src.backend.jax.core import NnxVariable
@@ -21,7 +21,7 @@
     reason="JAX backend specific test for core Variable integration with NNX.",
 )
 @pytest.mark.skipif(
-    not is_nnx_backend_enabled(),
+    not is_nnx_enabled(),
     reason="Test requires NNX backend to be enabled by default for setup.",
 )
 class JaxCoreVariableTest(testing.TestCase):
diff --git a/keras/src/backend/jax/trainer.py b/keras/src/backend/jax/trainer.py
@@ -234,7 +234,7 @@ def concatenate(outputs):
                     return output
 
                 if not self.run_eagerly and self.jit_compile:
-                    concatenate = jit()(concatenate)
+                    concatenate = jit(concatenate)
 
                 def iterator_step(state, iterator):
                     data = next(iterator)
diff --git a/keras/src/layers/layer.py b/keras/src/layers/layer.py
@@ -38,7 +38,7 @@
 from keras.src.backend.common.name_scope import current_path
 from keras.src.backend.common.remat import get_current_remat_mode
 from keras.src.backend.common.symbolic_scope import in_symbolic_scope
-from keras.src.backend.config import is_nnx_backend_enabled
+from keras.src.backend.config import is_nnx_enabled
 from keras.src.distribution import distribution_lib
 from keras.src.dtype_policies import DTypePolicyMap
 from keras.src.layers import input_spec
@@ -54,7 +54,7 @@
 if backend.backend() == "tensorflow":
     from keras.src.backend.tensorflow.layer import TFLayer as BackendLayer
 elif backend.backend() == "jax":
-    if is_nnx_backend_enabled():
+    if is_nnx_enabled():
         from keras.src.backend.jax.layer import NnxLayer as BackendLayer
     else:
         from keras.src.backend.jax.layer import JaxLayer as BackendLayer
@@ -1543,10 +1543,11 @@ def __setattr__(self, name, value):
         # NNX-specific bypass for `_called` and `built` attributes
         if (
             backend.backend() == "jax"
-            and is_nnx_backend_enabled()
+            and is_nnx_enabled()
             and (name == "_called" or name == "built")
         ):
             object.__setattr__(self, name, value)
+            self._parent_path = current_path()
             return
 
         super().__setattr__(
diff --git a/keras/src/ops/operation.py b/keras/src/ops/operation.py
@@ -6,7 +6,7 @@
 from keras.src import tree
 from keras.src.api_export import keras_export
 from keras.src.backend.common.keras_tensor import any_symbolic_tensors
-from keras.src.backend.config import is_nnx_backend_enabled
+from keras.src.backend.config import is_nnx_enabled
 from keras.src.ops.node import Node
 from keras.src.utils import python_utils
 from keras.src.utils import traceback_utils
@@ -122,7 +122,7 @@ def __new__(cls, *args, **kwargs):
         to manually implement `get_config()`.
         """
         instance = super(Operation, cls).__new__(cls)
-        if backend.backend() == "jax" and is_nnx_backend_enabled():
+        if backend.backend() == "jax" and is_nnx_enabled():
             from flax import nnx
 
             vars(instance)["_object__state"] = nnx.object.ObjectState()
diff --git a/keras/src/utils/jax_utils.py b/keras/src/utils/jax_utils.py
@@ -1,5 +1,5 @@
 from keras.src import backend
-from keras.src.backend.config import is_nnx_backend_enabled
+from keras.src.backend.config import is_nnx_enabled
 
 
 def is_in_jax_tracing_scope(x=None):
@@ -14,7 +14,7 @@ def is_in_jax_tracing_scope(x=None):
 
 def jit(*args, **kwargs):
     def decorator(func):
-        if is_nnx_backend_enabled():
+        if is_nnx_enabled():
             from flax import nnx
 
             return nnx.jit(func, *args, **kwargs)