Merge pull request #772 from google:lance-fix1

The tunix Authors · The tunix Authors · commit 29d0c0d01e67 · 2025-11-19T18:41:10.000-08:00
PiperOrigin-RevId: 834528724
diff --git a/.github/workflows/tpu-tests.yml b/.github/workflows/tpu-tests.yml
@@ -37,7 +37,7 @@ jobs:
     runs-on: [linux-x86-ct5lp-224-8tpu]
     environment: testing
     container:
-      image: us-docker.pkg.dev/tpu-prod-env-multipod/jax-stable-stack/candidate/tpu:jax0.7.1_rev1
+      image: us-docker.pkg.dev/tpu-prod-env-multipod/jax-stable-stack/candidate/tpu:latest
       options: --privileged
       env:
         CLOUD_TPU_ACCELERATOR: v5e-8
@@ -221,8 +221,13 @@ jobs:
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
+          # Reinstall Tunix with prod dependencies
+          pip install -e .[prod] --force-reinstall
+
           # Loading tfds requires tensorflow.
           pip install tensorflow
+
+          export JAX_PLATFORMS=tpu,cpu
           ./tests/sft/sft_tpu_smoke_test.sh
       - name: Run tunix cli tests
         env:
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "google-tunix"
-version = "0.1.3"
+version = "0.1.4"
 authors = [
   { name = "Tunix Developers", email = "tunix-dev@google.com" },
 ]
@@ -31,7 +31,7 @@ dependencies = [
   "omegaconf", # CLI config
   "pylatexenc",  # Eval result parsing
   "python-dotenv",  # Huggingface API key
-  "qwix<=0.1.1",  # Newer version of qwix depends on unreleased flax beyond 0.12.0
+  "qwix",
   "sentencepiece",
   "sympy",  # Eval result parsing
   "tensorflow_datasets",
diff --git a/tests/sft/dpo/orpo_trainer_test.py b/tests/sft/dpo/orpo_trainer_test.py
@@ -21,7 +21,6 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
-from tunix.rl import common
 from tunix.sft.dpo import dpo_trainer as orpo_lib
 from tunix.tests import test_common as tc
 
@@ -231,21 +230,28 @@ def test_orpo_loss_fn(self):
     np.random.seed(0)
     model = tc.ToyTransformer(config=tc.ModelConfig(), rngs=nnx.Rngs(0))
     # Use negative log probs (as they should be in reality)
-    per_token_logps = -np.abs(np.random.normal(2, 1, size=(8, 4)))
+    per_token_logps = -np.abs(np.random.rand(8, 4))
+    completion_mask = np.ones((8, 4))
+    token_logps = (per_token_logps * completion_mask).sum(axis=-1)
+
+    batch_size = token_logps.shape[0]
+    chosen_logps = token_logps[: batch_size // 2]
+    rejected_logps = token_logps[batch_size // 2 :]
+
     train_example = orpo_lib.TrainExample(
         input_ids=jnp.arange(0, 32).reshape(8, 4),
         positions=jnp.ones((8, 4)),
         attention_mask=jnp.ones((8, 4, 4)),
         ref_chosen_logps=None,
         ref_rejected_logps=None,
-        completion_mask=jnp.ones((8, 4)),
+        completion_mask=completion_mask,
         logits_to_keep=4,
     )
 
     with mock.patch.object(
-        common,
-        "get_per_token_logps",
-        return_value=jnp.array(per_token_logps),
+        orpo_lib,
+        "compute_logps",
+        return_value=(jnp.array(chosen_logps), jnp.array(rejected_logps)),
     ):
       loss, aux = orpo_lib.dpo_loss_fn(
           model,