- a few more tests to check differentiability of optimal potentials.

marcocuturi · marcocuturi · commit d340fe4ae2ed · 2021-07-23T15:05:37.000Z
- kernel_ridge is now only applied if problem is balanced (this can perturb results in the unbalanced case).
- clean up in transport interface to add parameters from epsilon_scheduler.

PiperOrigin-RevId: 386455511
diff --git a/ott/core/sinkhorn.py b/ott/core/sinkhorn.py
@@ -241,41 +241,8 @@ def sinkhorn(
   Raises:
     ValueError: If momentum parameter is not set correctly, or to a wrong value.
   """
-  if jit:
-    call_to_sinkhorn = functools.partial(
-        jax.jit, static_argnums=(3, 4, 6, 7, 8, 9) + tuple(range(11, 17)))(
-            _sinkhorn)
-  else:
-    call_to_sinkhorn = _sinkhorn
-  return call_to_sinkhorn(geom, a, b, tau_a, tau_b, threshold, norm_error,
-                          inner_iterations, min_iterations, max_iterations,
-                          momentum, chg_momentum_from, lse_mode,
-                          implicit_differentiation,
-                          linear_solve_kwargs, parallel_dual_updates,
-                          use_danskin, init_dual_a, init_dual_b)
-
 
-def _sinkhorn(
-    geom: geometry.Geometry,
-    a: Optional[jnp.ndarray] = None,
-    b: Optional[jnp.ndarray] = None,
-    tau_a: float = 1.0,
-    tau_b: float = 1.0,
-    threshold: float = 1e-3,
-    norm_error: int = 1,
-    inner_iterations: int = 10,
-    min_iterations: int = 0,
-    max_iterations: int = 2000,
-    momentum: float = 1.0,
-    chg_momentum_from: int = 0,
-    lse_mode: bool = True,
-    implicit_differentiation: bool = True,
-    linear_solve_kwargs: Optional[Mapping[str, Union[Callable, float]]] = None,
-    parallel_dual_updates: bool = False,
-    use_danskin: bool = None,
-    init_dual_a: Optional[jnp.ndarray] = None,
-    init_dual_b: Optional[jnp.ndarray] = None) -> SinkhornOutput:
-  """Checks inputs and forks between implicit/backprop exec of Sinkhorn."""
+  # Start by checking inputs.
   num_a, num_b = geom.shape
   a = jnp.ones((num_a,)) / num_a if a is None else a
   b = jnp.ones((num_b,)) / num_b if b is None else b
@@ -298,11 +265,49 @@ def _sinkhorn(
   # if that was not the error requested by the user.
   norm_error = (norm_error,) if norm_error == 1 else (norm_error, 1)
 
+  if jit:
+    call_to_sinkhorn = functools.partial(
+        jax.jit, static_argnums=(3, 4, 6, 7, 8, 9) + tuple(range(11, 17)))(
+            _sinkhorn)
+  else:
+    call_to_sinkhorn = _sinkhorn
+  return call_to_sinkhorn(geom, a, b, tau_a, tau_b, threshold, norm_error,
+                          inner_iterations, min_iterations, max_iterations,
+                          momentum, chg_momentum_from, lse_mode,
+                          implicit_differentiation,
+                          linear_solve_kwargs, parallel_dual_updates,
+                          use_danskin, init_dual_a, init_dual_b)
+
+
+def _sinkhorn(
+    geom: geometry.Geometry,
+    a: jnp.ndarray,
+    b: jnp.ndarray,
+    tau_a: float,
+    tau_b: float,
+    threshold: float,
+    norm_error: int,
+    inner_iterations: int,
+    min_iterations: int,
+    max_iterations: int,
+    momentum: float,
+    chg_momentum_from: int,
+    lse_mode: bool,
+    implicit_differentiation: bool,
+    linear_solve_kwargs: Mapping[str, Union[Callable, float]],
+    parallel_dual_updates: bool,
+    use_danskin: bool,
+    init_dual_a: jnp.ndarray,
+    init_dual_b: jnp.ndarray) -> SinkhornOutput:
+  """Forks between implicit/backprop exec of Sinkhorn."""
+
   if implicit_differentiation:
     iteration_fun = _sinkhorn_iterations_implicit
   else:
     iteration_fun = _sinkhorn_iterations
 
+  # By default, use Danskin theorem to differentiate
+  # the objective when using implicit_differentiation.
   use_danskin = implicit_differentiation if use_danskin is None else use_danskin
 
   f, g, errors = iteration_fun(tau_a, tau_b, inner_iterations, min_iterations,
@@ -337,6 +342,7 @@ def _sinkhorn(
   converged = jnp.logical_and(
       jnp.sum(errors == -1) > 0,
       jnp.sum(jnp.isnan(errors)) == 0)
+
   return SinkhornOutput(f, g, reg_ot_cost, errors, converged)
 
 
@@ -845,7 +851,7 @@ def apply_inv_hessian(gr: Tuple[np.ndarray],
     tau_b: float, ratio lam/(lam+eps), ratio of regularizers, second marginal.
     lse_mode: bool, log-sum-exp mode if True, kernel else.
     linear_solver_fun: Callable, should return (solution, ...)
-    ridge_kernel: promotes zero-sum solutions.
+    ridge_kernel: promotes zero-sum solutions. only used if tau_a = tau_b = 1.0
     ridge_identity: handles rank deficient transport matrices (this happens
       typically when rows/cols in cost/kernel matrices are colinear, or,
       equivalently when two points from either measure are close).
@@ -866,8 +872,12 @@ def apply_inv_hessian(gr: Tuple[np.ndarray],
 
   solve_fun = lambda lin_op, b: linear_solver_fun(lin_op, b)[0]
 
-  # Forks on using Schur complement of either A or D, depending on size.
   n, m = geom.shape
+  # Remove ridge on kernel space if problem is balanced.
+  ridge_kernel = jnp.where(tau_a == 1.0 and tau_b == 1.0,
+                           ridge_kernel,
+                           0.0)
+  # Forks on using Schur complement of either A or D, depending on size.
   if n > m:  #  if n is bigger, run m x m linear system.
     inv_vjp_ff = lambda z: z / diag_hess_a
     vjp_gg = lambda z: z * diag_hess_b
diff --git a/ott/tools/soft_sort.py b/ott/tools/soft_sort.py
@@ -29,6 +29,7 @@ def transport_for_sort(
     weights: jnp.ndarray,
     target_weights: jnp.ndarray,
     squashing_fun: Callable[[jnp.ndarray], jnp.ndarray] = None,
+    epsilon: float = 1e-2,
     **kwargs) -> jnp.ndarray:
   r"""Solves reg. OT, from inputs to a weighted family of increasing values.
 
@@ -53,21 +54,15 @@ def transport_for_sort(
 
   x = jnp.expand_dims(jnp.squeeze(inputs), axis=1)
   if squashing_fun is None:
-    squashing_fun = lambda z : jax.nn.sigmoid(
-        (z - jnp.mean(z)) / (jnp.std(z) + 1e-10))
+    squashing_fun = lambda z: jax.nn.sigmoid((z - jnp.mean(z)) /
+                                             (jnp.std(z) + 1e-10))
   x = squashing_fun(x)
   a = jnp.squeeze(weights)
   b = jnp.squeeze(target_weights)
   num_targets = b.shape[0]
   y = jnp.linspace(0.0, 1.0, num_targets)[:, jnp.newaxis]
 
-  # When runnning soft-sort, the entries are remapped into the segment [0,1].
-  # For that reason, it makes sense to have a default epsilon value adapted
-  # to that scale. If none is passed, we provide a default of 1e-2.
-  epsilon = kwargs.pop('epsilon', None)
-  scale = kwargs.pop('scale', None)
-  kwargs.update(epsilon=(1e-2 if epsilon is None else epsilon))
-  return transport.Transport(x, y, a=a, b=b, **kwargs)
+  return transport.Transport(x, y, a=a, b=b, epsilon=epsilon, **kwargs)
 
 
 def apply_on_axis(op, inputs, axis, *args, **kwargs):
diff --git a/ott/tools/transport.py b/ott/tools/transport.py
@@ -58,7 +58,8 @@ def __init__(self, *args, a=None, b=None, **kwargs):
       self.geom = args[0]
     else:
       pc_kw = {}
-      for key in ['epsilon', 'cost_fn', 'power', 'online']:
+      for key in ['epsilon', 'cost_fn', 'power', 'online', 'relative_epsilon',
+                  'target', 'scale', 'init', 'decay']:
         value = kwargs.pop(key, None)
         if value is not None:
           pc_kw[key] = value
diff --git a/ott/version.py b/ott/version.py
@@ -15,4 +15,4 @@
 
 """Current ott version."""
 
-__version__ = "0.1.13"
+__version__ = "0.1.14"
diff --git a/tests/core/sinkhorn_hessian_test.py b/tests/core/sinkhorn_hessian_test.py
@@ -41,7 +41,8 @@ def test_hessian_sinkhorn(self, lse_mode, tau_a, tau_b, shape, arg):
     eps = 1e-3
     n, m = shape
     # use slightly different parameter to test linear_solve_kwargs
-    linear_solve_kwargs = {'ridge_kernel' : 1.2e-4}
+    linear_solve_kwargs = {'ridge_kernel' : 1.2e-4, 'ridge_identity': .9e-4}
+
     dim = 3
     rngs = jax.random.split(self.rng, 6)
     x = jax.random.uniform(rngs[0], (n, dim))
diff --git a/tests/core/sinkhorn_potentials_jacobian_test.py b/tests/core/sinkhorn_potentials_jacobian_test.py
@@ -0,0 +1,102 @@
+# coding=utf-8
+# Copyright 2021 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for the Policy."""
+
+from absl.testing import absltest
+from absl.testing import parameterized
+import jax
+import jax.numpy as jnp
+import jax.test_util
+from ott.tools import transport
+
+
+class SinkhornHessianTest(jax.test_util.JaxTestCase):
+
+  def setUp(self):
+    super().setUp()
+    self.rng = jax.random.PRNGKey(0)
+
+  @parameterized.product(
+      lse_mode=[True, False],
+      tau_a=[1.0, .93],
+      tau_b=[1.0, .91],
+      shape=[(12, 15), (27, 18), (345, 434)],
+      arg=[0, 1])
+  def test_potential_jacobian_sinkhorn(self, lse_mode,
+                                       tau_a, tau_b, shape, arg):
+    """Test Jacobian of optimal potential w.r.t. weights and locations."""
+    n, m = shape
+    dim = 3
+    rngs = jax.random.split(self.rng, 6)
+    x = jax.random.uniform(rngs[0], (n, dim))
+    y = jax.random.uniform(rngs[1], (m, dim))
+    a = jax.random.uniform(rngs[2], (n,)) +.2
+    b = jax.random.uniform(rngs[3], (m,)) +.2
+    a = a / jnp.sum(a)
+    b = b / jnp.sum(b)
+
+    # As expected, lse_mode False has a harder time with small epsilon.
+    epsilon = 0.01 if lse_mode else 0.1
+
+    random_dir = jax.random.uniform(rngs[2], (n,)) / n
+    random_dir = random_dir - jnp.mean(random_dir)
+
+    def loss_from_potential(a, x, implicit):
+      out = transport.Transport(
+          x, y, epsilon=epsilon, a=a, b=b, tau_a=tau_a, tau_b=tau_b,
+          lse_mode=lse_mode, implicit_differentiation=implicit
+          )
+      return jnp.sum(random_dir * out._f)
+
+    delta_a = jax.random.uniform(rngs[4], (n,))
+    delta_a = delta_a - jnp.mean(delta_a)
+    delta_x = jax.random.uniform(rngs[5], (n, dim))
+
+    # Compute implicit gradient
+    loss_imp = jax.jit(jax.value_and_grad(
+        lambda a, x: loss_from_potential(a, x, True), argnums=arg))
+    _, g_imp = loss_imp(a, x)
+    imp_dif = jnp.sum(g_imp * (delta_a if arg == 0 else delta_x))
+    # Compute backprop (unrolling) gradient
+
+    loss_back = jax.jit(jax.grad(
+        lambda a, x: loss_from_potential(a, x, False), argnums=arg))
+    g_back = loss_back(a, x)
+    back_dif = jnp.sum(g_back * (delta_a if arg == 0 else delta_x))
+
+    # Compute finite difference
+    perturb_scale = 1e-4
+    a_p = a + perturb_scale * delta_a if arg == 0 else a
+    x_p = x if arg == 0 else x + perturb_scale * delta_x
+    a_m = a - perturb_scale * delta_a if arg == 0 else a
+    x_m = x if arg == 0 else x - perturb_scale * delta_x
+
+    val_p, _ = loss_imp(a_p, x_p)
+    val_m, _ = loss_imp(a_m, x_m)
+    fin_dif = (val_p - val_m) / (2 * perturb_scale)
+
+    self.assertAllClose(fin_dif, back_dif, atol=1e-2, rtol=1e-2)
+    self.assertAllClose(fin_dif, imp_dif, atol=1e-2, rtol=1e-2)
+
+    # center g_imp, g_back if balanced problem testing gradient w.r.t weights
+    if tau_a == 1.0 and tau_b == 1.0 and arg == 0:
+      g_imp = g_imp - jnp.mean(g_imp)
+      g_back = g_back - jnp.mean(g_back)
+    self.assertAllClose(g_imp, g_back, atol=5e-2, rtol=1e-2)
+
+if __name__ == '__main__':
+  absltest.main()
diff --git a/tests/tools/soft_sort_test.py b/tests/tools/soft_sort_test.py
@@ -168,9 +168,9 @@ def loss_fn(logits, implicit=False):
     val_peps = loss_fn(z + eps * delta)
     val_meps = loss_fn(z - eps * delta)
     self.assertAllClose((val_peps - val_meps)/(2 * eps),
-                        jnp.sum(grad_b * delta), atol=0.1, rtol=0.01)
+                        jnp.sum(grad_b * delta), atol=0.001, rtol=0.001)
     self.assertAllClose((val_peps - val_meps)/(2 * eps),
-                        jnp.sum(grad_i * delta), atol=0.1, rtol=0.01)
+                        jnp.sum(grad_i * delta), atol=0.001, rtol=0.001)
 
 
 if __name__ == '__main__':

Original file line number	Diff line number	Diff line change
`@@ -15,4 +15,4 @@`
`15`	`15`
`16`	`16`	`"""Current ott version."""`
`17`	`17`
`18`		`-__version__ = "0.1.13"`
	`18`	`+__version__ = "0.1.14"`