TUCKER_ALS: Add tucker_als to validate ttucker implementation.

nickj · ntjohnson1 · commit c5670f403903 · 2023-02-21T11:45:26.000-05:00
diff --git a/pyttb/__init__.py b/pyttb/__init__.py
@@ -19,6 +19,7 @@
 from pyttb.khatrirao import khatrirao
 from pyttb.cp_apr import *
 from pyttb.cp_als import cp_als
+from pyttb.tucker_als import tucker_als
 
 from pyttb.import_data import import_data
 from pyttb.export_data import export_data
diff --git a/pyttb/cp_als.py b/pyttb/cp_als.py
@@ -125,12 +125,12 @@ def cp_als(tensor, rank, stoptol=1e-4, maxiters=1000, dimorder=None,
         for n in dimorder:
             if init.factor_matrices[n].shape != (tensor.shape[n], rank):
                 assert False, "Mode {} of the initial guess is the wrong size".format(n)
-    elif init.lower() == 'random':
+    elif isinstance(init, str) and init.lower() == 'random':
         factor_matrices = []
         for n in range(N):
             factor_matrices.append(np.random.uniform(0, 1, (tensor.shape[n], rank)))
         init = ttb.ktensor.from_factor_matrices(factor_matrices)
-    elif init.lower() == 'nvecs':
+    elif isinstance(init, str) and init.lower() == 'nvecs':
         factor_matrices = []
         for n in range(N):
             factor_matrices.append(tensor.nvecs(n, rank))
diff --git a/pyttb/tucker_als.py b/pyttb/tucker_als.py
@@ -0,0 +1,147 @@
+
+from numbers import Real
+import numpy as np
+from pyttb.ttensor import ttensor
+
+def tucker_als(tensor, rank, stoptol=1e-4, maxiters=1000, dimorder=None,
+           init='random', printitn=1):
+    """
+    Compute Tucker decomposition with alternating least squares
+
+    Parameters
+    ----------
+    tensor: :class:`pyttb.tensor`
+    rank: int, list[int]
+        Rank of the decomposition(s)
+    stoptol: float
+        Tolerance used for termination - when the change in the fitness function in successive iterations drops
+        below this value, the iterations terminate (default: 1e-4)
+    dimorder: list
+        Order to loop through dimensions (default: [range(tensor.ndims)])
+    maxiters: int
+        Maximum number of iterations (default: 1000)
+    init: str or list[np.ndarray]
+        Initial guess (default: "random")
+
+             * "random": initialize using a :class:`pyttb.ttensor` with values chosen from a Normal distribution with mean 1 and standard deviation 0
+             * "nvecs": initialize factor matrices of a :class:`pyttb.ttensor` using the eigenvectors of the outer product of the matricized input tensor
+             * :class:`pyttb.ttensor`: initialize using a specific :class:`pyttb.ttensor` as input - must be the same shape as the input tensor and have the same rank as the input rank
+
+    printitn: int
+        Number of iterations to perform before printing iteration status - 0 for no status printing (default: 1)
+
+    Returns
+    -------
+    M: :class:`pyttb.ttensor`
+        Resulting ttensor from Tucker-ALS factorization
+    Minit: :class:`pyttb.ttensor`
+        Initial guess
+    output: dict
+        Information about the computation. Dictionary keys:
+
+            * `params` : tuple of (stoptol, maxiters, printitn, dimorder)
+            * `iters`: number of iterations performed
+            * `normresidual`: norm of the difference between the input tensor and ktensor factorization
+            * `fit`: value of the fitness function (fraction of tensor data explained by the model)
+
+    """
+    N = tensor.ndims
+    normX = tensor.norm()
+
+    # TODO: These argument checks look common with CP-ALS factor out
+    if not isinstance(stoptol, Real):
+        raise ValueError(f"stoptol must be a real valued scalar but received: {stoptol}")
+    if not isinstance(maxiters, Real) or maxiters < 0:
+        raise ValueError(f"maxiters must be a non-negative real valued scalar but received: {maxiters}")
+    if not isinstance(printitn, Real):
+        raise ValueError(f"printitn must be a real valued scalar but received: {printitn}")
+
+    if isinstance(rank, Real) or len(rank) == 1:
+        rank = rank * np.ones(N, dtype=int)
+
+    # Set up dimorder if not specified
+    if not dimorder:
+        dimorder = list(range(N))
+    else:
+        if not isinstance(dimorder, list):
+            raise ValueError("Dimorder must be a list")
+        elif tuple(range(N)) != tuple(sorted(dimorder)):
+            raise ValueError("Dimorder must be a list or permutation of range(tensor.ndims)")
+
+    if isinstance(init, list):
+        Uinit = init
+        if len(init) != N:
+            raise ValueError(f"Init needs to be of length tensor.ndim (which was {N}) but only got length {len(init)}.")
+        for n in dimorder[1::]:
+            correct_shape = (tensor.shape[n], rank[n])
+            if Uinit[n].shape != correct_shape:
+                raise ValueError(
+                    f"Init factor {n} had incorrect shape. Expected {correct_shape} but got {Uinit[n].shape}"
+                )
+    elif isinstance(init, str) and init.lower() == 'random':
+        Uinit = [None] * N
+        # Observe that we don't need to calculate an initial guess for the
+        # first index in dimorder because that will be solved for in the first
+        # inner iteration.
+        for n in range(1, N):
+            Uinit[n] = np.random.uniform(0, 1, (tensor.shape[n], rank[n]))
+    elif isinstance(init, str) and init.lower() in ('nvecs', 'eigs'):
+        # Compute an orthonormal basis for the dominant
+        # Rn-dimensional left singular subspace of
+        # X_(n) (0 <= n < N).
+        Uinit = [None] * N
+        for n in dimorder[1::]:
+            print(f" Computing {rank[n]} leading e-vector for factor {n}.\n")
+            Uinit[n] = tensor.nvecs(n, rank[n])
+    else:
+        raise ValueError(f"The selected initialization method is not supported. Provided: {init}")
+
+    # Set up for iterations - initializing U and the fit.
+    U = Uinit.copy()
+    fit = 0
+
+    if printitn > 0:
+        print("\nTucker Alternating Least-Squares:\n")
+
+    # Main loop: Iterate until convergence
+    for iter in range(maxiters):
+        fitold = fit
+
+        # Iterate over all N modes of the tensor
+        for n in dimorder:
+            if n == 0:  # TODO proposal to change ttm to include_dims and exclude_dims to resolve -0 ambiguity
+                dims = np.arange(1, tensor.ndims)
+                Utilde = tensor.ttm(U, dims, True)
+            else:
+                Utilde = tensor.ttm(U, -n, True)
+
+            # Maximize norm(Utilde x_n W') wrt W and
+            # maintain orthonormality of W
+            U[n] = Utilde.nvecs(n, rank[n])
+
+        # Assemble the current approximation
+        core = Utilde.ttm(U, n, True)
+
+        # Compute fit
+        # TODO this abs is missing from MATLAB, but I get negative numbers for trivial examples
+        normresidual = np.sqrt(abs(normX**2 - core.norm()**2))
+        fit = 1 - (normresidual / normX)  # fraction explained by model
+        fitchange = abs(fitold - fit)
+
+        if iter % printitn == 0:
+            print(f" NormX: {normX} Core norm: {core.norm()}")
+            print(f" Iter {iter}: fit = {fit:e} fitdelta = {fitchange:7.1e}\n")
+
+        # Check for convergence
+        if fitchange < stoptol:
+            break
+
+    solution = ttensor.from_data(core, U)
+
+    output = {}
+    output['params'] = (stoptol, maxiters, printitn, dimorder)
+    output['iters'] = iter
+    output['normresidual'] = normresidual
+    output['fit'] = fit
+
+    return solution, Uinit, output
diff --git a/tests/test_tucker_als.py b/tests/test_tucker_als.py
@@ -0,0 +1,89 @@
+import pyttb as ttb
+import numpy as np
+import pytest
+
+
+@pytest.fixture()
+def sample_tensor():
+    data = np.array([[29, 39.], [63., 85.]])
+    shape = (2, 2)
+    params = {'data': data, 'shape': shape}
+    tensorInstance = ttb.tensor().from_data(data, shape)
+    return params, tensorInstance
+
+@pytest.mark.indevelopment
+def test_tucker_als_tensor_default_init(capsys, sample_tensor):
+    (data, T) = sample_tensor
+    (Solution, Uinit, output) = ttb.tucker_als(T, 2)
+    capsys.readouterr()
+    assert pytest.approx(output['fit'], 1) == 0
+
+    (Solution, Uinit, output) = ttb.tucker_als(T, 2, init=Uinit)
+    capsys.readouterr()
+    assert pytest.approx(output['fit'], 1) == 0
+
+    (Solution, Uinit, output) = ttb.tucker_als(T, 2, init='nvecs')
+    capsys.readouterr()
+    assert pytest.approx(output['fit'], 1) == 0
+
+@pytest.mark.indevelopment
+def test_tucker_als_tensor_incorrect_init(capsys, sample_tensor):
+    (data, T) = sample_tensor
+
+    non_list = np.array([1])  # TODO: Consider generalizing to iterable
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, init=non_list)
+
+    bad_string = "foo_bar"
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, init=bad_string)
+
+    wrong_length = [np.ones(T.shape)] * T.ndims
+    wrong_length.pop()
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, init=wrong_length)
+
+    wrong_shape = [np.ones(5)] * T.ndims
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, init=wrong_shape)
+
+@pytest.mark.indevelopment
+def test_tucker_als_tensor_incorrect_steptol(capsys, sample_tensor):
+    (data, T) = sample_tensor
+
+    non_scalar = np.array([1])
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, stoptol=non_scalar)
+
+@pytest.mark.indevelopment
+def test_tucker_als_tensor_incorrect_maxiters(capsys, sample_tensor):
+    (data, T) = sample_tensor
+
+    negative_value = -1
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, maxiters=negative_value)
+
+    non_scalar = np.array([1])
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, maxiters=non_scalar)
+
+@pytest.mark.indevelopment
+def test_tucker_als_tensor_incorrect_printitn(capsys, sample_tensor):
+    (data, T) = sample_tensor
+
+    non_scalar = np.array([1])
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, printitn=non_scalar)
+
+@pytest.mark.indevelopment
+def test_tucker_als_tensor_incorrect_dimorder(capsys, sample_tensor):
+    (data, T) = sample_tensor
+
+    non_list = np.array([1])  # TODO: Consider generalizing to iterable
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, dimorder=non_list)
+
+    too_few_dims = list(range(len(T.shape)))
+    too_few_dims.pop()
+    with pytest.raises(ValueError):
+        _ = ttb.tucker_als(T, 2, dimorder=too_few_dims)