From c5670f40390318ba926bf5a6c4b5a2d646a6768e Mon Sep 17 00:00:00 2001 From: nickj Date: Sat, 4 Feb 2023 13:26:06 -0500 Subject: [PATCH] TUCKER_ALS: Add tucker_als to validate ttucker implementation. --- pyttb/__init__.py | 1 + pyttb/cp_als.py | 4 +- pyttb/tucker_als.py | 147 +++++++++++++++++++++++++++++++++++++++ tests/test_tucker_als.py | 89 ++++++++++++++++++++++++ 4 files changed, 239 insertions(+), 2 deletions(-) create mode 100644 pyttb/tucker_als.py create mode 100644 tests/test_tucker_als.py diff --git a/pyttb/__init__.py b/pyttb/__init__.py index f7b47c32..5e943ed9 100644 --- a/pyttb/__init__.py +++ b/pyttb/__init__.py @@ -19,6 +19,7 @@ from pyttb.khatrirao import khatrirao from pyttb.cp_apr import * from pyttb.cp_als import cp_als +from pyttb.tucker_als import tucker_als from pyttb.import_data import import_data from pyttb.export_data import export_data diff --git a/pyttb/cp_als.py b/pyttb/cp_als.py index dcc03924..3792cb6c 100644 --- a/pyttb/cp_als.py +++ b/pyttb/cp_als.py @@ -125,12 +125,12 @@ def cp_als(tensor, rank, stoptol=1e-4, maxiters=1000, dimorder=None, for n in dimorder: if init.factor_matrices[n].shape != (tensor.shape[n], rank): assert False, "Mode {} of the initial guess is the wrong size".format(n) - elif init.lower() == 'random': + elif isinstance(init, str) and init.lower() == 'random': factor_matrices = [] for n in range(N): factor_matrices.append(np.random.uniform(0, 1, (tensor.shape[n], rank))) init = ttb.ktensor.from_factor_matrices(factor_matrices) - elif init.lower() == 'nvecs': + elif isinstance(init, str) and init.lower() == 'nvecs': factor_matrices = [] for n in range(N): factor_matrices.append(tensor.nvecs(n, rank)) diff --git a/pyttb/tucker_als.py b/pyttb/tucker_als.py new file mode 100644 index 00000000..afd11231 --- /dev/null +++ b/pyttb/tucker_als.py @@ -0,0 +1,147 @@ + +from numbers import Real +import numpy as np +from pyttb.ttensor import ttensor + +def tucker_als(tensor, rank, stoptol=1e-4, maxiters=1000, dimorder=None, + init='random', printitn=1): + """ + Compute Tucker decomposition with alternating least squares + + Parameters + ---------- + tensor: :class:`pyttb.tensor` + rank: int, list[int] + Rank of the decomposition(s) + stoptol: float + Tolerance used for termination - when the change in the fitness function in successive iterations drops + below this value, the iterations terminate (default: 1e-4) + dimorder: list + Order to loop through dimensions (default: [range(tensor.ndims)]) + maxiters: int + Maximum number of iterations (default: 1000) + init: str or list[np.ndarray] + Initial guess (default: "random") + + * "random": initialize using a :class:`pyttb.ttensor` with values chosen from a Normal distribution with mean 1 and standard deviation 0 + * "nvecs": initialize factor matrices of a :class:`pyttb.ttensor` using the eigenvectors of the outer product of the matricized input tensor + * :class:`pyttb.ttensor`: initialize using a specific :class:`pyttb.ttensor` as input - must be the same shape as the input tensor and have the same rank as the input rank + + printitn: int + Number of iterations to perform before printing iteration status - 0 for no status printing (default: 1) + + Returns + ------- + M: :class:`pyttb.ttensor` + Resulting ttensor from Tucker-ALS factorization + Minit: :class:`pyttb.ttensor` + Initial guess + output: dict + Information about the computation. Dictionary keys: + + * `params` : tuple of (stoptol, maxiters, printitn, dimorder) + * `iters`: number of iterations performed + * `normresidual`: norm of the difference between the input tensor and ktensor factorization + * `fit`: value of the fitness function (fraction of tensor data explained by the model) + + """ + N = tensor.ndims + normX = tensor.norm() + + # TODO: These argument checks look common with CP-ALS factor out + if not isinstance(stoptol, Real): + raise ValueError(f"stoptol must be a real valued scalar but received: {stoptol}") + if not isinstance(maxiters, Real) or maxiters < 0: + raise ValueError(f"maxiters must be a non-negative real valued scalar but received: {maxiters}") + if not isinstance(printitn, Real): + raise ValueError(f"printitn must be a real valued scalar but received: {printitn}") + + if isinstance(rank, Real) or len(rank) == 1: + rank = rank * np.ones(N, dtype=int) + + # Set up dimorder if not specified + if not dimorder: + dimorder = list(range(N)) + else: + if not isinstance(dimorder, list): + raise ValueError("Dimorder must be a list") + elif tuple(range(N)) != tuple(sorted(dimorder)): + raise ValueError("Dimorder must be a list or permutation of range(tensor.ndims)") + + if isinstance(init, list): + Uinit = init + if len(init) != N: + raise ValueError(f"Init needs to be of length tensor.ndim (which was {N}) but only got length {len(init)}.") + for n in dimorder[1::]: + correct_shape = (tensor.shape[n], rank[n]) + if Uinit[n].shape != correct_shape: + raise ValueError( + f"Init factor {n} had incorrect shape. Expected {correct_shape} but got {Uinit[n].shape}" + ) + elif isinstance(init, str) and init.lower() == 'random': + Uinit = [None] * N + # Observe that we don't need to calculate an initial guess for the + # first index in dimorder because that will be solved for in the first + # inner iteration. + for n in range(1, N): + Uinit[n] = np.random.uniform(0, 1, (tensor.shape[n], rank[n])) + elif isinstance(init, str) and init.lower() in ('nvecs', 'eigs'): + # Compute an orthonormal basis for the dominant + # Rn-dimensional left singular subspace of + # X_(n) (0 <= n < N). + Uinit = [None] * N + for n in dimorder[1::]: + print(f" Computing {rank[n]} leading e-vector for factor {n}.\n") + Uinit[n] = tensor.nvecs(n, rank[n]) + else: + raise ValueError(f"The selected initialization method is not supported. Provided: {init}") + + # Set up for iterations - initializing U and the fit. + U = Uinit.copy() + fit = 0 + + if printitn > 0: + print("\nTucker Alternating Least-Squares:\n") + + # Main loop: Iterate until convergence + for iter in range(maxiters): + fitold = fit + + # Iterate over all N modes of the tensor + for n in dimorder: + if n == 0: # TODO proposal to change ttm to include_dims and exclude_dims to resolve -0 ambiguity + dims = np.arange(1, tensor.ndims) + Utilde = tensor.ttm(U, dims, True) + else: + Utilde = tensor.ttm(U, -n, True) + + # Maximize norm(Utilde x_n W') wrt W and + # maintain orthonormality of W + U[n] = Utilde.nvecs(n, rank[n]) + + # Assemble the current approximation + core = Utilde.ttm(U, n, True) + + # Compute fit + # TODO this abs is missing from MATLAB, but I get negative numbers for trivial examples + normresidual = np.sqrt(abs(normX**2 - core.norm()**2)) + fit = 1 - (normresidual / normX) # fraction explained by model + fitchange = abs(fitold - fit) + + if iter % printitn == 0: + print(f" NormX: {normX} Core norm: {core.norm()}") + print(f" Iter {iter}: fit = {fit:e} fitdelta = {fitchange:7.1e}\n") + + # Check for convergence + if fitchange < stoptol: + break + + solution = ttensor.from_data(core, U) + + output = {} + output['params'] = (stoptol, maxiters, printitn, dimorder) + output['iters'] = iter + output['normresidual'] = normresidual + output['fit'] = fit + + return solution, Uinit, output diff --git a/tests/test_tucker_als.py b/tests/test_tucker_als.py new file mode 100644 index 00000000..a7142585 --- /dev/null +++ b/tests/test_tucker_als.py @@ -0,0 +1,89 @@ +import pyttb as ttb +import numpy as np +import pytest + + +@pytest.fixture() +def sample_tensor(): + data = np.array([[29, 39.], [63., 85.]]) + shape = (2, 2) + params = {'data': data, 'shape': shape} + tensorInstance = ttb.tensor().from_data(data, shape) + return params, tensorInstance + +@pytest.mark.indevelopment +def test_tucker_als_tensor_default_init(capsys, sample_tensor): + (data, T) = sample_tensor + (Solution, Uinit, output) = ttb.tucker_als(T, 2) + capsys.readouterr() + assert pytest.approx(output['fit'], 1) == 0 + + (Solution, Uinit, output) = ttb.tucker_als(T, 2, init=Uinit) + capsys.readouterr() + assert pytest.approx(output['fit'], 1) == 0 + + (Solution, Uinit, output) = ttb.tucker_als(T, 2, init='nvecs') + capsys.readouterr() + assert pytest.approx(output['fit'], 1) == 0 + +@pytest.mark.indevelopment +def test_tucker_als_tensor_incorrect_init(capsys, sample_tensor): + (data, T) = sample_tensor + + non_list = np.array([1]) # TODO: Consider generalizing to iterable + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, init=non_list) + + bad_string = "foo_bar" + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, init=bad_string) + + wrong_length = [np.ones(T.shape)] * T.ndims + wrong_length.pop() + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, init=wrong_length) + + wrong_shape = [np.ones(5)] * T.ndims + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, init=wrong_shape) + +@pytest.mark.indevelopment +def test_tucker_als_tensor_incorrect_steptol(capsys, sample_tensor): + (data, T) = sample_tensor + + non_scalar = np.array([1]) + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, stoptol=non_scalar) + +@pytest.mark.indevelopment +def test_tucker_als_tensor_incorrect_maxiters(capsys, sample_tensor): + (data, T) = sample_tensor + + negative_value = -1 + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, maxiters=negative_value) + + non_scalar = np.array([1]) + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, maxiters=non_scalar) + +@pytest.mark.indevelopment +def test_tucker_als_tensor_incorrect_printitn(capsys, sample_tensor): + (data, T) = sample_tensor + + non_scalar = np.array([1]) + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, printitn=non_scalar) + +@pytest.mark.indevelopment +def test_tucker_als_tensor_incorrect_dimorder(capsys, sample_tensor): + (data, T) = sample_tensor + + non_list = np.array([1]) # TODO: Consider generalizing to iterable + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, dimorder=non_list) + + too_few_dims = list(range(len(T.shape))) + too_few_dims.pop() + with pytest.raises(ValueError): + _ = ttb.tucker_als(T, 2, dimorder=too_few_dims)