Skip to content

Tucker als #53

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyttb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pyttb.khatrirao import khatrirao
from pyttb.cp_apr import *
from pyttb.cp_als import cp_als
from pyttb.tucker_als import tucker_als

from pyttb.import_data import import_data
from pyttb.export_data import export_data
Expand Down
4 changes: 2 additions & 2 deletions pyttb/cp_als.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,12 @@ def cp_als(tensor, rank, stoptol=1e-4, maxiters=1000, dimorder=None,
for n in dimorder:
if init.factor_matrices[n].shape != (tensor.shape[n], rank):
assert False, "Mode {} of the initial guess is the wrong size".format(n)
elif init.lower() == 'random':
elif isinstance(init, str) and init.lower() == 'random':
factor_matrices = []
for n in range(N):
factor_matrices.append(np.random.uniform(0, 1, (tensor.shape[n], rank)))
init = ttb.ktensor.from_factor_matrices(factor_matrices)
elif init.lower() == 'nvecs':
elif isinstance(init, str) and init.lower() == 'nvecs':
factor_matrices = []
for n in range(N):
factor_matrices.append(tensor.nvecs(n, rank))
Expand Down
147 changes: 147 additions & 0 deletions pyttb/tucker_als.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@

from numbers import Real
import numpy as np
from pyttb.ttensor import ttensor

def tucker_als(tensor, rank, stoptol=1e-4, maxiters=1000, dimorder=None,
init='random', printitn=1):
"""
Compute Tucker decomposition with alternating least squares

Parameters
----------
tensor: :class:`pyttb.tensor`
rank: int, list[int]
Rank of the decomposition(s)
stoptol: float
Tolerance used for termination - when the change in the fitness function in successive iterations drops
below this value, the iterations terminate (default: 1e-4)
dimorder: list
Order to loop through dimensions (default: [range(tensor.ndims)])
maxiters: int
Maximum number of iterations (default: 1000)
init: str or list[np.ndarray]
Initial guess (default: "random")

* "random": initialize using a :class:`pyttb.ttensor` with values chosen from a Normal distribution with mean 1 and standard deviation 0
* "nvecs": initialize factor matrices of a :class:`pyttb.ttensor` using the eigenvectors of the outer product of the matricized input tensor
* :class:`pyttb.ttensor`: initialize using a specific :class:`pyttb.ttensor` as input - must be the same shape as the input tensor and have the same rank as the input rank

printitn: int
Number of iterations to perform before printing iteration status - 0 for no status printing (default: 1)

Returns
-------
M: :class:`pyttb.ttensor`
Resulting ttensor from Tucker-ALS factorization
Minit: :class:`pyttb.ttensor`
Initial guess
output: dict
Information about the computation. Dictionary keys:

* `params` : tuple of (stoptol, maxiters, printitn, dimorder)
* `iters`: number of iterations performed
* `normresidual`: norm of the difference between the input tensor and ktensor factorization
* `fit`: value of the fitness function (fraction of tensor data explained by the model)

"""
N = tensor.ndims
normX = tensor.norm()

# TODO: These argument checks look common with CP-ALS factor out
if not isinstance(stoptol, Real):
raise ValueError(f"stoptol must be a real valued scalar but received: {stoptol}")
if not isinstance(maxiters, Real) or maxiters < 0:
raise ValueError(f"maxiters must be a non-negative real valued scalar but received: {maxiters}")
if not isinstance(printitn, Real):
raise ValueError(f"printitn must be a real valued scalar but received: {printitn}")

if isinstance(rank, Real) or len(rank) == 1:
rank = rank * np.ones(N, dtype=int)

# Set up dimorder if not specified
if not dimorder:
dimorder = list(range(N))
else:
if not isinstance(dimorder, list):
raise ValueError("Dimorder must be a list")
elif tuple(range(N)) != tuple(sorted(dimorder)):
raise ValueError("Dimorder must be a list or permutation of range(tensor.ndims)")

if isinstance(init, list):
Uinit = init
if len(init) != N:
raise ValueError(f"Init needs to be of length tensor.ndim (which was {N}) but only got length {len(init)}.")
for n in dimorder[1::]:
correct_shape = (tensor.shape[n], rank[n])
if Uinit[n].shape != correct_shape:
raise ValueError(
f"Init factor {n} had incorrect shape. Expected {correct_shape} but got {Uinit[n].shape}"
)
elif isinstance(init, str) and init.lower() == 'random':
Uinit = [None] * N
# Observe that we don't need to calculate an initial guess for the
# first index in dimorder because that will be solved for in the first
# inner iteration.
for n in range(1, N):
Uinit[n] = np.random.uniform(0, 1, (tensor.shape[n], rank[n]))
elif isinstance(init, str) and init.lower() in ('nvecs', 'eigs'):
# Compute an orthonormal basis for the dominant
# Rn-dimensional left singular subspace of
# X_(n) (0 <= n < N).
Uinit = [None] * N
for n in dimorder[1::]:
print(f" Computing {rank[n]} leading e-vector for factor {n}.\n")
Uinit[n] = tensor.nvecs(n, rank[n])
else:
raise ValueError(f"The selected initialization method is not supported. Provided: {init}")

# Set up for iterations - initializing U and the fit.
U = Uinit.copy()
fit = 0

if printitn > 0:
print("\nTucker Alternating Least-Squares:\n")

# Main loop: Iterate until convergence
for iter in range(maxiters):
fitold = fit

# Iterate over all N modes of the tensor
for n in dimorder:
if n == 0: # TODO proposal to change ttm to include_dims and exclude_dims to resolve -0 ambiguity
dims = np.arange(1, tensor.ndims)
Utilde = tensor.ttm(U, dims, True)
else:
Utilde = tensor.ttm(U, -n, True)

# Maximize norm(Utilde x_n W') wrt W and
# maintain orthonormality of W
U[n] = Utilde.nvecs(n, rank[n])

# Assemble the current approximation
core = Utilde.ttm(U, n, True)

# Compute fit
# TODO this abs is missing from MATLAB, but I get negative numbers for trivial examples
normresidual = np.sqrt(abs(normX**2 - core.norm()**2))
fit = 1 - (normresidual / normX) # fraction explained by model
fitchange = abs(fitold - fit)

if iter % printitn == 0:
print(f" NormX: {normX} Core norm: {core.norm()}")
print(f" Iter {iter}: fit = {fit:e} fitdelta = {fitchange:7.1e}\n")

# Check for convergence
if fitchange < stoptol:
break

solution = ttensor.from_data(core, U)

output = {}
output['params'] = (stoptol, maxiters, printitn, dimorder)
output['iters'] = iter
output['normresidual'] = normresidual
output['fit'] = fit

return solution, Uinit, output
89 changes: 89 additions & 0 deletions tests/test_tucker_als.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import pyttb as ttb
import numpy as np
import pytest


@pytest.fixture()
def sample_tensor():
data = np.array([[29, 39.], [63., 85.]])
shape = (2, 2)
params = {'data': data, 'shape': shape}
tensorInstance = ttb.tensor().from_data(data, shape)
return params, tensorInstance

@pytest.mark.indevelopment
def test_tucker_als_tensor_default_init(capsys, sample_tensor):
(data, T) = sample_tensor
(Solution, Uinit, output) = ttb.tucker_als(T, 2)
capsys.readouterr()
assert pytest.approx(output['fit'], 1) == 0

(Solution, Uinit, output) = ttb.tucker_als(T, 2, init=Uinit)
capsys.readouterr()
assert pytest.approx(output['fit'], 1) == 0

(Solution, Uinit, output) = ttb.tucker_als(T, 2, init='nvecs')
capsys.readouterr()
assert pytest.approx(output['fit'], 1) == 0

@pytest.mark.indevelopment
def test_tucker_als_tensor_incorrect_init(capsys, sample_tensor):
(data, T) = sample_tensor

non_list = np.array([1]) # TODO: Consider generalizing to iterable
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, init=non_list)

bad_string = "foo_bar"
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, init=bad_string)

wrong_length = [np.ones(T.shape)] * T.ndims
wrong_length.pop()
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, init=wrong_length)

wrong_shape = [np.ones(5)] * T.ndims
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, init=wrong_shape)

@pytest.mark.indevelopment
def test_tucker_als_tensor_incorrect_steptol(capsys, sample_tensor):
(data, T) = sample_tensor

non_scalar = np.array([1])
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, stoptol=non_scalar)

@pytest.mark.indevelopment
def test_tucker_als_tensor_incorrect_maxiters(capsys, sample_tensor):
(data, T) = sample_tensor

negative_value = -1
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, maxiters=negative_value)

non_scalar = np.array([1])
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, maxiters=non_scalar)

@pytest.mark.indevelopment
def test_tucker_als_tensor_incorrect_printitn(capsys, sample_tensor):
(data, T) = sample_tensor

non_scalar = np.array([1])
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, printitn=non_scalar)

@pytest.mark.indevelopment
def test_tucker_als_tensor_incorrect_dimorder(capsys, sample_tensor):
(data, T) = sample_tensor

non_list = np.array([1]) # TODO: Consider generalizing to iterable
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, dimorder=non_list)

too_few_dims = list(range(len(T.shape)))
too_few_dims.pop()
with pytest.raises(ValueError):
_ = ttb.tucker_als(T, 2, dimorder=too_few_dims)