Source code for pyttb.cp_als

"""CP Decomposition via Alternating Least Squares."""

# Copyright 2024 National Technology & Engineering Solutions of Sandia,
# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
# U.S. Government retains certain rights in this software.

from __future__ import annotations

from typing import Dict, Literal, Optional, Tuple, Union

import numpy as np

import pyttb as ttb
from pyttb.pyttb_utils import OneDArray, parse_one_d


[docs] def cp_als( # noqa: PLR0912,PLR0913,PLR0915 input_tensor: Union[ttb.tensor, ttb.sptensor, ttb.ttensor, ttb.sumtensor], rank: int, stoptol: float = 1e-4, maxiters: int = 1000, dimorder: Optional[OneDArray] = None, optdims: Optional[OneDArray] = None, init: Union[Literal["random"], Literal["nvecs"], ttb.ktensor] = "random", printitn: int = 1, fixsigns: bool = True, ) -> Tuple[ttb.ktensor, ttb.ktensor, Dict]: """Compute CP decomposition with alternating least squares. Parameters ---------- input_tensor: Tensor to decompose rank: Rank of the decomposition stoptol: Tolerance used for termination - when the change in the fitness function in successive iterations drops below this value, the iterations terminate dimorder: Order to loop through dimensions (default: [range(tensor.ndims)]) optdims: Whether factor for corresponding mode should be optimized or not. Defaults to optimizing all modes ([range(tensor.ndims)]). maxiters: Maximum number of iterations init: Initial guess (default: "random") * "random": initialize using a :class:`pyttb.ktensor` with values chosen from a Normal distribution with mean 0 and standard deviation 1 * "nvecs": initialize factor matrices of a :class:`pyttb.ktensor` using the eigenvectors of the outer product of the matricized input tensor * :class:`pyttb.ktensor`: initialize using a specific :class:`pyttb.ktensor` as input - must be the same shape as the input tensor and have the same rank as the input rank printitn: Number of iterations to perform before printing iteration status - 0 for no status printing fixsigns: Align the signs of the columns of the factorization to align with the input tensor data Returns ------- M: Resulting ktensor from CP-ALS factorization Minit: Initial guess output: Information about the computation. Dictionary keys: * `params` : tuple of (stoptol, maxiters, printitn, dimorder) * `iters`: number of iterations performed * `normresidual`: norm of the difference between the input tensor and ktensor factorization * `fit`: value of the fitness function (fraction of tensor data explained by the model) Example ------- Random initialization causes slight perturbation in intermediate results. `...` is our place holder for these numeric values. Example using default values ("random" initialization): >>> weights = np.array([1., 2.]) >>> fm0 = np.array([[1., 2.], [3., 4.]]) >>> fm1 = np.array([[5., 6.], [7., 8.]]) >>> K = ttb.ktensor([fm0, fm1], weights) >>> np.random.seed(1) >>> M, Minit, output = ttb.cp_als(K.full(), 2) # doctest: +ELLIPSIS CP_ALS: Iter 0: f = ... f-delta = ... Iter 1: f = ... f-delta = ... Final f = ... >>> print(M) # doctest: +ELLIPSIS ktensor of shape (2, 2) with order F weights=[108.4715... 8.6114...] factor_matrices[0] = [[0.4187... 0.3989...] [0.9080... 0.9169...]] factor_matrices[1] = [[0.6188... 0.2581...] [0.7854... 0.9661...]] >>> print(Minit) # doctest: +ELLIPSIS ktensor of shape (2, 2) with order F weights=[1. 1.] factor_matrices[0] = [[4.1702...e-01 7.2032...e-01] [1.1437...e-04 3.0233...e-01]] factor_matrices[1] = [[0.1467... 0.0923...] [0.1862... 0.3455...]] >>> print(output["params"]) # doctest: +NORMALIZE_WHITESPACE {'stoptol': 0.0001, 'maxiters': 1000, 'dimorder': array([0, 1]),\ 'optdims': array([0, 1]), 'printitn': 1, 'fixsigns': True} Example using "nvecs" initialization: >>> M, Minit, output = ttb.cp_als(K.full(), 2, init="nvecs") # doctest: +ELLIPSIS CP_ALS: Iter 0: f = ... f-delta = ... Iter 1: f = ... f-delta = ... Final f = ... Example using :class:`pyttb.ktensor` initialization: >>> M, Minit, output = ttb.cp_als(K.full(), 2, init=K) # doctest: +ELLIPSIS CP_ALS: Iter 0: f = ... f-delta = ... Iter 1: f = ... f-delta = ... Final f = ... """ # Extract number of dimensions and norm of tensor N = input_tensor.ndims normX = input_tensor.norm() # Set up dimorder if not specified if dimorder is None: dimorder = np.arange(N) else: dimorder = parse_one_d(dimorder) if tuple(range(N)) != tuple(sorted(dimorder)): assert False, "Dimorder must be a list or permutation of range(tensor.ndims)" # Set up optdims if not specified if optdims is None: optdims = np.arange(N) else: optdims = parse_one_d(optdims) # Error checking assert rank > 0, "Number of components requested must be positive" # Set up and error checking on initial guess if isinstance(init, ttb.ktensor): # User provided an initial ktensor; validate it assert init.ndims == N, f"Initial guess does not have {N} modes" assert ( init.ncomponents == rank ), f"Initial guess does not have {rank} components" for n in dimorder: if init.factor_matrices[n].shape != (input_tensor.shape[n], rank): assert False, f"Mode {n} of the initial guess is the wrong size" elif isinstance(init, str) and init.lower() == "random": factor_matrices = [] for n in range(N): factor_matrices.append( np.random.uniform(0, 1, (input_tensor.shape[n], rank)) ) init = ttb.ktensor(factor_matrices) elif isinstance(init, str) and init.lower() == "nvecs": assert not isinstance( input_tensor, ttb.sumtensor ), "Sumtensor doesn't support nvecs" factor_matrices = [] for n in range(N): factor_matrices.append(input_tensor.nvecs(n, rank)) init = ttb.ktensor(factor_matrices) else: assert False, "The selected initialization method is not supported" # Set up for iterates and fit U = init.copy().factor_matrices fit = 0 # Reduce dimorder to only those modes we will optimize dimorder_in = dimorder # save for output dimorder = [int(d) for d in dimorder if d in optdims] # Store the last MTTKRP result to accelerate fitness computation U_mttkrp = np.zeros((input_tensor.shape[dimorder[-1]], rank)) if printitn > 0: print("CP_ALS:") # Main Loop: Iterate until convergence UtU = np.zeros((rank, rank, N)) for n in range(N): UtU[:, :, n] = U[n].T @ U[n] for iteration in range(maxiters): fitold = fit # Iterate over optimized modes in specified order for n in dimorder: # Calculate Unew = X_(n) * khatrirao(all U except n, 'r'). Unew = input_tensor.mttkrp(U, n) # Save the last MTTKRP result for fitness check. if n == dimorder[-1]: U_mttkrp = Unew # Compute the matrix of coefficients for linear system Y = np.prod(UtU, axis=2, where=[i != n for i in range(N)]) # don't try to solve linear system with Y = 0 if (Y == 0).all(): Unew = np.zeros(Unew.shape) else: Unew = np.linalg.solve(Y.T, Unew.T).T # TODO: should we have issparse implemented? I am not sure # when the following will occur # if issparse(Unew): # Unew = full(Unew) # for the case R=1 # Normalize each vector to prevent singularities in coefmatrix if iteration == 0: weights = np.sqrt(sum(Unew**2, 0)) # 2-norm else: weights = np.maximum(np.max(np.abs(Unew), 0), 1) # max-norm # if weights are 0, do not divide if not (weights == 0).all(): Unew = Unew / weights U[n] = Unew UtU[:, :, n] = U[n].T @ U[n] M = ttb.ktensor(U, weights) # This is equivalent to innerprod(X,P). iprod = np.sum( np.sum(M.factor_matrices[dimorder[-1]] * U_mttkrp, 0) * weights, 0 ) if normX == 0: normresidual = M.norm() ** 2 - 2 * iprod fit = normresidual else: # the following input to np.sqrt can be negative due to rounding and # truncation errors, so np.abs is used normresidual = np.sqrt(np.abs(normX**2 + M.norm() ** 2 - 2 * iprod)) fit = 1 - (normresidual / normX) # fraction explained by model fitchange = np.abs(fitold - fit) # Check for convergence if (iteration > 0) and (fitchange < stoptol): flag = 0 else: flag = 1 if (printitn > 0) and ((divmod(iteration, printitn)[1] == 0) or (flag == 0)): print(f" Iter {iteration}: f = {fit:e} f-delta = {fitchange:7.1e}") # Check for convergence if flag == 0: break # Clean up final result # Arrange the final tensor so that the columns are normalized. M.arrange() # Fix the signs if requested if fixsigns: M = M.fixsigns() if printitn > 0: if normX == 0: normresidual = M.norm() ** 2 - 2 * input_tensor.innerprod(M) fit = normresidual else: normresidual = np.sqrt( np.abs(normX**2 + M.norm() ** 2 - 2 * input_tensor.innerprod(M)) ) fit = 1 - (normresidual / normX) # fraction explained by model print(f" Final f = {fit:e}") output = { "params": { "stoptol": stoptol, "maxiters": maxiters, "dimorder": dimorder_in, "optdims": optdims, "printitn": printitn, "fixsigns": fixsigns, }, "iters": iteration, "normresidual": normresidual, "fit": fit, } return M, init, output
if __name__ == "__main__": import doctest # pragma: no cover doctest.testmod() # pragma: no cover