Source code for pyttb.cp_als

"""CP Decomposition via Alternating Least Squares."""

# Copyright 2024 National Technology & Engineering Solutions of Sandia,
# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
# U.S. Government retains certain rights in this software.

from __future__ import annotations

from typing import Dict, Literal, Optional, Tuple, Union

import numpy as np

import pyttb as ttb
from pyttb.pyttb_utils import OneDArray, parse_one_d



[docs]
def cp_als(  # noqa: PLR0912,PLR0913,PLR0915
    input_tensor: Union[ttb.tensor, ttb.sptensor, ttb.ttensor, ttb.sumtensor],
    rank: int,
    stoptol: float = 1e-4,
    maxiters: int = 1000,
    dimorder: Optional[OneDArray] = None,
    optdims: Optional[OneDArray] = None,
    init: Union[Literal["random"], Literal["nvecs"], ttb.ktensor] = "random",
    printitn: int = 1,
    fixsigns: bool = True,
) -> Tuple[ttb.ktensor, ttb.ktensor, Dict]:
    """Compute CP decomposition with alternating least squares.

    Parameters
    ----------
    input_tensor:
        Tensor to decompose
    rank:
        Rank of the decomposition
    stoptol:
        Tolerance used for termination - when the change in the fitness function
        in successive iterations drops below this value, the iterations terminate
    dimorder:
        Order to loop through dimensions (default: [range(tensor.ndims)])
    optdims:
        Whether factor for corresponding mode should be optimized or not.
        Defaults to optimizing all modes ([range(tensor.ndims)]).
    maxiters:
        Maximum number of iterations
    init:
        Initial guess (default: "random")
         * "random": initialize using a :class:`pyttb.ktensor` with values chosen
            from a Normal distribution with mean 0 and standard deviation 1
         * "nvecs": initialize factor matrices of a :class:`pyttb.ktensor` using
            the eigenvectors of the outer product of the matricized input tensor
         * :class:`pyttb.ktensor`: initialize using a specific
            :class:`pyttb.ktensor` as input - must be the same shape as the input
            tensor and have the same rank as the input rank
    printitn:
        Number of iterations to perform before printing iteration status - 0 for
        no status printing
    fixsigns:
        Align the signs of the columns of the factorization to align with the
        input tensor data

    Returns
    -------
    M:
        Resulting ktensor from CP-ALS factorization
    Minit:
        Initial guess
    output:
        Information about the computation. Dictionary keys:
         * `params` : tuple of (stoptol, maxiters, printitn, dimorder)
         * `iters`: number of iterations performed
         * `normresidual`: norm of the difference between the input tensor
            and ktensor factorization
         * `fit`: value of the fitness function (fraction of tensor data
            explained by the model)

    Example
    -------
    Random initialization causes slight perturbation in intermediate results.
    `...` is our place holder for these numeric values.
    Example using default values ("random" initialization):

    >>> weights = np.array([1., 2.])
    >>> fm0 = np.array([[1., 2.], [3., 4.]])
    >>> fm1 = np.array([[5., 6.], [7., 8.]])
    >>> K = ttb.ktensor([fm0, fm1], weights)
    >>> np.random.seed(1)
    >>> M, Minit, output = ttb.cp_als(K.full(), 2) # doctest: +ELLIPSIS
    CP_ALS:
     Iter 0: f = ... f-delta = ...
     Iter 1: f = ... f-delta = ...
     Final f = ...
    >>> print(M) # doctest: +ELLIPSIS
    ktensor of shape (2, 2) with order F
    weights=[108.4715... 8.6114...]
    factor_matrices[0] =
    [[0.4187... 0.3989...]
     [0.9080... 0.9169...]]
    factor_matrices[1] =
    [[0.6188... 0.2581...]
     [0.7854... 0.9661...]]
    >>> print(Minit) # doctest: +ELLIPSIS
    ktensor of shape (2, 2) with order F
    weights=[1. 1.]
    factor_matrices[0] =
    [[4.1702...e-01 7.2032...e-01]
     [1.1437...e-04 3.0233...e-01]]
    factor_matrices[1] =
    [[0.1467... 0.0923...]
     [0.1862... 0.3455...]]
    >>> print(output["params"]) # doctest: +NORMALIZE_WHITESPACE
    {'stoptol': 0.0001, 'maxiters': 1000, 'dimorder': array([0, 1]),\
     'optdims': array([0, 1]), 'printitn': 1, 'fixsigns': True}

    Example using "nvecs" initialization:

    >>> M, Minit, output = ttb.cp_als(K.full(), 2, init="nvecs") # doctest: +ELLIPSIS
    CP_ALS:
     Iter 0: f = ... f-delta = ...
     Iter 1: f = ... f-delta = ...
     Final f = ...

    Example using :class:`pyttb.ktensor` initialization:

    >>> M, Minit, output = ttb.cp_als(K.full(), 2, init=K) # doctest: +ELLIPSIS
    CP_ALS:
     Iter 0: f = ... f-delta = ...
     Iter 1: f = ... f-delta = ...
     Final f = ...
    """
    # Extract number of dimensions and norm of tensor
    N = input_tensor.ndims
    normX = input_tensor.norm()

    # Set up dimorder if not specified
    if dimorder is None:
        dimorder = np.arange(N)
    else:
        dimorder = parse_one_d(dimorder)
    if tuple(range(N)) != tuple(sorted(dimorder)):
        assert False, "Dimorder must be a list or permutation of range(tensor.ndims)"

    # Set up optdims if not specified
    if optdims is None:
        optdims = np.arange(N)
    else:
        optdims = parse_one_d(optdims)

    # Error checking
    assert rank > 0, "Number of components requested must be positive"

    # Set up and error checking on initial guess
    if isinstance(init, ttb.ktensor):
        # User provided an initial ktensor; validate it
        assert init.ndims == N, f"Initial guess does not have {N} modes"
        assert (
            init.ncomponents == rank
        ), f"Initial guess does not have {rank} components"
        for n in dimorder:
            if init.factor_matrices[n].shape != (input_tensor.shape[n], rank):
                assert False, f"Mode {n} of the initial guess is the wrong size"
    elif isinstance(init, str) and init.lower() == "random":
        factor_matrices = []
        for n in range(N):
            factor_matrices.append(
                np.random.uniform(0, 1, (input_tensor.shape[n], rank))
            )
        init = ttb.ktensor(factor_matrices)
    elif isinstance(init, str) and init.lower() == "nvecs":
        assert not isinstance(
            input_tensor, ttb.sumtensor
        ), "Sumtensor doesn't support nvecs"
        factor_matrices = []
        for n in range(N):
            factor_matrices.append(input_tensor.nvecs(n, rank))
        init = ttb.ktensor(factor_matrices)
    else:
        assert False, "The selected initialization method is not supported"

    # Set up for iterates and fit
    U = init.copy().factor_matrices
    fit = 0

    # Reduce dimorder to only those modes we will optimize
    dimorder_in = dimorder  # save for output
    dimorder = [int(d) for d in dimorder if d in optdims]

    # Store the last MTTKRP result to accelerate fitness computation
    U_mttkrp = np.zeros((input_tensor.shape[dimorder[-1]], rank))

    if printitn > 0:
        print("CP_ALS:")

    # Main Loop: Iterate until convergence

    UtU = np.zeros((rank, rank, N))
    for n in range(N):
        UtU[:, :, n] = U[n].T @ U[n]

    for iteration in range(maxiters):
        fitold = fit

        # Iterate over optimized modes in specified order
        for n in dimorder:
            # Calculate Unew = X_(n) * khatrirao(all U except n, 'r').
            Unew = input_tensor.mttkrp(U, n)

            # Save the last MTTKRP result for fitness check.
            if n == dimorder[-1]:
                U_mttkrp = Unew

            # Compute the matrix of coefficients for linear system
            Y = np.prod(UtU, axis=2, where=[i != n for i in range(N)])
            # don't try to solve linear system with Y = 0
            if (Y == 0).all():
                Unew = np.zeros(Unew.shape)
            else:
                Unew = np.linalg.solve(Y.T, Unew.T).T
            # TODO: should we have issparse implemented? I am not sure
            #  when the following will occur
            # if issparse(Unew):
            #    Unew = full(Unew)   # for the case R=1

            # Normalize each vector to prevent singularities in coefmatrix
            if iteration == 0:
                weights = np.sqrt(sum(Unew**2, 0))  # 2-norm
            else:
                weights = np.maximum(np.max(np.abs(Unew), 0), 1)  # max-norm

            # if weights are 0, do not divide
            if not (weights == 0).all():
                Unew = Unew / weights

            U[n] = Unew
            UtU[:, :, n] = U[n].T @ U[n]

        M = ttb.ktensor(U, weights)

        # This is equivalent to innerprod(X,P).
        iprod = np.sum(
            np.sum(M.factor_matrices[dimorder[-1]] * U_mttkrp, 0) * weights, 0
        )
        if normX == 0:
            normresidual = M.norm() ** 2 - 2 * iprod
            fit = normresidual
        else:
            # the following input to np.sqrt can be negative due to rounding and
            # truncation errors, so np.abs is used
            normresidual = np.sqrt(np.abs(normX**2 + M.norm() ** 2 - 2 * iprod))
            fit = 1 - (normresidual / normX)  # fraction explained by model

        fitchange = np.abs(fitold - fit)

        # Check for convergence
        if (iteration > 0) and (fitchange < stoptol):
            flag = 0
        else:
            flag = 1

        if (printitn > 0) and ((divmod(iteration, printitn)[1] == 0) or (flag == 0)):
            print(f" Iter {iteration}: f = {fit:e} f-delta = {fitchange:7.1e}")

        # Check for convergence
        if flag == 0:
            break

    # Clean up final result

    # Arrange the final tensor so that the columns are normalized.
    M.arrange()
    # Fix the signs if requested
    if fixsigns:
        M = M.fixsigns()

    if printitn > 0:
        if normX == 0:
            normresidual = M.norm() ** 2 - 2 * input_tensor.innerprod(M)
            fit = normresidual
        else:
            normresidual = np.sqrt(
                np.abs(normX**2 + M.norm() ** 2 - 2 * input_tensor.innerprod(M))
            )
            fit = 1 - (normresidual / normX)  # fraction explained by model
        print(f" Final f = {fit:e}")

    output = {
        "params": {
            "stoptol": stoptol,
            "maxiters": maxiters,
            "dimorder": dimorder_in,
            "optdims": optdims,
            "printitn": printitn,
            "fixsigns": fixsigns,
        },
        "iters": iteration,
        "normresidual": normresidual,
        "fit": fit,
    }

    return M, init, output



if __name__ == "__main__":
    import doctest  # pragma: no cover

    doctest.testmod()  # pragma: no cover