Source code for pyttb.cp_apr

"""Non-negative CP decomposition with alternating Poisson regression."""

# Copyright 2024 National Technology & Engineering Solutions of Sandia,
# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
# U.S. Government retains certain rights in this software.

from __future__ import annotations

import time
import warnings
from typing import Dict, Literal, Optional, Tuple, Union, overload

import numpy as np
from numpy_groupies import aggregate as accumarray

import pyttb as ttb



[docs]
def cp_apr(  # noqa: PLR0913
    input_tensor: Union[ttb.tensor, ttb.sptensor],
    rank: int,
    algorithm: Union[Literal["mu"], Literal["pdnr"], Literal["pqnr"]] = "mu",
    stoptol: float = 1e-4,
    stoptime: float = 1e6,
    maxiters: int = 1000,
    init: Union[ttb.ktensor, Literal["random"]] = "random",
    maxinneriters: int = 10,
    epsDivZero: float = 1e-10,
    printitn: int = 1,
    printinneritn: int = 0,
    kappa: float = 0.01,
    kappatol: float = 1e-10,
    epsActive: float = 1e-8,
    mu0: float = 1e-5,
    precompinds: bool = True,
    inexact: bool = True,
    lbfgsMem: int = 3,
) -> Tuple[ttb.ktensor, ttb.ktensor, Dict]:
    """
    Compute non-negative CP with alternating Poisson regression.

    Parameters
    ----------
    input_tensor: :class:`pyttb.tensor` or :class:`pyttb.sptensor`
    rank: int
        Rank of the decomposition
    algorithm: str
        in {'mu', 'pdnr, 'pqnr'}
    stoptol: float
        Tolerance on overall KKT violation
    stoptime: float
        Maximum number of seconds to run
    maxiters: int
        Maximum number of iterations
    init: str or :class:`pyttb.ktensor`
        Initial guess
    maxinneriters: int
        Maximum inner iterations per outer iteration
    epsDivZero: float
        Safeguard against divide by zero
    printitn: int
        Print every n outer iterations, 0 for none
    printinneritn: int
        Print every n inner iterations
    kappa: int
        MU ALGORITHM PARAMETER: Offset to fix complementary slackness
    kappatol:
        MU ALGORITHM PARAMETER: Tolerance on complementary slackness
    epsActive: float
        PDNR & PQNR ALGORITHM PARAMETER: Bertsekas tolerance for active set
    mu0: float
        PDNR ALGORITHM PARAMETER: Initial Damping Parameter
    precompinds: bool
        PDNR & PQNR ALGORITHM PARAMETER: Precompute sparse tensor indices
    inexact: bool
        PDNR ALGORITHM PARAMETER: Compute inexact Newton steps
    lbfgsMem: int
        PQNR ALGORITHM PARAMETER: Precompute sparse tensor indices

    Returns
    -------
    M: :class:`pyttb.ktensor`
        Resulting ktensor from CP APR
    Minit: :class:`pyttb.ktensor`
        Initial Guess
    output: dict
        Additional output #TODO document this more appropriately

    """
    # Extract the number of modes in tensor X
    N = input_tensor.ndims

    assert rank > 0, "Number of components requested must be positive"

    # Check that the data is non-negative.
    tmp = input_tensor < 0.0
    assert (
        tmp.nnz == 0
    ), "Data tensor must be nonnegative for Poisson-based factorization"

    # Set up an initial guess for the factor matrices.
    if isinstance(init, ttb.ktensor):
        # User provided an initial ktensor; validate it
        assert init.ndims == N, "Initial guess does not have the right number of modes"
        assert (
            init.ncomponents == rank
        ), "Initial guess does not have the right number of components"
        for n in range(N):
            if init.shape[n] != input_tensor.shape[n]:
                assert False, f"Mode {n} of the initial guess is the wrong size"
            if np.min(init.factor_matrices[n]) < 0.0:
                assert False, f"Initial guess has negative element in mode {n}"
        if np.min(init.weights) < 0:
            assert False, "Initial guess has a negative ktensor weight"

    elif init.lower() == "random":
        factor_matrices = []
        for n in range(N):
            factor_matrices.append(
                np.random.uniform(0, 1, (input_tensor.shape[n], rank))
            )
        init = ttb.ktensor(factor_matrices)
    else:
        raise ValueError(
            f"Initial guess supports ktensor or `random` but received {init}"
        )

    # Call solver based on the couce of algorithm parameter, passing all the other
    # input parameters
    if algorithm.lower() == "mu":
        M, output = tt_cp_apr_mu(
            input_tensor,
            rank,
            init,
            stoptol,
            stoptime,
            maxiters,
            maxinneriters,
            epsDivZero,
            printitn,
            printinneritn,
            kappa,
            kappatol,
        )
        output["algorithm"] = "mu"
        output["params"]["algorithm"] = "mu"
    elif algorithm.lower() == "pdnr":
        M, output = tt_cp_apr_pdnr(
            input_tensor,
            rank,
            init,
            stoptol,
            stoptime,
            maxiters,
            maxinneriters,
            epsDivZero,
            printitn,
            printinneritn,
            epsActive,
            mu0,
            precompinds,
            inexact,
        )
        output["algorithm"] = "pdnr"
        output["params"]["algorithm"] = "pdnr"
    elif algorithm.lower() == "pqnr":
        M, output = tt_cp_apr_pqnr(
            input_tensor,
            rank,
            init,
            stoptol,
            stoptime,
            maxiters,
            maxinneriters,
            epsDivZero,
            printitn,
            printinneritn,
            epsActive,
            lbfgsMem,
            precompinds,
        )
        output["algorithm"] = "pqnr"
        output["params"]["algorithm"] = "pqnr"
    else:
        assert False, "{algorithm} is not a supported cp_als algorithm"

    return M, init, output




[docs]
def tt_cp_apr_mu(  # noqa: PLR0912,PLR0913,PLR0915
    input_tensor: Union[ttb.tensor, ttb.sptensor],
    rank: int,
    init: ttb.ktensor,
    stoptol: float,
    stoptime: float,
    maxiters: int,
    maxinneriters: int,
    epsDivZero: float,
    printitn: int,
    printinneritn: int,
    kappa: float,
    kappatol: float,
) -> Tuple[ttb.ktensor, Dict]:
    """
    Compute nonnegative CP with alternating Poisson regression.

    Parameters
    ----------
    input_tensor: :class:`pyttb.tensor` or :class:`pyttb.sptensor`
    rank: int
        Rank of the decomposition
    init: :class:`pyttb.ktensor`
        Initial guess
    stoptol: float
        Tolerance on overall KKT violation
    stoptime: float
        Maximum number of seconds to run
    maxiters: int
        Maximum number of iterations
    maxinneriters: int
        Maximum inner iterations per outer iteration
    epsDivZero: float
        Safeguard against divide by zero
    printitn: int
        Print every n outer iterations, 0 for none
    printinneritn: int
        Print every n inner iterations
    kappa: int
        MU ALGORITHM PARAMETER: Offset to fix complementary slackness
    kappatol:
        MU ALGORITHM PARAMETER: Tolerance on complementary slackness

    Notes
    -----
    REFERENCE: E. C. Chi and T. G. Kolda. On Tensors, Sparsity, and
    Nonnegative Factorizations, arXiv:1112.2414 [math.NA], December 2011,
    URL: http://arxiv.org/abs/1112.2414. Submitted for publication.

    """
    N = input_tensor.ndims

    # TODO I vote no duplicate error checking, copy error checking from cp_apr for
    #  initial guess here if disagree

    # Initialize output arrays
    # fnEvals = np.zeros((maxiters,))
    kktViolations = -np.ones((maxiters,))
    # TODO we initialize nInnerIters of size max outer iters?
    nInnerIters = np.zeros((maxiters,))
    # nzeros = np.zeros((maxiters,))
    nViolations = np.zeros((maxiters,))
    nTimes = np.zeros((maxiters,))

    # Set up for iteration - initializing M and Phi.
    M = init.copy()
    M.normalize(normtype=1)
    Phi = []  # np.zeros((N,))#cell(N,1)
    for n in range(N):
        # TODO prepopulation Phi instead of append should be faster
        Phi.append(np.zeros(M.factor_matrices[n].shape))
    kktModeViolations = np.zeros((N,))

    if printitn > 0:
        print("CP_APR:")

    # Start the wall clock timer.
    start = time.time()

    # PDN-R and PQN-R benefit from precomputing sparse indices of X for each mode
    # subproblem. However, MU execution time barely changes, so the precomputer option
    # is not offered.

    # Main loop: Iterate until convergence
    for iteration in range(maxiters):
        isConverged = True
        for n in range(N):
            # Make adjustments to entries of M[n] that are violating complementary
            # slackness conditions.
            # TODO both these zeros were 1 in matlab
            if iteration > 0:
                V = (Phi[n] > 0) & (M.factor_matrices[n] < kappatol)
                if np.any(V):
                    nViolations[iteration] += 1
                    M.factor_matrices[n][V > 0] += kappa

            # Shift the weight from lambda to mode n
            M.redistribute(mode=n)

            # Calculate product of all matrices but the n-th
            # Sparse case only calculates entries corresponding to nonzeros in X
            Pi = calculate_pi(input_tensor, M, rank, n, N)

            # Do the multiplicative updates
            for i in range(maxinneriters):
                # Count the inner iterations
                nInnerIters[iteration] += 1

                # Calculate matrix for multiplicative update
                Phi[n] = calculate_phi(input_tensor, M, rank, n, Pi, epsDivZero)

                # Check for convergence
                kktModeViolations[n] = np.max(
                    np.abs(
                        vectorize_for_mu(np.minimum(M.factor_matrices[n], 1 - Phi[n]))
                    )
                )
                if kktModeViolations[n] < stoptol:
                    break
                isConverged = False

                # Do the multiplicative update
                # TODO cannot update M[n] in this way
                M.factor_matrices[n] *= Phi[n]

                # Print status
                if printinneritn != 0 and divmod(i, printinneritn)[1] == 0:
                    print(
                        "\t\tMode = {n}, Inner Iter = {i}, "
                        f"KKT violation = {kktModeViolations[n]}"
                    )

            # Shift weight from mode n back to lambda
            M.normalize(normtype=1, mode=n)

        kktViolations[iteration] = np.max(kktModeViolations)
        if divmod(iteration, printitn)[1] == 0:
            print(
                f"\tIter {iteration}: Inner Its = {nInnerIters[iteration]} "
                f"KKT violation = {kktViolations[iteration]}, "
                f"nViolations = {nViolations[iteration]}"
            )

        nTimes[iteration] = time.time() - start

        # Check for convergence
        if isConverged:
            if printitn > 0:
                print("Exiting because all subproblems reached KKT tol.")
            break
        if nTimes[iteration] > stoptime:
            if printitn > 0:
                print("Exiting because time limit exceeded.")
            break

    t_stop = time.time() - start

    # Clean up final result
    M.normalize(sort=True, normtype=1)

    obj = tt_loglikelihood(input_tensor, M)

    if printitn > 0:
        normTensor = input_tensor.norm()
        normresidual = np.sqrt(
            normTensor**2 + M.norm() ** 2 - 2 * input_tensor.innerprod(M)
        )
        fit = 1 - (normresidual / normTensor)  # fraction explained by model
        print("===========================================")
        print(f" Final log-likelihood = {obj}")
        print(f" Final least squares fit = {fit}")
        print(f" Final KKT violation = {kktViolations[iteration]}")
        print(f" Total inner iterations = {sum(nInnerIters)}")
        print(f" Total execution time = {t_stop} secs")

    output = {
        "params": {
            "stoptol": stoptol,
            "stoptime": stoptime,
            "maxiters": maxiters,
            "maxinneriters": maxinneriters,
            "epsDivZero": epsDivZero,
            "printitn": printitn,
            "printinneritn": printinneritn,
            "kappa": kappa,
            "kappatol": kappatol,
        },
        "kktViolations": kktViolations[: iteration + 1],
        "nInnerIters": nInnerIters[: iteration + 1],
        "nViolations": nViolations[: iteration + 1],
        "nTotalIters": np.sum(nInnerIters),
        "times": nTimes[: iteration + 1],
        "totalTime": t_stop,
        "obj": obj,
    }

    return M, output




[docs]
def tt_cp_apr_pdnr(  # noqa: PLR0912,PLR0913,PLR0915
    input_tensor: Union[ttb.tensor, ttb.sptensor],
    rank: int,
    init: ttb.ktensor,
    stoptol: float,
    stoptime: float,
    maxiters: int,
    maxinneriters: int,
    epsDivZero: float,
    printitn: int,
    printinneritn: int,
    epsActive: float,
    mu0: float,
    precompinds: bool,
    inexact: bool,
) -> Tuple[ttb.ktensor, Dict]:
    """Compute nonnegative CP with alternating Poisson regression.

    Computes an estimate of the best rank-R
    CP model of a tensor X using an alternating Poisson regression.
    The algorithm solves "row subproblems" in each alternating subproblem,
    using a Hessian of size R^2.

    Parameters
    ----------
    input_tensor: Union[:class:`pyttb.tensor`,:class:`pyttb.sptensor`]
    rank: int
        Rank of the decomposition
    init: str or :class:`pyttb.ktensor`
        Initial guess
    stoptol: float
        Tolerance on overall KKT violation
    stoptime: float
        Maximum number of seconds to run
    maxiters: int
        Maximum number of iterations
    maxinneriters: int
        Maximum inner iterations per outer iteration
    epsDivZero: float
        Safeguard against divide by zero
    printitn: int
        Print every n outer iterations, 0 for none
    printinneritn: int
        Print every n inner iterations
    epsActive: float
        PDNR & PQNR ALGORITHM PARAMETER: Bertsekas tolerance for active set
    mu0: float
        PDNR ALGORITHM PARAMETER: Initial Damping Parameter
    precompinds: bool
        PDNR & PQNR ALGORITHM PARAMETER: Precompute sparse tensor indices
    inexact: bool
        PDNR ALGORITHM PARAMETER: Compute inexact Newton steps

    Returns
    -------
    # TODO detail return dictionary

    Notes
    -----
    REFERENCE: Samantha Hansen, Todd Plantenga, Tamara G. Kolda.
    Newton-Based Optimization for Nonnegative Tensor Factorizations,
    arXiv:1304.4964 [math.NA], April 2013,
    URL: http://arxiv.org/abs/1304.4964. Submitted for publication.

    """
    # Extract the number of modes in tensor X
    N = input_tensor.ndims

    # If the initial guess has any rows of all zero elements, then modify so the row
    # subproblem is not taking log(0). Values will be restored to zero later if the
    # unfolded X for the row has no zeros.
    for n in range(N):
        rowsum = np.sum(init.factor_matrices[n], axis=1)
        tmpIdx = np.where(rowsum == 0)[0]
        if tmpIdx.size != 0:
            init.factor_matrices[n][tmpIdx, 0] = 1e-8

    # Start with the initial guess, normalized using the vector L1 norm
    M = init.copy()
    M.normalize(normtype=1)

    # Sparse tensor flag affects how Pi and Phi are computed.
    isSparse = isinstance(input_tensor, ttb.sptensor)

    # Initialize output arrays
    fnEvals = np.zeros((maxiters, 1))
    fnVals = np.zeros((maxiters, 1))
    kktViolations = -np.ones((maxiters, 1))
    nInnerIters = np.zeros((maxiters, 1))
    nzeros = np.zeros((maxiters, 1))
    times = np.zeros((maxiters, 1))

    if printitn > 0:
        print("CP_PDNR (alternating Poisson regression using damped Newton)")

    dispLineWarn = printinneritn > 0

    # Start the wall clock timer.
    start = time.time()

    if isinstance(input_tensor, ttb.sptensor) and isSparse and precompinds:
        # Precompute sparse index sets for all the row subproblems.
        # Takes more memory but can cut execution time significantly in some cases.
        if printitn > 0:
            print("\tPrecomuting sparse index sets...")
        sparseIx = []
        for n in range(N):
            num_rows = M.factor_matrices[n].shape[0]
            row_indices = []
            for jj in range(num_rows):
                row_indices.append(np.where(input_tensor.subs[:, n] == jj)[0])
            sparseIx.append(row_indices)

        if printitn > 0:
            print("done")

    e_vec = np.ones((1, rank))

    rowsubprobStopTol = stoptol

    # Main loop: iterate until convergence or a max threshold is reached
    for iteration in range(maxiters):
        isConverged = True
        kktModeViolations = np.zeros((N,))
        countInnerIters = np.zeros((N,))

        # Alternate thru each factor matrix, A_1, A_2, ..., A_N.
        for n in range(N):
            # Shift the weight from lambda to mode n.
            M.redistribute(mode=n)

            # calculate khatri-rao product of all matrices but the n-th
            if isinstance(input_tensor, ttb.tensor) and isSparse is False:
                # Data is not a sparse tensor.
                Pi = tt_calcpi_prowsubprob(input_tensor, M, rank, n, N, isSparse)
                X_mat = input_tensor.to_tenmat(
                    np.array([n], order=input_tensor.order), copy=False
                ).data

            num_rows = M.factor_matrices[n].shape[0]
            isRowNOTconverged = np.zeros((num_rows,))

            # Loop over the row subproblems in mode n.
            for jj in range(num_rows):
                # Initialize the damped Hessian parameter for the row subproblem.
                mu = mu0

                # Get data values for row jj of matricized mode n,
                if isinstance(input_tensor, ttb.sptensor) and isSparse:
                    # Data is a sparse tensor
                    if not precompinds:
                        sparse_indices = np.where(input_tensor.subs[:, n] == jj)[0]
                    else:
                        sparse_indices = sparseIx[n][jj]

                    if sparse_indices.size == 0:
                        # The row jj of matricized tensor X in mode n is empty
                        M.factor_matrices[n][jj, :] = 0
                        continue

                    x_row = input_tensor.vals[sparse_indices]

                    # Calculate just the columns of Pi needed for this row.
                    Pi = tt_calcpi_prowsubprob(
                        input_tensor, M, rank, n, N, isSparse, sparse_indices
                    )

                else:
                    x_row = X_mat[jj, :]

                # Get current values of the row subproblem variables.
                m_row = M.factor_matrices[n][jj, :]

                # Iteratively solve the row subproblem with projected Newton steps.
                if inexact and iteration == 1:
                    innerIterMaximum = 2
                else:
                    innerIterMaximum = maxinneriters

                f_new = 0.0
                for i in range(innerIterMaximum):
                    # Calculate the gradient.
                    [phi_row, ups_row] = calc_partials(
                        isSparse, Pi, epsDivZero, x_row, m_row
                    )
                    gradM = (e_vec - phi_row).transpose()

                    # Compute the row subproblem kkt_violation.

                    # Note experiments in the original paper used:
                    # kkt_violation = \
                    #    np.norm(np.abs(np.minimum(m_row, gradM.transpose())))

                    # We now use \| KKT \|_{inf}:
                    kkt_violation = np.max(
                        np.abs(np.minimum(m_row, gradM.transpose()[0]))
                    )

                    # Report largest row subproblem initial violation
                    if i == 0 and kkt_violation > kktModeViolations[n]:
                        kktModeViolations[n] = kkt_violation

                    if printinneritn > 0 and np.mod(i, printinneritn) == 0:
                        print(
                            f"\tMode = {n}, Row = {jj}, InnerIt = {i}",
                            end="",
                        )

                        if i == 0:
                            print(", RowKKT = {kkt_violation}")
                        else:
                            print(f", RowKKT = {kkt_violation}, RowObj = {-f_new}")

                    # Check for row subproblem convergence.
                    if kkt_violation < stoptol:
                        break
                    # Not converged, so m_row will be modified.
                    isRowNOTconverged[jj] = 1

                    # Calculate the search direction
                    # TODO clean up reshaping gradM to row
                    search_dir, predicted_red = get_search_dir_pdnr(
                        Pi, ups_row, rank, gradM.transpose()[0], m_row, mu, epsActive
                    )

                    # Perform a projected linesearch and update variables.
                    # Start from a unit step length, decrease by 1/2,
                    # stop with sufficicent decrease of 1.0e-4 or at most 10 steps.
                    (
                        m_rowNew,
                        f_old,
                        f_unit,
                        f_new,
                        num_evals,
                    ) = tt_linesearch_prowsubprob(
                        search_dir.transpose()[0],
                        gradM.transpose(),
                        m_row,
                        1,
                        1 / 2,
                        10,
                        1.0e-4,
                        isSparse,
                        x_row,
                        Pi,
                        phi_row,
                        dispLineWarn,
                    )
                    fnEvals[iteration] += num_evals
                    m_row = m_rowNew

                    # Update damping parameter mu based on the unit step length, which
                    # is returned in f_unit
                    actual_red = f_old - f_unit
                    rho = actual_red / -predicted_red
                    if predicted_red == 0:
                        mu *= 10
                    elif rho < 1 / 4:
                        mu *= 7 / 2
                    elif rho > 3 / 4:
                        mu *= 2 / 7

                M.factor_matrices[n][jj, :] = m_row
                countInnerIters[n] += i

            # Test if all row subproblems have converged, which means that no variables
            # in this row were changed.
            if np.sum(isRowNOTconverged) != 0:
                isConverged = False

            # Shift weight from mode n back to lambda.
            M.normalize(mode=n, normtype=1)

            # Total number of inner iterations for a given outer iteration, totalled
            # across all modes and all row subproblems in each mode
            nInnerIters[iteration] += countInnerIters[n]

        # Save output items for the outer iteration.
        num_zero = 0
        for n in range(N):
            num_zero += np.count_nonzero(M.factor_matrices[n] == 0)  # [0].size

        nzeros[iteration] = num_zero
        kktViolations[iteration] = np.max(kktModeViolations)

        if inexact:
            rowsubprobStopTol = np.maximum(stoptol, kktViolations[iteration]) / 100.0

            # Print outer iteration status.
            if printitn > 0 and np.mod(iteration, printitn) == 0:
                fnVals[iteration] = -tt_loglikelihood(input_tensor, M)
                print(
                    f"{iteration}. Ttl Inner Its: {nInnerIters[iteration]}, "
                    f"KKT viol = {kktViolations[iteration]}, obj = {fnVals[iteration]}"
                    f", nz: {num_zero}"
                )

        times[iteration] = time.time() - start

        # Check for convergence
        if isConverged and not inexact:
            break
        if isConverged and inexact and rowsubprobStopTol <= stoptol:
            break
        if times[iteration] > stoptime:
            print("EXiting because time limit exceeded")
            break

    t_stop = time.time() - start

    # Clean up final result
    M.normalize(sort=True, normtype=1)

    obj = tt_loglikelihood(input_tensor, M)

    if printitn > 0:
        normTensor = input_tensor.norm()
        normresidual = np.sqrt(
            normTensor**2 + M.norm() ** 2 - 2 * input_tensor.innerprod(M)
        )
        fit = 1 - (normresidual / normTensor)  # fraction explained by model
        print("===========================================")
        print(f" Final log-likelihood = {obj}")
        print(f" Final least squares fit = {fit}")
        print(f" Final KKT violation = {kktViolations[iteration]}")
        print(f" Total inner iterations = {sum(nInnerIters)}")
        print(f" Total execution time = {t_stop} secs")

    output = {
        "params": {
            "stoptol": stoptol,
            "stoptime": stoptime,
            "maxiters": maxiters,
            "maxinneriters": maxinneriters,
            "epsDivZero": epsDivZero,
            "printitn": printitn,
            "printinneritn": printinneritn,
            "epsActive": epsActive,
            "mu0": mu0,
            "precompinds": precompinds,
            "inexact": inexact,
        },
        "kktViolations": kktViolations[: iteration + 1],
        "obj": obj,
        "fnEvals": fnEvals[: iteration + 1],
        "fnVals": fnVals[: iteration + 1],
        "nInnerIters": nInnerIters[: iteration + 1],
        "nZeros": nzeros[: iteration + 1],
        "times": times[: iteration + 1],
        "totalTime": t_stop,
    }

    return M, output




[docs]
def tt_cp_apr_pqnr(  # noqa: PLR0912,PLR0913,PLR0915
    input_tensor: Union[ttb.tensor, ttb.sptensor],
    rank: int,
    init: ttb.ktensor,
    stoptol: float,
    stoptime: float,
    maxiters: int,
    maxinneriters: int,
    epsDivZero: float,
    printitn: int,
    printinneritn: int,
    epsActive: float,
    lbfgsMem: int,
    precompinds: bool,
) -> Tuple[ttb.ktensor, Dict]:
    """
    Compute nonnegative CP with alternating Poisson regression.

    tt_cp_apr_pdnr computes an estimate of the best rank-R
    CP model of a tensor X using an alternating Poisson regression.
    The algorithm solves "row subproblems" in each alternating subproblem,
    using a Hessian of size R^2.
    The function is typically called by cp_apr.

    The model is solved by nonlinear optimization, and the code literally
    minimizes the negative of log-likelihood.  However, printouts to the
    console reverse the sign to show maximization of log-likelihood.

    Parameters
    ----------
    input_tensor:
        Tensor to decompose
    rank: int
        Rank of the decomposition
    init:
        Initial guess
    stoptol:
        Tolerance on overall KKT violation
    stoptime:
        Maximum number of seconds to run
    maxiters:
        Maximum number of iterations
    maxinneriters:
        Maximum inner iterations per outer iteration
    epsDivZero:
        Safeguard against divide by zero
    printitn:
        Print every n outer iterations, 0 for none
    printinneritn:
        Print every n inner iterations
    epsActive:
        PDNR & PQNR ALGORITHM PARAMETER: Bertsekas tolerance for active set
    lbfgsMem:
        Number of vector pairs to store for L-BFGS
    precompinds:
        Precompute sparse tensor indices

    Returns
    -------
    # TODO detail return dictionary

    Notes
    -----
    REFERENCE: Samantha Hansen, Todd Plantenga, Tamara G. Kolda.
    Newton-Based Optimization for Nonnegative Tensor Factorizations,
    arXiv:1304.4964 [math.NA], April 2013,
    URL: http://arxiv.org/abs/1304.4964. Submitted for publication.

    """
    # TODO first ~100 lines are identical to PDNR, consider abstracting just the
    #  algorithm portion
    # Extract the number of modes in data tensor
    N = input_tensor.ndims

    # If the initial guess has any rows of all zero elements, then modify so the row
    # subproblem is not taking log(0). Values will be restored to zero later if the
    # unfolded X for the row has no zeros.
    for n in range(N):
        rowsum = np.sum(init.factor_matrices[n], axis=1)
        tmpIdx = np.where(rowsum == 0)[0]
        if tmpIdx.size != 0:
            init.factor_matrices[n][tmpIdx, 0] = 1e-8

    # Start with the initial guess, normalized using the vector L1 norm
    M = init.copy()
    M.normalize(normtype=1)

    # Sparse tensor flag affects how Pi and Phi are computed.
    isSparse = isinstance(input_tensor, ttb.sptensor)

    # Initialize output arrays
    fnEvals = np.zeros((maxiters, 1))
    fnVals = np.zeros((maxiters, 1))
    kktViolations = -np.ones((maxiters, 1))
    nInnerIters = np.zeros((maxiters, 1))
    nzeros = np.zeros((maxiters, 1))
    times = np.zeros((maxiters, 1))

    if printitn > 0:
        print("CP_PQNR (alternating Poisson regression using quasi-Newton)")

    dispLineWarn = printinneritn > 0

    # Start the wall clock timer.
    start = time.time()

    if isinstance(input_tensor, ttb.sptensor) and precompinds:
        # Precompute sparse index sets for all the row subproblems.
        # Takes more memory but can cut execution time significantly in some cases.
        if printitn > 0:
            print("\tPrecomuting sparse index sets...")
        sparseIx = []
        for n in range(N):
            num_rows = M.factor_matrices[n].shape[0]
            row_indices = []
            for jj in range(num_rows):
                row_indices.append(np.where(input_tensor.subs[:, n] == jj)[0])
            sparseIx.append(row_indices)

        if printitn > 0:
            print("done")

    # Main loop: iterate until convergence or a max threshold is reached
    for iteration in range(maxiters):
        isConverged = True
        kktModeViolations = np.zeros((N,))
        countInnerIters = np.zeros((N,))

        # Alternate thru each factor matrix, A_1, A_2, ..., A_N.
        for n in range(N):
            # Shift the weight from lambda to mode n.
            M.redistribute(mode=n)

            # calculate khatri-rao product of all matrices but the n-th
            if not isinstance(input_tensor, ttb.sptensor) and not isSparse:
                # Data is not a sparse tensor.
                Pi = tt_calcpi_prowsubprob(input_tensor, M, rank, n, N, isSparse)
                X_mat = input_tensor.to_tenmat(
                    np.array([n], order=input_tensor.order), copy=False
                ).data

            num_rows = M.factor_matrices[n].shape[0]
            isRowNOTconverged = np.zeros((num_rows,))

            # Loop over the row subproblems in mode n.
            for jj in range(num_rows):
                # Get data values for row jj of matricized mode n,
                if isinstance(input_tensor, ttb.sptensor) and isSparse:
                    # Data is a sparse tensor
                    if not precompinds:
                        sparse_indices = np.where(input_tensor.subs[:, n] == jj)[0]
                    else:
                        sparse_indices = sparseIx[n][jj]

                    if sparse_indices.size == 0:
                        # The row jj of matricized tensor X in mode n is empty
                        M.factor_matrices[n][jj, :] = 0
                        continue

                    x_row = input_tensor.vals[sparse_indices]

                    # Calculate just the columns of Pi needed for this row.
                    Pi = tt_calcpi_prowsubprob(
                        input_tensor, M, rank, n, N, isSparse, sparse_indices
                    )

                else:
                    x_row = X_mat[jj, :]

                # Get current values of the row subproblem variables.
                m_row = M.factor_matrices[n][jj, :]

                # Initialize L-BFGS storage for the row subproblem.
                delm = np.zeros((rank, lbfgsMem))
                delg = np.zeros((rank, lbfgsMem))
                rho = np.zeros((lbfgsMem,))
                lbfgsPos = 0
                m_rowOLD = np.empty((), dtype=m_row.dtype)
                gradOLD = np.empty((), dtype=m_row.dtype)

                # Iteratively solve the row subproblem with projected quasi-Newton steps
                for i in range(maxinneriters):
                    # Calculate the gradient.
                    gradM, phi_row = calc_grad(isSparse, Pi, epsDivZero, x_row, m_row)
                    if i == 0:
                        # Note from MATLAB tensortoolbox
                        # Original cp_aprPQN_row code (and plb_row) does a gradient
                        # step to prime the L-BFGS approximation.  However, it means
                        # a row subproblem that already converged wastes time
                        # doing a gradient step before checking KKT conditions.
                        # TODO: fix in a future release.
                        m_rowOLD = m_row
                        gradOLD = gradM
                        m_row, _, _, f_new, num_evals = tt_linesearch_prowsubprob(
                            -gradM.transpose(),
                            gradM.transpose(),
                            m_rowOLD,
                            1,
                            1 / 2,
                            10,
                            1e-4,
                            isSparse,
                            x_row,
                            Pi,
                            phi_row,
                            dispLineWarn,
                        )
                        fnEvals[iteration] += num_evals
                        gradM, phi_row = calc_grad(
                            isSparse, Pi, epsDivZero, x_row, m_row
                        )

                    # Compute the row subproblem kkt_violation.

                    # Note experiments in the original paper used:
                    # kkt_violation = \
                    #    np.norm(np.abs(np.minimum(m_row, gradM.transpose())))

                    # We now use \| KKT \|_{inf}:
                    kkt_violation = np.max(np.abs(np.minimum(m_row, gradM)))

                    # Report largest row subproblem initial violation
                    if i == 0 and kkt_violation > kktModeViolations[n]:
                        kktModeViolations[n] = kkt_violation

                    if printinneritn > 0 and np.mod(i, printinneritn) == 0:
                        print(
                            f"\tMode = {n}, Row = {jj}, InnerIt = {i}",
                            end="",
                        )

                        if i == 0:
                            print(f", RowKKT = {kkt_violation}")
                        else:
                            print(f", RowKKT = {kkt_violation}, RowObj = {-f_new}")

                    # Check for row subproblem convergence.
                    if kkt_violation < stoptol:
                        break
                    # Not converged, so m_row will be modified.
                    isRowNOTconverged[jj] = 1

                    # Update the L-BFGS approximation.
                    tmp_delm: np.ndarray = m_row - m_rowOLD
                    tmp_delg: np.ndarray = gradM - gradOLD
                    tmp_delm_dot = tmp_delm.dot(tmp_delg.transpose())
                    if not np.any(tmp_delm_dot == 0):
                        tmp_rho = 1 / tmp_delm_dot
                        delm[:, lbfgsPos] = tmp_delm
                        delg[:, lbfgsPos] = tmp_delg
                        rho[lbfgsPos] = tmp_rho
                    else:
                        # Rho is required to be positive; if not, then skip the L-BFGS
                        # update pair. The recommended safeguard for full BFGS is
                        # Powell damping, but not clear how to damp in 2-loop L-BFGS
                        if dispLineWarn:
                            warnings.warn(
                                "WARNING: skipping L-BFGS update, rho would be "
                                f"1 / {tmp_delm * tmp_delg}"
                            )
                        # Roll back lbfgsPos since it will increment later.
                        if lbfgsPos == 0:
                            if rho[lbfgsMem - 1] > 0:
                                lbfgsPos = lbfgsMem - 1
                            else:
                                # Fatal error, should not happen.
                                assert False, "ERROR: L-BFGS first iterate is bad"

                        else:
                            lbfgsPos -= 1

                    # Calculate search direction
                    search_dir = get_search_dir_pqnr(
                        m_row,
                        gradM,
                        epsActive,
                        delm,
                        delg,
                        rho,
                        lbfgsPos,
                        i,
                        dispLineWarn,
                    )

                    lbfgsPos = np.mod(lbfgsPos, lbfgsMem)

                    m_rowOLD = m_row
                    gradOLD = gradM

                    # Perform a projected linesearch and update variables.
                    # Start from a unit step length, decrease by 1/2,
                    # stop with sufficicent decrease of 1.0e-4 or at most 10 steps.
                    m_row, _, _, f_new, num_evals = tt_linesearch_prowsubprob(
                        search_dir.transpose()[0],
                        gradOLD.transpose(),
                        m_rowOLD,
                        1,
                        1 / 2,
                        10,
                        1.0e-4,
                        isSparse,
                        x_row,
                        Pi,
                        phi_row,
                        dispLineWarn,
                    )
                    fnEvals[iteration] += num_evals

                M.factor_matrices[n][jj, :] = m_row
                countInnerIters[n] += i

            # Test if all row subproblems have converged, which means that no variables
            # in this row were changed.
            if np.sum(isRowNOTconverged) != 0:
                isConverged = False

            # Shift weight from mode n back to lambda.
            M.normalize(mode=n, normtype=1)

            # Total number of inner iterations for a given outer iteration,
            # totalled across all modes and all row subproblems in each mode
            nInnerIters[iteration] += countInnerIters[n]

        # Save output items for the outer iteration.
        num_zero = 0
        for n in range(N):
            num_zero += np.count_nonzero(M.factor_matrices[n] == 0)  # [0].size

        nzeros[iteration] = num_zero
        kktViolations[iteration] = np.max(kktModeViolations)

        # Print outer iteration status.
        if printitn > 0 and np.mod(iteration, printitn) == 0:
            fnVals[iteration] = -tt_loglikelihood(input_tensor, M)
            print(
                f"{iteration}. Ttl Inner Its: {nInnerIters[iteration]}, KKT viol = "
                f"{kktViolations[iteration]}, obj = {fnVals[iteration]}, nz: {num_zero}"
            )

        times[iteration] = time.time() - start

        # Check for convergence
        if isConverged:
            break
        if times[iteration] > stoptime:
            print("Exiting because time limit exceeded")
            break

    t_stop = time.time() - start

    # Clean up final result
    M.normalize(sort=True, normtype=1)

    obj = tt_loglikelihood(input_tensor, M)

    if printitn > 0:
        normTensor = input_tensor.norm()
        normresidual = np.sqrt(
            normTensor**2 + M.norm() ** 2 - 2 * input_tensor.innerprod(M)
        )
        fit = 1 - (normresidual / normTensor)  # fraction explained by model
        print("===========================================")
        print(f" Final log-likelihood = {obj}")
        print(f" Final least squares fit = {fit}")
        print(f" Final KKT violation = {kktViolations[iteration]}")
        print(f" Total inner iterations = {sum(nInnerIters)}")
        print(f" Total execution time = {t_stop} secs")

    output = {
        "params": {
            "stoptol": stoptol,
            "stoptime": stoptime,
            "maxiters": maxiters,
            "maxinneriters": maxinneriters,
            "epsDivZero": epsDivZero,
            "printitn": printitn,
            "printinneritn": printinneritn,
            "epsActive": epsActive,
            "lbfgsMem": lbfgsMem,
            "precompinds": precompinds,
        },
        "kktViolations": kktViolations[: iteration + 1],
        "obj": obj,
        "fnEvals": fnEvals[: iteration + 1],
        "fnVals": fnVals[: iteration + 1],
        "nInnerIters": nInnerIters[: iteration + 1],
        "nZeros": nzeros[: iteration + 1],
        "times": times[: iteration + 1],
        "totalTime": t_stop,
    }

    return M, output



# PDNR helper functions


@overload
def tt_calcpi_prowsubprob(
    Data: ttb.sptensor,
    Model: ttb.ktensor,
    rank: int,
    factorIndex: int,
    ndims: int,
    isSparse: Literal[True],
    sparse_indices: np.ndarray,
) -> np.ndarray: ...  # pragma: no cover see coveragepy/issues/970


@overload
def tt_calcpi_prowsubprob(
    Data: ttb.tensor,
    Model: ttb.ktensor,
    rank: int,
    factorIndex: int,
    ndims: int,
    isSparse: Literal[False],
) -> np.ndarray: ...  # pragma: no cover see coveragepy/issues/970



[docs]
def tt_calcpi_prowsubprob(  # noqa: PLR0913
    Data: Union[ttb.sptensor, ttb.tensor],
    Model: ttb.ktensor,
    rank: int,
    factorIndex: int,
    ndims: int,
    isSparse: bool = False,
    sparse_indices: Optional[np.ndarray] = None,
) -> np.ndarray:
    """
    Compute Pi for a row subproblem.

    Parameters
    ----------
    Data:
        Tensor to compute subproblem for.
    isSparse:
        Flag to determine if sparse subproblem.
    Model:
        Current decomposition.
    rank:
        Rank of solution.
    factorIndex:
        Which factor to solve
    ndims:
        Number of dimensions
    sparse_indices:
        Indices of row subproblem nonzero elements

    Returns
    -------
    Pi: :class:`numpy.ndarray`

    See Also
    --------
    :class:`pyttb.calculate_pi`

    """
    # TODO: this can probably be merged with general calculate pi,
    #  where default for sparse_indices is slice(None,None,None)
    #  alternatively, sparse_indices seems redundant with isSparse
    if isSparse and sparse_indices is not None and isinstance(Data, ttb.sptensor):
        # Data is a sparse tensor. Compute Pi for the row problem specified by
        # sparse_indices
        num_row_nnz = len(sparse_indices)

        Pi = np.ones((num_row_nnz, rank))
        for i in np.setdiff1d(np.arange(ndims), factorIndex).astype(int):
            Pi *= Model.factor_matrices[i][Data.subs[sparse_indices, i], :]
    else:
        Pi = ttb.khatrirao(
            *(
                Model.factor_matrices[:factorIndex]
                + Model.factor_matrices[factorIndex + 1 : ndims + 1]
            ),
            reverse=True,
        )

    return Pi




[docs]
def calc_partials(
    isSparse: bool,
    Pi: np.ndarray,
    epsilon: float,
    data_row: np.ndarray,
    model_row: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray]:
    r"""
    Compute derivative quantities for a PDNR row subproblem.

    Parameters
    ----------
    isSparse:
        Flag if sparse subproblem.
    Pi:
    epsilon:
        Prevent division by zero.
    data_row:
        Row of data for subproblem.
    model_row:
        Row of model for subproblem.

    Returns
    -------
    phi_row: :class:`numpy.ndarray`
        gradient of row subproblem, except for a constant \n
        :math:`phi\_row[r] = \sum_{j=1}^{J_n}\frac{x_j\pi_{rj}}{\sum_i^R b_i\pi_{ij}}`
    ups_row: :class:`numpy.ndarray`
        intermediate quantity (upsilon) used for second derivatives  \n
        :math:`ups\_row[j] = \frac{x_j}{\left(\sum_i^R b_i\pi_{ij}\right)^2}`

    """
    if isSparse:
        data_row = data_row.transpose()[0]
    v = model_row.dot(Pi.transpose())
    w = data_row.transpose() / np.maximum(v, epsilon)
    phi_row = w.dot(Pi)
    u = v**2
    ups_row = data_row.transpose() / np.maximum(u, epsilon)
    return phi_row, ups_row




[docs]
def get_search_dir_pdnr(  # noqa: PLR0913
    Pi: np.ndarray,
    ups_row: np.ndarray,
    rank: int,
    gradModel: np.ndarray,
    model_row: np.ndarray,
    mu: float,
    epsActSet: float,
) -> Tuple[np.ndarray, np.ndarray]:
    """Compute the search direction using a two-metric projection with damped Hessian.

    Parameters
    ----------
    Pi:
    ups_row:
        intermediate quantity (upsilon) used for second derivatives
    rank:
        number of variables for the row subproblem
    gradModel:
        gradient vector for the row subproblem
    model_row:
        vector of variables for the row subproblem
    mu:
        damping parameter
    epsActSet:
        Bertsekas tolerance for active set determination

    Returns
    -------
    search_dir: :class:`numpy.ndarray`
        search direction vector
    pred_red: :class:`numpy.ndarray`
        predicted reduction in quadratic model
    """
    search_dir = np.zeros((rank, 1))
    projGradStep = (model_row - gradModel.transpose()) * (
        model_row - (gradModel.transpose() > 0).astype(float)
    )
    wk = np.linalg.norm(model_row - projGradStep)

    # Determine active and free variables
    num_free = 0
    free_indices_tmp = np.zeros((rank,)).astype(int)
    for r in range(rank):
        if (model_row[r] <= np.minimum(epsActSet, wk)) and (gradModel[r] > 0):
            # Variable is not free (belongs to set A or G)
            if model_row[r] != 0:
                # Variable moves according to the gradient (set G).
                search_dir[r] = -gradModel[r]

        else:
            # Variable is free (set F).
            num_free += 1
            free_indices_tmp[num_free - 1] = r

    free_indices = free_indices_tmp[0:num_free]

    # Compute the Hessian for free variables.
    Hessian_free = get_hessian(ups_row, Pi, free_indices)
    grad_free = -gradModel[free_indices]

    # Compute the damped Newton search direction over free variables
    # TODO verify this is appropriate representation of matlab's method,
    #  s.b. because hessian is square, and addition should ensure full rank,
    #  try.catch handles singular matrix
    try:
        search_dir[free_indices] = np.linalg.solve(
            Hessian_free + (mu * np.eye(num_free)), grad_free
        )[:, None]
    except np.linalg.LinAlgError:
        warnings.warn("CP_APR: Damped Hessian is nearly singular\n")
        # TODO: note this may be a typo in matlab see line 1107
        search_dir = -gradModel

    # Calculate expected reduction in the quadratic model of the objective.
    # TODO: double check if left or right multiplication has an speed effect,
    #  memory layout
    q = (
        search_dir[free_indices]
        .transpose()
        .dot(Hessian_free + (mu * np.eye(num_free)))
        .dot(search_dir[free_indices])
    )
    pred_red = (search_dir[free_indices].transpose().dot(gradModel[free_indices])) + (
        0.5 * q
    )
    if pred_red > 0:
        warnings.warn("CP_APR: Expected decrease in objective is positive\n")
        search_dir = -gradModel

    return search_dir, pred_red




[docs]
def tt_linesearch_prowsubprob(  # noqa: PLR0913
    direction: np.ndarray,
    grad: np.ndarray,
    model_old: np.ndarray,
    step_len: float,
    step_red: float,
    max_steps: int,
    suff_decr: float,
    isSparse: bool,
    data_row: np.ndarray,
    Pi: np.ndarray,
    phi_row: np.ndarray,
    display_warning: bool,
) -> Tuple[np.ndarray, float, float, float, int]:
    """Perform a line search on a row subproblem.

    Parameters
    ----------
    direction:
        search direction
    grad:
        gradient vector a model_old
    model_old:
        current variable values
    step_len:
        initial step length, which is the maximum possible step length
    step_red:
        step reduction factor (suggest 1/2)
    max_steps:
        maximum number of steps to try (suggest 10)
    suff_decr:
        sufficient decrease for convergence (suggest 1.0e-4)
    isSparse:
        sparsity flag for computing the objective
    data_row:
        row subproblem data, for computing the objective
    Pi:
        Pi matrix, for computing the objective
    phi_row:
        1-grad, more accurate if failing over to multiplicative update
    display_warning:
        Flag to display warning messages or not

    Returns
    -------
    m_new:  :class:`numpy.ndarray`
        new (improved) model values
    num_evals: int
        number of times objective was evaluated
    f_old: float
        objective value at model_old
    f_1: float
        objective value at model_old + step_len*direction
    f_new: float
        objective value at model_new
    """
    minDescentTol = 1.0e-7
    smallStepTol = 1.0e-7

    stepSize = step_len

    # Evaluate the current objective value
    f_old = -tt_loglikelihood_row(isSparse, data_row, model_old, Pi)
    num_evals = 1
    count = 1

    while count <= max_steps:
        # Compute a new step and project it onto the positive orthant.
        model_new = model_old + stepSize * direction
        model_new *= model_new > 0

        # Check that it is a descent direction.
        gDotd = np.sum(grad * (model_new - model_old))
        if (gDotd > 0) or (np.sum(model_new) < minDescentTol):
            # Don't evaluate the objective if not a descent direction
            # or if all the elements of model_new are close to zero
            f_new = np.inf
            if count == 1:
                f_1 = f_new

            stepSize *= step_red
            count += 1
        else:
            # Evaluate objective function at new iterate
            f_new = -tt_loglikelihood_row(isSparse, data_row, model_new, Pi)
            num_evals += 1
            if count == 1:
                f_1 = f_new

            # Check for sufficient decrease.
            if f_new <= (f_old + suff_decr * gDotd):
                break
            stepSize *= step_red
            count += 1

    if np.isinf(f_1):
        # Unit step failed; return a value that yields ared =0
        f_1 = f_old

    if ((count >= max_steps) and (f_new > f_old)) or (np.sum(model_new) < smallStepTol):
        # Fall back on a multiplicative update step (scaled steepest descent).
        # Experiments indicate it works better than a unit step in the direction
        # of steepest descent, which would be the following:
        # m_new = m_old - (step_len * grad);     # steepest descent
        # A simple update formula follows, but suffers from round-off error
        # when phi_row is tiny:
        # m_new = m_old - (m_old .* grad);
        # Use this for best accuracy:
        model_new = model_old * phi_row  # multiplicative update

        # Project to the constraints and reevaluate the subproblem objective
        model_new *= model_new > 0
        f_new = -tt_loglikelihood_row(isSparse, data_row, model_new, Pi)
        num_evals += 1

        # Let the caller know the search direction made no progress.
        f_1 = f_old

        if display_warning:
            warnings.warn(
                "CP_APR: Line search failed, using multiplicative update step"
            )

    return model_new, f_old, f_1, f_new, num_evals




[docs]
def get_hessian(
    upsilon: np.ndarray, Pi: np.ndarray, free_indices: np.ndarray
) -> np.ndarray:
    """Return the Hessian for one PDNR row subproblem of Model[n].

    Only for just the rows and columns corresponding to the free variables.

    Parameters
    ----------
    upsilon:
        intermediate quantity (upsilon) used for second derivatives
    Pi:
    free_indices:

    Returns
    -------
    Hessian: :class:`numpy.ndarray`
        Sub-block of full Hessian identified by free-indices

    """
    num_free = len(free_indices)
    H = np.zeros((num_free, num_free))
    for i in range(num_free):
        for j in range(num_free):
            c = free_indices[i]
            d = free_indices[j]
            val = np.sum(upsilon.transpose() * Pi[:, c] * Pi[:, d])
            H[(i, j), (j, i)] = val
    return H




[docs]
def tt_loglikelihood_row(
    isSparse: bool,
    data_row: np.ndarray,
    model_row: np.ndarray,
    Pi: np.ndarray,
) -> float:
    """Compute log-likelihood of one row subproblem.

    Parameters
    ----------
    isSparse:
        Sparsity flag
    data_row:
        vector of data values
    model_row:
        vector of model values
    Pi:

    Notes
    -----
    The row subproblem for a given mode includes one row of matricized tensor
    data (x) and one row of the model (m) in the same matricized mode.
    Then
    (dense case)
    m:  R-length vector
    x:  J-length vector
    Pi: R x J matrix
    (sparse case)
    m:  R-length vector
    x:  p-length vector, where p = nnz in row of matricized data tensor
    Pi: R x p matrix
    F = - (sum_r m_r - sum_j x_j * log (m * Pi_j)
    where Pi_j denotes the j^th column of Pi
    NOTE: Rows of Pi' must sum to one

    Returns
    -------
    loglikelihood: float
        See notes for description

    """
    term1 = -np.sum(model_row)
    if isSparse:
        term2 = np.sum(data_row.transpose() * np.log(model_row.dot(Pi.transpose())))
    else:
        b_pi = model_row.dot(Pi.transpose())
        skip_zeros = data_row != 0
        term2 = np.dot(data_row[skip_zeros], np.log(b_pi[skip_zeros])).item()
    loglikelihood = term1 + term2
    return loglikelihood



# PQNR helper functions

[docs]
def get_search_dir_pqnr(  # noqa: PLR0913
    model_row: np.ndarray,
    gradModel: np.ndarray,
    epsActSet: float,
    delta_model: np.ndarray,
    delta_grad: np.ndarray,
    rho: np.ndarray,
    lbfgs_pos: int,
    iters: int,
    disp_warn: bool,
) -> np.ndarray:
    """
    Compute the search direction by projecting with L-BFGS.

    Parameters
    ----------
    model_row:
        current variable values
    gradModel:
        gradient at model_row
    epsActSet:
        Bertsekas tolerance for active set determination
    delta_model:
        L-BFGS array of variable deltas
    delta_grad:
        L-BFGS array of gradient deltas
    rho:
    lbfgs_pos:
        pointer into L-BFGS arrays
    iters:
    disp_warn:

    Returns
    -------
    direction: :class:`numpy.ndarray`
        Search direction based on current L-BFGS and grad

    Notes
    -----
    Adapted from MATLAB code of Dongmin Kim and Suvrit Sra written in 2008.
    Modified extensively to solve row subproblems and use a better linesearch;
    for details see REFERENCE: Samantha Hansen, Todd Plantenga, Tamara G. Kolda.
    Newton-Based Optimization for Nonnegative Tensor Factorizations,
    arXiv:1304.4964 [math.NA], April 2013,
    URL: http://arxiv.org/abs/1304.4964. Submitted for publication.

    """
    lbfgsSize = delta_model.shape[1]

    # Determine active and free variables.
    # TODO: is the below relevant?
    # If epsActSet is zero, then the following works:
    # fixedVars = find((m_row == 0) & (grad' > 0));
    # For the general case this works but is less clear and assumes m_row > 0:
    # fixedVars = find((grad' > 0) & (m_row <= min(epsActSet,grad')));
    projGradStep = (model_row - gradModel.transpose()) * (
        model_row - (gradModel.transpose() > 0).astype(float)
    )
    wk = np.linalg.norm(model_row - projGradStep)
    fixedVars = np.logical_and(gradModel > 0, (model_row <= np.minimum(epsActSet, wk)))

    direction = -gradModel
    direction[fixedVars] = 0

    if delta_model[:, lbfgs_pos].transpose().dot(delta_grad[:, lbfgs_pos]) == 0.0:
        # Cannot proceed with this L-BFGS data; most likely the iteration has
        # converged, so this is rarely seen.
        if disp_warn:
            warnings.warn("WARNING: L-BFGS update is orthogonal, using gradient")
        return direction

    alpha = np.ones((lbfgsSize,))
    k = lbfgs_pos

    # Perform an L-BFGS two-loop recursion to compute the search direction.
    for _ in range(np.minimum(iters, lbfgsSize)):
        alpha[k] = rho[k] * (delta_model[:, k].transpose().dot(direction))
        direction -= alpha[k] * (delta_grad[:, k])
        # TODO check mod
        k = (
            lbfgsSize - np.mod(1 - k, lbfgsSize) - 1
        )  # -1 accounts for numpy indexing starting at 0 not 1

    coef = (
        1
        / rho[lbfgs_pos]
        / delta_grad[:, lbfgs_pos].transpose().dot(delta_grad[:, lbfgs_pos])
    )
    direction *= coef

    for _ in range(np.minimum(iters, lbfgsSize)):
        k = np.mod(k, lbfgsSize)  # + 1
        b = rho[k] * (delta_grad[:, k].transpose().dot(direction))
        direction += (alpha[k] - b) * (delta_model[:, k])

    direction[fixedVars] = 0

    return direction




[docs]
def calc_grad(
    isSparse: bool,
    Pi: np.ndarray,
    eps_div_zero: float,
    data_row: np.ndarray,
    model_row: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray]:
    """Compute the gradient for a PQNR row subproblem.

    Parameters
    ----------
    isSparse:
    Pi:
    eps_div_zero:
    data_row:
    model_row:

    Returns
    -------
    phi_row:
    grad_row:

    """
    # TODO: note this is duplicated exactly from calc_partials, should
    #  combine to one function
    if isSparse:
        data_row = data_row.transpose()[0]
    v = model_row.dot(Pi.transpose())
    w = data_row / np.maximum(v, eps_div_zero)
    phi_row = w.dot(Pi)
    # print("V: {}, W :{}".format(v,w))
    # u = v**2
    # ups_row = data_row.transpose() / np.maximum(u, epsilon)
    grad_row = (np.ones(phi_row.shape) - phi_row).transpose()
    return grad_row, phi_row



# TODO verify what pi is
# Mu helper functions

[docs]
def calculate_pi(
    Data: Union[ttb.sptensor, ttb.tensor],
    Model: ttb.ktensor,
    rank: int,
    factorIndex: int,
    ndims: int,
) -> np.ndarray:
    """Calculate Pi matrix.

    Parameters
    ----------
    Data:
    Model:
    rank:
    factorIndex:
    ndims:

    Returns
    -------
    Pi:
    """
    if isinstance(Data, ttb.sptensor):
        Pi = np.ones((Data.nnz, rank))
        for i in np.setdiff1d(np.arange(ndims), factorIndex).astype(int):
            Pi *= Model.factor_matrices[i][Data.subs[:, i], :]
    else:
        Pi = ttb.khatrirao(
            *(
                Model.factor_matrices[:factorIndex]
                + Model.factor_matrices[factorIndex + 1 :]
            ),
            reverse=True,
        )

    return Pi




[docs]
def calculate_phi(  # noqa: PLR0913
    Data: Union[ttb.sptensor, ttb.tensor],
    Model: ttb.ktensor,
    rank: int,
    factorIndex: int,
    Pi: np.ndarray,
    epsilon: float,
) -> np.ndarray:
    """Calculate Phi.

    Parameters
    ----------
    Data:
    Model:
    rank:
    factorIndex:
    Pi:
    epsilon:

    """
    if isinstance(Data, ttb.sptensor):
        Phi = -np.ones((Data.shape[factorIndex], rank))
        xsubs = Data.subs[:, factorIndex]
        v = np.sum(Model.factor_matrices[factorIndex][xsubs, :] * Pi, axis=1)
        wvals = Data.vals / np.maximum(v, epsilon)[:, None]
        for r in range(rank):
            Yr = accumarray(
                xsubs,
                np.squeeze(wvals * Pi[:, r][:, None]),
                size=Data.shape[factorIndex],
            )
            Phi[:, r] = Yr
    else:
        Xn = Data.to_tenmat(np.array([factorIndex], order=Data.order), copy=False).data
        V = Model.factor_matrices[factorIndex].dot(Pi.transpose())
        W = Xn / np.maximum(V, epsilon)
        Y = W.dot(Pi)
        Phi = Y

    return Phi




[docs]
def tt_loglikelihood(
    Data: Union[ttb.tensor, ttb.sptensor], Model: ttb.ktensor
) -> float:
    """
    Compute log-likelihood of data with model.

    Parameters
    ----------
    Data:
    Model:

    Returns
    -------
    loglikelihood:
        - (sum_i m_i - x_i * log_i) with i as a multiindex across all tensor dimensions

    Notes
    -----
    We define for any x 0*log(x)=0, such that if our true data is 0 the loglikelihood
        is the value of the model.

    """
    N = Data.ndims

    assert isinstance(Model, ttb.ktensor), "Model must be a ktensor"

    Model.normalize(weight_factor=0, normtype=1)
    if isinstance(Data, ttb.sptensor):
        xsubs = Data.subs
        A = Model.factor_matrices[0][xsubs[:, 0], :]
        for n in range(1, N):
            A *= Model.factor_matrices[n][xsubs[:, n], :]
        return float(
            np.sum(Data.vals * np.log(np.sum(A, axis=1))[:, None])
            - np.sum(Model.factor_matrices[0])
        )
    dX = Data.to_tenmat(np.array([1], order=Data.order), copy=False).data
    dM = Model.to_tenmat(np.array([1], order=Model.order), copy=False).data
    f = 0
    for i in range(dX.shape[0]):
        for j in range(dX.shape[1]):
            if dX[i, j] == 0:
                pass
            else:
                f += dX[i, j] * np.log(dM[i, j])

    f -= np.sum(Model.factor_matrices[0])
    return float(f)




[docs]
def vectorize_for_mu(matrix: np.ndarray) -> np.ndarray:
    """Unravel matrix into vector.

    Parameters
    ----------
    matrix:

    Returns
    -------
    matrix:
        Unraveled matrix len(matrix.shape)==1
    """
    return matrix.ravel()