Source code for groupedpaneldatamodels.models.ando_bai

# TODO
# - Check which imports are actually needed
# - Try to remove np.squeeze arguments
# - Getting intitial clusters is quite bad, may need to be improved
# - Cache np.arange in _get_clusters
# - Lambda returns all possible factors and not only the ones that are used, which may be confusing
# - Add docstrings
# - Upgrade to superfast_lstsq, which is faster than lstsq

from numba import njit
from numpy.linalg import lstsq
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from skglm import GeneralizedLinearEstimator
from skglm.penalties import SCAD

import numpy as np

# NOTE suppress FutureWarnings from the SCAD penalty in skglm
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)


###### Homogeneous Case ######
def _get_factors_initial(y, GF, T):
    y_squeezed = np.squeeze(y).T
    pca = PCA(n_components=GF.sum())
    U = pca.fit_transform(y_squeezed) / pca.singular_values_
    F = np.sqrt(T) * U
    Lambda = F.T @ y_squeezed / T

    return F, Lambda


def _get_factors(y, x, beta, g, G, GF, T):
    y_squeezed = np.squeeze(y - x @ beta).T

    F = np.zeros((T, GF.sum()))
    Lambda = np.zeros((GF.sum(), y_squeezed.shape[1]))

    for i in range(G):
        y_squeezed_partial = np.atleast_2d(np.squeeze(y[g == i] - x[g == i] @ beta).T)

        pca = PCA(n_components=GF[i])
        U = pca.fit_transform(y_squeezed_partial) / pca.singular_values_

        F_partial = np.sqrt(T) * U
        F[:, GF[:i].sum() : GF[: i + 1].sum()] = F_partial

        Lambda_partial = F_partial.T @ y_squeezed / T
        Lambda[GF[:i].sum() : GF[: i + 1].sum(), :] = Lambda_partial

    return F, Lambda


def _get_clusters_initial(Lambda, G, GF):
    # FIXME this code is very bad, but it works
    while True:
        km = KMeans(n_clusters=G)
        g = km.fit_predict(Lambda.T)

        counts = np.bincount(g, minlength=G)
        if np.all(counts >= (GF + 1)):
            break

    return g


def _get_clusters(y, x, beta, Lambda, g, G, F, GF, N, T):
    y_star = y - x @ beta
    res = np.zeros((N, T, G))
    for i in range(G):
        # TODO check if this is correct
        # But I think so
        res[:, :, i] = (
            y_star.reshape(N, -1)
            - (F[:, GF[:i].sum() : GF[: i + 1].sum()] @ Lambda[GF[:i].sum() : GF[: i + 1].sum(), :]).T
        )
    res_per_grouping = (res**2).sum(axis=1)
    g = res_per_grouping.argmin(axis=1)

    # Count size of each group
    counts = np.bincount(g, minlength=G)

    # Ensure minimum group sizes (GF[i] + 1)
    min_sizes = GF + 1

    # Check if any group is below minimum size
    while np.any(counts < min_sizes):
        # Find the most deficient group
        target_group = np.argmin(counts - min_sizes)
        needed = min_sizes[target_group] - counts[target_group]

        if needed <= 0:
            continue  # This group already meets its minimum

        # Find elements not in this group
        non_target_indices = np.where(g != target_group)[0]

        # Sort by distance to target group
        distances = res_per_grouping[non_target_indices, target_group]
        closest_indices = non_target_indices[np.argsort(distances)]

        # Try to reassign closest elements
        reassigned = 0
        for idx in closest_indices:
            source_group = g[idx]

            # Only reassign if source group has enough elements to spare
            if counts[source_group] > min_sizes[source_group]:
                g[idx] = target_group
                counts[source_group] -= 1
                counts[target_group] += 1
                reassigned += 1

                if reassigned >= needed:
                    break

        # If we couldn't reassign any elements, we're stuck
        if reassigned == 0:
            raise Exception("Cannot satisfy minimum group size constraints.")

    objective_value = res_per_grouping[np.arange(N), g].sum()

    return g, objective_value


# NOTE this ignores the factor structure
def _estimate_beta_initial(y, x, K):
    beta = lstsq(x.reshape(-1, K), y.reshape(-1, 1), rcond=None)[0]
    return beta


def _estimate_beta(y, x, g, GF, F, Lambda, K, N, T, G, kappa, gamma):
    res = np.zeros((N, T, G))
    for i in range(G):
        res[:, :, i] = (
            y.reshape(N, -1) - (F[:, GF[:i].sum() : GF[: i + 1].sum()] @ Lambda[GF[:i].sum() : GF[: i + 1].sum(), :]).T
        )

    y_star = res[np.arange(N), :, g]
    # FIXME the np.arange could be cached or something
    if kappa == 0:
        beta = lstsq(x.reshape(-1, K), y_star.reshape(-1, 1), rcond=None)[0]
        return beta

    beta = np.atleast_2d(
        GeneralizedLinearEstimator(penalty=SCAD(alpha=kappa, gamma=gamma))
        .fit(x.reshape(-1, K), np.squeeze(y_star.reshape(-1, 1)))
        .coef_
    ).T
    return beta


def _grouped_interactive_effects_iteration(y, x, G, GF, N, T, K, kappa, gamma, tol, max_iterations):
    last_objective_value = np.inf
    F, Lambda = _get_factors_initial(y, GF, T)
    g = _get_clusters_initial(Lambda, G, GF)
    beta = _estimate_beta_initial(y, x, K)
    F, Lambda = _get_factors(y, x, beta, g, G, GF, T)

    obj_val_store = 5
    objective_values = np.zeros(obj_val_store)

    for i in range(max_iterations):
        g, objective_value = _get_clusters(y, x, beta, Lambda, g, G, F, GF, N, T)
        F, Lambda = _get_factors(y, x, beta, g, G, GF, T)
        beta = _estimate_beta(y, x, g, GF, F, Lambda, K, N, T, G, kappa, gamma)

        objective_values[i % obj_val_store] = objective_value
        if objective_values.max() - objective_values.min() < tol:
            break

        last_objective_value = objective_value

    return beta, g, F, Lambda, last_objective_value


def _compute_resid(y, x, beta, g, F, Lambda, G, GF, N, T):
    """Computes the residuals for the GIFE model"""
    res = np.zeros((N, T, G))
    for i in range(G):
        res[:, :, i] = (
            y.reshape(N, -1)
            - x @ beta
            - (F[:, GF[:i].sum() : GF[: i + 1].sum()] @ Lambda[GF[:i].sum() : GF[: i + 1].sum(), :]).T
        )

    return res[np.arange(N), :, g].reshape(N, T)



[docs]
def grouped_interactive_effects(
    y, x, G, GF=None, kappa=0.0, gamma=3.7, tol=1e-6, gife_iterations=100, max_iterations=1000
):
    """
    Estimates the Grouped Interactive Fixed Effects (GIFE) model with homogeneous slopes.

    This function performs the GIFE estimation process across multiple random initializations
    and returns the solution with the lowest objective value. The model assumes that slope
    coefficients are shared across groups (homogeneous).

    Parameters:
        y (np.ndarray): A 3D array of shape (N, T, 1) containing the dependent variable.
        x (np.ndarray): A 3D array of shape (N, T, K) containing the independent variables.
        G (int): The number of groups.
        GF (array-like, optional): The number of factors per group. If None, defaults to one factor per group.
        kappa (float, optional): Regularization strength for SCAD penalty. Default is 0.0 (no penalty).
        gamma (float, optional): SCAD parameter. Default is 3.7.
        tol (float, optional): Convergence tolerance. Default is 1e-6.
        gife_iterations (int, optional): Number of initializations for the GIFE estimator. Default is 100.
        max_iterations (int, optional): Maximum number of iterations within each GIFE run. Default is 1000.

    Returns:
        tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, float, np.ndarray]:
            - best_beta: Estimated homogeneous coefficients, shape (K, 1)
            - best_g: Group assignments for each unit, shape (N,)
            - best_F: Estimated common factors, shape (T, sum(GF))
            - best_Lambda: Estimated factor loadings, shape (sum(GF), N)
            - best_objective_value: Final value of the objective function
            - resid: Residuals of the model, shape (N, T)
    """
    N, T, K = x.shape
    if GF is None:
        GF = np.array([1] * G)
    else:
        GF = np.array(GF)  # Ensures that is an np array

    best_objective_value = np.inf
    best_g = None
    best_beta = None
    best_F = None
    best_Lambda = None

    # FIXME Lamda returns all possible factors and not only the ones that are used

    for i in range(gife_iterations):
        beta, g, F, Lambda, objective_value = _grouped_interactive_effects_iteration(
            y, x, G, GF, N, T, K, kappa, gamma, tol, max_iterations
        )

        if objective_value < best_objective_value:
            best_objective_value = objective_value
            best_g = g
            best_beta = beta
            best_F = F
            best_Lambda = Lambda

    resid = _compute_resid(y, x, best_beta, best_g, best_F, best_Lambda, G, GF, N, T)

    return best_beta, best_g, best_F, best_Lambda, best_objective_value, resid



##### Heterogeneous Case ######
# NOTE this is a copy of the homogeneous case, but with some modifications
def _get_factors_hetrogeneous(y, x, beta, g, G, GF, T, N):
    # NOTE this is not neeeded I believe
    F = np.zeros((T, GF.sum()))
    Lambda = np.zeros((GF.sum(), y.shape[0]))

    for i in range(G):
        y_squeezed_partial = np.atleast_2d(np.squeeze(y[g == i] - x[g == i] @ beta[:, i : i + 1]).T)

        pca = PCA(n_components=GF[i])
        U = pca.fit_transform(y_squeezed_partial) / pca.singular_values_

        F_partial = np.sqrt(T) * U
        F[:, GF[:i].sum() : GF[: i + 1].sum()] = F_partial

        # NOTE that here the correct beta is used
        y_squeezed = np.squeeze(y - x @ beta[:, i : i + 1]).T
        Lambda_partial = F_partial.T @ y_squeezed / T
        Lambda[GF[:i].sum() : GF[: i + 1].sum(), :] = Lambda_partial

    return F, Lambda


# FIXME this code should be modified s.t. cannot return groups less than size
# GF[i]
def _get_clusters_hetrogeneous(y, x, beta, Lambda, g, G, F, GF, N, T):
    res = np.zeros((N, T, G))
    for i in range(G):
        # TODO check if this is correct
        # But I think so
        res[:, :, i] = (
            y.reshape(N, -1)
            - x @ beta[:, i]
            - (F[:, GF[:i].sum() : GF[: i + 1].sum()] @ Lambda[GF[:i].sum() : GF[: i + 1].sum(), :]).T
        )
    res_per_grouping = (res**2).sum(axis=1)
    g = res_per_grouping.argmin(axis=1)

    # Count size of each group
    counts = np.bincount(g, minlength=G)

    # Ensure minimum group sizes (GF[i] + 1)
    min_sizes = GF + 1

    # Check if any group is below minimum size
    while np.any(counts < min_sizes):
        # Find the most deficient group
        target_group = np.argmin(counts - min_sizes)
        needed = min_sizes[target_group] - counts[target_group]

        if needed <= 0:
            continue  # This group already meets its minimum

        # Find elements not in this group
        non_target_indices = np.where(g != target_group)[0]

        # Sort by distance to target group
        distances = res_per_grouping[non_target_indices, target_group]
        closest_indices = non_target_indices[np.argsort(distances)]

        # Try to reassign closest elements
        reassigned = 0
        for idx in closest_indices:
            source_group = g[idx]

            # Only reassign if source group has enough elements to spare
            if counts[source_group] > min_sizes[source_group]:
                g[idx] = target_group
                counts[source_group] -= 1
                counts[target_group] += 1
                reassigned += 1

                if reassigned >= needed:
                    break

        # If we couldn't reassign any elements, we're stuck
        if reassigned == 0:
            raise Exception("Cannot satisfy minimum group size constraints.")

    objective_value = res_per_grouping[np.arange(N), g].sum()

    return g, objective_value


# NOTE this ignores the factor structure
def _estimate_beta_initial_hetrogeneous(y, x, g, K, G):
    beta = np.zeros((K, G))
    for i in range(G):
        y_partial = y[g == i]
        x_partial = x[g == i]

        beta[:, i] = np.squeeze(lstsq(x_partial.reshape(-1, K), y_partial.reshape(-1, 1), rcond=None)[0])
    return beta


def _estimate_beta_hetrogeneous(y, x, g, GF, F, Lambda, K, N, T, G, kappa, gamma):
    beta = np.zeros((K, G))
    res = np.zeros((N, T, G))
    for i in range(G):
        res[:, :, i] = (
            y.reshape(N, -1) - (F[:, GF[:i].sum() : GF[: i + 1].sum()] @ Lambda[GF[:i].sum() : GF[: i + 1].sum(), :]).T
        )

    y_star = res[np.arange(N), :, g]
    if kappa == 0:
        for i in range(G):
            beta[:, i] = np.squeeze(lstsq(x[g == i].reshape(-1, K), y_star[g == i].reshape(-1, 1), rcond=None)[0])
        return beta

    for i in range(G):
        beta[:, i] = (
            GeneralizedLinearEstimator(penalty=SCAD(alpha=kappa, gamma=gamma))
            .fit(x[g == i].reshape(-1, K), np.squeeze(y_star[g == i].reshape(-1, 1)))
            .coef_
        )

    return beta


def _grouped_interactive_effects_iteration_hetrogeneous(y, x, G, GF, N, T, K, kappa, gamma, tol, max_iterations):
    last_objective_value = np.inf
    F, Lambda = _get_factors_initial(y, GF, T)
    g = _get_clusters_initial(Lambda, G, GF)
    beta = _estimate_beta_initial_hetrogeneous(y, x, g, K, G)
    F, Lambda = _get_factors_hetrogeneous(y, x, beta, g, G, GF, T, K)

    obj_val_store = 5
    objective_values = np.zeros(obj_val_store)

    for i in range(max_iterations):
        g, objective_value = _get_clusters_hetrogeneous(y, x, beta, Lambda, g, G, F, GF, N, T)
        F, Lambda = _get_factors_hetrogeneous(y, x, beta, g, G, GF, T, K)
        beta = _estimate_beta_hetrogeneous(y, x, g, GF, F, Lambda, K, N, T, G, kappa, gamma)

        objective_values[i % obj_val_store] = objective_value
        if objective_values.max() - objective_values.min() < tol:
            break

        last_objective_value = objective_value

    return beta, g, F, Lambda, last_objective_value


def _reorder_groups(g, beta, F):
    """Reorders the groups based on the first value of alpha"""
    # FIXME this is not the best way to do this
    # But it works for now
    mapping = np.argsort(beta[0, :])
    ordered_g = np.argsort(mapping)[g]
    ordered_beta = beta[:, mapping]
    ordered_F = F[:, mapping]
    return ordered_g, ordered_beta, ordered_F


def _compute_resid_hetrogeneous(y, x, beta, g, F, Lambda, G, GF, N, T):
    """Computes the residuals for the GIFE model"""
    res = np.zeros((N, T, G))
    for i in range(G):
        res[:, :, i] = (
            y.reshape(N, -1)
            - x @ beta[:, i]
            - (F[:, GF[:i].sum() : GF[: i + 1].sum()] @ Lambda[GF[:i].sum() : GF[: i + 1].sum(), :]).T
        )

    return res[np.arange(N), :, g].reshape(N, T)



[docs]
def grouped_interactive_effects_hetrogeneous(
    y: np.ndarray,
    x: np.ndarray,
    G: int,
    GF=None,
    kappa: float = 0.0,
    gamma: float = 3.7,
    tol: float = 1e-6,
    gife_iterations: int = 100,
    max_iter: int = 1000,
):
    """
    Estimates the Grouped Interactive Fixed Effects (GIFE) model with heterogeneous slopes.

    This function runs the GIFE estimation procedure multiple times and selects the best solution
    based on the objective value. The model allows for heterogeneous slope coefficients across groups.

    Parameters:
        y (np.ndarray): A 3D array of shape (N, T, 1) containing the dependent variable.
        x (np.ndarray): A 3D array of shape (N, T, K) containing the independent variables.
        G (int): The number of groups.
        GF (ArrayLike, optional): The number of factors per group. If None, defaults to one factor per group.
        kappa (float, optional): Regularization strength for SCAD penalty. Default is 0.0 (no penalty).
        gamma (float, optional): SCAD parameter. Default is 3.7.
        tol (float, optional): Convergence tolerance. Default is 1e-6.
        gife_iterations (int, optional): Number of initializations for the GIFE estimator. Default is 100.
        max_iter (int, optional): Maximum number of iterations within each GIFE run. Default is 1000.

    Returns:
        tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, float, np.ndarray]:
            - ordered_beta: Estimated heterogeneous coefficients, shape (K, G)
            - ordered_g: Group assignments for each unit, shape (N,)
            - ordered_F: Estimated common factors, shape (T, sum(GF))
            - best_Lambda: Estimated factor loadings, shape (sum(GF), N)
            - best_objective_value: Final value of the objective function
            - resid: Residuals of the model, shape (N, T)
    """

    N, T, K = x.shape
    if GF is None:
        GF = np.array([1] * G)
    else:
        GF = np.array(GF)  # Ensures that is an np array

    best_objective_value = np.inf
    best_g = None
    best_beta = None
    best_F = None
    best_Lambda = None

    # FIXME Lamda returns all possible factors and not only the ones that are used

    for i in range(gife_iterations):
        beta, g, F, Lambda, objective_value = _grouped_interactive_effects_iteration_hetrogeneous(
            y, x, G, GF, N, T, K, kappa, gamma, tol, max_iter
        )

        if objective_value < best_objective_value:
            best_objective_value = objective_value
            best_g = g
            best_beta = beta
            best_F = F
            best_Lambda = Lambda

    resid = _compute_resid_hetrogeneous(y, x, best_beta, best_g, best_F, best_Lambda, G, GF, N, T)
    ordered_g, ordered_beta, ordered_F = _reorder_groups(best_g, best_beta, best_F)

    return ordered_beta, ordered_g, ordered_F, best_Lambda, best_objective_value, resid