Source code for statsmodels.stats.knockoff_regeffects

import numpy as np
import statsmodels.api as sm


class RegressionEffects(object):
    """
    Base class for regression effects used in RegressionFDR.

    Any implementation of the class must provide a method called
    'stats' that takes a RegressionFDR object and returns effect sizes
    for the model coefficients.  Greater values for these statistics
    imply greater evidence that the effect is real.

    Knockoff effect sizes are based on fitting the regression model to
    an extended design matrix [X X'], where X' is a design matrix with
    the same shape as the actual design matrix X.  The construction of
    X' guarantees that there are no true associations between the
    columns of X' and the dependent variable of the regression.  If X
    has p columns, then the effect size of covariate j is based on the
    strength of the estimated association for coefficient j compared
    to the strength of the estimated association for coefficient p+j.
    """

    def stats(self, parent):
        raise NotImplementedError


[docs]class CorrelationEffects(RegressionEffects):
    """
    Marginal correlation effect sizes for FDR control.

    Parameters
    ----------
    parent : RegressionFDR
        The RegressionFDR instance to which this effect size is
        applied.

    Notes
    -----
    This class implements the marginal correlation approach to
    constructing test statistics for a knockoff analysis, as
    described under (1) in section 2.2 of the Barber and Candes
    paper.
    """

    def stats(self, parent):
        s1 = np.dot(parent.exog1.T, parent.endog)
        s2 = np.dot(parent.exog2.T, parent.endog)
        return np.abs(s1) - np.abs(s2)


[docs]class ForwardEffects(RegressionEffects):
    """
    Forward selection effect sizes for FDR control.

    Parameters
    ----------
    parent : RegressionFDR
        The RegressionFDR instance to which this effect size is
        applied.
    pursuit : bool
        If True, 'basis pursuit' is used, which amounts to performing
        a full regression at each selection step to adjust the working
        residual vector.  If False (the default), the residual is
        adjusted by regressing out each selected variable marginally.
        Setting pursuit=True will be considerably slower, but may give
        better results when exog is not orthogonal.

    Notes
    -----
    This class implements the forward selection approach to
    constructing test statistics for a knockoff analysis, as
    described under (5) in section 2.2 of the Barber and Candes
    paper.
    """

    def __init__(self, pursuit):
        self.pursuit = pursuit

    def stats(self, parent):
        nvar = parent.exog.shape[1]
        rv = parent.endog.copy()
        vl = [(i, parent.exog[:, i]) for i in range(nvar)]
        z = np.empty(nvar)
        past = []
        for i in range(nvar):
            dp = np.r_[[np.abs(np.dot(rv, x[1])) for x in vl]]
            j = np.argmax(dp)
            z[vl[j][0]] = nvar - i - 1
            x = vl[j][1]
            del vl[j]
            if self.pursuit:
                for v in past:
                    x -= np.dot(x, v)*v
                past.append(x)
            rv -= np.dot(rv, x) * x
        z1 = z[0:nvar//2]
        z2 = z[nvar//2:]
        st = np.where(z1 > z2, z1, z2) * np.sign(z1 - z2)
        return st


[docs]class OLSEffects(RegressionEffects):
    """
    OLS regression for knockoff analysis.

    Parameters
    ----------
    parent : RegressionFDR
        The RegressionFDR instance to which this effect size is
        applied.

    Notes
    -----
    This class implements the ordinary least squares regression
    approach to constructing test statistics for a knockoff analysis,
    as described under (2) in section 2.2 of the Barber and Candes
    paper.
    """

    def stats(self, parent):
        model = sm.OLS(parent.endog, parent.exog)
        result = model.fit()
        q = len(result.params) // 2
        stats = np.abs(result.params[0:q]) - np.abs(result.params[q:])
        return stats


[docs]class RegModelEffects(RegressionEffects):
    """
    Use any regression model for Regression FDR analysis.

    Parameters
    ----------
    parent : RegressionFDR
        The RegressionFDR instance to which this effect size is
        applied.
    model_cls : class
        Any model with appropriate fit or fit_regularized
        functions
    regularized : bool
        If True, use fit_regularized to fit the model
    model_kws : dict
        Keywords passed to model initializer
    fit_kws : dict
        Dictionary of keyword arguments for fit or fit_regularized
    """

    def __init__(self, model_cls, regularized=False, model_kws=None,
                 fit_kws=None):
        self.model_cls = model_cls
        self.regularized = regularized
        self.model_kws = model_kws if model_kws is not None else {}
        self.fit_kws = fit_kws if fit_kws is not None else {}

    def stats(self, parent):
        model = self.model_cls(parent.endog, parent.exog, **self.model_kws)
        if self.regularized:
            params = model.fit_regularized(**self.fit_kws).params
        else:
            params = model.fit(**self.fit_kws).params
        q = len(params) // 2
        stats = np.abs(params[0:q]) - np.abs(params[q:])
        return stats