if z_pos < gamma_: # some coefficients have changed sign idx = np.where(z == z_pos)[0][::-1] # update the sign, important for LAR sign_active[idx] = -sign_active[idx] if method == "lasso": gamma_ = z_pos drop = True n_iter += 1 if return_path: if n_iter >= coefs.shape[0]: del coef, alpha, prev_alpha, prev_coef # resize the coefs and alphas array add_features = 2 * max(1, (max_features - n_active)) coefs = np.resize(coefs, (n_iter + add_features, n_features)) coefs[-add_features:] = 0 alphas = np.resize(alphas, n_iter + add_features) alphas[-add_features:] = 0 coef = coefs[n_iter] prev_coef = coefs[n_iter - 1] else: # mimic the effect of incrementing n_iter on the array references prev_coef = coef prev_alpha[0] = alpha[0] coef = np.zeros_like(coef) coef[active] = prev_coef[active] + gamma_ * least_squares # update correlations Cov -= gamma_ * corr_eq_dir # See if any coefficient has changed sign if drop and method == "lasso": # handle the case when idx is not length of 1 for ii in idx: arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii) n_active -= 1 # handle the case when idx is not length of 1 drop_idx = [active.pop(ii) for ii in idx] if Gram is None: # propagate dropped variable for ii in idx: for i in range(ii, n_active): X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1]) # yeah this is stupid indices[i], indices[i + 1] = indices[i + 1], indices[i] # TODO: this could be updated residual = y - np.dot(X[:, :n_active], coef[active]) temp = np.dot(X.T[n_active], residual) Cov = np.r_[temp, Cov] else: for ii in idx: for i in range(ii, n_active): indices[i], indices[i + 1] = indices[i + 1], indices[i] Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1]) Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:, i + 1]) # Cov_n = Cov_j + x_j * X + increment(betas) TODO: # will this still work with multiple drops ? # recompute covariance. Probably could be done better # wrong as Xy is not swapped with the rest of variables # TODO: this could be updated temp = Cov_copy[drop_idx] - np.dot(Gram_copy[drop_idx], coef) Cov = np.r_[temp, Cov] sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.0) # just to maintain size if verbose > 1: print( "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, "", drop_idx, n_active, abs(temp)) ) if return_path: # resize coefs in case of early stop alphas = alphas[: n_iter + 1] coefs = coefs[: n_iter + 1] if return_n_iter: return alphas, active, coefs.T, n_iter else: return alphas, active, coefs.T else: if return_n_iter: return alpha, active, coef, n_iter else: return alpha, active, coef ############################################################################### # Estimator classes class Lars(MultiOutputMixin, RegressorMixin, LinearModel): """Least Angle Regression model a.k.a. LAR. Read more in the :ref:`User Guide `. Parameters ---------- fit_intercept : bool, default=True Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered). verbose : bool or int, default=False Sets the verbosity amount. precompute : bool, 'auto' or array-like , default='auto' Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument. n_nonzero_coefs : int, default=500 Target number of non-zero coefficients. Use ``np.inf`` for no limit. eps : float, default=np.finfo(float).eps The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization. copy_X : bool, default=True If ``True``, X will be copied; else, it may be overwritten. fit_path : bool, default=True If True the full path is stored in the ``coef_path_`` attribute. If you compute the solution for a large problem or many targets, setting ``fit_path`` to ``False`` will lead to a speedup, especially with a small alpha. jitter : float, default=None Upper bound on a uniform noise parameter to be added to the `y` values, to satisfy the model's assumption of one-at-a-time computations. Might help with stability. .. versionadded:: 0.23 random_state : int, RandomState instance or None, default=None Determines random number generation for jittering. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. Ignored if `jitter` is None. .. versionadded:: 0.23 Attributes ---------- alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays Maximum of covariances (in absolute value) at each iteration. ``n_alphas`` is either ``max_iter``, ``n_features`` or the number of nodes in the path with ``alpha >= alpha_min``, whichever is smaller. If this is a list of array-like, the length of the outer list is `n_targets`. active_ : list of shape (n_alphas,) or list of such lists Indices of active variables at the end of the path. If this is a list of list, the length of the outer list is `n_targets`. coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \ of such arrays The varying values of the coefficients along the path. It is not present if the ``fit_path`` parameter is ``False``. If this is a list of array-like, the length of the outer list is `n_targets`. coef_ : array-like of shape (n_features,) or (n_targets, n_features) Parameter vector (w in the formulation formula). intercept_ : float or array-like of shape (n_targets,) Independent term in decision function. n_iter_ : array-like or int The number of iterations taken by lars_path to find the grid of alphas for each target. n_features_in_ : int Number of features seen during :term:`fit`. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. .. versionadded:: 1.0 See Also -------- lars_path: Compute Least Angle Regression or Lasso path using LARS algorithm. LarsCV : Cross-validated Least Angle Regression model. sklearn.decomposition.sparse_encode : Sparse coding. Examples -------- >>> from sklearn import linear_model >>> reg = linear_model.Lars(n_nonzero_coefs=1) >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111]) Lars(n_nonzero_coefs=1) >>> print(reg.coef_) [ 0. -1.11...] """ _parameter_constraints: dict = { "fit_intercept": ["boolean"], "verbose": ["verbose"], "precompute": ["boolean", StrOptions({"auto"}), np.ndarray, Hidden(None)], "n_nonzero_coefs": [Interval(Integral, 1, None, closed="left")], "eps": [Interval(Real, 0, None, closed="left")], "copy_X": ["boolean"], "fit_path": ["boolean"], "jitter": [Interval(Real, 0, None, closed="left"), None], "random_state": ["random_state"], } method = "lar" positive = False def __init__( self, *, fit_intercept=True, verbose=False, precompute="auto", n_nonzero_coefs=500, eps=np.finfo(float).eps, copy_X=True, fit_path=True, jitter=None, random_state=None, ): self.fit_intercept = fit_intercept self.verbose = verbose self.precompute = precompute self.n_nonzero_coefs = n_nonzero_coefs self.eps = eps self.copy_X = copy_X self.fit_path = fit_path self.jitter = jitter self.random_state = random_state @staticmethod def _get_gram(precompute, X, y): if (not hasattr(precompute, "__array__")) and ( (precompute is True) or (precompute == "auto" and X.shape[0] > X.shape[1]) or (precompute == "auto" and y.shape[1] > 1) ): precompute = np.dot(X.T, X) return precompute def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None): """Auxiliary method to fit the model using X, y as training data""" n_features = X.shape[1] X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=self.fit_intercept, copy=self.copy_X ) if y.ndim == 1: y = y[:, np.newaxis] n_targets = y.shape[1] Gram = self._get_gram(self.precompute, X, y) self.alphas_ = [] self.n_iter_ = [] self.coef_ = np.empty((n_targets, n_features), dtype=X.dtype) if fit_path: self.active_ = [] self.coef_path_ = [] for k in range(n_targets): this_Xy = None if Xy is None else Xy[:, k] alphas, active, coef_path, n_iter_ = lars_path( X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X, copy_Gram=True, alpha_min=alpha, method=self.method, verbose=max(0, self.verbose - 1), max_iter=max_iter, eps=self.eps, return_path=True, return_n_iter=True, positive=self.positive, ) self.alphas_.append(alphas) self.active_.append(active) self.n_iter_.append(n_iter_) self.coef_path_.append(coef_path) self.coef_[k] = coef_path[:, -1] if n_targets == 1: self.alphas_, self.active_, self.coef_path_, self.coef_ = [ a[0] for a in (self.alphas_, self.active_, self.coef_path_, self.coef_) ] self.n_iter_ = self.n_iter_[0] else: for k in range(n_targets): this_Xy = None if Xy is None else Xy[:, k] alphas, _, self.coef_[k], n_iter_ = lars_path( X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X, copy_Gram=True, alpha_min=alpha, method=self.method, verbose=max(0, self.verbose - 1), max_iter=max_iter, eps=self.eps, return_path=False, return_n_iter=True, positive=self.positive, ) self.alphas_.append(alphas) self.n_iter_.append(n_iter_) if n_targets == 1: self.alphas_ = self.alphas_[0] self.n_iter_ = self.n_iter_[0] self._set_intercept(X_offset, y_offset, X_scale) return self @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, Xy=None): """Fit the model using X, y as training data. Parameters ---------- X : array-like of shape (n_samples, n_features) Training data. y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values. Xy : array-like of shape (n_features,) or (n_features, n_targets), \ default=None Xy = np.dot(X.T, y) that can be precomputed. It is useful only when the Gram matrix is precomputed. Returns ------- self : object Returns an instance of self. """ X, y = validate_data( self, X, y, force_writeable=True, y_numeric=True, multi_output=True ) alpha = getattr(self, "alpha", 0.0) if hasattr(self, "n_nonzero_coefs"): alpha = 0.0 # n_nonzero_coefs parametrization takes priority max_iter = self.n_nonzero_coefs else: max_iter = self.max_iter if self.jitter is not None: rng = check_random_state(self.random_state) noise = rng.uniform(high=self.jitter, size=len(y)) y = y + noise self._fit( X, y, max_iter=max_iter, alpha=alpha, fit_path=self.fit_path, Xy=Xy, ) return self class LassoLars(Lars): """Lasso model fit with Least Angle Regression a.k.a. Lars. It is a Linear Model trained with an L1 prior as regularizer. The optimization objective for Lasso is:: (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1 Read more in the :ref:`User Guide `. Parameters ---------- alpha : float, default=1.0 Constant that multiplies the penalty term. Defaults to 1.0. ``alpha = 0`` is equivalent to an ordinary least square, solved by :class:`LinearRegression`. For numerical reasons, using ``alpha = 0`` with the LassoLars object is not advised and you should prefer the LinearRegression object. fit_intercept : bool, default=True Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered). verbose : bool or int, default=False Sets the verbosity amount. precompute : bool, 'auto' or array-like, default='auto' Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix can also be passed as argument. max_iter : int, default=500 Maximum number of iterations to perform. eps : float, default=np.finfo(float).eps The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization. copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. fit_path : bool, default=True If ``True`` the full path is stored in the ``coef_path_`` attribute. If you compute the solution for a large problem or many targets, setting ``fit_path`` to ``False`` will lead to a speedup, especially with a small alpha. positive : bool, default=False Restrict coefficients to be >= 0. Be aware that you might want to remove fit_intercept which is set True by default. Under the positive restriction the model coefficients will not converge to the ordinary-least-squares solution for small values of alpha. Only coefficients up to the smallest alpha value (``alphas_[alphas_ >