Source code for tropea_clustering._internal.onion_smooth.first_classes
"""
Contains the classes used for storing parameters and system states.
"""
# Author: Becchi Matteo <bechmath@gmail.com>
from dataclasses import asdict, dataclass
import numpy as np
[docs]
@dataclass
class StateUni:
"""
Represents a unidimensional state as a Gaussian.
All the parameters and information on the Gaussian states corresponding
to the different clusters are stored within this class. The attributes can
be acessed using the get_attributes() method.
Parameters
----------
mean : float
Mean of the Gaussian.
sigma : float
Rescaled standard deviation of the Gaussian.
area : float
Area below the Gaussian.
r_2 : float
Coefficient of determination of the Gaussian fit.
Attributes
----------
peak : float
Maximum value of the Gaussian.
perc : float
Fraction of data points classified in the state.
th_inf : ndarray of shape (2,)
_th_inf[0] stores the lower threshold of the state. Considering the
Gaussian states oreder with increasing values of the mean, this is the
intercsection point (if exists) with the Gaussian before. If there is
no intersection, it is the weighted average between the two means.
The two cases are distinguished by the value of _th_inf[1], which is
"0" in the first case, "1" in the second.
th_sup : ndarray of shape (2,)
_th_sup[0] stores the upper threshold of the state. Considering the
Gaussian states oreder with increasing values of the mean, this is the
intercsection point (if exists) with the Gaussian after. If there is
no intersection, it is the weighted average between the two means.
The two cases are distinguished by the value of _th_sup[1], which is
"0" in the first case, "1" in the second.
"""
mean: float
sigma: float
area: float
r_2: float
perc: float = 0.0
peak: float | None = None
th_inf: list[float] | None = None
th_sup: list[float] | None = None
def __post_init__(self):
if self.peak is None:
self.peak = self.area / self.sigma / np.sqrt(np.pi)
if self.th_inf is None:
self.th_inf = [self.mean - 2.0 * self.sigma, -1]
if self.th_sup is None:
self.th_sup = [self.mean + 2.0 * self.sigma, -1]
def _build_boundaries(self, number_of_sigmas: float):
"""
Sets the thresholds to classify the data windows inside the state.
Parameters
----------
number of sigmas : float
How many sigmas the thresholds are far from the mean.
"""
self.th_inf = [self.mean - number_of_sigmas * self.sigma, -1]
self.th_sup = [self.mean + number_of_sigmas * self.sigma, -1]
[docs]
def get_attributes(self):
"""
Returns a dictionary containing the attributes of the state.
The attributes "th_inf" and "th_sup" are returned as a single ndarray
with the label "th".
Returns
-------
attr_list : dict
"""
attr_list = asdict(self)
return attr_list
[docs]
@dataclass
class StateMulti:
"""
Represents a multifimensional state as a factorized Gaussian.
All the parameters and information on the factorized Gaussian states
corresponding to the different clusters are stored within this class.
The attributes can be acessed using the get_attributes() method.
Parameters
----------
mean : np.ndarray of shape (dim,)
Mean of the Gaussians.
covariance : np.ndarray of shape (dim, dim)
Covariance matrix of the Gaussians.
log_likelihood : float
log_likelihood of the data under the fitted Gaussian.
Attributes
----------
perc : float
Fraction of data points classified in this state.
axis : ndarray of shape (dim,)
The thresholds of the state. It contains the axis of the ellipsoid
given by the rescaled sigmas of the factorized Gaussian states,
multiplied by "number of sigmas".
"""
mean: np.ndarray
covariance: np.ndarray
log_likelihood: float
number_of_sigmas: float
perc: float = 0.0
[docs]
def get_boundaries(self) -> tuple[float, float, float]:
"""
Sets the thresholds to classify the data windows inside the state.
Parameters
----------
number of sigmas : float
How many sigmas the thresholds are far from the mean.
"""
n_dim = self.covariance.shape[0]
if n_dim > 2:
msg = "get_boundarise() only works with 2D data."
raise ValueError(msg)
chi2_val = self.number_of_sigmas**2 * n_dim
eigvals, eigvecs = np.linalg.eigh(self.covariance)
width, height = 2 * np.sqrt(eigvals * chi2_val)
angle = np.degrees(np.arctan2(*eigvecs[:, 0][::-1]))
return width, height, angle
[docs]
def get_attributes(self):
"""
Returns a dictionary containing the attributes of the state.
Returns
-------
attr_list : dict
"""
attr_list = asdict(self)
return attr_list