-
-
Save anon-double-blind/855583b55f4d4c33b3ca201d6fc31063 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import silhouette_samples | |
import numpy as np | |
from sklearn.mixture import GaussianMixture | |
from sklearn.cluster import KMeans | |
from scipy.special import logsumexp | |
def softmax(x, axis=None): | |
return np.exp(x - logsumexp(x, axis=axis, keepdims=True)) | |
def _get_loud_bins_mask(threshold, stft): | |
representation = np.abs(stft) | |
threshold = np.percentile(representation, threshold) | |
mask = representation > threshold | |
return mask, representation | |
def posterior_confidence(stft, features, num_sources, threshold=95, | |
**kwargs): | |
""" | |
Calculates the clusterability of an embedding space by looking at the | |
strength of the assignments of each point to a specific cluster. The | |
more points that are "in between" clusters (e.g. no strong assignmment), | |
the lower the clusterability. | |
Args: | |
stft (np.ndarray): STFT array which will be used to compute | |
the mask over which to compute the confidence measure. | |
features (np.ndarray): Numpy array containing the features to be clustered. | |
Should have the same dimensions as the representation. | |
n_sources (int): Number of sources to cluster the features into. | |
threshold (int, optional): Threshold by loudness. Points below the threshold are | |
excluded from being used in the confidence measure. Defaults to 95. | |
kwargs: Keyword arguments to `_get_loud_bins_mask`. Namely, representation can | |
go here as a keyword argument. | |
Returns: | |
float: Confidence given by posteriors. | |
""" | |
mask, _ = _get_loud_bins_mask(threshold, stft, **kwargs) | |
embedding_size = features.shape[-1] | |
features = features[mask].reshape(-1, embedding_size) | |
kmeans = KMeans(num_sources) | |
distances = kmeans.fit_transform(features) | |
confidence = softmax(-distances, axis=-1) | |
confidence = ( | |
(num_sources * np.max(confidence, axis=-1) - 1) / | |
(num_sources - 1) | |
) | |
return confidence.mean() | |
def silhouette_confidence(stft, features, num_sources, threshold=95, | |
max_points=1000, **kwargs): | |
""" | |
Uses the silhouette score to compute the clusterability of the feature space. | |
The Silhouette Coefficient is calculated using the | |
mean intra-cluster distance (a) and the mean nearest-cluster distance (b) | |
for each sample. The Silhouette Coefficient for a sample is (b - a) / max(a, b). | |
To clarify, b is the distance between a sample and the nearest cluster | |
that the sample is not a part of. Note that Silhouette Coefficient is | |
only defined if number of labels is 2 <= n_labels <= n_samples - 1. | |
References: | |
Peter J. Rousseeuw (1987). “Silhouettes: a Graphical Aid to the | |
Interpretation and Validation of Cluster Analysis”. Computational and | |
Applied Mathematics 20: 53-65. | |
Args: | |
stft (np.ndarray): STFT array which will be used to compute | |
the mask over which to compute the confidence measure. | |
features (np.ndarray): Numpy array containing the features to be clustered. | |
Should have the same dimensions as the representation. | |
n_sources (int): Number of sources to cluster the features into. | |
threshold (int, optional): Threshold by loudness. Points below the threshold are | |
excluded from being used in the confidence measure. Defaults to 95. | |
kwargs: Keyword arguments to `_get_loud_bins_mask`. Namely, representation can | |
go here as a keyword argument. | |
max_points (int, optional): Maximum number of points to compute the Silhouette | |
score for. Silhouette score is a costly operation. Defaults to 1000. | |
Returns: | |
float: Confidence given by Silhouette score. | |
""" | |
mask, _ = _get_loud_bins_mask(threshold, stft, **kwargs) | |
embedding_size = features.shape[-1] | |
features = features[mask].reshape(-1, embedding_size) | |
if features.shape[0] > max_points: | |
idx = np.random.choice( | |
np.arange(features.shape[0]), max_points, | |
replace=False) | |
features = features[idx] | |
kmeans = KMeans(num_sources) | |
labels = kmeans.fit_predict(features) | |
confidence = silhouette_samples(features, labels) | |
return confidence.mean() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment