|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
OPNMTF_alpha |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
import itertools |
|
from math import * |
|
from scipy.io import loadmat, savemat |
|
import sys |
|
import numpy as np |
|
import scipy.sparse as sp |
|
from sklearn.utils import check_random_state |
|
from sklearn.preprocessing import normalize |
|
from sklearn.utils import check_random_state, check_array |
|
|
|
|
|
|
|
from ..initialization import random_init |
|
from ..io.input_checking import check_positive |
|
|
|
from numpy.random import rand |
|
from numpy import nan_to_num |
|
from numpy import linalg |
|
from datetime import datetime |
|
import timeit |
|
|
|
|
|
|
|
|
|
|
|
|
|
class OPNMTF: |
|
|
|
def __init__(self, |
|
n_row_clusters = 2 , n_col_clusters = 2 , landa = 0, |
|
mu = 0, alpha = 1+1e-1, |
|
F_init = None, S_init = None, G_init = None, |
|
max_iter = 100, n_init = 1, tol = 1e-9, |
|
random_state = None): |
|
self.n_row_clusters = n_row_clusters |
|
self.n_col_clusters = n_col_clusters |
|
self.landa = landa |
|
self.mu = mu |
|
self.F_init = F_init |
|
self.S_init = S_init |
|
self.G_init = G_init |
|
self.max_iter = max_iter |
|
self.n_init = n_init |
|
self.tol = tol |
|
self.alpha = alpha+1e-1 |
|
self.random_state = check_random_state(random_state) |
|
self.F = None |
|
self.G = None |
|
self.S = None |
|
self.row_labels_ = None |
|
self.column_labels_= None |
|
self.rowcluster_matrix = None |
|
self.columncluster_matrix = None |
|
self.reorganized_matrix = None |
|
self.soft_matrix = None |
|
self.hard_matrix = None |
|
self.orthogonality_F = None |
|
self.orthogonality_G = None |
|
self.orthogonality_D_alpha_F = None |
|
self.orthogonality_D_alpha_G = None |
|
self.MSE_1 = None |
|
self.MSE_2 = None |
|
self.criterions = [] |
|
self.criterion = -np.inf |
|
self.runtime = None |
|
|
|
|
|
def fit(self, X, y=None): |
|
|
|
check_array(X, accept_sparse=True, dtype="numeric", order=None, |
|
copy=False, force_all_finite=True, ensure_2d=True, |
|
allow_nd=False, ensure_min_samples=self.n_row_clusters, |
|
ensure_min_features=self.n_col_clusters, estimator=None) |
|
criterion = self.criterion |
|
criterions = self.criterions |
|
row_labels_ = self.row_labels_ |
|
column_labels_ = self.column_labels_ |
|
|
|
X = X.astype(float) |
|
|
|
random_state = check_random_state(self.random_state) |
|
seeds = random_state.randint(np.iinfo(np.int32).max, size=self.n_init) |
|
|
|
for seed in seeds: |
|
self._fit_single(X, seed, y) |
|
if np.isnan(self.criterion): |
|
raise ValueError("matrix may contain negative or unexpected NaN values") |
|
|
|
if (self.criterion > criterion): |
|
criterion = self.criterion |
|
criterions = self.criterions |
|
row_labels_ = self.row_labels_ |
|
column_labels_ = self.column_labels_ |
|
|
|
self.random_state = random_state |
|
|
|
|
|
self.criterion = criterion |
|
self.criterions = criterions |
|
self.row_labels_ = row_labels_ |
|
self.column_labels_ = column_labels_ |
|
|
|
|
|
|
|
def _fit_single(self, X, random_state = None, y=None): |
|
|
|
|
|
n, m = X.shape |
|
N = n*m |
|
g = self.n_row_clusters |
|
s = self.n_col_clusters |
|
F = rand(n, g) if isinstance(self.F_init, type(None)) else self.F_init |
|
S = rand(g , s) if isinstance(self.S_init, type(None)) else self.S_init |
|
G = rand(m, s) if isinstance(self.G_init, type(None)) else self.G_init |
|
I_g = np.identity(g, dtype = None) |
|
I_s = np.identity(s, dtype = None) |
|
E_gg = np.ones((self.n_row_clusters, self.n_row_clusters)) |
|
E_ss = np.ones((self.n_col_clusters, self.n_col_clusters)) |
|
E_nm = np.ones((n, m)) |
|
|
|
|
|
change = True |
|
c_init = float(-np.inf) |
|
c_list = [] |
|
runtime = [] |
|
D_alpha_F_list = [] |
|
D_alpha_G_list = [] |
|
Orthogonal_F_list = [] |
|
Orthogonal_G_list = [] |
|
iteration = 0 |
|
start = timeit.default_timer() |
|
|
|
|
|
|
|
while change : |
|
change = False |
|
for itr in range(self.max_iter): |
|
if isinstance(self.F_init, type(None)): |
|
enum = np.power(X/(F@S@G.T), self.alpha)@G@S.T + 2*self.landa*F@np.power(I_g/(F.T@F), self.alpha) |
|
denom = E_nm@G@S.T + F@I_g*2*self.landa |
|
DDF = np.power(enum/denom, 1/self.alpha) |
|
F = np.nan_to_num(np.multiply(F, DDF)) |
|
if isinstance(self.G_init, type(None)): |
|
enum = np.power((X/(F@S@G.T)).T, self.alpha)@F@S + 2*self.mu*G@np.power(I_s/(G.T@G), self.alpha) |
|
denom = E_nm.T@F@S+G@I_s*2*self.mu |
|
DDG = np.power(enum / denom, 1/self.alpha) |
|
G = np.nan_to_num(np.multiply(G, DDG)) |
|
if isinstance(self.S_init, type(None)): |
|
enum = F.T@np.power(X/(F@S@G.T), self.alpha)@G |
|
denom = F.T@E_nm@G |
|
DDS = np.power(enum/denom, 1/self.alpha) |
|
S = np.nan_to_num(np.multiply(S, DDS)) |
|
|
|
DF = np.diagflat(F.sum(axis = 0)) |
|
DG = np.diagflat(G.sum(axis = 0)) |
|
|
|
|
|
|
|
F = F@np.diagflat(np.power(F.sum(axis = 0), -1)) |
|
S = DF@S@DG |
|
G = (np.diagflat(np.power(G.sum(axis = 0), -1))@G.T).T |
|
F_cluster = np.zeros_like(F) |
|
F_cluster[np.arange(len(F)),np.sort(np.argmax(F,axis=1))] = 1 |
|
G_cluster = np.zeros_like(G) |
|
G_cluster[np.arange(len(G)),np.sort(np.argmax(G,axis=1))] = 1 |
|
|
|
|
|
c_0 = self.alpha*(1-self.alpha) |
|
z = np.nan_to_num(F@S@G.T /X , posinf=0) |
|
z_F = np.nan_to_num(F.T@F /I_g , posinf=0) |
|
z_G = np.nan_to_num(G.T@G /I_s , posinf=0) |
|
|
|
f_z = np.nan_to_num(self.alpha+(1-self.alpha)*z - np.power(z,1-self.alpha), posinf=0) |
|
f_z_F = np.nan_to_num(self.alpha+(1-self.alpha)*z_F - np.power(z_F,1-self.alpha), posinf=0) |
|
f_z_G = np.nan_to_num(self.alpha+(1-self.alpha)*z_G - np.power(z_G,1-self.alpha), posinf=0) |
|
|
|
D_alpha = np.sum(np.multiply(X,f_z)) |
|
D_alpha_F = np.sum(np.multiply(I_g,f_z_F)) |
|
D_alpha_G = np.sum(np.multiply(I_s,f_z_G)) |
|
|
|
Orthogonal_F = linalg.norm(F.T@F - I_g, 'fro') |
|
Orthogonal_G = linalg.norm(G.T@G - I_s, 'fro') |
|
|
|
Orthogonal_F_list.append(Orthogonal_F) |
|
Orthogonal_G_list.append(Orthogonal_G) |
|
|
|
D_alpha_F_list.append(D_alpha_F) |
|
D_alpha_G_list.append(D_alpha_G) |
|
|
|
c = c_0*D_alpha + c_0*self.landa * D_alpha_F + c_0*self.mu * D_alpha_G |
|
print(c) |
|
|
|
iteration += 1 |
|
if (np.abs(c - c_init) > self.tol and iteration < self.max_iter): |
|
c_init = c |
|
change = True |
|
c_list.append(c) |
|
|
|
stop = timeit.default_timer() |
|
runtime.append(stop - start) |
|
|
|
|
|
self.max_iter = iteration |
|
self.runtime = runtime |
|
self.criterion = c |
|
self.criterions = c_list |
|
self.F = F_cluster |
|
self.S = S |
|
self.G = G_cluster |
|
self.soft_matrix = F@S@G.T |
|
self.hard_matrix = F_cluster.T@X@G_cluster |
|
self.rowcluster_matrix = F_cluster@F_cluster.T@X |
|
self.columncluster_matrix = X@G_cluster@G_cluster.T |
|
self.reorganized_matrix = F_cluster@F_cluster.T@X@G_cluster@G_cluster.T |
|
self.row_labels_ = [x+1 for x in np.argmax(F, axis =1)] |
|
self.column_labels_ = [x+1 for x in np.argmax(G, axis =1)] |
|
self.orthogonality_D_alpha_F = D_alpha_F_list |
|
self.orthogonality_D_alpha_G = D_alpha_G_list |
|
self.orthogonality_F = Orthogonal_F_list |
|
self.orthogonality_G = Orthogonal_G_list |
|
self.MSE_1 = linalg.norm( X - (F_cluster@F_cluster.T@X@G_cluster@G_cluster.T), 'fro')**2/N |
|
self.MSE_2 = linalg.norm( X - (F_cluster@S@G_cluster.T), 'fro')**2/N |
|
|