Ma30ha
/

NMTFcoclust

Model card Files Files and versions Community

NMTFcoclust / NMTFcoclust_OPNMTF_alpha.py

Ma30ha

Upload NMTFcoclust_OPNMTF_alpha.py

232d550 over 1 year ago

raw

history blame contribute delete

7.48 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	"""
	OPNMTF_alpha
	"""

	# Author: Hoseinipour Saeid <saeidhoseinipour@aut.ac.ir>

	# License: ??????????

	import itertools
	from math import *
	from scipy.io import loadmat, savemat
	import sys
	import numpy as np
	import scipy.sparse as sp
	from sklearn.utils import check_random_state
	from sklearn.preprocessing import normalize
	from sklearn.utils import check_random_state, check_array
	#from coclust.utils.initialization import (random_init, check_numbers,check_array)
	# use sklearn instead FR 08-05-19
	#from initialization import random_init
	from ..initialization import random_init
	from ..io.input_checking import check_positive
	#from input_checking import check_positive
	from numpy.random import rand
	from numpy import nan_to_num
	from numpy import linalg
	from datetime import datetime
	import timeit


	# from pylab import *



	class OPNMTF:

	def __init__(self,
	n_row_clusters = 2 , n_col_clusters = 2 , landa = 0,
	mu = 0, alpha = 1+1e-1,
	F_init = None, S_init = None, G_init = None,
	max_iter = 100, n_init = 1, tol = 1e-9,
	random_state = None):
	self.n_row_clusters = n_row_clusters
	self.n_col_clusters = n_col_clusters
	self.landa = landa
	self.mu = mu
	self.F_init = F_init
	self.S_init = S_init
	self.G_init = G_init
	self.max_iter = max_iter
	self.n_init = n_init
	self.tol = tol
	self.alpha = alpha+1e-1
	self.random_state = check_random_state(random_state)
	self.F = None
	self.G = None
	self.S = None
	self.row_labels_ = None
	self.column_labels_= None
	self.rowcluster_matrix = None
	self.columncluster_matrix = None
	self.reorganized_matrix = None
	self.soft_matrix = None
	self.hard_matrix = None
	self.orthogonality_F = None
	self.orthogonality_G = None
	self.orthogonality_D_alpha_F = None
	self.orthogonality_D_alpha_G = None
	self.MSE_1 = None
	self.MSE_2 = None
	self.criterions = []
	self.criterion = -np.inf
	self.runtime = None


	def fit(self, X, y=None):

	check_array(X, accept_sparse=True, dtype="numeric", order=None,
	copy=False, force_all_finite=True, ensure_2d=True,
	allow_nd=False, ensure_min_samples=self.n_row_clusters,
	ensure_min_features=self.n_col_clusters, estimator=None)
	criterion = self.criterion
	criterions = self.criterions
	row_labels_ = self.row_labels_
	column_labels_ = self.column_labels_

	X = X.astype(float)

	random_state = check_random_state(self.random_state)
	seeds = random_state.randint(np.iinfo(np.int32).max, size=self.n_init)

	for seed in seeds:
	self._fit_single(X, seed, y)
	if np.isnan(self.criterion): # c --> self.criterion
	raise ValueError("matrix may contain negative or unexpected NaN values")
	# remember attributes corresponding to the best criterion
	if (self.criterion > criterion):
	criterion = self.criterion
	criterions = self.criterions
	row_labels_ = self.row_labels_
	column_labels_ = self.column_labels_

	self.random_state = random_state

	# update attributes
	self.criterion = criterion
	self.criterions = criterions
	self.row_labels_ = row_labels_
	self.column_labels_ = column_labels_



	def _fit_single(self, X, random_state = None, y=None):


	n, m = X.shape
	N = n*m
	g = self.n_row_clusters
	s = self.n_col_clusters
	F = rand(n, g) if isinstance(self.F_init, type(None)) else self.F_init
	S = rand(g , s) if isinstance(self.S_init, type(None)) else self.S_init
	G = rand(m, s) if isinstance(self.G_init, type(None)) else self.G_init
	I_g = np.identity(g, dtype = None)
	I_s = np.identity(s, dtype = None)
	E_gg = np.ones((self.n_row_clusters, self.n_row_clusters))
	E_ss = np.ones((self.n_col_clusters, self.n_col_clusters))
	E_nm = np.ones((n, m))

	################ OPNMTF_alpha ###################loop: MUR------> Multiplactive Update Rules
	change = True
	c_init = float(-np.inf)
	c_list = []
	runtime = []
	D_alpha_F_list = []
	D_alpha_G_list = []
	Orthogonal_F_list = []
	Orthogonal_G_list = []
	iteration = 0
	start = timeit.default_timer()



	while change :
	change = False
	for itr in range(self.max_iter):
	if isinstance(self.F_init, type(None)):
	enum = np.power(X/(F@S@G.T), self.alpha)@G@S.T + 2self.landaF@np.power(I_g/(F.T@F), self.alpha)
	denom = E_nm@G@S.T + F@I_g2self.landa
	DDF = np.power(enum/denom, 1/self.alpha)
	F = np.nan_to_num(np.multiply(F, DDF))
	if isinstance(self.G_init, type(None)):
	enum = np.power((X/(F@S@G.T)).T, self.alpha)@F@S + 2self.muG@np.power(I_s/(G.T@G), self.alpha)
	denom = E_nm.T@F@S+G@I_s2self.mu
	DDG = np.power(enum / denom, 1/self.alpha)
	G = np.nan_to_num(np.multiply(G, DDG))
	if isinstance(self.S_init, type(None)):
	enum = F.T@np.power(X/(F@S@G.T), self.alpha)@G
	denom = F.T@E_nm@G
	DDS = np.power(enum/denom, 1/self.alpha)
	S = np.nan_to_num(np.multiply(S, DDS))

	DF = np.diagflat(F.sum(axis = 0))
	DG = np.diagflat(G.sum(axis = 0))

	#Normalization

	F = F@np.diagflat(np.power(F.sum(axis = 0), -1))
	S = DF@S@DG
	G = (np.diagflat(np.power(G.sum(axis = 0), -1))@G.T).T #rank2*n
	F_cluster = np.zeros_like(F)
	F_cluster[np.arange(len(F)),np.sort(np.argmax(F,axis=1))] = 1
	G_cluster = np.zeros_like(G)
	G_cluster[np.arange(len(G)),np.sort(np.argmax(G,axis=1))] = 1

	#criterion alpha-divargance with convex function f(z) = 1/alpha(1-alpha) alpha+ (1-alpha)z - z^{1-alpha}
	c_0 = self.alpha*(1-self.alpha)
	z = np.nan_to_num(F@S@G.T /X , posinf=0)
	z_F = np.nan_to_num(F.T@F /I_g , posinf=0)
	z_G = np.nan_to_num(G.T@G /I_s , posinf=0)

	f_z = np.nan_to_num(self.alpha+(1-self.alpha)*z - np.power(z,1-self.alpha), posinf=0)
	f_z_F = np.nan_to_num(self.alpha+(1-self.alpha)*z_F - np.power(z_F,1-self.alpha), posinf=0)
	f_z_G = np.nan_to_num(self.alpha+(1-self.alpha)*z_G - np.power(z_G,1-self.alpha), posinf=0)

	D_alpha = np.sum(np.multiply(X,f_z))
	D_alpha_F = np.sum(np.multiply(I_g,f_z_F))
	D_alpha_G = np.sum(np.multiply(I_s,f_z_G))

	Orthogonal_F = linalg.norm(F.T@F - I_g, 'fro') # \|\|sum(F^TF - I)^2\|\|^0.5
	Orthogonal_G = linalg.norm(G.T@G - I_s, 'fro')

	Orthogonal_F_list.append(Orthogonal_F)
	Orthogonal_G_list.append(Orthogonal_G)

	D_alpha_F_list.append(D_alpha_F)
	D_alpha_G_list.append(D_alpha_G)

	c = c_0D_alpha + c_0self.landa * D_alpha_F + c_0self.mu D_alpha_G
	print(c)

	iteration += 1
	if (np.abs(c - c_init) > self.tol and iteration < self.max_iter):
	c_init = c
	change = True
	c_list.append(c)

	stop = timeit.default_timer()
	runtime.append(stop - start)


	self.max_iter = iteration
	self.runtime = runtime
	self.criterion = c
	self.criterions = c_list
	self.F = F_cluster
	self.S = S
	self.G = G_cluster
	self.soft_matrix = F@S@G.T
	self.hard_matrix = F_cluster.T@X@G_cluster
	self.rowcluster_matrix = F_cluster@F_cluster.T@X
	self.columncluster_matrix = X@G_cluster@G_cluster.T
	self.reorganized_matrix = F_cluster@F_cluster.T@X@G_cluster@G_cluster.T
	self.row_labels_ = [x+1 for x in np.argmax(F, axis =1)]
	self.column_labels_ = [x+1 for x in np.argmax(G, axis =1)]
	self.orthogonality_D_alpha_F = D_alpha_F_list
	self.orthogonality_D_alpha_G = D_alpha_G_list
	self.orthogonality_F = Orthogonal_F_list
	self.orthogonality_G = Orthogonal_G_list
	self.MSE_1 = linalg.norm( X - (F_cluster@F_cluster.T@X@G_cluster@G_cluster.T), 'fro')**2/N
	self.MSE_2 = linalg.norm( X - (F_cluster@S@G_cluster.T), 'fro')**2/N