Source code for sconce.DirLinSCMS

# -*- coding: utf-8 -*-

# Author: Yikun Zhang
# Last Editing: June 26, 2022

# Description: This script contains code for the directional-linear kernel density 
# estimator with the von Mises and Gaussian kernels, directional-linear mean shift 
# (DirLinMS), and directional-linear subspace constrained mean shift (DirLinSCMS) 
# algorithms.

import numpy as np
from numpy import linalg as LA
import scipy.special as sp

#==========================================================================================#

[docs]def DirLinKDE(x, data, h=None, b=None, q=2, D=1):
    '''
    Directional-linear KDE with the von Mises and Gaussian kernels
    
    Parameters
    ----------
        x: (m, q+1+D)-array
            Eulidean coordinates of m directional-linear query points, where 
            (q+1) is the Euclidean dimension of the directional component (first 
            (q+1) columns) and D is the dimension of the linear component (last 
            D columns).
    
        data: (n, q+1+D)-array
            Euclidean coordinates of n directional-linear random sample points, 
            where (q+1) is the Euclidean dimension of the directional component 
            (first (q+1) columns) and D is the dimension of the linear component
            (last D columns).
       
        h: float
            Bandwidth parameter for the directional component. (Default: h=None. 
            Then a rule of thumb for directional KDEs with the von Mises kernel 
            in Garcia-Portugues (2013) is applied.)
            
        b: float
            Bandwidth parameter for the linear component. (Default: h=None. 
            Then the Silverman's rule of thumb is applied. See Chen et al.(2016) 
            for details.)
            
        q: int
            Intrinsic data dimension of directional components.
            
        D: int
            Data dimension of linear components.
    
    Return
    ----------
        f_hat: (m,)-array
            The corresponding directinal-linear density estimates at m query 
            points.
    '''
    n = data.shape[0]  ## Number of data points
    D_t = data.shape[1]  ## Total dimension of the data
    
    assert q+1+D == D_t, "The dimension of the input data, "+str(D_t)+", should "\
    "equal to the sum of directional dimension ("+str(q+1)+") in its ambient "\
    "space and linear dimension ("+str(D)+")."
    
    data_Dir = data[:,:(q+1)]
    x_Dir = x[:,:(q+1)]
    data_Lin = data[:,(q+1):(q+1+D)]
    x_Lin = x[:,(q+1):(q+1+D)]

    # Rule of thumb for directional component
    if h is None:
        R_bar = np.sqrt(sum(np.mean(data_Dir, axis=0) ** 2))
        ## An approximation to kappa (Banerjee 2005 & Sra, 2011)
        kap_hat = R_bar * (q + 1 - R_bar ** 2) / (1 - R_bar ** 2)
        if q == 2:
            h = (8*np.sinh(kap_hat)**2/(n*kap_hat * \
                 ((1+4*kap_hat**2)*np.sinh(2*kap_hat) - 2*kap_hat*np.cosh(2*kap_hat))))**(1/6)
        else:
            h = ((4 * np.sqrt(np.pi) * sp.iv((q-1) / 2 , kap_hat)**2) / \
                 (n * kap_hat ** ((q+1) / 2) * (2 * q * sp.iv((q+1)/2, 2*kap_hat) + \
                                  (q+2) * kap_hat * sp.iv((q+3)/2, 2*kap_hat)))) ** (1/(q + 4))
        print("The current bandwidth for directional component is " + str(h) + ".\n")
    
    # Rule of thumb for linear component
    if b is None:
        # Apply Silverman's rule of thumb to select the bandwidth parameter 
        b = (4/(D+2))**(1/(D+4))*(n**(-1/(D+4)))*np.mean(np.std(data_Lin, axis=0))
        print("The current bandwidth for linear component is "+ str(b) + ".\n")
    
    # Compute the kernel weights contributed by directional components
    if q == 2:
        f_hat_Dir = np.exp((np.dot(x_Dir, data_Dir.T)-1)/(h**2))/(2*np.pi\
                        *(1-np.exp(-2/h**2))*h**2)
    else:
        f_hat_Dir = np.exp(np.dot(x_Dir, data_Dir.T)/(h**2))/((2*np.pi)**((q+1)/2)*\
                           sp.iv((q-1)/2, 1/(h**2))*h**(q-1))
    
    # Compute the kernel weights contributed by linear components
    f_hat_Lin = np.zeros((x.shape[0], n))
    for i in range(x.shape[0]):
        f_hat_Lin[i,:] = np.exp(np.sum(-((x_Lin[i,:] - data_Lin)/b)**2, axis=1)/2)/ \
                        ((2*np.pi)**(D/2)*np.prod(b))
    
    f_hat = np.mean(f_hat_Dir * f_hat_Lin, axis=1)
    return f_hat


[docs]def DirLinMS(mesh_0, data, h=None, b=None, q=2, D=1, eps=1e-7, max_iter=1000):
    '''
    Directional-linear Mean Shift Algorithm with the von Mises and Gaussian kernels
    (Simultaneous version)
    
    Parameters
    ----------
        mesh_0: (m, q+1+D)-array
            Eulidean coordinates of m directional-linear query points, where 
            (q+1) is the Euclidean dimension of the directional component (first 
            (q+1) columns) and D is the dimension of the linear component (last 
            D columns).
    
        data: (n, q+1+D)-array
            Euclidean coordinates of n directional-linear random sample points, 
            where (q+1) is the Euclidean dimension of the directional component 
            (first (q+1) columns) and D is the dimension of the linear component
            (last D columns).
       
        h: float
            Bandwidth parameter for the directional component. (Default: h=None. 
            Then a rule of thumb for directional KDEs with the von Mises kernel 
            in Garcia-Portugues (2013) is applied.)
            
        b: float
            Bandwidth parameter for the linear component. (Default: h=None. 
            Then the Silverman's rule of thumb is applied. See Chen et al.(2016) 
            for details.)
            
        q: int
            Intrinsic data dimension of directional components.
            
        D: int
            Data dimension of linear components.
       
        eps: float
            The precision parameter.
       
        max_iter: int
            The maximum number of iterations for the MS algorithm on each 
            initial point.
    
    Return
    ----------
        MS_path: (m,q+1+D,T)-array
            The entire iterative MS sequence for each initial point.
    '''
    
    n = data.shape[0]  ## Number of data points
    D_t = data.shape[1]  ## Total dimension of the data
    
    assert q+1+D == D_t, "The dimension of the input data, "+str(D_t)+", should "\
    "equal to the sum of directional dimension ("+str(q+1)+") in its ambient "\
    "space and linear dimension ("+str(D)+")."
    
    data_Dir = data[:,:(q+1)]
    data_Lin = data[:,(q+1):(q+1+D)]

    # Rule of thumb for directional component
    if h is None:
        R_bar = np.sqrt(sum(np.mean(data_Dir, axis=0) ** 2))
        # An approximation to kappa (Banerjee 2005 & Sra, 2011)
        kap_hat = R_bar * (q + 1 - R_bar ** 2) / (1 - R_bar ** 2)
        if q == 2:
            h = (8*np.sinh(kap_hat)**2/(n*kap_hat * \
                 ((1+4*kap_hat**2)*np.sinh(2*kap_hat) - 2*kap_hat*np.cosh(2*kap_hat))))**(1/6)
        else:
            h = ((4 * np.sqrt(np.pi) * sp.iv((q-1) / 2 , kap_hat)**2) / \
                 (n * kap_hat ** ((q+1) / 2) * (2 * q * sp.iv((q+1)/2, 2*kap_hat) + \
                                  (q+2) * kap_hat * sp.iv((q+3)/2, 2*kap_hat)))) ** (1/(q + 4))
        print("The current bandwidth for directional component is " + str(h) + ".\n")
    
    # Rule of thumb for linear component
    if b is None:
        # Apply Silverman's rule of thumb to select the bandwidth parameter 
        b = (4/(D+2))**(1/(D+4))*(n**(-1/(D+4)))*np.mean(np.std(data_Lin, axis=0))
        print("The current bandwidth for linear component is "+ str(b) + ".\n")
    
    MS_path = np.zeros((mesh_0.shape[0], D_t, max_iter))
    ## Create a vector indicating the convergent status of every mesh point
    conv_sign = np.zeros((mesh_0.shape[0], )) 
    MS_path[:,:,0] = mesh_0
    for t in range(1, max_iter):
        # print(t)
        if all(conv_sign == 1):
            print('The directional-linear MS algorithm converges in '\
                  + str(t-1) + 'steps!')
            break
        for i in range(mesh_0.shape[0]):
            if conv_sign[i] == 0:
                x_Dir = MS_path[i,:(q+1),t-1]
                x_Lin = MS_path[i,(q+1):(q+1+D),t-1]
                # Kernel weights
                ker_w_Dir = np.exp((np.dot(data_Dir, x_Dir) - 1)/ h**2)
                ker_w_Lin = np.exp(-np.sum(((x_Lin - data_Lin)/b)**2, axis=1)/2)
                # Mean shift updates for directional components
                x_Dir_new = np.sum(data_Dir * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1), 
                                   axis=0)
                x_Dir_new = x_Dir_new / LA.norm(x_Dir_new)
                MS_path[i,:(q+1),t] = x_Dir_new
                # Mean shift updates for linear components
                x_Lin_new = np.sum(data_Lin * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1),
                                   axis=0) / np.sum(ker_w_Dir * ker_w_Lin)
                MS_path[i,(q+1):(q+1+D),t] = x_Lin_new
                if LA.norm(MS_path[i,:,t] - MS_path[i,:,t-1]) <= eps:
                    conv_sign[i] = 1
            else:
                MS_path[i,:,t] = MS_path[i,:,t-1]
                
    if t >= max_iter-1:
        print('The MS algorithm reaches the maximum number of iterations,'\
               +str(max_iter)+', and has not yet converged.')
    return MS_path[:,:,:t]

                
                
[docs]def DirLinMS_CA(mesh_0, data, h=None, b=None, q=2, D=1, eps=1e-7, max_iter=1000):
    '''
    Directional-Linear Mean Shift Algorithm with the von Mises and Gaussian kernels
    (Componentwise Ascending version)
    
    Parameters
    ----------
        mesh_0: (m, q+1+D)-array
            Eulidean coordinates of m directional-linear query points, where 
            (q+1) is the Euclidean dimension of the directional component (first 
            (q+1) columns) and D is the dimension of the linear component (last 
            D columns).
    
        data: (n, q+1+D)-array
            Euclidean coordinates of n directional-linear random sample points, 
            where (q+1) is the Euclidean dimension of the directional component 
            (first (q+1) columns) and D is the dimension of the linear component
            (last D columns).
       
        h: float
            Bandwidth parameter for the directional component. (Default: h=None. 
            Then a rule of thumb for directional KDEs with the von Mises kernel 
            in Garcia-Portugues (2013) is applied.)
            
        b: float
            Bandwidth parameter for the linear component. (Default: h=None. 
            Then the Silverman's rule of thumb is applied. See Chen et al.(2016) 
            for details.)
            
        q: int
            Intrinsic data dimension of directional components.
            
        D: int
            Data dimension of linear components.
       
        eps: float
            The precision parameter.
       
        max_iter: int
            The maximum number of iterations for the MS algorithm on each 
            initial point.
    
    Return
    ----------
        MS_path: (m,q+1+D,T)-array
            The entire iterative MS sequence for each initial point.
    '''
    
    n = data.shape[0]  ## Number of data points
    D_t = data.shape[1]  ## Total dimension of the data
    
    assert q+1+D == D_t, "The dimension of the input data, "+str(D_t)+", should "\
    "equal to the sum of directional dimension ("+str(q+1)+") in its ambient "\
    "space and linear dimension ("+str(D)+")."
    
    data_Dir = data[:,:(q+1)]
    data_Lin = data[:,(q+1):(q+1+D)]

    # Rule of thumb for directional component
    if h is None:
        R_bar = np.sqrt(sum(np.mean(data_Dir, axis=0) ** 2))
        ## An approximation to kappa (Banerjee 2005 & Sra, 2011)
        kap_hat = R_bar * (q + 1 - R_bar ** 2) / (1 - R_bar ** 2)
        if q == 2:
            h = (8*np.sinh(kap_hat)**2/(n*kap_hat * \
                 ((1+4*kap_hat**2)*np.sinh(2*kap_hat) - 2*kap_hat*np.cosh(2*kap_hat))))**(1/6)
        else:
            h = ((4 * np.sqrt(np.pi) * sp.iv((q-1) / 2 , kap_hat)**2) / \
                 (n * kap_hat ** ((q+1) / 2) * (2 * q * sp.iv((q+1)/2, 2*kap_hat) + \
                                  (q+2) * kap_hat * sp.iv((q+3)/2, 2*kap_hat)))) ** (1/(q + 4))
        print("The current bandwidth for directional component is " + str(h) + ".\n")
    
    # Rule of thumb for linear component
    if b is None:
        # Apply Silverman's rule of thumb to select the bandwidth parameter 
        b = (4/(D+2))**(1/(D+4))*(n**(-1/(D+4)))*np.mean(np.std(data_Lin, axis=0))
        print("The current bandwidth for linear component is "+ str(b) + ".\n")
    
    MS_path = np.zeros((mesh_0.shape[0], D_t, max_iter))
    ## Create a vector indicating the convergent status of every mesh point
    conv_sign = np.zeros((mesh_0.shape[0], )) 
    MS_path[:,:,0] = mesh_0
    for t in range(1, max_iter):
        # print(t)
        if all(conv_sign == 1):
            print('The directional-linear MS algorithm converges in '\
                  + str(t-1) + ' steps!')
            break
        for i in range(mesh_0.shape[0]):
            if conv_sign[i] == 0:
                x_Dir = MS_path[i,:(q+1),t-1]
                x_Lin = MS_path[i,(q+1):(q+1+D),t-1]
                # Kernel weights
                ker_w_Dir = np.exp((np.dot(data_Dir, x_Dir) - 1)/ h**2)
                ker_w_Lin = np.exp(-np.sum(((x_Lin - data_Lin)/b)**2, axis=1)/2)
                # Mean shift updates for directional components
                x_Dir_new = np.sum(data_Dir * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1), 
                                   axis=0)
                x_Dir_new = x_Dir_new / LA.norm(x_Dir_new)
                MS_path[i,:(q+1),t] = x_Dir_new
                # Recompute the directional kernel weights
                ker_w_Dir = np.exp((np.dot(x_Dir_new, data_Dir.T) - 1)/ h**2)
                # Mean shift updates for linear components
                x_Lin_new = np.sum(data_Lin * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1),
                                   axis=0) / np.sum(ker_w_Dir * ker_w_Lin)
                MS_path[i,(q+1):(q+1+D),t] = x_Lin_new
                if LA.norm(MS_path[i,:,t] - MS_path[i,:,t-1]) <= eps:
                    conv_sign[i] = 1
            else:
                MS_path[i,:,t] = MS_path[i,:,t-1]
                
    if t >= max_iter-1:
        print('The MS algorithm reaches the maximum number of iterations,'\
               +str(max_iter)+', and has not yet converged.')
    return MS_path[:,:,:t]


[docs]def DirLinSCMS(mesh_0, data, d=1, h=None, b=None, q=2, D=1, eps=1e-7, max_iter=1000):
    '''
    Directional-linear Subspace Constrained Mean Shift Algorithm with the 
    von Mises and Gaussian kernels (Our proposed version, converging to DirLin 
    ridges under the correct (Riemannian) gradient of DirLin KDE).
    
    Parameters
    ----------
        mesh_0: (m, q+1+D)-array
            Eulidean coordinates of m directional-linear query points, where 
            (q+1) is the Euclidean dimension of the directional component (first 
            (q+1) columns) and D is the dimension of the linear component (last 
            D columns).
    
        data: (n, q+1+D)-array
            Euclidean coordinates of n directional-linear random sample points, 
            where (q+1) is the Euclidean dimension of the directional component 
            (first (q+1) columns) and D is the dimension of the linear component
            (last D columns).
            
        d: int
            The order of the density ridge.
       
        h: float
            Bandwidth parameter for the directional component. (Default: h=None. 
            Then a rule of thumb for directional KDEs with the von Mises kernel 
            in Garcia-Portugues (2013) is applied.)
            
        b: float
            Bandwidth parameter for the linear component. (Default: h=None. 
            Then the Silverman's rule of thumb is applied. See Chen et al.(2016) 
            for details.)
            
        q: int
            Intrinsic data dimension of directional components.
            
        D: int
            Data dimension of linear components.
       
        eps: float
            The precision parameter.
       
        max_iter: int
            The maximum number of iterations for the MS algorithm on each 
            initial point.
            
    Returns
    ----------
        SCMS_path: (m,q+1+D,T)-array
            The entire iterative DirLinSCMS sequence for each initial point.
            
        conv_sign: (m, )-array
            A array with 0 or 1 values indicating the convergence of each initial
            point.
    
    '''
    
    n = data.shape[0]  ## Number of data points
    D_t = data.shape[1]  ## Total dimension of the data
    
    assert q+1+D == D_t, "The dimension of the input data, "+str(D_t)+", should "\
    "equal to the sum of directional dimension ("+str(q+1)+") in its ambient "\
    "space and linear dimension ("+str(D)+")."
    
    data_Dir = data[:,:(q+1)]
    data_Lin = data[:,(q+1):(q+1+D)]

    # Rule of thumb for directional component
    if h is None:
        R_bar = np.sqrt(sum(np.mean(data_Dir, axis=0) ** 2))
        ## An approximation to kappa (Banerjee 2005 & Sra, 2011)
        kap_hat = R_bar * (q + 1 - R_bar ** 2) / (1 - R_bar ** 2)
        if q == 2:
            h = (8*np.sinh(kap_hat)**2/(n*kap_hat * \
                 ((1+4*kap_hat**2)*np.sinh(2*kap_hat) - 2*kap_hat*np.cosh(2*kap_hat))))**(1/6)
        else:
            h = ((4 * np.sqrt(np.pi) * sp.iv((q-1) / 2 , kap_hat)**2) / \
                 (n * kap_hat ** ((q+1) / 2) * (2 * q * sp.iv((q+1)/2, 2*kap_hat) + \
                                  (q+2) * kap_hat * sp.iv((q+3)/2, 2*kap_hat)))) ** (1/(q + 4))
    print("The current bandwidth for directional component is " + str(h) + ".\n")
    
    # Rule of thumb for linear component
    if b is None:
        # Apply Silverman's rule of thumb to select the bandwidth parameter 
        b = (4/(D+2))**(1/(D+4))*(n**(-1/(D+4)))*np.mean(np.std(data_Lin, axis=0))
    print("The current bandwidth for linear component is "+ str(b) + ".\n")
    
    SCMS_path = np.zeros((mesh_0.shape[0], D_t, max_iter))
    SCMS_path[:,:,0] = mesh_0
    ## Create a vector indicating the convergent status of every mesh point
    conv_sign = np.zeros((mesh_0.shape[0], ))
    
    for t in range(1, max_iter):
        if all(conv_sign > 0):
            print('The directional-linear SCMS algorithm converges in '\
                  + str(t-1) + ' steps!')
            break
        for i in range(mesh_0.shape[0]):
            if conv_sign[i] == 0:
                x_Dir = SCMS_path[i,:(q+1),t-1]
                x_Lin = SCMS_path[i,(q+1):(q+1+D),t-1]
                x_pts = SCMS_path[i,:,t-1]
                # Kernel weights
                ker_w_Dir = np.exp((np.dot(data_Dir, x_Dir) - 1)/ h**2)
                ker_w_Lin = np.exp(-np.sum(((x_Lin - data_Lin)/b)**2, axis=1)/2)
                # Compute the Hessian matrix
                ## Hessian in the directional component: (q+1)*(q+1)
                Hess_Dir = np.dot(data_Dir.T, data_Dir * ker_w_Dir.reshape(n,1) \
                                  * ker_w_Lin.reshape(n,1))/(h**4) \
                          - np.eye(q+1) * np.sum(np.dot(data_Dir * ker_w_Dir.reshape(n,1) \
                                                        * ker_w_Lin.reshape(n,1), x_Dir))/(h**2)
                x_Dir = x_Dir.reshape(q+1, 1)
                proj_mat = np.eye(q+1) - np.dot(x_Dir, x_Dir.T)
                Hess_Dir = np.dot(np.dot(proj_mat, Hess_Dir), proj_mat)
                ## Hessian in the linear component: D*D
                Hess_Lin = np.dot((x_Lin - data_Lin).T, (x_Lin - data_Lin) \
                                  * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1))/(b**4)\
                          - np.eye(D) * np.sum(ker_w_Dir.reshape(n,1) \
                                               * ker_w_Lin.reshape(n,1))/(b**2)
                ## Hessian in the off-diagonal part: (q+1)*D
                x_Dir = x_Dir.reshape(q+1, )
                Hess_Off = np.dot(proj_mat, 
                                  np.dot(data_Dir.T, 
                                         (data_Lin - x_Lin) * ker_w_Dir.reshape(n,1) \
                                             * ker_w_Lin.reshape(n,1))/((b**2)*(h**2)))
                ## Concatenate to obtain the final Hessian
                Hess = np.zeros((q+1+D, q+1+D))
                Hess[:(q+1), :(q+1)] = Hess_Dir
                Hess[:(q+1), (q+1):(q+1+D)] = Hess_Off
                Hess[(q+1):(q+1+D), :(q+1)] = Hess_Off.T
                Hess[(q+1):(q+1+D), (q+1):(q+1+D)] = Hess_Lin
                # Spectral decomposition
                w, v = LA.eig(Hess)
                x_eig = np.concatenate([x_Dir.reshape(q+1, 1), 
                                        np.zeros((D,1))], axis=0)
                # Obtain the eigenpairs within the tangent space
                tang_eig_v = v[:, (abs(np.dot(x_eig.T, v)) < 1e-8)[0,:]]
                tang_eig_w = w[(abs(np.dot(x_eig.T, v)) < 1e-8)[0,:]]
                V_d = tang_eig_v[:, np.argsort(tang_eig_w)[:(q+D-d)]]
                # Compute the total gradient
                tot_grad_Dir = np.sum(data_Dir * ker_w_Dir.reshape(n,1) \
                                      * ker_w_Lin.reshape(n,1), axis=0)/(h**2)
                tot_grad_Lin = np.sum((data_Lin - x_Lin) * ker_w_Dir.reshape(n,1) \
                                      * ker_w_Lin.reshape(n,1), axis=0)/(b**2)
                tot_grad = np.concatenate([tot_grad_Dir.reshape(q+1, 1), 
                                           tot_grad_Lin.reshape(D, 1)], axis=0)
                tot_grad_skew = np.concatenate([tot_grad_Dir.reshape(q+1, 1)*(h**2), 
                                                tot_grad_Lin.reshape(D, 1)*(b**2)], axis=0)
                # Mean shift vector in directional and linear components
                # ms_Dir = tot_grad_Dir / LA.norm(tot_grad_Dir)
                ms_Dir = (np.sum(data_Dir * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1), 
                                 axis=0) / np.sum(ker_w_Dir * ker_w_Lin)) * np.min([b/h, 1/(h**2)])
                ms_Lin = (np.sum(data_Lin * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1),
                                axis=0) / np.sum(ker_w_Dir * ker_w_Lin) - x_Lin) * np.min([h/b, 1/(b**2)])
                ms_v = np.concatenate([ms_Dir.reshape(q+1, 1),
                                       ms_Lin.reshape(D, 1)], axis=0)
                # Subspace constrained gradient and mean shift vector
                SCMS_grad = np.dot(V_d, np.dot(V_d.T, tot_grad))
                SCMS_grad_skew = np.dot(V_d, np.dot(V_d.T, tot_grad_skew))
                SCMS_v = np.dot(V_d, np.dot(V_d.T, ms_v))
                ## SCMS update
                x_new = SCMS_v + x_pts.reshape(q+1+D, 1)
                # x_new = eta*SCMS_grad + x_pts.reshape(q+1+D, 1)
                x_new = x_new.reshape(q+1+D, )
                x_new[:(q+1)] = x_new[:(q+1)] / LA.norm(x_new[:(q+1)])
                if LA.norm(SCMS_grad) < eps:
                # if LA.norm(x_new - x_pts) < eps:
                    conv_sign[i] = 1
                SCMS_path[i,:,t] = x_new
            else:
                SCMS_path[i,:,t] = SCMS_path[i,:,t-1]
        # print(t)
    if t >= max_iter-1:
        print('The directional-linear SCMS algorithm reaches the maximum number of '\
               'iterations,'+str(max_iter)+', and has not yet converged.')
    return SCMS_path[:,:,:t], conv_sign


[docs]def DirLinSCMSLog(mesh_0, data, d=1, h=None, b=None, q=2, D=1, eps=1e-7, max_iter=1000):
    '''
    Directional-linear Subspace Constrained Mean Shift Algorithm under the 
    log-density with the von Mises and Gaussian kernels (Our proposed version, 
    converging to DirLin ridges under the correct (Riemannian) gradient of 
    DirLin KDE).
    
    Parameters
    ----------
        mesh_0: (m, q+1+D)-array
            Eulidean coordinates of m directional-linear query points, where 
            (q+1) is the Euclidean dimension of the directional component (first 
            (q+1) columns) and D is the dimension of the linear component (last 
            D columns).
    
        data: (n, q+1+D)-array
            Euclidean coordinates of n directional-linear random sample points, 
            where (q+1) is the Euclidean dimension of the directional component 
            (first (q+1) columns) and D is the dimension of the linear component
            (last D columns).
            
        d: int
            The order of the density ridge.
       
        h: float
            Bandwidth parameter for the directional component. (Default: h=None. 
            Then a rule of thumb for directional KDEs with the von Mises kernel 
            in Garcia-Portugues (2013) is applied.)
            
        b: float
            Bandwidth parameter for the linear component. (Default: h=None. 
            Then the Silverman's rule of thumb is applied. See Chen et al.(2016) 
            for details.)
            
        q: int
            Intrinsic data dimension of directional components.
            
        D: int
            Data dimension of linear components.
       
        eps: float
            The precision parameter.
       
        max_iter: int
            The maximum number of iterations for the MS algorithm on each 
            initial point.
            
    Returns
    ----------
        SCMS_path: (m,q+1+D,T)-array
            The entire iterative DirLinSCMS sequence for each initial point.
            
        conv_sign: (m, )-array
            A array with 0 or 1 values indicating the convergence of each initial
            point.
    
    '''
    
    n = data.shape[0]  ## Number of data points
    D_t = data.shape[1]  ## Total dimension of the data
    
    assert q+1+D == D_t, "The dimension of the input data, "+str(D_t)+", should "\
    "equal to the sum of directional dimension ("+str(q+1)+") in its ambient "\
    "space and linear dimension ("+str(D)+")."
    
    data_Dir = data[:,:(q+1)]
    data_Lin = data[:,(q+1):(q+1+D)]

    # Rule of thumb for directional component
    if h is None:
        R_bar = np.sqrt(sum(np.mean(data_Dir, axis=0) ** 2))
        ## An approximation to kappa (Banerjee 2005 & Sra, 2011)
        kap_hat = R_bar * (q + 1 - R_bar ** 2) / (1 - R_bar ** 2)
        if q == 2:
            h = (8*np.sinh(kap_hat)**2/(n*kap_hat * \
                 ((1+4*kap_hat**2)*np.sinh(2*kap_hat) - 2*kap_hat*np.cosh(2*kap_hat))))**(1/6)
        else:
            h = ((4 * np.sqrt(np.pi) * sp.iv((q-1) / 2 , kap_hat)**2) / \
                 (n * kap_hat ** ((q+1) / 2) * (2 * q * sp.iv((q+1)/2, 2*kap_hat) + \
                                  (q+2) * kap_hat * sp.iv((q+3)/2, 2*kap_hat)))) ** (1/(q + 4))
        print("The current bandwidth for directional component is " + str(h) + ".\n")
    
    # Rule of thumb for linear component
    if b is None:
        # Apply Silverman's rule of thumb to select the bandwidth parameter 
        # (Only works for Gaussian kernel)
        b = (4/(D+2))**(1/(D+4))*(n**(-1/(D+4)))*np.mean(np.std(data_Lin, axis=0))
        print("The current bandwidth for linear component is "+ str(b) + ".\n")
    
    SCMS_path = np.zeros((mesh_0.shape[0], D_t, max_iter))
    SCMS_path[:,:,0] = mesh_0
    ## Create a vector indicating the convergent status of every mesh point
    conv_sign = np.zeros((mesh_0.shape[0], ))
    
    for t in range(1, max_iter):
        if all(conv_sign > 0):
            print('The directional-linear SCMS algorithm converges in '\
                  + str(t-1) + ' steps!')
            break
        for i in range(mesh_0.shape[0]):
            if conv_sign[i] == 0:
                x_Dir = SCMS_path[i,:(q+1),t-1]
                x_Lin = SCMS_path[i,(q+1):(q+1+D),t-1]
                x_pts = SCMS_path[i,:,t-1]
                # Kernel weights
                ker_w_Dir = np.exp((np.dot(data_Dir, x_Dir) - 1)/ h**2)
                ker_w_Lin = np.exp(-np.sum(((x_Lin - data_Lin)/b)**2, axis=1)/2)
                # Compute the directional-linear KDE up to a constant
                den_prop = np.sum(ker_w_Dir * ker_w_Lin)
                if den_prop == 0:
                    # Set those points with zero density values to NaN
                    nan_arr = np.zeros_like(x_pts)
                    nan_arr[:] = np.nan
                    conv_sign[i] = 3
                    x_new = nan_arr
                else:
                    # Compute the total gradient of the log density
                    tot_grad_Log_Dir = np.sum(data_Dir * ker_w_Dir.reshape(n,1) \
                                         * ker_w_Lin.reshape(n,1), axis=0)/((h**2)*den_prop)
                    tot_grad_Log_Lin = np.sum((data_Lin - x_Lin) * ker_w_Dir.reshape(n,1) \
                                         * ker_w_Lin.reshape(n,1), axis=0)/((b**2)*den_prop)
                    tot_grad_Log = np.concatenate([tot_grad_Log_Dir.reshape(q+1, 1), 
                                               tot_grad_Log_Lin.reshape(D, 1)], axis=0)
                    tot_grad_Log_skew = np.concatenate([tot_grad_Log_Dir.reshape(q+1, 1)*(h**2), 
                                                    tot_grad_Log_Lin.reshape(D, 1)*(b**2)], axis=0)
                    # Compute the Hessian matrix of the log density
                    ## Hessian in the directional component: (q+1)*(q+1)
                    Log_Hess_Dir = np.dot(data_Dir.T, data_Dir * ker_w_Dir.reshape(n,1) \
                                     * ker_w_Lin.reshape(n,1))/((h**4)*den_prop) \
                        - np.dot(tot_grad_Log_Dir.reshape(q+1, 1), tot_grad_Log_Dir.reshape(1, q+1)) \
                        - np.eye(q+1) * np.sum(np.dot(data_Dir * ker_w_Dir.reshape(n,1) \
                                                  * ker_w_Lin.reshape(n,1), x_Dir))/((h**2)*den_prop)
                    x_Dir = x_Dir.reshape(q+1, 1)
                    proj_mat = np.eye(q+1) - np.dot(x_Dir, x_Dir.T)
                    Log_Hess_Dir = np.dot(np.dot(proj_mat, Log_Hess_Dir), proj_mat)
                    ## Hessian in the linear component: D*D
                    Log_Hess_Lin = np.dot((x_Lin - data_Lin).T, (x_Lin - data_Lin) \
                              * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1))/((b**4)*den_prop) \
                              - np.eye(D) * np.sum(ker_w_Dir.reshape(n,1) \
                                               * ker_w_Lin.reshape(n,1))/((b**2)*den_prop) \
                              - np.dot(tot_grad_Log_Lin.reshape(D,1), tot_grad_Log_Lin.reshape(1,D))
                    ## Hessian in the off-diagonal part: (q+1)*D
                    x_Dir = x_Dir.reshape(q+1, )
                    Log_Hess_Off = np.dot(data_Dir.T, (data_Lin - x_Lin) * ker_w_Dir.reshape(n,1) \
                                    * ker_w_Lin.reshape(n,1))/((b**2)*(h**2)*den_prop) \
                             - np.dot(tot_grad_Log_Dir.reshape(q+1, 1), tot_grad_Log_Lin.reshape(1,D))
                    Log_Hess_Off = np.dot(proj_mat, Log_Hess_Off)
                    ## Concatenate to obtain the final Hessian
                    Log_Hess = np.zeros((q+1+D, q+1+D))
                    Log_Hess[:(q+1), :(q+1)] = Log_Hess_Dir
                    Log_Hess[:(q+1), (q+1):(q+1+D)] = Log_Hess_Off
                    Log_Hess[(q+1):(q+1+D), :(q+1)] = Log_Hess_Off.T
                    Log_Hess[(q+1):(q+1+D), (q+1):(q+1+D)] = Log_Hess_Lin
                    # Spectral decomposition
                    w, v = LA.eig(Log_Hess)
                    x_eig = np.concatenate([x_Dir.reshape(q+1, 1), 
                                            np.zeros((D,1))], axis=0)
                    # Obtain the eigenpairs within the tangent space
                    tang_eig_v = v[:, (abs(np.dot(x_eig.T, v)) < 1e-8)[0,:]]
                    tang_eig_w = w[(abs(np.dot(x_eig.T, v)) < 1e-8)[0,:]]
                    V_d = tang_eig_v[:, np.argsort(tang_eig_w)[:(q+D-d)]]
                    # Mean shift vector in directional and linear components
                    # ms_Dir = tot_grad_Log_Dir / LA.norm(tot_grad_Log_Dir)
                    ms_Dir = (np.sum(data_Dir * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1), 
                                axis=0) / np.sum(ker_w_Dir * ker_w_Lin)) * np.min([b/h, 1/(h**2)])
                    ms_Lin = (np.sum(data_Lin * ker_w_Dir.reshape(n,1) * ker_w_Lin.reshape(n,1),
                                axis=0) / np.sum(ker_w_Dir * ker_w_Lin) - x_Lin) * np.min([h/b, 1/(b**2)])
                    ms_v = np.concatenate([ms_Dir.reshape(q+1, 1),
                                           ms_Lin.reshape(D, 1)], axis=0)
                    # Subspace constrained gradient and mean shift vector
                    SCMS_grad = np.dot(V_d, np.dot(V_d.T, tot_grad_Log))
                    SCMS_grad_skew = np.dot(V_d, np.dot(V_d.T, tot_grad_Log_skew))
                    SCMS_v = np.dot(V_d, np.dot(V_d.T, ms_v))
                    ## SCMS update
                    x_new = SCMS_v + x_pts.reshape(q+1+D, 1)
                    # x_new = eta*SCMS_grad + x_pts.reshape(q+1+D, 1)
                    x_new = x_new.reshape(q+1+D, )
                    x_new[:(q+1)] = x_new[:(q+1)] / LA.norm(x_new[:(q+1)])
                    if LA.norm(SCMS_grad) < eps:
                    # if LA.norm(x_new - x_pts) < eps:
                        conv_sign[i] = 1
                SCMS_path[i,:,t] = x_new
            else:
                SCMS_path[i,:,t] = SCMS_path[i,:,t-1]
        # print(t)
    if t >= max_iter-1:
        print('The directional-linear SCMS algorithm reaches the maximum number of '\
               'iterations,'+str(max_iter)+', and has not yet converged.')
    # return SCMS_path[conv_sign != 0,:,:t]
    return SCMS_path[:,:,:t], conv_sign