I have a list of sparse symmetric matrices sigmasuch that
len(sigma) = N
and for all i,j,k,
sigma[i].shape[0] == sigma[i].shape[1] = m # Square
sigma[i][j,k] == sigma[i][k,j] # Symmetric
I have an index array Psuch that
P.shape[0] = N
P.shape[1] = k
My goal is to extract tight sub-matrices k x k sigma[i]using indexing given P[i,:]. This can be done as follows:
sub_matrices = np.empty([N,k,k])
for i in range(N):
sub_matrices[i,:,:] = sigma[i][np.ix_(P[i,:], P[i,:])].todense()
Please note that although ksmall, N(and m) are very large. If sparse symmetric matrices are stored in CSR format, this takes a very long time. I believe that there should be a better solution. For example, is there a sparse format that lends itself well to symmetric matrices that need to be cut into both dimensions?
Python, C, Cython.
EXTRA
, Cython :
cimport cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False) # turn off bounds-checking for entire function
cpdef sparse_slice_fast_cy(sigma,
long[:,:] P,
double[:,:,:] sub_matrices):
"""
Inputs:
sigma: A list (N,) of sparse sp.csr_matrix (m x m)
P: A 2D array of integers (N, k)
sub_matrices: A 3D array of doubles (N, k, k) containing the slicing
"""
cdef long N = P.shape[0]
cdef long k = P.shape[1]
cdef long i
cdef long j
cdef long index_pointer
cdef long sparse_row_pointer
cdef double[:] data
cdef long[:] indices
cdef long[:] indptr
cdef long[:] perm
sub_matrices[:] = 0
for i in range(N):
perm = np.argsort(P[i,:])
data = sigma[i].data
indices = sigma[i].indices.astype(long)
indptr = sigma[i].indptr.astype(long)
for j in range(k):
index_pointer = 0
sparse_row_pointer = indptr[P[i, perm[j]]]
while ((index_pointer < k) and (sparse_row_pointer < indptr[P[i, perm[j]] + 1])):
if indices[sparse_row_pointer] == P[i, perm[index_pointer]]:
sub_matrices[i, perm[j], perm[index_pointer]] = \
data[sparse_row_pointer]
index_pointer += 1
elif indices[sparse_row_pointer] > P[i, perm[index_pointer]]:
index_pointer += 1
else:
sparse_row_pointer += 1
, np.argsort , C. , N. , Python, , prange.
, , , Cython, -, , , .
ead Cython
cimport cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False) # turn off bounds-checking for entire function
cpdef sparse_slice_fast_cy(sigma,
np.ndarray[np.int32_t, ndim=2] P,
np.float64_t[:,:,:] sub_matrices,
int symmetric):
"""
Inputs:
sigma: A list (N,) of sparse sp.csr_matrix (m x m)
P: A 2D array of integers (N, k)
sub_matrices: A 3D array of doubles (N, k, k) containing the slicing
symmetric: 1 if the sigma matrices are symmetric
"""
cdef np.int32_t N = P.shape[0]
cdef np.int32_t k = P.shape[1]
cdef np.int32_t i
cdef np.int32_t j
cdef np.int32_t index_pointer
cdef np.int32_t sparse_row_pointer
cdef np.float64_t[:] data
cdef np.int32_t[:] indices
cdef np.int32_t[:] indptr
cdef np.int32_t[:,:] perm = np.argsort(P, axis=1).astype(np.int32)
sub_matrices[:] = 0
for i in range(N):
data = sigma[i].data
indices = sigma[i].indices
indptr = sigma[i].indptr
for j in range(k):
if symmetric:
index_pointer = j # Only search upper triangular
else:
index_pointer = 0
sparse_row_pointer = indptr[P[i, perm[i, j]]]
while ((index_pointer < k) and (sparse_row_pointer < indptr[P[i, perm[i, j]] + 1])):
if indices[sparse_row_pointer] == P[i, perm[i, index_pointer]]:
sub_matrices[i, perm[i, j], perm[i, index_pointer]] = \
data[sparse_row_pointer]
if symmetric:
sub_matrices[i, perm[i, index_pointer], perm[i, j]] = \
data[sparse_row_pointer]
index_pointer += 1
elif indices[sparse_row_pointer] > P[i, perm[i, index_pointer]]:
index_pointer += 1
else:
sparse_row_pointer += 1
, , , , :
cimport cython
import numpy as np
cimport numpy as np
from libc.stdlib cimport malloc, free
from cython.parallel import prange
@cython.boundscheck(False) # turn off bounds-checking for entire function
cpdef sparse_slice_fast_cy(sigma,
np.ndarray[np.int32_t, ndim=2] P,
np.float64_t[:,:,:] sub_matrices,
int symmetric):
"""
Inputs:
sigma: A list (N,) of sparse sp.csr_matrix (m x m)
P: A 2D array of integers (N, k)
sub_matrices: A 3D array of doubles (N, k, k) containing the slicing
symmetric: 1 if the sigma matrices are symmetric
"""
cdef np.int32_t N = P.shape[0]
cdef np.int32_t k = P.shape[1]
cdef np.int32_t i
cdef np.int32_t j
cdef np.int32_t index_pointer
cdef np.int32_t sparse_row_pointer
cdef np.float64_t[:] data_mem_view
cdef np.int32_t[:] indices_mem_view
cdef np.int32_t[:] indptr_mem_view
cdef np.float64_t **data = <np.float64_t **> malloc(N * sizeof(np.float64_t *))
cdef np.int32_t **indices = <np.int32_t **> malloc(N * sizeof(np.int32_t *))
cdef np.int32_t **indptr = <np.int32_t **> malloc(N * sizeof(np.int32_t *))
for i in range(N):
data_mem_view = sigma[i].data
data[i] = &(data_mem_view[0])
indices_mem_view = sigma[i].indices
indices[i] = &(indices_mem_view[0])
indptr_mem_view = sigma[i].indptr
indptr[i] = &(indptr_mem_view[0])
cdef np.int32_t[:,:] perm = np.argsort(P, axis=1).astype(np.int32)
sub_matrices[:] = 0
for i in prange(N, nogil=True):
for j in range(k):
if symmetric:
index_pointer = j # Only search upper triangular
else:
index_pointer = 0
sparse_row_pointer = indptr[i][P[i, perm[i, j]]]
while ((index_pointer < k) and
(sparse_row_pointer < indptr[i][P[i, perm[i, j]] + 1])):
if indices[i][sparse_row_pointer] == P[i, perm[i, index_pointer]]:
sub_matrices[i, perm[i, j], perm[i, index_pointer]] = \
data[i][sparse_row_pointer]
if symmetric:
sub_matrices[i, perm[i, index_pointer], perm[i, j]] = \
data[i][sparse_row_pointer]
index_pointer = index_pointer + 1
elif indices[i][sparse_row_pointer] > P[i, perm[i, index_pointer]]:
index_pointer = index_pointer + 1
else:
sparse_row_pointer = sparse_row_pointer + 1
free(data)
free(indices)
free(indptr)
Test
cythonize -i sparse_slice.pyx
sparse_slice.pyx - . script:
import time
import numpy as np
import scipy as sp
import scipy.sparse
from sparse_slice import sparse_slice_fast_cy
k = 100
N = 20000
m = 10000
samples = 20
now = time.time()
sigma_samples = []
for i in range(samples):
sigma_samples.append(sp.sparse.rand(m, m, density=0.001, format='csr'))
sigma_samples[-1] = sigma_samples[-1] + sigma_samples[-1].T
sigma = []
for i in range(N):
j = np.random.randint(samples)
sigma.append(sigma_samples[j])
print('Time to make sigma: {}'.format(time.time() - now))
now = time.time()
P = np.empty([N, k]).astype(int)
for i in range(N):
P[i, :] = np.random.choice(np.arange(m), k, replace=True)
print('Time to make P: {}'.format(time.time() - now))
sub_matrices_slow = np.empty([N, k, k])
sub_matrices_fast = np.empty([N, k, k])
now = time.time()
for i in range(N):
sub_matrices_slow[i,:,:] = sigma[i][np.ix_(P[i,:], P[i,:])].todense()
print('Time to make sub_matrices_slow: {}'.format(time.time() - now))
symmetric = 1
now = time.time()
sparse_slice_fast_cy(sigma, P.astype(np.int32), sub_matrices_fast, symmetric)
print('Time to make sub_matrices_fast: {}'.format(time.time() - now))
assert(np.all((sub_matrices_slow - sub_matrices_fast)**2 < 1e-6))