Determine which rows (or columns) have values ​​in a sparse matrix

I need to identify the rows (/ columns) that defined the values ​​in a large sparse boolean matrix. I want to use this for 1. slice (actually view) Matrices over these rows / columns; and 2. slices (/ view) of vectors and matrices that have the same dimensions as the matrix fields. That is, the result should probably be an index vector / Bools or (preferably) an iterator.

I tried the obvious:

a = sprand(10000, 10000, 0.01)
cols = unique(a.colptr)
rows = unique(a.rowvals)

but each of them takes 20 ms on my machine, probably because they allocate about 1 MB (at least they allocate colsand rows). This is in a performance-critical function, so I would like the code to be optimized. The base code seems to have an iterator nzrangefor sparse matrices, but it's not easy for me to figure out how to apply this to my case.

Is there any way to do this?

Second question: I will also need to perform this operation on the representations of my sparse matrix - will it be something like x = view(a,:,:); cols = unique(x.parent.colptr[x.indices[:,2]])or are there special functions for this? The representations of rare matrices seem complicated (cf https://discourse.julialang.org/t/slow-arithmetic-on-views-of-sparse-matrices/3644 - not cross-mail)

Thank you so much!

+4
2

, :

nzcols(a::SparseMatrixCSC) = collect(i 
  for i in 1:a.n if a.colptr[i]<a.colptr[i+1])

function nzrows(a::SparseMatrixCSC)
    active = falses(a.m)
    for r in a.rowval
        active[r] = true
    end
    return find(active)
end

10_000x10_000 0,1 0,2 2,9 . , ( ).

, (, b = sparse(view(a,100:199,100:199))) . :

nzcols(b::SubArray{T,2,P}) where {T,P<:AbstractSparseArray} = nzcols(sparse(b))
nzrows(b::SubArray{T,2,P}) where {T,P<:AbstractSparseArray} = nzrows(sparse(b))

. , UnitRanges :

# utility predicate returning true if element of sorted v in range r
inrange(v,r) = searchsortedlast(v,last(r))>=searchsortedfirst(v,first(r))

function nzcols(b::SubArray{T,2,P,Tuple{UnitRange{Int64},UnitRange{Int64}}}
  ) where {T,P<:SparseMatrixCSC}
    return collect(i+1-start(b.indexes[2]) 
      for i in b.indexes[2]
      if b.parent.colptr[i]<b.parent.colptr[i+1] && 
        inrange(b.parent.rowval[nzrange(b.parent,i)],b.indexes[1]))
end

function nzrows(b::SubArray{T,2,P,Tuple{UnitRange{Int64},UnitRange{Int64}}}
  ) where {T,P<:SparseMatrixCSC}
    active = falses(length(b.indexes[1]))
    for c in b.indexes[2]
        for r in nzrange(b.parent,c)
            if b.parent.rowval[r] in b.indexes[1]
                active[b.parent.rowval[r]+1-start(b.indexes[1])] = true
            end
        end
    end
    return find(active)
end

, ( 100x100 10 000x10 000 16 12 , , , ).

(, , ). , .

+5

, , , , . , . , , , , .

function sortedintersecting(v1, v2)
    i,j = start(v1), start(v2)
    while i <= length(v1) && j <= length(v2)
        if v1[i] == v2[j] return true
        elseif v1[i] > v2[j] j += 1
        else i += 1
        end
    end
    return false
end

function nzcols(b::SubArray{T,2,P,Tuple{Vector{Int64},Vector{Int64}}}
  ) where {T,P<:SparseMatrixCSC}
    brows = sort(unique(b.indexes[1]))
    return [k 
      for (k,i) in enumerate(b.indexes[2])
      if b.parent.colptr[i]<b.parent.colptr[i+1] && 
        sortedintersecting(brows,b.parent.rowval[nzrange(b.parent,i)])]
end

function nzrows(b::SubArray{T,2,P,Tuple{Vector{Int64},Vector{Int64}}}
  ) where {T,P<:SparseMatrixCSC}
    active = falses(length(b.indexes[1]))
    for c in b.indexes[2]
      active[findin(b.indexes[1],b.parent.rowval[nzrange(b.parent,c)])] = true
    end
    return find(active)
end

- -

nzrows Vector {Int}, , , findin , :

function findin2(inds,v,w)
    i,j = start(v),start(w)
    res = Vector{Int}()
    while i<=length(v) && j<=length(w)
        if v[i]==w[j]
            push!(res,inds[i])
            i += 1
        elseif (v[i]<w[j]) i += 1
        else j += 1
        end
    end
    return res
end

function nzrows(b::SubArray{T,2,P,Tuple{Vector{Int64},Vector{Int64}}}
  ) where {T,P<:SparseMatrixCSC}
    active = falses(length(b.indexes[1]))
    inds = sortperm(b.indexes[1])
    brows = (b.indexes[1])[inds] 
    for c in b.indexes[2]
      active[findin2(inds,brows,b.parent.rowval[nzrange(b.parent,c)])] = true
    end
    return find(active)
end
+2

Source: https://habr.com/ru/post/1677124/


All Articles