Parallel multiplication of many small matrices by a fixed vector

The situation is as follows: I have a number (1000) of elements that are defined by small 4x2, 9x3 matrixes ... you get the idea. All matrices have the same dimension.

I want to multiply each of these matrices by a fixed vector of previously calculated values. In short:

for(i = 1...n)
    X[i] = M[i] . N;

What is the best approach to using Thrust in parallel? How to put my data in memory?

NB: GPUs may have specialized, more appropriate libraries. I'm interested in Thrust because it allows me to deploy to different servers, not just CUDA.

+4
source share
2

:

( ), .

, , 2, (.. , = N , ). , , ( ), , ( = 1).

, :

#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/functional.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/transform.h>

#define N_MAT 1000
#define H_MAT 4
#define W_MAT 3
#define RANGE 1024

struct my_modulo_functor : public thrust::unary_function<int, int>
{
  __host__ __device__
  int operator() (int idx) {
    return idx%(H_MAT*W_MAT);}
};

int main(){

  thrust::host_vector<int> data(N_MAT*H_MAT*W_MAT);
  thrust::host_vector<int> scale(H_MAT*W_MAT);
  // synthetic; instead flatten/copy matrices into data vector
  for (int i = 0; i < N_MAT*H_MAT*W_MAT; i++) data[i] = rand()%RANGE;
  for (int i = 0; i < H_MAT*W_MAT; i++) scale[i] = rand()%RANGE;

  thrust::device_vector<int> d_data = data;
  thrust::device_vector<int> d_scale = scale;
  thrust::device_vector<int> d_result(N_MAT*H_MAT*W_MAT);

  thrust::transform(d_data.begin(), d_data.end(), thrust::make_permutation_iterator(d_scale.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), my_modulo_functor())) ,d_result.begin(), thrust::multiplies<int>());

  thrust::host_vector<int> result = d_result;

  for (int i = 0; i < N_MAT*H_MAT*W_MAT; i++)
    if (result[i] != data[i] * scale[i%(H_MAT*W_MAT)]) {std::cout << "Mismatch at: " << i << " cpu result: " << (data[i] * scale[i%(H_MAT*W_MAT)]) << " gpu result: " << result[i] << std::endl; return 1;}
  std::cout << "Success!" << std::endl;
  return 0;
}

: :

(.. transform(numbers, iterator)) , / other number ( ), transform(numbers, other numbers). other numbers , , , . other numbers, . , "Fusion".

other numbers " " , , , , transform.

+2

Source: https://habr.com/ru/post/1525336/


All Articles