CUDA thrust zip_iterator tuple transform_reduce

I want to calculate \ left | \ vec {a} - \ vec {b} \ right | for vectors \ vec {a} and \ vec {b} where \ left | \ vec {x} \ right | denotes the magnitude of the vector \ vec {x} . Since this involves taking the square root of the sum of the squared differences between each respective component of the two vectors, this should be a highly parallelizable task. I use Cuda and Thrust through Cygwin on Windows 10. Both Cuda and Thrust work in general.

Below, the code compiles and runs (with nvcc), but only because I commented on the three lines at the bottom of main , each of which, I think, should work, but does not work. func::operator()(tup t) thinks that the arguments I pass are not really a tup type.

I also commented on the actual organ of the operator, in the interest of making it more likely to at least compile. The operator is supposed to find the squared difference between the elements of the input tup. The abbreviation unary_op for transform_reduce (which in this case is equal to func() ) will then add them, giving me the square of the norm of the difference of the vectors.

 #include <iostream> #include <stdlib.h> #include <thrust/device_vector.h> #include <thrust/transform.h> #include <thrust/tuple.h> #include <thrust/transform_reduce.h> #include <thrust/iterator/zip_iterator.h> typedef thrust::device_vector<float> dvec; typedef dvec::iterator iter; typedef thrust::tuple<iter, iter> tup; struct func: public thrust::unary_function<tup, float> { __device__ float operator()(tup t) //difsq { // I've commented out these two lines for testing purposes: // float f = thrust::get<0>(t) - thrust::get<1>(t); // return f*f; return 3.14; } }; int main() { dvec a(40, 4.f); dvec b(40, 3.f); auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin())); auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end())); //these two lines work thrust::get<0>(begin[0]); std::cout << thrust::get<0>(begin[0]) - thrust::get<1>(begin[0]); //these three lines do not //thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>()); //func()(begin[0]); //thrust::transform(begin, end, begin, func()); std::cout << "done" << std::endl; return 0; } 

I get this error: (my program is called sandbox.cu )

 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(310): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list argument types are: (const thrust::device_reference<float>) detected during: instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]" C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]" sandbox.cu(37): here C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(411): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list argument types are: (const thrust::device_reference<float>) detected during: instantiation of "thrust::detail::cons<HT, thrust::null_type>::cons(const thrust::detail::cons<HT2, thrust::null_type> &) [with HT=iter, HT2=thrust::device_reference<float>]" (310): here instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]" C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]" sandbox.cu(37): here 2 errors detected in the compilation of "C:/cygwin64/tmp/tmpxft_00001a90_00000000-10_sandbox.cpp1.ii". 
+5
source share
1 answer

Solved! tup should be thrust::tuple<float, float> , not thrust::tuple<iter, iter> . Complete solution:

 #include <iostream> #include <stdlib.h> #include <thrust/device_vector.h> #include <thrust/transform.h> #include <thrust/tuple.h> #include <thrust/transform_reduce.h> #include <thrust/iterator/zip_iterator.h> typedef thrust::device_vector<float> dvec; typedef thrust::tuple<float, float> tup; struct func { __device__ float operator()(tup t) //difsq { float f = thrust::get<0>(t) - thrust::get<1>(t); return f*f; } }; int main() { dvec a(4, 3.f); dvec b(4, 2.f); auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin())); auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end())); std::cout << thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>()) << std::endl; std::cout << "done" << std::endl; return 0; } 
+3
source

Source: https://habr.com/ru/post/1246502/


All Articles