Can I take advantage of parallelization to speed up this part of the code?

Question

Can I take advantage of parallelization to speed up this part of the code?

OK, the continuation of this and this question. The code I want to change is of course:

function fdtd1d_local(steps, ie = 200)
    ez = zeros(ie + 1);
    hy = zeros(ie);
    for n in 1:steps
        for i in 2:ie
            ez[i]+= (hy[i] - hy[i-1])
        end
        ez[1]= sin(n/10)
        for i in 1:ie
            hy[i]+= (ez[i+1]- ez[i])
        end
    end
    (ez, hy)
end

fdtd1d_local(1);
@time sol1=fdtd1d_local(10);

elapsed time: 3.4292e-5 seconds (4148 bytes allocated)

And I naively tried:

function fdtd1d_local_parallel(steps, ie = 200)
    ez = dzeros(ie + 1);
    hy = dzeros(ie);
    for n in 1:steps
        for i in 2:ie
            localpart(ez)[i]+= (hy[i] - hy[i-1])
        end
        localpart(ez)[1]= sin(n/10)
        for i in 1:ie
            localpart(hy)[i]+= (ez[i+1]- ez[i])
        end
    end
    (ez, hy)
end

fdtd1d_local_parallel(1);
@time sol2=fdtd1d_local_parallel(10);

elapsed time: 0.0418593 seconds (3457828 bytes allocated)

sol2==sol1

true

The result is correct, but performance is much worse. So why? Because parallelization is not for the dual core old lap-top, or am I mistaken again?

Well, I admit that the only thing I know about parallelization is code acceleration, but not every piece of code can be parallel, is there any basic knowledge you need to know before trying parallel programming?

Any help would be appreciated.

+4

parallel-processing julia-lang

xzczd Apr 25 '14 at 10:48

source share

2 answers

tholy · Answer 1 · 2014-04-25T16:06:25+0000

. -, . , - . , ( zeros dzeros) . , ; , - . , localpart , raw DArray . IPC. , . ProfileView.

-, , . , . , , . SharedArray.

, ; . ( ), .

rickhg12hs · Answer 2 · 2014-05-04T07:17:28+0000

N.B.: , FDTD, noob.

@tholy , .

, Wikipedia Finite-difference time-domain method , .

, FDTD , . , .

1D , . , , , , . , Julia .

, :

addprocs(2)

@everywhere function ez_front(n::Int, ez::DArray, hy::DArray)
  ez_local=localpart(ez)
  hy_local=localpart(hy)
  ez_local[1]=sin(n/10)
  @simd for i=2:length(ez_local)
    @inbounds ez_local[i] += (hy_local[i] - hy_local[i-1])
  end
end

@everywhere function ez_back(ez::DArray, hy::DArray)
  ez_local=localpart(ez)
  hy_local=localpart(hy)
  index_boundary::Int = first(localindexes(hy)[1])-1
  ez_local[1] += (hy_local[1]-hy[index_boundary])
  @simd for i=2:length(ez_local)
    @inbounds ez_local[i] += (hy_local[i] - hy_local[i-1])
  end
end

@everywhere function hy_front(ez::DArray, hy::DArray)
  ez_local=localpart(ez)
  hy_local=localpart(hy)
  index_boundary = last(localindexes(ez)[1])+1
  @simd for i=1:(length(hy_local)-1)
    @inbounds hy_local[i] += (ez_local[i+1] - ez_local[i])
  end
  hy_local[end] += (ez[index_boundary] - ez_local[end])
end

@everywhere function hy_back(ez::DArray, hy::DArray)
  ez_local=localpart(ez)
  hy_local=localpart(hy)
  @simd for i=2:(length(hy_local)-1)
    @inbounds hy_local[i] += (ez_local[i+1] - ez_local[i])
  end
  hy_local[end] -= ez_local[end]
end


function fdtd1d_parallel(steps::Int, ie::Int = 200)
  ez = dzeros((ie,),workers()[1:2],2)
  hy = dzeros((ie,),workers()[1:2],2)
  for n = 1:steps
    @sync begin
      @async begin
        remotecall(workers()[1],ez_front,n,ez,hy)
        remotecall(workers()[2],ez_back,ez,hy)
      end
    end
    @sync begin
      @async begin
        remotecall(workers()[1],hy_front,ez,hy)
        remotecall(workers()[2],hy_back,ez,hy)
      end
    end
  end
  (convert(Array{Float64},ez), convert(Array{Float64},hy))
end

fdtd1d_parallel(1);
@time sol2=fdtd1d_parallel(10);

( 32- 2- ) , ie - 5000000.

Julia, FDTD, FDTD. , Julia .

Can I take advantage of parallelization to speed up this part of the code?

More articles: