My next code got an error when it starts, some of the graph weights are overwritten, but this should not happen with the Xa array (which saves the ones that have already been visited) and the __syncthreads () function ... Can someone help?
struct Node { int begin; // begining of the substring int num; // size of the sub-string }; __global__ void BFS (Node *Va, int *Ea, bool *Fa, bool *Xa, int *Ca, bool *parada) { int tid = threadIdx.x; if (Fa[tid] == true && Xa[tid] == false) { Fa[tid] = false; __syncthreads(); // Va begin is where it edges' subarray begins, Va is it's // number of elements for (int i = Va[tid].begin; i < (Va[tid].begin + Va[tid].num); i++) { int nid = Ea[i]; if (Xa[nid] == false) { Ca[nid] = Ca[tid] + 1; Fa[nid] = true; *parada = true; } } Xa[tid] = true; } } // The BFS frontier corresponds to all the nodes being processed // at the current level. int main() { //descriΓ§Γ£o do grafo struct Node node[4]; node[0].begin=0; node[0].num=2; node[1].begin=1; node[1].num=0; node[2].begin=2; node[2].num=2; node[3].begin=1; node[3].num=0; int edges[]={1,2,3,1}; bool frontier[4]={false}; bool visited[4]={false}; int custo[4]={0}; int source=0; frontier[source]=true; Node* Va; cudaMalloc((void**)&Va,sizeof(Node)*4); cudaMemcpy(Va,node,sizeof(Node)*4,cudaMemcpyHostToDevice); int* Ea; cudaMalloc((void**)&Ea,sizeof(Node)*4); cudaMemcpy(Ea,edges,sizeof(Node)*4,cudaMemcpyHostToDevice); bool* Fa; cudaMalloc((void**)&Fa,sizeof(bool)*4); cudaMemcpy(Fa,frontier,sizeof(bool)*4,cudaMemcpyHostToDevice); bool* Xa; cudaMalloc((void**)&Xa,sizeof(bool)*4); cudaMemcpy(Xa,visited,sizeof(bool)*4,cudaMemcpyHostToDevice); int* Ca; cudaMalloc((void**)&Ca,sizeof(int)*4); cudaMemcpy(Ca,custo,sizeof(int)*4,cudaMemcpyHostToDevice); dim3 threads(4,1,1); bool para; bool* parada; cudaMalloc((void**)¶da,sizeof(bool)); printf("\n"); int n=1; do{ para=false; cudaMemcpy(parada,¶,sizeof(bool),cudaMemcpyHostToDevice); BFS <<<1,threads>>>(Va,Ea,Fa,Xa,Ca,parada); CUT_CHECK_ERROR("kernel1 execution failed"); cudaMemcpy(¶,parada,sizeof(bool),cudaMemcpyDeviceToHost); printf("Run number: %d >> ",n); cudaMemcpy(custo,Ca,sizeof(int)*4,cudaMemcpyDeviceToHost); for(int i=0;i<4;i++) printf("%d ",custo[i]); printf("\n"); n++; }while(para); printf("\nFinal:\n"); cudaMemcpy(custo,Ca,sizeof(int)*4,cudaMemcpyDeviceToHost); for(int i=0;i<4;i++) printf("%d ",custo[i]); printf("\n"); }