c++ - CUDA, "illegal memory access was encountered" in Memcpy -


i have cuda file:

#include "cuda.h" #include "../../handleerror.h" #include "sphere.hpp" #include <stdlib.h> #include <cimg.h>  #define width 1280 #define height 720 #define rnd(x) (x*rand()/rand_max) #define spheres_count 5  using namespace cimg_library;  __global__ void kernel(unsigned char* bitmap, sphere* s) {    // map threadidx/blockidx pixel position    int x = threadidx.x + blockidx.x * blockdim.x;    int y = threadidx.y + blockidx.y * blockdim.y;    int offset = x + y * blockdim.x * griddim.x;    float ox = x - blockdim.x * griddim.x / 2;    float oy = y - blockdim.y * griddim.y / 2;    float r = 0.2, g = 0.2, b = 0.5;    float maxz = -inf;    (int = 0; < spheres_count; i++) {        float n, t = s[i].hit(ox, oy, &n);        if (t > maxz) {            float fscale = n;            r = s[i].r * fscale;            g = s[i].g * fscale;            b = s[i].b * fscale;            maxz = t;        }    }     bitmap[offset*3] = (int)(r * 255);    bitmap[offset*3 + 1] = (int)(g * 255);    bitmap[offset*3 + 2] = (int)(b * 255); }  __constant__ sphere s[spheres_count];  int main () {     //capture start time     cudaevent_t start, stop;     handle_error(cudaeventcreate(&start));     handle_error(cudaeventcreate(&stop));     handle_error(cudaeventrecord(start, 0));      //create host bitmap     cimg<unsigned char> image(width, height, 1, 3);     image.permute_axes("cxyz");      //allocate device bitmap data     unsigned char* dev_bitmap;     handle_error(cudamalloc((void**)&dev_bitmap, image.size()*sizeof(unsigned char)));      //generate spheres , copy them on gpu 1 one     sphere* temp_s = (sphere*)malloc(spheres_count*sizeof(sphere));     (int i=0; <spheres_count; i++) {         temp_s[i].r = rnd(1.0f);         temp_s[i].g = rnd(1.0f);         temp_s[i].b = rnd(1.0f);         temp_s[i].x = rnd(1000.0f) - 500;         temp_s[i].y = rnd(1000.0f) - 500;         temp_s[i].z = rnd(1000.0f) - 500;         temp_s[i].radius = rnd(100.0f) + 20;     }      handle_error(cudamemcpytosymbol(s, temp_s, sizeof(sphere)*spheres_count));     free(temp_s);      //generate bitmap spere data     dim3 grids(width/16, height/16);     dim3 threads(16, 16);     kernel<<<grids, threads>>>(dev_bitmap, s);      //copy bitmap gpu display     handle_error(cudamemcpy(image.data(), dev_bitmap,                             image.size()*sizeof(unsigned char),                             cudamemcpydevicetohost));      cudafree(dev_bitmap);      image.permute_axes("yzcx");     image.save("render.bmp"); } 

it compiles fine, when executed error:

an illegal memory access encountered in main.cu @ line 82 

that is, here:

    //copy bitmap gpu display     handle_error(cudamemcpy(image.data(), dev_bitmap,                             image.size()*sizeof(unsigned char),                             cudamemcpydevicetohost)); 

i cannot understand why... know if remove this:

  bitmap[offset*3] = (int)(r * 255);   bitmap[offset*3 + 1] = (int)(g * 255);   bitmap[offset*3 + 2] = (int)(b * 255); 

the error not reported, thought may out of index error, reported later, have identical version of program makes no use of constant memory, , works fine same version of kernel function...

there 2 things @ issue here. first this:

__constant__ sphere s[spheres_count];  int main () {     ......      kernel<<<grids, threads>>>(dev_bitmap, s);      ...... 

in host code, s host memory variable provides handle cuda runtime hook device constant memory symbol. doesn't contain valid device pointer , can't passed kernel calls. result invalid memory access error.

you this:

__constant__ sphere s[spheres_count];  int main () {     ......      sphere *d_s;     cudagetsymboladdress((void **)&d_s, s);     kernel<<<grids, threads>>>(dev_bitmap, d_s);      ...... 

which cause symbol lookup device address of s, , valid pass kernel. however, gpu relies on compiler emitting specific instructions access memory through constant cache. device compiler emit these instructions when can detect __constant__ variable being accessed within kernel, not possible when using pointer. can see more how compiler generate code constant variable access in this stack overflow question , answer.


Comments

Popular posts from this blog

android - MPAndroidChart - How to add Annotations or images to the chart -

javascript - Add class to another page attribute using URL id - Jquery -

firefox - Where is 'webgl.osmesalib' parameter? -