c++ - CUDA, "illegal memory access was encountered" in Memcpy -
i have cuda file:
#include "cuda.h" #include "../../handleerror.h" #include "sphere.hpp" #include <stdlib.h> #include <cimg.h> #define width 1280 #define height 720 #define rnd(x) (x*rand()/rand_max) #define spheres_count 5 using namespace cimg_library; __global__ void kernel(unsigned char* bitmap, sphere* s) { // map threadidx/blockidx pixel position int x = threadidx.x + blockidx.x * blockdim.x; int y = threadidx.y + blockidx.y * blockdim.y; int offset = x + y * blockdim.x * griddim.x; float ox = x - blockdim.x * griddim.x / 2; float oy = y - blockdim.y * griddim.y / 2; float r = 0.2, g = 0.2, b = 0.5; float maxz = -inf; (int = 0; < spheres_count; i++) { float n, t = s[i].hit(ox, oy, &n); if (t > maxz) { float fscale = n; r = s[i].r * fscale; g = s[i].g * fscale; b = s[i].b * fscale; maxz = t; } } bitmap[offset*3] = (int)(r * 255); bitmap[offset*3 + 1] = (int)(g * 255); bitmap[offset*3 + 2] = (int)(b * 255); } __constant__ sphere s[spheres_count]; int main () { //capture start time cudaevent_t start, stop; handle_error(cudaeventcreate(&start)); handle_error(cudaeventcreate(&stop)); handle_error(cudaeventrecord(start, 0)); //create host bitmap cimg<unsigned char> image(width, height, 1, 3); image.permute_axes("cxyz"); //allocate device bitmap data unsigned char* dev_bitmap; handle_error(cudamalloc((void**)&dev_bitmap, image.size()*sizeof(unsigned char))); //generate spheres , copy them on gpu 1 one sphere* temp_s = (sphere*)malloc(spheres_count*sizeof(sphere)); (int i=0; <spheres_count; i++) { temp_s[i].r = rnd(1.0f); temp_s[i].g = rnd(1.0f); temp_s[i].b = rnd(1.0f); temp_s[i].x = rnd(1000.0f) - 500; temp_s[i].y = rnd(1000.0f) - 500; temp_s[i].z = rnd(1000.0f) - 500; temp_s[i].radius = rnd(100.0f) + 20; } handle_error(cudamemcpytosymbol(s, temp_s, sizeof(sphere)*spheres_count)); free(temp_s); //generate bitmap spere data dim3 grids(width/16, height/16); dim3 threads(16, 16); kernel<<<grids, threads>>>(dev_bitmap, s); //copy bitmap gpu display handle_error(cudamemcpy(image.data(), dev_bitmap, image.size()*sizeof(unsigned char), cudamemcpydevicetohost)); cudafree(dev_bitmap); image.permute_axes("yzcx"); image.save("render.bmp"); }
it compiles fine, when executed error:
an illegal memory access encountered in main.cu @ line 82
that is, here:
//copy bitmap gpu display handle_error(cudamemcpy(image.data(), dev_bitmap, image.size()*sizeof(unsigned char), cudamemcpydevicetohost));
i cannot understand why... know if remove this:
bitmap[offset*3] = (int)(r * 255); bitmap[offset*3 + 1] = (int)(g * 255); bitmap[offset*3 + 2] = (int)(b * 255);
the error not reported, thought may out of index error, reported later, have identical version of program makes no use of constant memory, , works fine same version of kernel function...
there 2 things @ issue here. first this:
__constant__ sphere s[spheres_count]; int main () { ...... kernel<<<grids, threads>>>(dev_bitmap, s); ......
in host code, s
host memory variable provides handle cuda runtime hook device constant memory symbol. doesn't contain valid device pointer , can't passed kernel calls. result invalid memory access error.
you this:
__constant__ sphere s[spheres_count]; int main () { ...... sphere *d_s; cudagetsymboladdress((void **)&d_s, s); kernel<<<grids, threads>>>(dev_bitmap, d_s); ......
which cause symbol lookup device address of s
, , valid pass kernel. however, gpu relies on compiler emitting specific instructions access memory through constant cache. device compiler emit these instructions when can detect __constant__
variable being accessed within kernel, not possible when using pointer. can see more how compiler generate code constant variable access in this stack overflow question , answer.
Comments
Post a Comment