Forge
cuda/fractal.cu
/*******************************************************
* Copyright (c) 2015-2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <forge.h>
#include <cuda_runtime.h>
#include <cuComplex.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <ComputeCopy.h>
#include <cstdio>
const unsigned DIMX = 512;
const unsigned DIMY = 512;
const size_t TOT_SIZE = DIMX*DIMY*4;
void kernel(unsigned char* dev_out);
int main(void)
{
unsigned char *dev_out;
/*
* First Forge call should be a window creation call
* so that necessary OpenGL context is created for any
* other forge::* object to be created successfully
*/
forge::Window wnd(DIMX, DIMY, "Fractal Demo");
wnd.makeCurrent();
/* Create an image object which creates the necessary
* textures and pixel buffer objects to hold the image
* */
forge::Image img(DIMX, DIMY, FG_RGBA, forge::u8);
GfxHandle* handle = 0;
// create GL-CPU interop buffer
createGLBuffer(&handle, img.pixels(), FORGE_IMAGE_BUFFER);
/* copy your data into the pixel buffer object exposed by
* forge::Image class and then proceed to rendering.
* To help the users with copying the data from compute
* memory to display memory, Forge provides copy headers
* along with the library to help with this task
*/
FORGE_CUDA_CHECK(cudaMalloc((void**)&dev_out, TOT_SIZE));
kernel(dev_out);
// copy the data from compute buffer to graphics buffer
copyToGLBuffer(handle, (ComputeResourceHandle)dev_out, img.size());
do {
wnd.draw(img);
} while(!wnd.close());
// destroy GL-CPU Interop buffer
releaseGLBuffer(handle);
FORGE_CUDA_CHECK(cudaFree(dev_out));
return 0;
}
__device__
int julia(int x, int y)
{
const float scale = 1.5;
float jx = scale * (float)(DIMX/2.0f - x)/(DIMX/2.0f);
float jy = scale * (float)(DIMY/2.0f - y)/(DIMY/2.0f);
cuFloatComplex c = make_cuFloatComplex(-0.8f, 0.156f);
cuFloatComplex a = make_cuFloatComplex(jx, jy);
for (int i=0; i<200; i++) {
a = cuCaddf(cuCmulf(a, a), c);
if (cuCabsf(a) > 1000.0f)
return 0;
}
return 1;
}
__global__
void julia(unsigned char* out)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x<DIMX && y<DIMY) {
int offset = x + y * DIMX;
// now calculate the value at that position
int juliaValue = julia(x, y);
out[offset*4 + 2] = 255 * juliaValue;
out[offset*4 + 0] = 0;
out[offset*4 + 1] = 0;
out[offset*4 + 3] = 255;
}
}
inline int divup(int a, int b)
{
return (a+b-1)/b;
}
void kernel(unsigned char* dev_out)
{
static const dim3 threads(8, 8);
dim3 blocks(divup(DIMX, threads.x),
divup(DIMY, threads.y));
julia<<< blocks, threads >>>(dev_out);
}