/
mandelbrot.cu
148 lines (116 loc) · 4.05 KB
/
mandelbrot.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// Mandelbrot with CUDA
//
// Author: Axel Huebl (Serial Code by Matze)
// Date: 10th Jan 2012
//
#include <stdio.h>
#include <math.h>
#include <complex.h>
#include "cuda.h"
// simulation parameters
const int max_iterations = 255;
const int num_cols = 400;
const int num_rows = 300;
// cuda parameters
size_t blocksize = 256; // threads per block
const int maxRam = 250; // ION has approx. 256 MB global RAM
// Complex Numbers
struct cuComplex {
float r;
float i;
__device__ cuComplex( float a, float b ) : r(a), i(b) {}
__device__ float magnitude2( void ) {
return r * r + i * i;
}
__device__ cuComplex operator*(const cuComplex& a) {
return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);
}
__device__ cuComplex operator*(const float& a) {
return cuComplex(r*a, i*a);
}
__device__ cuComplex operator+(const cuComplex& a) {
return cuComplex(r+a.r, i+a.i);
}
__device__ cuComplex operator+(const float& a) {
return cuComplex(r+a, i);
}
};
__device__ int iterate(cuComplex c ) {
cuComplex z(0., 0.);
int iterations = 0;
for( int i=0; i<max_iterations; i++ ) {
z = z*z + c;
if( sqrtf(z.magnitude2() ) > 2.0f )
break;
else
++iterations;
}
return iterations;
}
__global__ void calcMandelbrot( int* color_d, const int num_rows, const int num_cols ) {
const int globalX = ( blockIdx.x * blockDim.x ) + threadIdx.x;
const int globalY = blockIdx.y;
const int offset = globalY * num_cols + globalX;
// parameters
const float c_rmin = -2.0;
const float c_rmax = +1.0;
const float c_imin = -1.0;
const float c_imax = +1.0;
const float dx = (c_rmax - c_rmin) / float(num_cols);
const float dy = (c_imax - c_imin) / float(num_rows);
cuComplex imaginary( 0., 1.);
if( globalY < num_rows && globalX < num_cols ) {
cuComplex c = ( imaginary*( c_imin+(float(globalY)*dy) ) ) + (c_rmin+(float(globalX)*dx));
color_d[offset] = iterate(c);
}
}
int main() {
FILE *output = fopen("mandelbrot.ppm", "w+b");
int *color_h, *color_d;
const int nBytes = num_rows*num_cols*sizeof(int);
const int globalMem = nBytes / 1024 / 1024; // in MiB
printf( "Will use %d MiB of global Memory...\n", globalMem );
if( globalMem > maxRam ) {
printf( "Maximum RAM is %d ... exit now...\n", maxRam);
return 1;
}
// allocate host memory
color_h = (int*)malloc(nBytes);
// allocate device memory
cudaMalloc( (void**)&color_d, nBytes );
// init host
for( int i=0; i<num_cols*num_rows; i++ ) color_h[i] = 0;
// copy to device
cudaMemcpy(color_d, color_h, nBytes, cudaMemcpyHostToDevice);
printf( "Copied Memory to Device...\n" );
// call kernel
// dimension and size of grid *in blocks* (2D)
dim3 grid( ceil( double(num_cols)/double(blocksize) ), num_rows );
printf( "Grid size in blocks: %d %d\n", grid.x, grid.y );
// dimension and size of blocks *in threads* (3D)
dim3 threads( blocksize );
// asynchroner (!!) funktionsaufruf!
calcMandelbrot<<<grid, threads>>>( color_d, num_rows, num_cols );
printf( "%s\n", cudaGetErrorString( cudaGetLastError() ) );
// copy to host
cudaMemcpy(color_h, color_d, nBytes, cudaMemcpyDeviceToHost);
printf( "Copied Memory back to Host...\n" );
fprintf(output, "P3\n");
fprintf(output, "%d %d\n%d\n\n", num_cols, num_rows, max_iterations);
for (int x=0; x<num_cols; x++) {
for (int y=0; y<num_rows; y++) {
// float complex imaginary = 0+1.0i;
// c = (c_rmin+(x*dx)) + ((c_imin+(y*dy))*imaginary);
// color = iterate(c);
fprintf(output, "%d\n", color_h[x*num_rows + y]);
fprintf(output, "%d\n", color_h[x*num_rows + y]);
fprintf(output, "%d\n\n", color_h[x*num_rows + y]);
}
}
fclose(output);
// free host memory
free(color_h);
// free devide memory
cudaFree(color_d);
return 0;
}