Improving parallelism in fragment shader leading to more than 20x the framerate

This commit is contained in:
2019-12-26 23:07:03 +02:00
parent 2e6ddd2ff2
commit 6cef4df543
6 changed files with 48 additions and 27 deletions
+28 -11
View File
@@ -27,7 +27,7 @@ const char* fragment_shader_kernel_source =
"} \n"
" \n"
"__kernel void fragment_shader ( \n"
" __global int* faces, \n"
" __global int3* faces, \n"
" __global float* vertices, \n"
" __global int* pixels, \n"
" __global int* screen_width, \n"
@@ -39,8 +39,9 @@ const char* fragment_shader_kernel_source =
" __global float* norms_buff, \n"
" __global uchar* diffuse_map \n"
") { \n"
" int index = get_global_id(0); \n"
" \n"
" int GROUP_ID = get_group_id(0); \n"
" int GROUP_SIZE = get_local_size(0); \n"
" int LOCAL_ID = get_local_id(0); \n"
" \n"
" bool out = true; \n"
" float3 vertices3[3]; \n"
@@ -48,18 +49,18 @@ const char* fragment_shader_kernel_source =
" float3 norms[3]; \n"
" \n"
" for(int i = 0; i < 3; i++) { \n"
" float4 vertex = vertices [faces[(index * 9) + (3 * i) + (0)]]; \n"
" float4 vertex;// = vertices[faces[GROUP_ID * 3 + i ][0]]; \n"
" for(int j = 0; j < 4; j ++) { \n"
" vertex[j] = vertices[4 * faces[(index*9) + (3 * i) + (0)] + j]; \n"
" vertex[j] = vertices[4 * faces[GROUP_ID*3 + i][0] + j]; \n"
" } \n"
" \n"
" for(int j = 0; j < 2; j++) { \n"
" uv_coords[i][j] = uv_buffer [2 * faces[(index * 9) + (3 * i) + (1)] + j]; \n"
" uv_coords[i][j] = uv_buffer [2 * faces[GROUP_ID * 3 + i][1] + j]; \n"
" } \n"
" \n"
" for( int j = 0; j < 3; j++ ) { \n"
" vertices3[j][i] = (vertex[j]/vertex[3]); \n"
" norms[j][i] = norms_buff[3 * (faces[(index * 9) + (3 * i) + (2)]) + j]; \n"
" norms[j][i] = norms_buff[3 * (faces[GROUP_ID * 3 + i][2]) + j]; \n"
" } \n"
" \n"
" if ( vertices3[0][i] > 0 && vertices3[0][i] < *screen_width \n"
@@ -82,10 +83,21 @@ const char* fragment_shader_kernel_source =
" } \n"
" } \n"
" \n"
" float3 point; \n"
" for(point[0] = bounding_box_min[0]; point[0] <= bounding_box_max[0]; point[0]++) { \n"
" for(point[1] = bounding_box_min[1]; point[1] <= bounding_box_max[1]; point[1]++) { \n"
" if(bounding_box_min[0] > *screen_width || bounding_box_max[0] < 0 || bounding_box_min[1] > *screen_width || bounding_box_max[1] < 0) return; \n"
" \n"
" \n"
" int X_PER_ITEM = (int)(ceil((float)(bounding_box_max[0] - bounding_box_min[0]) / 16.f)); \n"
" int Y_PER_ITEM = (int)(ceil((float)(bounding_box_max[1] - bounding_box_min[1]) / 16.f)); \n"
" float STARTING_X = bounding_box_min[0] + X_PER_ITEM * (LOCAL_ID % 16); \n"
" float ENDING_X = STARTING_X + X_PER_ITEM; \n"
" float STARTING_Y = bounding_box_min[1] + Y_PER_ITEM * (LOCAL_ID / 16); \n"
" float ENDING_Y = STARTING_Y + Y_PER_ITEM; \n"
" \n"
" \n"
" float3 point; \n"
" for(point[0] = STARTING_X; point[0] <= ENDING_X; point[0]++) { \n"
" for(point[1] = STARTING_Y; point[1] <= ENDING_Y; point[1]++) { \n"
" if(point[1] >= *screen_width || point[1] >= *screen_width) break; \n"
" float3 bc_coord = barycentric(vertices3, point); \n"
" if (bc_coord[0] < 0 || bc_coord[1] < 0 || bc_coord[2] < 0) continue; \n"
" \n"
@@ -114,13 +126,18 @@ const char* fragment_shader_kernel_source =
" float3 normalized_norm = normalize(normal); \n"
" \n"
" \n"
" float intensity = clamp((dot(normalized_norm , (float3){light_dir[0], light_dir[1], light_dir[2]})), 0.f, 1.f); \n"
" float intensity = clamp((dot(normalized_norm , (float3){light_dir[0], light_dir[1], light_dir[2]})), 0.f, 1.f) + 0.2; \n"
" \n"
" int color = 0; \n"
" color |= ((int)fmin((float)(diffuse_map[col_index + 0]) * intensity, (float) 0xff)) << 16; \n"
" color |= ((int)fmin((float)(diffuse_map[col_index + 1]) * intensity, (float) 0xff)) << 8; \n"
" color |= ((int)fmin((float)(diffuse_map[col_index + 2]) * intensity, (float) 0xff)) << 0; \n"
" \n"
" //color |= ((int)fmin((float)(0xff) * intensity, (float) 0xff)) << 16; \n"
" //color |= ((int)fmin((float)(0xff) * intensity, (float) 0xff)) << 8; \n"
" //color |= ((int)fmin((float)(0xff) * intensity, (float) 0xff)) << 0; \n"
" \n"
" \n"
" z_buffer[(int) (point[0] + point[1] * *screen_width)] = point[2]; \n"
" pixels [(int) (point[0] + point[1] * *screen_width)] = color;// & 0x00ffffff; \n"
" } \n"