egral_img[(y + p) * width + x + p]; } float4 patch_diff = convert_float4(d + a - c - b); float4 w = native_exp(-patch_diff / (h * h)); float w_sum = w.x + w.y + w.z + w.w; weight[y * width + x] += w_sum; sum[y * width + x] += dot(w, convert_float4(src_pix)); } kernel void average(__write_only image2d_t dst, __read_only image2d_t src, global float *sum, global float *weight) { int x = get_global_id(0); int y = get_global_id(1); int2 dim = get_image_dim(dst); float w = weight[y * dim.x + x]; float s = sum[y * dim.x + x]; float src_pix = read_imagef(src, sampler, (int2)(x, y)).x; float r = (s + src_pix * 255) / (1.0f + w) / 255.0f; if (x < dim.x && y < dim.y) write_imagef(dst, (int2)(x, y), (float4)(r, 0.0f, 0.0f, 1.0f)); }