kernel void read_linear(global float* input, global float* output) { float val; uint gid = get_global_id(0); uint index = 0; val = input[gid]; #pragma unroll NUM_READS for(index = 1; index < NUM_READS; index++) { val = val + input[gid + index]; } output[gid] = val; }