slm: OpenCL code base  0.1
integration.cl
Go to the documentation of this file.
1 
25 #define BYTE_REVERSAL(initial_rng_state) \
26  initial_rng_state = (initial_rng_state>>24)&0xff \
27  | (initial_rng_state>> 8)&0xff00 \
28  | (initial_rng_state<< 8)&0xff0000 \
29  | (initial_rng_state<<24)&0xff000000;
30 
31 
32 
41 #define INITIALIZE_RNG(initial_rng_state) \
42  initial_rng_state = i+(j+seed_idx); \
43  /*initial_rng_state = i+(j+seed_idx*SUBPIXEL_SEED_POINT_DENSITY)\
44  * *SUBPIXEL_SEED_POINT_DENSITY;*/ \
45  BYTE_REVERSAL(initial_rng_state); \
46  lehmer_rand_uint(&initial_rng_state); \
47  BYTE_REVERSAL(initial_rng_state);
48 
49 #ifdef KERNEL_INTEGRATE_TRAJECTORY
50 __kernel void integrate_trajectory( __global const float2 *seed_point_array,
89  __global const bool *mask_array,
90  __global const float2 *uv_array,
91  __global char2 *trajectories_array,
92  __global ushort *traj_nsteps_array,
93  __global float *traj_length_array,
94  __global uint *slc_array,
95  __global uint *slt_array )
96 {
97  // global_id plus the chunk SEEDS_CHUNK_OFFSET is a seed point index
98  const uint global_id = get_global_id(0u)+get_global_id(1u)*get_global_size(0u),
99  seed_idx = (SEEDS_CHUNK_OFFSET)+global_id,
100  trajectory_index = global_id*(MAX_N_STEPS);
101  const float2 current_seed_point_vec = seed_point_array[seed_idx];
102  __global char2 *trajectory_vec = &trajectories_array[trajectory_index];
103  __private uint i,j, initial_rng_state;
104 
105  // Report how kernel instances are distributed
106  if (seed_idx==0) {
107  printf("On GPU/OpenCL device: #workitems=%d #workgroups=%d\n",
108  get_local_size(0u), get_num_groups(0u));
109  }
110 
111  // Trace a "smooth" streamline from the seed point coordinate
112  trajectory_record( uv_array, mask_array, traj_nsteps_array, traj_length_array,
113  trajectory_vec, global_id, seed_idx,
114  seed_point_array[seed_idx]);
115 
116  // Trace a set of streamlines from a grid of sub-pixel positions centered
117  // on the seed point
118  // Generate an initial RNG state (aka 'seed the RNG')
119  // [was: using the sum of the current pixel index and the sub-pixel index]
120  // using the current pixel index ("seed_idx")
121  // byte-reversed per GJS suggestion
122  INITIALIZE_RNG(initial_rng_state);
123  for (j=0u;j<SUBPIXEL_SEED_POINT_DENSITY;j++) {
124  for (i=0u;i<SUBPIXEL_SEED_POINT_DENSITY;i++){
125  // Trace a jittered streamline from a sub-pixel-offset first point
126  trajectory_jittered(uv_array, mask_array, slc_array, slt_array,
127  global_id, seed_idx,
128  current_seed_point_vec + (float2)(
131  initial_rng_state);
132  }
133  }
134 }
135 #endif
__kernel void integrate_trajectory(__global const float2 *seed_point_array, __global const bool *mask_array, __global const float2 *uv_array, __global char2 *trajectories_array, __global ushort *traj_nsteps_array, __global float *traj_length_array, __global uint *slc_array, __global uint *slt_array)
GPU kernel that drives streamline integration from seed positions given in seed_point_array, controlled by the &#39;flow&#39; vector field given in uv_array, and either terminated at pixels masked in mask_array or because a streamline exceeds a threshold distance (length or number of integration points) given by parameters stored in info.
Definition: integration.cl:88
#define SUBPIXEL_SEED_STEP
Definition: info.h:84
#define SUBPIXEL_SEED_POINT_DENSITY
Definition: info.h:82
#define SUBPIXEL_SEED_HALFSPAN
Definition: info.h:83
static void trajectory_jittered(__global const float2 *uv_array, __global const bool *mask_array, __global uint *slc_array, __global uint *slt_array, const uint global_id, const uint seed_idx, const float2 current_seed_point_vec, const uint initial_rng_state)
Integrate a jittered flow path downstream or upstream.
Definition: trajectory.cl:107
#define SEEDS_CHUNK_OFFSET
Definition: info.h:17
#define INITIALIZE_RNG(initial_rng_state)
Initialize the Lehmer random number generator.
Definition: integration.cl:41
#define MAX_N_STEPS
Definition: info.h:49
static void trajectory_record(__global const float2 *uv_array, __global const bool *mask_array, __global ushort *traj_nsteps_array, __global float *traj_length_array, __global char2 *trajectory_vec, const uint global_id, const uint seed_idx, const float2 current_seed_point_vec)
Integrate a streamline downstream or upstream; record the trajectory.
Definition: trajectory.cl:40