00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00025 #pragma once
00026
00027 #include "device_settings.hpp"
00028 #include "utilities.hpp"
00029
00030
00055 template<int Begin, int End, int Step = 1>
00056 struct Unroller {
00057 template<typename Action>
00058 __device__ static void step(const Action& action) {
00059 action(Begin);
00060 Unroller<Begin+Step, End, Step>::step(action);
00061 }
00062 };
00063
00065 template<int End, int Step>
00066 struct Unroller<End, End, Step> {
00067 template<typename Action>
00068 __device__ static void step(const Action& action) { }
00069 };
00070
00071
00097 template<template<int> class T,int N,int MAXN,class B,class P>
00098 struct choose {
00099 B operator ()(const int& n, const P& x){
00100 if(n == N)
00101 return T<N>::choose(x);
00102 else if(n <= MAXN && n > N)
00103 return choose<T,(N<MAXN ? N+1 : MAXN),MAXN,B,P>()(n,x);
00104 else
00105 return B();
00106 }
00107 };
00108
00109 namespace swarm {
00110
00111
00115 template<int i>
00116 struct compile_time_params_t {
00117 const static int n = i;
00118 };
00119
00120
00124 template<class implementation,class T>
00125 __global__ void generic_kernel(implementation* integ,T compile_time_param) {
00126 integ->kernel(compile_time_param);
00127 }
00128
00132 template< class implementation, class T>
00133 void launch_template(implementation* integ, implementation* gpu_integ, T compile_time_param)
00134 {
00135 if(integ->get_ensemble().nbod() == T::n)
00136 generic_kernel<<<integ->gridDim(), integ->threadDim(), integ->shmemSize() >>>(gpu_integ,compile_time_param);
00137 else
00138 ERROR("Error launching kernel. Active ensemble has " + inttostr(integ->get_ensemble().nbod()) + " bodies per system.\n");
00139
00140 }
00141
00142
00167 template<int N>
00168 struct launch_template_choose {
00169 template<class integ_pair>
00170 static void choose(integ_pair p){
00171 compile_time_params_t<N> ctp;
00172 typename integ_pair::first_type integ = p.first;
00173
00174 int sys_p_block = integ->override_system_per_block();
00175 const int nsys = integ->get_ensemble().nsys();
00176 const int tps = integ->thread_per_system(ctp);
00177 const int shm = integ->shmem_per_system(ctp);
00178 if(sys_p_block == 0){
00179 sys_p_block = optimized_system_per_block(SHMEM_CHUNK_SIZE, tps, shm);
00180 }
00181
00182
00183 const int nblocks = ( nsys + sys_p_block - 1 ) / sys_p_block;
00184 const int shmemSize = sys_p_block * shm;
00185
00186 dim3 gridDim;
00187 find_best_factorization(gridDim.x,gridDim.y,nblocks);
00188
00189 dim3 threadDim;
00190 threadDim.x = sys_p_block;
00191 threadDim.y = tps;
00192
00193 int blocksize = threadDim.x * threadDim.y;
00194 if(!check_cuda_limits(blocksize, shmemSize )){
00195 throw runtime_error("The block size settings exceed CUDA requirements");
00196 }
00197
00198 generic_kernel<<<gridDim, threadDim, shmemSize>>>(p.second, ctp);
00199 }
00200 };
00201
00202
00216 template<class implementation>
00217 void launch_templatized_integrator(implementation* integ){
00218
00219 if(integ->get_ensemble().nbod() <= MAX_NBODIES){
00220 implementation* gpu_integ;
00221 cudaErrCheck ( cudaMalloc(&gpu_integ,sizeof(implementation)) );
00222 cudaErrCheck ( cudaMemcpy(gpu_integ,integ,sizeof(implementation),cudaMemcpyHostToDevice) );
00223
00224 typedef std::pair<implementation*,implementation*> integ_pair ;
00225 integ_pair p ( integ, gpu_integ );
00226 int nbod = integ->get_ensemble().nbod();
00227
00228 choose< launch_template_choose, 3, MAX_NBODIES, void, integ_pair > c;
00229 c( nbod, p );
00230
00231 cudaFree(gpu_integ);
00232 } else {
00233 char b[100];
00234 snprintf(b,100,"Invalid number of bodies. (Swarm-NG was compiled with MAX_NBODIES = %d bodies per system.)",MAX_NBODIES);
00235 ERROR(b);
00236 }
00237
00238 }
00239
00240
00241
00242 }