helpers.hpp

Go to the documentation of this file.
00001 /*************************************************************************
00002  * Copyright (C) 2011 by Saleh Dindar and the Swarm-NG Development Team  *
00003  *                                                                       *
00004  * This program is free software; you can redistribute it and/or modify  *
00005  * it under the terms of the GNU General Public License as published by  *
00006  * the Free Software Foundation; either version 3 of the License.        *
00007  *                                                                       *
00008  * This program is distributed in the hope that it will be useful,       *
00009  * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00010  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00011  * GNU General Public License for more details.                          *
00012  *                                                                       *
00013  * You should have received a copy of the GNU General Public License     *
00014  * along with this program; if not, write to the                         *
00015  * Free Software Foundation, Inc.,                                       *
00016  * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00017  ************************************************************************/
00018 
00025 #pragma once
00026 
00027 #include "device_settings.hpp"
00028 #include "utilities.hpp"
00029 
00030 
00055 template<int Begin, int End, int Step = 1>
00056 struct Unroller {
00057         template<typename Action>
00058         __device__      static void step(const Action& action) {
00059                 action(Begin);
00060                 Unroller<Begin+Step, End, Step>::step(action);
00061         }
00062 };
00063 
00065 template<int End, int Step>
00066 struct Unroller<End, End, Step> {
00067         template<typename Action>
00068         __device__      static void step(const Action& action) { }
00069 };
00070 
00071 
00097 template<template<int> class T,int N,int MAXN,class B,class P>
00098 struct choose {
00099 B operator ()(const int& n, const P& x){
00100         if(n == N)
00101                 return T<N>::choose(x);
00102         else if(n <= MAXN && n > N)
00103                 return choose<T,(N<MAXN ? N+1 : MAXN),MAXN,B,P>()(n,x);
00104         else
00105                 return B();
00106 }
00107 };
00108 
00109 namespace swarm {
00110 
00111 
00115 template<int i>
00116 struct compile_time_params_t {
00117         const static int n = i;
00118 };
00119 
00120 
00124 template<class implementation,class T>
00125 __global__ void generic_kernel(implementation* integ,T compile_time_param) {
00126         integ->kernel(compile_time_param);
00127 }
00128 
00132 template< class implementation, class T>
00133 void launch_template(implementation* integ, implementation* gpu_integ, T compile_time_param)
00134 {
00135         if(integ->get_ensemble().nbod() == T::n) 
00136                 generic_kernel<<<integ->gridDim(), integ->threadDim(), integ->shmemSize() >>>(gpu_integ,compile_time_param);
00137         else
00138           ERROR("Error launching kernel.  Active ensemble has " + inttostr(integ->get_ensemble().nbod()) + " bodies per system.\n");
00139                 
00140 }
00141 
00142 
00167 template<int N>
00168 struct launch_template_choose {
00169         template<class integ_pair>
00170         static void choose(integ_pair p){
00171                 compile_time_params_t<N> ctp;
00172                 typename integ_pair::first_type integ = p.first;
00173 
00174                 int sys_p_block = integ->override_system_per_block();
00175                 const int nsys = integ->get_ensemble().nsys();
00176                 const int tps = integ->thread_per_system(ctp);
00177                 const int shm = integ->shmem_per_system(ctp);
00178                 if(sys_p_block == 0){
00179                         sys_p_block = optimized_system_per_block(SHMEM_CHUNK_SIZE, tps, shm);
00180                 }
00181 
00182 
00183                 const int nblocks = ( nsys + sys_p_block - 1 ) / sys_p_block;
00184                 const int shmemSize = sys_p_block * shm;
00185 
00186                 dim3 gridDim;
00187                 find_best_factorization(gridDim.x,gridDim.y,nblocks);
00188 
00189                 dim3 threadDim;
00190                 threadDim.x = sys_p_block;
00191                 threadDim.y = tps;
00192 
00193                 int blocksize = threadDim.x * threadDim.y;
00194                 if(!check_cuda_limits(blocksize, shmemSize )){
00195                         throw runtime_error("The block size settings exceed CUDA requirements");
00196                 }
00197 
00198                 generic_kernel<<<gridDim, threadDim, shmemSize>>>(p.second, ctp);
00199         }
00200 };
00201 
00202 
00216 template<class implementation>
00217 void launch_templatized_integrator(implementation* integ){
00218 
00219         if(integ->get_ensemble().nbod() <= MAX_NBODIES){
00220                 implementation* gpu_integ;
00221                 cudaErrCheck ( cudaMalloc(&gpu_integ,sizeof(implementation)) );
00222                 cudaErrCheck ( cudaMemcpy(gpu_integ,integ,sizeof(implementation),cudaMemcpyHostToDevice) );
00223 
00224                 typedef std::pair<implementation*,implementation*> integ_pair ;
00225                 integ_pair p ( integ, gpu_integ );
00226                 int nbod = integ->get_ensemble().nbod();
00227 
00228                 choose< launch_template_choose, 3, MAX_NBODIES, void, integ_pair > c;
00229                         c( nbod, p );
00230 
00231                 cudaFree(gpu_integ);
00232         } else {
00233                 char b[100];
00234                 snprintf(b,100,"Invalid number of bodies. (Swarm-NG was compiled with MAX_NBODIES = %d bodies per system.)",MAX_NBODIES);
00235                 ERROR(b);
00236         }
00237 
00238 }
00239 
00240 
00241         
00242 }

doxygen