Commit 8ea2ce68 authored by dino's avatar dino
Browse files

New interface to the GPU implementation of the Grid Search to simplify the...

New interface to the GPU implementation of the Grid Search to simplify the parallel processing of multiple events

git-svn-id: svn://gal-serv.lnl.infn.it/agata/trunk/narval_emulator@912 170316e4-aea8-4b27-aad4-0380ec0519c9
parent 332757a6
......@@ -16,7 +16,7 @@
#define WCT_BUFSIZE_ANC ( 256*1024) // for the Ancillary producer (must be < WCT_BUFSIZE)
#define WCT_BUFSIZE_AFP ( 1*1024*1024) // for the basic producer (must be < WCT_BUFSIZE)
#define WCT_THREADED // to run the various chains in parallel on a many-core system
//#define WCT_THREADED // to run the various chains in parallel on a many-core system
#include "commonDefs.h"
......
......@@ -120,7 +120,7 @@
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\PRISMA\src\lib_prisma\include;&quot;C:\Program Files (x86)\boost\boost_1_40&quot;;..\myADF0.2;..\myADF0.2\standalone;..\common;..\producers\Crystal;..\producers\Crystal\includeATCA;..\producers\AncillaryTCP;..\filters\Preprocessing;..\filters\Preprocessing\includePrePSA;..\filters\Ancillary;..\filters\Ancillary\includeVME;..\filters\PSA;..\filters\PSA\includePSA;..\filters\Tracking;..\filters\Tracking\includeOFT;..\builders;C:\root\include"
AdditionalIncludeDirectories="..\PRISMA\src\lib_prisma\include;&quot;C:\Program Files (x86)\boost\boost_1_40&quot;;..\myADF0.2;..\myADF0.2\standalone;..\common;..\producers\Crystal;..\producers\Crystal\includeATCA;..\producers\AncillaryTCP;..\filters\Preprocessing;..\filters\Preprocessing\includePrePSA;..\filters\Ancillary;..\filters\Ancillary\includeVME;..\filters\PSA;..\filters\PSA\includePSA;..\filters\Tracking;..\filters\Tracking\includeOFT;..\filters\Tracking\includeMGT;..\builders;C:\root\include"
PreprocessorDefinitions="WIN64;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;NRV_OFFLINE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
......@@ -143,7 +143,7 @@
Name="VCLinkerTool"
UseLibraryDependencyInputs="true"
LinkIncremental="2"
AdditionalLibraryDirectories="C:\Boost\lib64"
AdditionalLibraryDirectories="C:\Boost\lib64-VC90"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="17"
......@@ -197,7 +197,7 @@
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
AdditionalIncludeDirectories="..\PRISMA\src\lib_prisma\include;&quot;C:\Program Files (x86)\boost\boost_1_40&quot;;..\myADF0.2;..\myADF0.2\standalone;..\common;..\producers\Crystal;..\producers\Crystal\includeATCA;..\producers\AncillaryTCP;..\filters\Preprocessing;..\filters\Preprocessing\includePrePSA;..\filters\Ancillary;..\filters\Ancillary\includeVME;..\filters\PSA;..\filters\PSA\includePSA;..\filters\Tracking;..\filters\Tracking\includeOFT;..\builders;C:\root\include"
AdditionalIncludeDirectories="..\PRISMA\src\lib_prisma\include;&quot;C:\Program Files (x86)\boost\boost_1_40&quot;;..\myADF0.2;..\myADF0.2\standalone;..\common;..\producers\Crystal;..\producers\Crystal\includeATCA;..\producers\AncillaryTCP;..\filters\Preprocessing;..\filters\Preprocessing\includePrePSA;..\filters\Ancillary;..\filters\Ancillary\includeVME;..\filters\PSA;..\filters\PSA\includePSA;..\filters\Tracking;..\filters\Tracking\includeOFT;..\filters\Tracking\includeMGT;..\builders;C:\root\include"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;NRV_OFFLINE"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
......
......@@ -424,7 +424,7 @@ void EventBuilder::process_block( int ichain,
opos = 5; // where to start writing next event, relative to obuf
}
*used_osize = osiz*4 ;
*used_osize = osiz*4;
*error_code = 0;
LOCK_COUT;
......
......@@ -122,7 +122,7 @@ const int defTriggerSample = 10; // 60-10 useful samples passed to the PS
//////// PSAFilter ////////
///////////////////////////
//#define TCOUNT 5 // number of threads to use to decompose signals
#define TCOUNT 2 // number of threads to use to decompose signals (commented or <1 ==> nothreads)
#define TMODULO 100 // how many events are distributed to each thread
#define USEADAPTIVE // to use the coarse-fine grid searh
#define PSA_LOCALSPECTRA // enable nDhist spectra
......
......@@ -58,7 +58,7 @@ PSAFilter::PSAFilter() :
PSAFilter::~PSAFilter()
{
for(int slot = 0; slot < TCOUNT*TMODULO; slot++) {
for(int slot = 0; slot < TSLOTS; slot++) {
DD[slot].Destroy();
}
}
......@@ -153,7 +153,7 @@ void PSAFilter::process_initialise (UInt_t *error_code)
// properly initialised, it should set the state to kIdle so that the data could be treated
fFrameIO.SetStatus(BaseFrameIO::kIdle);
for(int slot = 0; slot < TCOUNT*TMODULO; slot++) {
for(int slot = 0; slot < TSLOTS; slot++) {
DD[slot].InitInput(fTraceLengthPSA);
}
......@@ -167,7 +167,7 @@ void PSAFilter::process_initialise (UInt_t *error_code)
cServer.Start(gMotherClass);
#if TCOUNT > 1
#if TCOUNT > 0
// launch the threads of the local chains
cout << crystal_id << "-Grid Search called using " << TCOUNT << " THREADS with blocks of " << TMODULO << " EVENTS" << endl;
for(int slot = 0; slot < TCOUNT ; slot++) {
......@@ -343,7 +343,7 @@ Int_t PSAFilter::SetOutput(int slot)
return nwritten ? 0 : 1;
}
Int_t PSAFilter::Process(int slot)
Int_t PSAFilter::Process(int slot, int nslots)
{
Int_t result = 0;
//cout << "WARNING!! Empty Process()" << endl << flush;
......@@ -362,7 +362,7 @@ void PSAFilter::process_block( void *input_buffer, UInt_t size_of_input_buffe
*used_size_of_output_buffer = fOublock.GetSize();
}
#if TCOUNT > 1
#if TCOUNT > 0
// this is the threaded version
......@@ -571,9 +571,9 @@ UInt_t PSAFilter::ProcessBlock (ADF::FrameBlock &in, ADF::FrameBlock &out)
return error_code;
}
#else // TCOUNT > 1
#else // TCOUNT > 0
// this is the non-threaded version
// this is the non-threaded version i.e. TCOUNT == 0
UInt_t PSAFilter::ProcessBlock (ADF::FrameBlock &in, ADF::FrameBlock &out)
{
......@@ -640,7 +640,7 @@ UInt_t PSAFilter::ProcessBlock (ADF::FrameBlock &in, ADF::FrameBlock &out)
return error_code;
}
#endif // TCOUNT == 1
#endif // TCOUNT == 0
void PSAFilter::process_reset (UInt_t *error_code)
{
......@@ -701,7 +701,7 @@ cout << "\n" << crystal_id << "-PSAFilter::process_unload called with GetPID()"
*error_code = 0;
} */
#if TCOUNT > 1
#if TCOUNT > 0
void tProcess::operator()()
{
......@@ -716,14 +716,20 @@ void tProcess::operator()()
}
}
// do the job
for(int nnn = 0; nnn < pslot->Count; nnn++) {
int sslot = slot*TMODULO + nnn;
// do the job; both ways of calling PSAFilterGridSearch::Process() are valid
int retval = ppsa->Process(sslot);
// calling the PSAFilterGridSearch::Process() one event at a time
//for(int nnn = 0; nnn < pslot->Count; nnn++) {
// int sslot = slot*TMODULO + nnn;
ppsa->DD[sslot].retValue = retval;
}
// int retval = ppsa->Process(sslot, 1);
// ppsa->DD[sslot].retValue = retval;
//}
// calling the PSAFilterGridSearch::Process() one event only once
int retval = ppsa->Process(slot*TMODULO, pslot->Count);
{
// notify job done
......@@ -735,4 +741,4 @@ void tProcess::operator()()
}
#endif // TCOUNT > 1
#endif // TCOUNT > 0
......@@ -26,20 +26,25 @@
#include <ctime>
#ifndef TCOUNT // this must be defined
# define TCOUNT 1
# define TCOUNT 0
#endif
#ifndef TMODULO // this must be defined too
# define TMODULO 50
# define TMODULO 1
#endif
#if TCOUNT <= 1
#define TSLOTS (TCOUNT*TMODULO)
#if TCOUNT < 1
# undef TCOUNT
# define TCOUNT 1
# define TCOUNT 0
# undef TMODULO
# define TMODULO 1
# undef TSLOTS
# define TSLOTS 1
#endif
#if TCOUNT > 1
#if TCOUNT > 0
# include <boost/thread/thread.hpp>
# include <boost/thread/mutex.hpp>
# include <boost/thread/condition.hpp>
......@@ -58,7 +63,7 @@ struct CSlot
int State; // 0(free), 1(activated), 2(finished)
int Count; // number of events to decompose
};
#endif // TCOUNT > 1
#endif // TCOUNT > 0
// the result of PSA for each hit segment
struct PsaOut_t
......@@ -217,7 +222,7 @@ protected:
public:
// The data-interface to the GridSearch algorithm
PsaData DD[TCOUNT*TMODULO];
PsaData DD[TSLOTS];
protected:
ADF::GeSegment *seg;
......@@ -268,7 +273,7 @@ public:
//! to copy the result of the algrithm into the frame through ADFObjects
virtual Int_t SetOutput(int slot = 0);
//! Overload with your own PSA algo.
virtual Int_t Process(int slot = 0);
virtual Int_t Process(int slot = 0, int nslots = 1);
//! Overload in PSA algo to produce spectra and diagnostics in a thread-safe way
virtual Int_t PostProcess(int slot = 0) {return 0;}
//! to init globals (static) from a directory
......@@ -291,16 +296,16 @@ public:
virtual void process_resume (UInt_t *error_code);
//virtual void process_unload (UInt_t *error_code);
#if TCOUNT > 1
#if TCOUNT > 0
CSlot Slot[TCOUNT];
#endif // TCOUNT > 1
#endif // TCOUNT > 0
private:
void GetParameters(UInt_t *error_code);
};
#if TCOUNT > 1
#if TCOUNT > 0
class tProcess
{
......@@ -318,6 +323,6 @@ private:
CSlot *pslot;
};
#endif // TCOUNT > 1
#endif // TCOUNT > 0
#endif // PSAFILTER_H_INCLUDED
This diff is collapsed.
......@@ -36,6 +36,23 @@
const int WCHAN = 42; // number of channels to write in the output waves
const int WSAMP = 60; // number of samples per channel in the output waves
// Input data and results of the Grid Search algorithm
// Originally the results were reported directly to the data structure of the mother class
// Now they are written here to simplify the GPU implementation in view of parallel processing
struct pointFull : public pointExp
{
pointFull() : isValid(false), selectIt(false), samp_first(0), samp_last(0), indexDD(0), usamp(0), numHits(0) {}
bool isValid; // to be processed
bool selectIt; // used to select events
int samp_first;
int samp_last;
int indexDD;
int usamp;
UInt_t numHits;
char localMask[((NCHAN+1+3)/4)*4]; // rounded to 4 bytes ??
PsaOut_t PsaOut[ADF::CrystalInterface::kNbSegments];
};
class PSAFilterGridSearch : public PSAFilter
{
private:
......@@ -72,16 +89,16 @@ private:
#endif // PSA_FromGRU_
void MakeSegmentMap (int neighbours);
void PrepareEvent (PsaData *pD, pointExp *pS);
int ProcessEvent (PsaData *pD, pointExp &pS, int sMult, int samp_first, int samp_last);
void SetToSegCenter (PsaData *pD, pointExp *pS);
void MakeLocalMask (char *localMask, pointExp &pS, int netChSeg);
int PreSearchCoarse(pointExp &pS, int sMult);
int SearchFullGrid (pointExp *pS, int netChSeg, char *lMask, int addr_first, int addr_last);
int SearchAdaptive (pointExp *pS, int netChSeg, char *lMask, int addr_first, int addr_last, bool bCoarseOnly = false);
void PrepareEvent (PsaData *pD, pointFull *pS);
void SetToSegCenter (PsaData *pD, pointFull *pS);
int ProcessEvent (pointFull *n_pS, int num);
int PreSearchCoarse(pointFull &pS, int sMult);
int SearchFullGrid (pointFull *pS, int netChSeg);
int SearchAdaptive (pointFull *pS, int netChSeg, bool bCoarseOnly = false);
void MakeLocalMask (pointFull &pS, int netChSeg);
protected:
// this is written in a thread-safe mode and can be called in parallel, using different data slots
Int_t Process(int slot = 0);
Int_t Process(int slot = 0, int nslots = 1);
// this is not thread-safe and must be called sequentially
Int_t PostProcess(int slot = 0);
......@@ -96,10 +113,10 @@ public:
Float_t GetHitSegThreshold() {return fHitSegThreshold;}
int WriteTraces(PsaData *pD);
int WritePsaHits(PsaOut_t *pOut);
void StorePartialTrace(pointExp &pS, pointPsa *bestPoint, float scaleFact, int samp_first, int usamp);
int WritePartialTrace(pointExp &pS, pointPsa *bestPoint, float scaleFact, int samp_first, int usamp);
void SaveTotalTrace (pointExp &pS, int slot);
Float_t FitT0FromCore (pointExp &pS, int tsamp);
void StorePartialTrace(pointFull &pS, pointPsa *bestPoint, float scaleFact);
int WritePartialTrace(pointFull &pS, pointPsa *bestPoint, float scaleFact);
void SaveTotalTrace (pointFull &pS, PsaData *pD);
Float_t FitT0FromCore (pointFull &pS, int tsamp);
};
......
......@@ -20,14 +20,14 @@ typedef short gs_type;
const int DIFFLAG = 0; // use the net-charge signals as they are
#else
//const int DIFFLAG = 0; // don't use the net-charge segments
const int DIFFLAG = 5; // use them with a delayed-differentiation (in units of signal samples)
const int DIFFLAG = 4; // use them with a delayed-differentiation (in units of signal samples)
//# define SHOWDIFFERENTIATED // in the saved traces show the differentiated version of the net-charge segments
#endif
#define ORDEREDSEARCH // energy-ordered decomposition with removal of previous signals
#ifdef ORDEREDSEARCH
# define PRECOARSESEARCH // a coarse-only preliminary search to remove the other net-charges
//# define WRITEPARTIALS // if using the energy-ordered decomposition write the partial traces
//# define WRITEPARTIALS // if using the energy-ordered decomposition write the partial traces (don't use with threads)
#endif
const double METRIC = 0.3; // norm for the figure of merit
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment