PSAFilterGridSearch.h 16.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
#ifndef PSAFILTERGRIDSEARCH_H_INCLUDED
#define PSAFILTERGRIDSEARCH_H_INCLUDED

#include "PSAFilter.h"
#include "GridSearchParams.h"
#include "SignalBasis.h"

#ifdef USE_SSE_VERSION    // defined in GridSearchParams.h
#include <xmmintrin.h>
#include <tmmintrin.h>
#endif

//#define SPECMETRICS        // to produce specMetrics in SearchFullGrid and SearchAdaptive ==> will be very slow and is not thread-safe

//! Implementation of a simple grid search method. 
/**
   Coded by Roberto Venturelli
   Ported first to Narval by Joa Ljungvall
   Modified and maintained by Dino Bazzacco
*/

const int WCHAN =  42;  // number of channels to write in the output waves
const int WSAMP =  60;  // number of samples per channel in the output waves

// Input data and results of the Grid Search algorithm
// Originally the results were reported directly to the data structure of the mother class 
// Now they are written here to simplify the GPU implementation in view of parallel processing
struct pointFull : public pointExp
{
  pointFull() : samp_first(0), samp_last(0),
                isValid(false), isSelected(false), isInitialized(false), isPreSearched(false), baseScale(1.f) {}
  int      samp_first;  // must be a multiple of 4 to use sse intrinsics
  int      samp_last;
  char     localMask[NSEGS+4];  // rounded to 4 bytes ??
  bool     isValid;             // to be processed
  bool     isSelected;          // used to select events
  bool     isInitialized;
  bool     isPreSearched;
  float    enerSeg;       // the energy of the segment for GridSearch2
  float    baseScale;     // temporarily used to manage the chi2 mapping
  PsaOut_t PsaOut[ADF::CrystalInterface::kNbSegments];
  void Reset() {
    pointExp::Reset();
    samp_first = samp_last = 0;
    isValid = isSelected = isInitialized = isPreSearched = false;
    baseScale = 1;
  }
  void FullCopy(pointFull &PF) {
    pointExp::FullCopy(PF);
    samp_first = PF.samp_first;
    samp_last  = PF.samp_last;
    memcpy(localMask, PF.localMask, sizeof(localMask));
    isValid = PF.isValid;
    isSelected = PF.isSelected;
    isInitialized = PF.isInitialized;
    isPreSearched = PF.isPreSearched;
    enerSeg = PF.enerSeg;
    baseScale = PF.baseScale;
    for(int nn=0; nn < ADF::CrystalInterface::kNbSegments; nn++) {
      PsaOut[nn].FullCopy(PF.PsaOut[nn]);
    }
  }
};

class PSAFilterGridSearch : public PSAFilter
{
private:
  Float_t   fHitSegThreshold;

  SignalBasis fBasis;
  float       mappedMetric[NMETRIC];
  char        hmask[NSEGS][NSEGS+4];  // Some padding to have a "good" length
  float       chi2NetSegs[NSEGS];     // the chi2 of the net-charge segments 
  float       chi2AllSegs[NCHAN];     // the chi2 of all segments and CC 

  bool        gUseAdaptive;
  bool        gTryTwoHits;
  bool        gCoarseOnly;
  bool        gPsaSegCenter;
  bool        gFullSearch;
81
  bool        gFitTZero;
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  float       gDistMetric;            // initialized to PMETRIC but user settable; may create inconsistencies if USE_SSE_VERSION
  float       fDistRescale;           // used to consider RESCALE when calculating chi2 for non rescaled residues  

  bool        firstEventPSA;

  float       tauSGCC[37];            // the 36 segments and the CC

#ifdef PSA_MULTIHIST
  MultiHist<unsigned int  > *PsaSpecEner;
  MultiHist<unsigned int  > *PsaSpecStat;
  MultiHist<unsigned int  > *PsaSpecTZero;
  MultiHist<unsigned int  > *PsaSpecSigma;
  MultiHist<unsigned int  > *PsaSpecMetric;
  MultiHist<unsigned short> *PsaMatrXYZ;
  MultiHist<unsigned short> *PsaMatrXYZR;
  MultiHist<unsigned short> *PsaMatrSeg;
#endif  //PSA_MULTIHIST

  Int_t  gWriteNumTraces;
  Bool_t gWritePartialTraces;

  std::string fnPsaTraces;
  FILE       *fpPsaTraces;

  std::string fnPsaHits;
  FILE       *fpPsaHits;
  Int_t       fPsaIndex;  // 0--15 to distinguish different events in the file

  void  SegmentMapMake (int neighbours);   // based on Manhattan distance
  void  SegmentMapTest ();                 // exclude elements related to broken segments
  void  PrepareEvent   (PsaSlot *pSlot, pointFull &PF, bool reset, bool traces);
  void  SetToSegCenter (PsaSlot *pSlot, pointFull &PF);
  void  ShiftMoveTrace (float *data, int nsamp, float delta, float *accu);
115
  int   ProcessTheEvent(pointFull &PF, bool doFull, bool twoHits);
116 117 118 119 120 121
  int   ProcessTwoHits (pointFull &PF);
  void  InitialSolution(pointFull &PF, bool keep = false);
  void  PreSearchCoarse(pointFull &PF);
  int   SearchFullGrid (pointFull &PF, int netChSeg);
  int   SearchAdaptive1(pointFull &PF, int netChSeg, bool bCoarseOnly = false, bool bFirstOnly = false); // 1 hit
  int   SearchAdaptive2(pointFull &PF, int netChSeg, bool bCoarseOnly = false, bool bFirstOnly = false); // 2 hits
122
  void  MakeSearchWave (pointFull &PF, float fact, float maxVal);
123 124
  void  MakeLocalMask  (pointFull &PF);
  float CalcChi2Residue(pointFull &PF);
125
  float CalcChi2GridPts(int segNum, int pdNum, pointFull &PF);
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
  bool  WriteBaseAverPt(float scale, std::string ext);
  bool  WriteBasePoint (float scale, pointPsa &pt, FILE *FP);

  bool IsItWorthwhile(pointFull &PF) {return true;}  // just to start
  int  TryToFit(pointFull &PF);
  int  RandomSplit2(pointFull &PF, int ntry);

protected:
  // this is written in a thread-safe mode and can be called in parallel, using, of course, different data slots
  Int_t Process(int slot = 0, int count = 1);

  Int_t AlgoSpecificInitialise();
  Int_t AlgoSpecificReset() { return 0; }
  Int_t AlgoSpecificPostProcess(int slot = 0);

public:
  PSAFilterGridSearch();
  virtual ~PSAFilterGridSearch();

  void    SetHitSegThreshold(Float_t thres) {fHitSegThreshold = thres;}
  Float_t GetHitSegThreshold() {return fHitSegThreshold;}
  bool    WriteWorkingWave(float *pW, bool append);
  bool    WritePartialWave(float *pW, float fscale, bool append);
  int     WriteTraces   (PsaSlot  *pSlot);
  bool    WriteTracesAll(PsaSlot  *pSlot);
  int     WritePsaHits  (PsaOut_t *pOut, int index = 0);
  void    AddToSolution (pointFull &PF, pointPsa &bestPoint, int numsamp, float scaleFact);
  void    AddToSolution (pointFull &PF, pointPsa &bestPoint1, pointPsa &bestPoint2, float bestFac, int numsamp, float scaleFact);
  void    SaveTotalTrace(pointFull &PF, PsaSlot *pSlot);
  Float_t FitT0AfterPSA(pointFull &PF); 

private:
  void Extend(float *dd, int n1, int n2) {
    float ddl = dd[n1-1];
    for(int nn = n1; nn < n2; nn++)
      dd[nn] = ddl;
  }
  void MatChi2(float chi, pointPsa* pPsa);

  // intergrid final interpolation (still just a uniform random distribution)
  // be aware that this depends on the basis and its grid step (assumed 2 mm)
  float xInterp() {return 0; } 
  float yInterp() {return 0; } 
  float zInterp() {return 0; }
  //float xInterp() {return (rand()%200 - 100)*0.01f; } 
  //float yInterp() {return (rand()%200 - 100)*0.01f; } 
  //float zInterp() {return (rand()%200 - 125)*0.01f; }
  //float xInterp() {return (gCoarseOnly) ? 0 : (rand()%200 - 100)*0.01f; } 
  //float yInterp() {return (gCoarseOnly) ? 0 : (rand()%200 - 100)*0.01f; } 
  //float zInterp() {return (gCoarseOnly) ? 0 : (rand()%200 - 125)*0.01f; }

  void incrPosMats(int netChSeg, int nhi, float fx, float fy, float fz, float rr);

  float Chi2InnerLoop(const float *pReal);
  float Chi2InnerLoop(const float *pReal, const float *pBase,  float bScale);
  float Chi2InnerLoop(const float *pReal, const float *pBase1, float bScale1, const float *pBase2, float bScale2);

// For the Chi2OuterLoop methods, look to versions before r1039
};

#if defined(__GNUC__)
# define INLINE_ALWAYS  inline
#elif defined(_MSC_VER)
# define INLINE_ALWAYS  __forceinline
#endif

#ifdef USE_SSE_VERSION    // defined in GridSearchParams.h

// The vectorized version of the FOM loop does not use the mapped metrics, but a fixed power of 2, 1, 0.5 or 0.25

// Select the distance metric 
// If enabled, the SSE versions must use the same metric as requested in PSAFilter.setup (or the default value given in GridSearchParams.h)!! 
#if   STANDARD_METRIC == STANDARD_METRIC_ABSVAL
# define SSE_PMETRIC 1.0f
#elif STANDARD_METRIC ==  STANDARD_METRIC_SQUARE
# define SSE_PMETRIC 2.0f
#elif STANDARD_METRIC ==  STANDARD_METRIC_1SQRT
# define SSE_PMETRIC 0.5f
#elif STANDARD_METRIC ==  STANDARD_METRIC_2SQRT
# define SSE_PMETRIC 0.25f
#else
# error Inconsistency in the definition of the distance metric when using the SSE versions 
#endif

// 0 interactions (the number of used samples is fixed to LOOP_SAMP)
INLINE_ALWAYS float PSAFilterGridSearch::Chi2InnerLoop(const float *pReal)
{
  const __m128 masks = _mm_set1_ps(-0.0f);
  const __m128 zeros = _mm_setzero_ps();

  __m128* realTrace = (__m128*)pReal;

  __m128  chis = _mm_setzero_ps();

  for(int nn = 0; nn < SSE_NUM; nn++) {
#if   STANDARD_METRIC == STANDARD_METRIC_ABSVAL                               // pow(|d|,1  )
    chis = _mm_add_ps(chis, _mm_andnot_ps(masks, realTrace[nn]) );
#elif STANDARD_METRIC == STANDARD_METRIC_SQUARE                               // pow( d ,2  )
    chis = _mm_add_ps(chis, _mm_mul_ps(realTrace[nn], realTrace[nn]) );
#elif STANDARD_METRIC == STANDARD_METRIC_1SQRT                                // pow(|d|,1/2)
    chis = _mm_add_ps(chis, _mm_sqrt_ps(_mm_andnot_ps(masks, realTrace[nn])) );  
#elif STANDARD_METRIC == STANDARD_METRIC_2SQRT                                // pow(|d|,1/4)
    chis = _mm_add_ps(chis, _mm_sqrt_ps(_mm_sqrt_ps(_mm_andnot_ps(masks, realTrace[nn])) ) );
#else
# error Inconsistency in the definition of the distance metric when using the SSE versions
#endif
  }

  float chi2 = _mm_cvtss_f32(_mm_hadd_ps(_mm_hadd_ps(chis, zeros), zeros)); // sum the 4 values and save in chi2
  return chi2;
}

// 1 interaction (the number of used samples is fixed to LOOP_SAMP)
INLINE_ALWAYS float PSAFilterGridSearch::Chi2InnerLoop(const float *pReal, const float *pBase,  float bScale)
{
  const __m128 masks = _mm_set1_ps(-0.0f);
  const __m128 zeros = _mm_setzero_ps();

  __m128 loopFac    = _mm_set_ps(bScale, bScale, bScale, bScale);

  __m128* realTrace = (__m128*)pReal;
  __m128* baseTrace = (__m128*)pBase;

  __m128  wave = _mm_setzero_ps();
  __m128  diff = _mm_setzero_ps();
  __m128  chis = _mm_setzero_ps();

  for(int nn = 0; nn < SSE_NUM; nn++) {
    wave = _mm_mul_ps(loopFac, baseTrace[nn]);
    diff = _mm_sub_ps(realTrace[nn], wave);
#if   STANDARD_METRIC == STANDARD_METRIC_ABSVAL                               // pow(|d|,1  )
    chis = _mm_add_ps(chis, _mm_andnot_ps(masks, diff) );
#elif STANDARD_METRIC == STANDARD_METRIC_SQUARE                               // pow( d ,2  )
    chis = _mm_add_ps(chis, _mm_mul_ps(diff, diff) );
#elif STANDARD_METRIC == STANDARD_METRIC_1SQRT                                // pow(|d|,1/2)
    chis = _mm_add_ps(chis, _mm_sqrt_ps(_mm_andnot_ps(masks, diff)) );  
#elif STANDARD_METRIC == STANDARD_METRIC_2SQRT                                // pow(|d|,1/4)
    chis = _mm_add_ps(chis, _mm_sqrt_ps(_mm_sqrt_ps(_mm_andnot_ps(masks, diff )) ) );
#else
# error Inconsistency in the definition of the distance metric when using the SSE versions
#endif
  }

  float chi2 = _mm_cvtss_f32(_mm_hadd_ps(_mm_hadd_ps(chis, zeros), zeros)); // sum the 4 values and save in chi2
  return chi2;
}

// 2 interactions (the number of used samples is fixed to LOOP_SAMP)
INLINE_ALWAYS float PSAFilterGridSearch::Chi2InnerLoop(const float *pReal, const float *pBase1, float bScale1, const float *pBase2, float bScale2)
{
  const __m128 masks = _mm_set1_ps(-0.0f);
  const __m128 zeros = _mm_setzero_ps();

  __m128 loopFac1 = _mm_set_ps(bScale1, bScale1, bScale1, bScale1);
  __m128 loopFac2 = _mm_set_ps(bScale2, bScale2, bScale2, bScale2);

  __m128* realTrace   = (__m128*)pReal;
  __m128* baseTrace1  = (__m128*)pBase1;
  __m128* baseTrace2  = (__m128*)pBase2;

  __m128  wave = _mm_setzero_ps();
  __m128  diff = _mm_setzero_ps();
  __m128  chis = _mm_setzero_ps();

  for(int nn = 0; nn < SSE_NUM; nn++) {
    wave = _mm_add_ps(_mm_mul_ps(loopFac1, baseTrace1[nn]), _mm_mul_ps(loopFac2, baseTrace2[nn]));
    diff = _mm_sub_ps(realTrace[nn], wave);
#if   STANDARD_METRIC == STANDARD_METRIC_ABSVAL                               // pow(|d|,1  )
    chis = _mm_add_ps(chis, _mm_andnot_ps(masks, diff) );
#elif STANDARD_METRIC == STANDARD_METRIC_SQUARE                               // pow( d ,2  )
    chis = _mm_add_ps(chis, _mm_mul_ps(diff, diff) );
#elif STANDARD_METRIC == STANDARD_METRIC_1SQRT                                // pow(|d|,1/2)
    chis = _mm_add_ps(chis, _mm_sqrt_ps(_mm_andnot_ps(masks, diff)) );  
#elif STANDARD_METRIC == STANDARD_METRIC_2SQRT                                // pow(|d|,1/4)
    chis = _mm_add_ps(chis, _mm_sqrt_ps(_mm_sqrt_ps(_mm_andnot_ps(masks, diff )) ) );
#else
# error Inconsistency in the definition of the distance metric when using the SSE versions
#endif
  }

  float chi2 = _mm_cvtss_f32(_mm_hadd_ps(_mm_hadd_ps(chis, zeros), zeros)); // sum the 4 values and save in chi2
  return chi2;
}

#else // #ifdef USE_SSE_VERSION 

// the standard, non-vectorized versions

// The multiplication * baseScale costs ~ 5% in terms of speed. It could be avoided by pre-scaling the experimental trace
// in such a way that bScale == 1. In this case the mapped-metrics vector is applied with a varying scale and loses its
// statistical meaning (if it ever had one, like e.g. with the biweight).
317 318
//
// avoid mapped metric if STANDARD_METRIC == STANDARD_METRIC_SQUARE
319 320 321 322

// loop with 0 hits
INLINE_ALWAYS float PSAFilterGridSearch::Chi2InnerLoop(const float *pReal)
{
323
#if STANDARD_METRIC != STANDARD_METRIC_SQUARE
324
  const float *metric = mappedMetric + RMETRIC;
325
#endif
326 327
  float chi2 = 0;
  for(int nn = 0; nn < LOOP_SAMP; nn++) {
328 329 330 331
#if STANDARD_METRIC == STANDARD_METRIC_SQUARE
    float fdiff = pReal[nn];
    chi2 += fdiff*fdiff;
#else
332 333
    int idiff = int(pReal[nn]);
    chi2 += metric[idiff];
334
#endif
335 336 337 338 339 340 341
  }
  return chi2;
}

// loop with 1 hit
INLINE_ALWAYS float PSAFilterGridSearch::Chi2InnerLoop(const float *pReal, const float *pBase, float bScale)
{
342
#if STANDARD_METRIC != STANDARD_METRIC_SQUARE
343
  const float *metric = mappedMetric + RMETRIC;
344
#endif
345 346 347
  float chi2 = 0;
  for(int nn = 0; nn < LOOP_SAMP; nn++) {
    float fdiff = pReal[nn] -  bScale * pBase[nn];
348 349 350
#if STANDARD_METRIC == STANDARD_METRIC_SQUARE
    chi2 += fdiff*fdiff;
#else
351
    int   idiff = int(fdiff);
352 353 354 355 356 357 358 359
    chi2 += metric[idiff];
    //////chi2 += fabs(diff);                       // direct calculation of pow(|d|, 1  )
    //////chi2 += diff*diff;                        // direct calculation of pow( d , 2  )
    //////chi2 += sqrt(fabs(diff));                 // direct calculation of pow(|d|, 1/2)
    //////chi2 += sqrt(sqrt(fabs(diff)));           // direct calculation of pow(|d|, 1/4)
    //////chi2 += pow(fabs(diff), float(PMETRIC));  // direct use of the pow function --> very slow
    //////const float wwww = -1.f/(2*30.f*30.f);
    //////chi2 += 1.f - exp(diff*diff*wwww);        // direct use of the 1-exp(-d^2) function --> very slow
360
#ifdef SPECMETRIC
361 362
    // This block is to check if the size of "metrics" is big enough to address all
    // possible values of "idiff". If enabled, it slows very much down the execution.
363 364 365 366 367
    // As MultiHist is not thread safe, be sure that you are NOT USING THREADS.
    if(specMetrics) specMetrics->incr(idiff+RMETRIC);
    if(idiff<=-RMETRIC+2 || idiff> RMETRIC-2) {
      int nerrr = 1;    // to catch the outliers with the debugger
    }
368 369
#endif    // SPECMETRIC
#endif    // STANDARD_METRIC == STANDARD_METRIC_SQUARE
370 371 372 373 374 375 376
  }
  return chi2;
}

// loop with 2 hits
INLINE_ALWAYS float PSAFilterGridSearch::Chi2InnerLoop(const float *pReal, const float *pBase1, float bScale1, const float *pBase2, float bScale2)
{
377
#if STANDARD_METRIC != STANDARD_METRIC_SQUARE
378
  const float *metric = mappedMetric + RMETRIC;
379
#endif
380 381 382
  float chi2 = 0;
  for(int nn = 0; nn < LOOP_SAMP; nn++) {
    float fdiff = pReal[nn] - (bScale1*pBase1[nn] + bScale2*pBase2[nn]);
383 384 385 386
#if STANDARD_METRIC == STANDARD_METRIC_SQUARE
    chi2 += fdiff*fdiff;
#else
    int   xdiff = int(fdiff);
387
    chi2 += metric[idiff];
388
#endif
389 390 391 392
  }
  return chi2;
}

393 394 395 396 397 398 399 400 401
////// Performance obtained qite some time ago with various types of chi2
//////chi2 += metric[int(diff)];                    // 160 evts/s   map0
//////chi2 += metric[int(fabs(diff));               // 115 evts/s   map1
//////chi2 += metric[abs(int(diff));                // 145 evts/s   map2
//////chi2 += abs(diff);                            // 145 evts/s   abs
//////chi2 += diff*diff;                            // 290 evts/s   sqr
//////chi2 += sqrt(fabs(diff));                     //  60 evts/s   1sqrt
//////chi2 += sqrt(sqrt(fabs(diff)));               //  30 evts/s   2sqrt
//////chi2 += pow(fabs(diff), float(gDistMetric));  //   4 evts/s   pow
402

403
#endif // #else // #ifdef USE_SSE_VERSION 
404 405

#endif // PSAFILTERGRIDSEARCH_H_INCLUDED