Commit b7f57237 authored by Jean-Eric Campagne's avatar Jean-Eric Campagne
Browse files

(JEC) 7/4/15 optimizaton du code MultiSyntheis/MultiAnalysis de LaguerreTransform

parent bd7c2742
......@@ -7,6 +7,9 @@ git clone https://gitlab.in2p3.fr/campagne/LagSHT.git
Download en Personne enregistree sur gitlab
git clone git@gitlab.in2p3.fr:campagne/LagSHT.git
# Version du 07/04/15
* Essentiellement optimization du code de LaguerreTransform (MultiSyntheis & MultiAnalysis) grace a la collaboration de Martin Reinecke (auteur de `libsharp` )
# Version du 03/04/15
* refonte des algorithmes de Synthese/Analyse de la transformation de Laguerre en utilisant des functions generalisees de Laguerre et non les polynomes generalises. Ainsi, le facteur exponentiel permet de manipuler au mieux des nombres 64-bits sauf a quelques endroits ou on a besoin de 128-bits. L'ineterface avec `libsharp` se fait donc mieux.
* L'algorithme d'Analyse la transformation Harmonqiue Spherique + Laguerre a ete revu pour tirer profit des calculs iteratifs des polynomes de Laguerre comme dans le cza de la Synthese. Cela induit une acceleration significative.
......
......@@ -61,7 +61,6 @@ void LaguerreSphericalTransform<T>::Synthesis(const NDataBlock< complex<T> >& fl
lagTrans_.MultiSynthesis(flmn, xflmk, Nalm_);
#if DEBUG >= 1
cout << "End Inverse Laguerre...." << endl;
cout << "Start Inverse H.Spherical Transform " << endl;
......@@ -238,6 +237,7 @@ void LaguerreSphericalTransform<r_8>::Synthesis(const NDataBlock< complex<r_8> >
lagTrans_.MultiSynthesis(flmn, flmk, Nalm_);
#if DEBUG >= 1
cout << "End Inverse Laguerre...." << endl;
cout << "Start Inverse H.Spherical Transform " << endl;
......@@ -249,8 +249,8 @@ void LaguerreSphericalTransform<r_8>::Synthesis(const NDataBlock< complex<r_8> >
off7n = n*Nalm_;
off7k = n*Npix_;
#if DEBUG >= 1
cout << "Synthesis... at shell num: " << n << " scale = " << scale << endl;
#if DEBUG >= 2
cout << "Synthesis... at shell num: " << n << endl;
#endif
sharpjob_.alm2map(&(flmk.Data()[off7n].real()),&fijk.Data()[off7k],false);
......@@ -262,11 +262,9 @@ void LaguerreSphericalTransform<r_8>::Synthesis(const NDataBlock< complex<r_8> >
}//Synthesis
template <>
void LaguerreSphericalTransform<r_8>::Analysis(const NDataBlock<r_8>& fijk,
NDataBlock< complex<r_8> >& flmn) {
NDataBlock< complex<r_8> >& flmn) {
int Ntot3DPix = Npix_*N_;
int NtotCoeff = Nalm_*N_;
if(fijk.Size() != Ntot3DPix) throw SzMismatchError("SphLagTransform::Analysis size error");
......@@ -289,8 +287,6 @@ void LaguerreSphericalTransform<r_8>::Analysis(const NDataBlock<r_8>& fijk,
cout << "Start Harmonic Spheric... at n = "<< n << endl;
#endif
////////
// sharpjob_.map2alm(&(fijk.Data()[0]),&(flmk.Data()[0].real()),false); //the false is to forbidd accumulation
sharpjob_.map2alm(&(fijk.Data()[off7k]),&(flmk.Data()[off7n].real()),false); //the false is to forbidd accumulation
}//loop on shell
......@@ -322,6 +318,4 @@ void LaguerreSphericalTransform<r_8>::Analysis(const NDataBlock<r_8>& fijk,
}// Fin de namespace
......@@ -88,14 +88,8 @@ protected:
template<class T>
class LaguerreSphericalTransform : public BaseLagSphTransform {
public:
//! Creator
// LaguerreSphericalTransform(int Lmax, int Mmax, int Nmax, r_8 R, int alpha=2):
// L_(Lmax), M_(Mmax), N_(Nmax), R_(R), alpha_(alpha), lagTrans_(Nmax,(T)R,alpha),
// Nrings_(0), Nphi_(0), Npix_(0), Nalm_(0),geomDone_(false) {
// if (Mmax<0) M_=Lmax;
// }//Ctor
//! Constructor
LaguerreSphericalTransform(int Lmax, int Mmax, int Nmax, r_8 R, int alpha=2):
BaseLagSphTransform(Lmax,Mmax,Nmax,alpha), R_(R), lagTrans_(Nmax,(T)R,alpha) {
}//Ctor
......@@ -104,19 +98,6 @@ class LaguerreSphericalTransform : public BaseLagSphTransform {
//! Destructor
virtual ~LaguerreSphericalTransform() {}
// //! Number of colatitude rings of the 2D-sphere representation
// int Nrings() { return Nrings_; }
// //! Number of colatitude rings of the 2D-sphere representation
// int NTheta() { return Nrings(); }
// //! Number of phi values (pixels) per colatitude rings
// int NPhi() { return Nphi_; }
// //! Total number of 2D pixels
// int Npix() { return Npix_; }
// //! Total number of Alm coefficients
// int Nalm() { return Nalm_; }
// //! Choose the 2D map representation & initialization of libsharp
// void SetThetaPhiMap(string choice ="");
//! Synthesis
/*! \brief Coeffs -> Pixels with Input/Output using T floating representation
......@@ -133,42 +114,9 @@ class LaguerreSphericalTransform : public BaseLagSphTransform {
void Analysis(const NDataBlock<T>& fijk, NDataBlock< complex<T> >& flmn);
// protected:
// //from sharp test suite
// static ptrdiff_t get_npix(const sharp_geom_info *ginfo) {
// ptrdiff_t res=0;
// for (int i=0; i<ginfo->npairs; ++i){
// res += ginfo->pair[i].r1.nph;
// if (ginfo->pair[i].r2.nph>0) res += ginfo->pair[i].r2.nph;
// }
// return res;
// }
// //from sharp test suite
// static ptrdiff_t get_nalms(const sharp_alm_info *ainfo) {
// ptrdiff_t res=0;
// for (int i=0; i<ainfo->nm; ++i)
// res += ainfo->lmax-ainfo->mval[i]+1;
// return res;
// }
protected:
// int L_; //!< Spherical harmonic limit such that l:0,...,L-1 m:0,...,l
// int M_; //!< maximum value of m
// int N_; //!< Radial band limit such that n:0,...,N-1
r_8 R_; //!< Radial maximal value (not yet used)
// int alpha_; //!< parameter of the generalized Laguerre Polynomials
LaguerreTransform<T> lagTrans_; //!< the Laguerre Transform
// int Nrings_; //!< Number of colatitude rings of the 2D-sphere representation
// int Nphi_; //!< Number of phi values (pixels) per colatitude rings
// int Npix_; //!< Total number of 2D pixels
// int Nalm_; //!< Total number of Alm coefficients
// bool geomDone_; //!< Set to true once the geometry is properly set (See SetThetaPhiMap(string) )
// sharp_cxxjob<r_8> sharpjob_; //!< libsharp job // voir si r_8 -> r_16 marcherait !!!
}; //class
......
......@@ -3,12 +3,21 @@
//Sophya
#include "machdefs.h"
#include "pexceptions.h"
#include <vector> // test
#include "resusage.h" //"
#include "ctimer.h" //"
#include "timing.h" //"
#include "sopnamsp.h"
#define LAGUERRETRANSFORM_CC_BFILE // avoid extern template declarations
#include "laguerreTransform.h"
#define DEBUG 1
#define DEBUG 0
namespace SOPHYA {
......@@ -245,6 +254,52 @@ LaguerreTransform<r_8>::LaguerreTransform(int N, r_8 R, int alpha) :
}//Ctor
//OLD VERSION before optimisation
// template <>
// void LaguerreTransform<r_8>::MultiAnalysis(const NDataBlock< complex<r_8> >& fi, NDataBlock< complex<r_8> >& fn, int stride) {
// if(fi.Size() != N_*stride) throw SzMismatchError("LagTransform::Analysis size error");
// fn.ReSize(N_*stride);
// #if DEBUG >= 1
// cout << "Analysis with Function...." << endl;
// fi.Print(cout);
// #endif
// r_8 invalphaFact = 1.0/alphaFact_; //1/alpha!
// NDataBlock<r_8> facts(N_); //sqrt(n!/(n+alpha)!) en iteratif
// facts(0) = invalphaFact;
// for(int n=1;n<N_;n++) facts(n) = facts(n-1)*sqrt(((r_8)n)/((r_8)(n+alpha_)) );
// LaguerreFuncQuad lag(N_-1);
// NDataBlock<r_16> LnAll(N_); //all the values Lag_n(r) n:0...N-1
// Timer tm("lagTrans multi analysis part");
// int off7i, off7n;
// //Try i-loop before n-loop (the reverse is the natural order)
// for (int i = 0; i<N_; i++ ){
// off7i = i*stride;
// r_8 r = nodes_(i);
// lag.Values(r,LnAll);
// for (int n=0; n<N_;n++){
// // LaguerreFuncQuad lag(n);
// off7n = n*stride;
// //Lni = sqrt(n!/(n+alpha)!) L_n^(alpha)(r_i)
// r_8 wLni = weights_(i)*LnAll(n)*facts(n); ///TEST r_16 -> r_8
// //#pragma omp parallel for
// for (int l=0; l<stride; l++) {
// fn(l+off7n).real() += wLni * fi(l+off7i).real();
// fn(l+off7n).imag() += wLni * fi(l+off7i).imag();
// }
// }//loop on i
// }//loop on n
// }//MultiAnalysis
template <>
void LaguerreTransform<r_8>::MultiAnalysis(const NDataBlock< complex<r_8> >& fi, NDataBlock< complex<r_8> >& fn, int stride) {
......@@ -264,28 +319,33 @@ void LaguerreTransform<r_8>::MultiAnalysis(const NDataBlock< complex<r_8> >& fi,
LaguerreFuncQuad lag(N_-1);
NDataBlock<r_16> LnAll(N_); //all the values Lag_n(r) n:0...N-1
Timer tm("lagTrans analysis Test");
int off7i, off7n;
vector<r_8> LnkMtx(N_*N_); //all the values Lag_n(r) n:0...N-1 TRY r_16->r_8
for (int k=0; k<N_; k++){
r_8 rk = nodes_(k);
lag.Values(rk,LnAll);
for (int n = 0; n<N_; n++ ){
LnkMtx[n*N_+k] = LnAll(n)*facts(n);
}
}
//Try i-loop before n-loop (the reverse is the natural order)
for (int i = 0; i<N_; i++ ){
off7i = i*stride;
r_8 r = nodes_(i);
lag.Values(r,LnAll);
for (int n=0; n<N_;n++){
// LaguerreFuncQuad lag(n);
off7n = n*stride;
//Lni = sqrt(n!/(n+alpha)!) L_n^(alpha)(r_i)
r_8 wLni = weights_(i)*LnAll(n)*facts(n); ///TEST r_16 -> r_8
//#pragma omp parallel for
for (int l=0; l<stride; l++) {
fn(l+off7n).real() += wLni * fi(l+off7i).real();
fn(l+off7n).imag() += wLni * fi(l+off7i).imag();
}
}//loop on i
}//loop on n
}//MultiAnalysis
tm.Split("lagTrans Test/End LnkMtx initialization");
vector<complex<double> > vtmp(N_);
for (int l=0; l<stride; l++) {
vtmp.assign(N_,0.);
for (int i = 0; i<N_; i++ ){
complex<double> fli = fi(l+ i*stride);
for (int n=0; n<N_; n++){
vtmp[n] += fli * weights_(i) * LnkMtx[n*N_+i];
}//loop on k
}//loop on n
for (int n=0; n<N_; n++) fn(l+n*stride) += vtmp[n];
}//loop on l
}//MultiAnalysisTest
......@@ -297,42 +357,92 @@ void LaguerreTransform<r_8>::MultiSynthesis(const NDataBlock< complex<r_8> >& fn
fi.ReSize(N_*stride);
#if DEBUG >= 1
cout << "Multi Synthesis with Function....:" << N_ << endl;
cout << "Multi Synthesis Test with Function....:" << N_ << endl;
// fn.Print(cout);
#endif
int off7i, off7n;
int off7k, off7n;
int threadId;
r_8 invalphaFact = 1.0/alphaFact_; //1/alpha!
r_8 invalphaFact = 1.0/alphaFact_; //1/alpha!
NDataBlock<r_8> facts(N_); //sqrt(n!/(n+alpha)!) en iteratif
facts(0) = invalphaFact;
for(int n=1;n<N_;n++) facts(n) = facts(n-1)*sqrt(((r_8)n)/((r_8)(n+alpha_)) );
Timer tm("lagTrans synthesis Test");
LaguerreFuncQuad lag(N_-1);
NDataBlock<r_16> LnAll(N_); //all the values Lag_n(r) n:0...N-1
for (int i=0; i<N_;i++){
off7i = i*stride;
r_8 r = nodes_(i);
lag.Values(r,LnAll);
vector<r_8> LnkMtx(N_*N_); //all the values Lag_n(r) n:0...N-1 TRY r_16->r_8
for (int k=0; k<N_; k++){
r_8 rk = nodes_(k);
lag.Values(rk,LnAll);
for (int n = 0; n<N_; n++ ){
//Lni = sqrt(n!/(n+alpha)!) L_n^(alpha)(r_i)
r_8 Lni = LnAll(n)*facts(n); // TEST r_16 -> r_8
//flm(ri) = Sum_{n=0}^{N-1} flmn * sqrt(n!/(n+alpha)!) L_n^(alpha)(r_i)
//(lm) is represented by a single index l
off7n = n*stride;
//#pragma omp parallel for
for (int l=0; l<stride; l++) {
fi(l+off7i).real() += Lni * fn(l+off7n).real();
fi(l+off7i).imag() += Lni * fn(l+off7n).imag();
}
LnkMtx[n*N_+k] = LnAll(n)*facts(n);
}
}
tm.Split("lagTrans Test/End LnkMtx initialization");
vector<complex<double> > vtmp(N_);
for (int l=0; l<stride; l++) {
vtmp.assign(N_,0.);
for (int n = 0; n<N_; n++ ){
complex<double> fln = fn(l+ n*stride);
for (int k=0; k<N_; k++){
vtmp[k] += fln * LnkMtx[n*N_+k];
}//loop on k
}//loop on n
}//loop on i
for (int k=0; k<N_; k++) fi(l+k*stride) += vtmp[k];
}//loop on l
}//MultiSynthesis
//Old version NON OPTIMIZED
// template <>
// void LaguerreTransform<r_8>::MultiSynthesis(const NDataBlock< complex<r_8> >& fn, NDataBlock< complex<r_8> >& fi, int stride) {
// if(fn.Size() != N_*stride) throw SzMismatchError("LagTransform::Synthesis size error");
// fi.ReSize(N_*stride);
// #if DEBUG >= 1
// cout << "Multi Synthesis with Function....:" << N_ << endl;
// // fn.Print(cout);
// #endif
// int off7i, off7n;
// int threadId;
// r_8 invalphaFact = 1.0/alphaFact_; //1/alpha!
// NDataBlock<r_8> facts(N_); //sqrt(n!/(n+alpha)!) en iteratif
// facts(0) = invalphaFact;
// for(int n=1;n<N_;n++) facts(n) = facts(n-1)*sqrt(((r_8)n)/((r_8)(n+alpha_)) );
// LaguerreFuncQuad lag(N_-1);
// NDataBlock<r_16> LnAll(N_); //all the values Lag_n(r) n:0...N-1
// Timer tm("lagTrans multi synthesis part");
// for (int i=0; i<N_;i++){
// off7i = i*stride;
// r_8 r = nodes_(i);
// lag.Values(r,LnAll);
// for (int n = 0; n<N_; n++ ){
// //Lni = sqrt(n!/(n+alpha)!) L_n^(alpha)(r_i)
// r_8 Lni = LnAll(n)*facts(n); // TEST r_16 -> r_8
// //flm(ri) = Sum_{n=0}^{N-1} flmn * sqrt(n!/(n+alpha)!) L_n^(alpha)(r_i)
// //(lm) is represented by a single index l
// off7n = n*stride;
// //#pragma omp parallel for
// for (int l=0; l<stride; l++) {
// fi(l+off7i).real() += Lni * fn(l+off7n).real();
// fi(l+off7i).imag() += Lni * fn(l+off7n).imag();
// }
// }//loop on n
// }//loop on i
// }//MultiSynthesis
}// Fin de namespace
......@@ -5,6 +5,8 @@
#include "machdefs.h"
#include "ndatablock.h"
#include <limits> //sizeof
#ifdef SO_NTLXDBLE
#include "NTL/xdouble.h" /* double : mantisse, long: exponent (NTL-library) */
......@@ -68,6 +70,7 @@ class LaguerreTransform : public BaseLaguerreTransform {
*/
void MultiAnalysis(const NDataBlock< complex<T> >& fi, NDataBlock< complex<T> >& fn, int stride);
//! Single Inverse Laguerre Transform (Coefficientss -> Values)
void Synthesis(const NDataBlock< complex<T> >& fn, NDataBlock< complex<T> >& fi) {
MultiSynthesis(fn,fi,1);
......@@ -82,6 +85,8 @@ class LaguerreTransform : public BaseLaguerreTransform {
*/
void MultiSynthesis(const NDataBlock< complex<T> >& fn, NDataBlock< complex<T> >& fi, int stride);
protected:
// int N_; //!< Order of the Generalized Lagurerre polynomial
// int alpha_; //!< second parameter of the generalized Laguerre polynomial
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment