Commit fb5907a8 authored by Jean-Eric Campagne's avatar Jean-Eric Campagne
Browse files

(JEC) 22/03/16 use Discrete Cosine Transform via FFTW to compute Jnlp. Use...

(JEC) 22/03/16 use Discrete Cosine Transform via FFTW to compute Jnlp. Use OpenMP to speed the sampling of Spherical Bessel function
parent 4b2ef3c7
...@@ -39,6 +39,12 @@ SHARPLIB = $(SHARPDIR)/lib ...@@ -39,6 +39,12 @@ SHARPLIB = $(SHARPDIR)/lib
SHARPINC = -I$(SHARPDIR)/include SHARPINC = -I$(SHARPDIR)/include
SHARPLIBN = -L$(SHARPLIB) -lsharp -lc_utils -lfftpack SHARPLIBN = -L$(SHARPLIB) -lsharp -lc_utils -lfftpack
# ===== FFTW3 ======
FFTWDIR = /opt/local
FFTWLIB = $(FFTWDIR)/lib
FFTWINC = -I$(FFTWDIR)/include
FFTWLIBN = -L$(FFTWLIB) -lfftw3 -lfftw3_threads
SRC = ./src/ SRC = ./src/
LIB = ./lib/ LIB = ./lib/
OBJ = ./objs/ OBJ = ./objs/
...@@ -111,6 +117,7 @@ CXXOBJ = $(OBJ)laguerreBuilder.o \ ...@@ -111,6 +117,7 @@ CXXOBJ = $(OBJ)laguerreBuilder.o \
$(OBJ)lagsht_bessel.o \ $(OBJ)lagsht_bessel.o \
$(OBJ)laguerre2bessel.o $(OBJ)laguerre2bessel.o
CXXSHOBJ = laguerreBuilder.o \ CXXSHOBJ = laguerreBuilder.o \
laguerreTransform.o \ laguerreTransform.o \
lagsht_spheregeom.o \ lagsht_spheregeom.o \
...@@ -135,8 +142,8 @@ CXXHDR = $(SRC)lagsht_exceptions.h \ ...@@ -135,8 +142,8 @@ CXXHDR = $(SRC)lagsht_exceptions.h \
$(SRC)laguerre2bessel.h \ $(SRC)laguerre2bessel.h \
$(SRC)quadinteg.h $(SRC)quadinteg.h
CPPFLAGS += $(BLASYES) $(SHARPINC) $(BLASINC) $(BOOSTINC) CPPFLAGS += $(BLASYES) $(SHARPINC) $(BLASINC) $(BOOSTINC) $(FFTWINC)
LDFLAGS += $(SHARPLIBN) $(BLASLIBN) -lm LDFLAGS += $(SHARPLIBN) $(BLASLIBN) $(FFTWLIBN) -lm
#C++ rule for compiling #C++ rule for compiling
...@@ -144,6 +151,7 @@ $(OBJ)%.o: $(SRC)%.cc $(CXXHDR) ...@@ -144,6 +151,7 @@ $(OBJ)%.o: $(SRC)%.cc $(CXXHDR)
echo "compile... $<" echo "compile... $<"
$(CXXCOMPILE) $< -o $@ $(CXXCOMPILE) $< -o $@
###################### ######################
.PHONY: sharelib .PHONY: sharelib
sharelib : $(CXXOBJ) sharelib : $(CXXOBJ)
......
...@@ -12,15 +12,26 @@ using namespace std; ...@@ -12,15 +12,26 @@ using namespace std;
#include "lagsht_exceptions.h" #include "lagsht_exceptions.h"
#include "lagsht_utils.h" //getMemorySize #include "lagsht_utils.h" //getMemorySize
#include "walltimer.h" //timing #include "walltimer.h" //timing
#include <omp.h> //OpenMP for sampling
#include "lagSphericTransform.h" #include "lagSphericTransform.h"
#include "laguerre2bessel.h" #include "laguerre2bessel.h"
//Test FFT
#include <vector>
#include <algorithm>
#include <functional>
#include <math.h>
#include <fftw3.h>
using namespace LagSHT; using namespace LagSHT;
#define DEBUG 10 #define DEBUG 10
#define DEBVEC 0
#define NUM_OMP_THREADS 4
#define CHECK_BY_CCQUAD 0
//JEC 12/1/16: alpha becomes double instead of int //JEC 12/1/16: alpha becomes double instead of int
...@@ -30,15 +41,15 @@ struct PARAM { ...@@ -30,15 +41,15 @@ struct PARAM {
int N; int N;
r_8 R; r_8 R;
int Pmax; int Pmax;
int spin; //JEC 18/1/16 int spin;
// int alpha; // int alpha;
double alpha; double alpha;
string geometry; string geometry;
int ntheta; int ntheta;
int nphi; int nphi;
string clenshawDir; //JEC 23/11/15 string clenshawDir;
string jlnpDir; // " string jlnpDir;
bool recomputeJlnp; // " bool recomputeJlnp;
} param ; } param ;
//simple random generator (taken from sharp_testsuite //simple random generator (taken from sharp_testsuite
...@@ -494,7 +505,901 @@ void TestJlnpComputation() { ...@@ -494,7 +505,901 @@ void TestJlnpComputation() {
cout << " ______________ TestJlnpComputation End ___________ " << endl; cout << " ______________ TestJlnpComputation End ___________ " << endl;
} }
//---------------------------------------------------------------------------------
class JBess : public ClassFunc1D {
public:
JBess(int l, r_8 kt):l_(l),kt_(kt) {norm_ = sqrt(2./M_PI);}
virtual double operator()(double x) const {
return norm_*Bessel::jn(l_,x*kt_);
}
virtual ~JBess() {}
private:
int l_;
r_8 kt_;
r_8 norm_;
};
//------------------
class KLag : public ClassFunc1D {
public:
KLag(LaguerreFuncQuad* Ln, r_8 norm):Ln_(Ln),norm_(norm){}
virtual double operator()(double x) const {
return norm_*x*x*Ln_->Value(x);
}
virtual ~KLag() {}
private:
LaguerreFuncQuad* Ln_; //no ownership
r_8 norm_;
};
//------------------
class IntFunc1D : public ClassFunc1D {
public:
IntFunc1D(int l, r_8 klp,LaguerreFuncQuad* Ln, r_8 norm=1.0):
l_(l),klp_(klp),Ln_(Ln),norm_(norm) {}
virtual double operator()(double x) const {
return norm_*(x*x)*(Bessel::jn(l_,x*klp_))*Ln_->Value(x);
}
virtual ~IntFunc1D() {}
private:
int l_;
r_8 klp_;
LaguerreFuncQuad* Ln_; //no ownership
r_8 norm_;
};//class IntFunc1D
//------------------
void ChebyshevSampling(int n, vector<r_8>& val, const ClassFunc1D& f, r_8 a, r_8 b){
r_8 bma = 0.5*(b-a); r_8 bpa = 0.5*(b+a);
r_8 cte = M_PI/((r_8)n);
int k;
r_8 x;
#pragma omp parallel for private(x) num_threads(NUM_OMP_THREADS)
for(k=0;k<=n;k++){
x = cos(k*cte)*bma+bpa;
val[k] = f(x);
}
}
//------------------
class FFTPlanning {
public:
FFTPlanning(int n, vector<r_8>& vec) {
createPlan(n,vec);
}
~FFTPlanning() { DestroyPlan(); }
void DestroyPlan() {
if(plan_) {
cout << "Destroy Plan ["<< this << "]" << endl;
fftw_destroy_plan(plan_);
plan_=NULL;
}
}
void Execute() const { fftw_execute(plan_); }
private:
void createPlan(int n, vector<r_8>& vec) {
cout << "Create Plan ["<< this << "]" << endl;
plan_ = fftw_plan_r2r_1d(n + 1,&vec.data()[0], &vec.data()[0], FFTW_REDFT00, FFTW_ESTIMATE);
}
fftw_plan plan_;
};
//------------------
//size of val = n+1
//void ChebyshevCoeffFFT(int n, const fftw_plan plan, vector<r_8>& val){
void ChebyshevCoeffFFT(int n, const FFTPlanning& plan, vector<r_8>& val){
tstack_push("FFTW plan exec...");
// fftw_execute ( plan );
plan.Execute();
tstack_pop("FFTW plan exec...");
/*
Chebyshev Coeff. the noramization of FFTW is propto 1/(val.size()-1) = 1/n
*/
tstack_push("FFTW norm...");
r_8 norm = 1./((r_8)n);
transform(val.begin(), val.end(), val.begin(),
std::bind1st(multiplies<r_8>(),norm));
val[0] *= 0.5;
val[n] *= 0.5;
tstack_pop("FFTW norm...");
}
//------------------
void InverseChebyshevCoeffFFT(int n, const FFTPlanning& plan, vector<r_8>& val){
val[0] *= 2.0;
val[n] *= 2.0;
// fftw_execute ( plan );
plan.Execute();
transform(val.begin(), val.end(), val.begin(),
std::bind1st(multiplies<r_8>(),0.5));
}
//------------------
void VectorDebug(const string& msg, const vector<r_8>&vec, int beg, int end){
cout << "(JEC) debug " << msg << endl;
for(int i=beg;i<end;i++) {
cout << vec[i] << ", ";
}
cout << endl;
}
//------------------
//extrait de quadinteg.h dans un premier temps.
//rappel l'original calcule les poids (et nodes) pour une quadrature sur [-1, 1]
void ClenshawCurtisWeights(int order, vector<r_8>& w) throw(string) {
double b;
int i;
int j;
double pi = M_PI; //JEC use cmath definition of PI
double theta;
//Todo this kind of error should be tracked before
if ( order < 1 ) {
cerr << "\n";
cerr << "CLENSHAW_CURTIS_COMPUTE - Fatal error!\n";
cerr << " Illegal value of ORDER = " << order << "\n";
exit (EXIT_FAILURE);
}
if ( order == 1 ) {
w[0] = 2.0;
} else {
for ( i = 0; i < order; i++ ) {
theta = ( double ) ( i ) * pi / ( double ) ( order - 1 );
w[i] = 1.0;
for ( j = 1; j <= ( order - 1 ) / 2; j++ ) {
if ( 2 * j == ( order - 1 ) ) {
b = 1.0;
} else {
b = 2.0;
}
w[i] = w[i] - b * cos ( 2.0 * ( double ) ( j ) * theta )
/ ( double ) ( 4 * j * j - 1 );
}
}
w[0] = w[0] / ( double ) ( order - 1 );
for ( i = 1; i < order - 1; i++ ) {
w[i] = 2.0 * w[i] / ( double ) ( order - 1 );
}
w[order-1] = w[order-1] / ( double ) ( order - 1 );
}
}
//weights defined for [-1 1]
//FFTW Y_k = 2 Sum''_j=0^(N-1) X_j Cos[Pi k j/(N-1)]
//CC weights
//W_k = 4/(N-1) a_k Sum''_{j=0, j even}^(N-1) 1/(1-j^2) Cos[Pi k j/(N-1)]
void ClenshawCurtisWeightsFast(int n, const FFTPlanning& plan, vector<r_8>& w) {
//dim of w is n
fill(w.begin(), w.end(), (r_8)0.0);
for(int k=0;k<n; k +=2){
w[k] = 1./(1.-(r_8)(k*k));
}
// fftw_execute(plan);
plan.Execute();
r_8 norm = 2.0/(r_8)(n-1); //2 * FFTW DCT-1
transform(w.begin(), w.end(), w.begin(),
std::bind1st(multiplies<r_8>(),norm));
w[0] /= (r_8)2; w[n-1] /= (r_8)2;
}
/*
estimates the order of the Chebyshev expansion approx using the number
of roots and min/max of the cosine function asymptotic apprx of Spherical Bessel
in the range [lowBnd, uppBnd]
*/
int EstimChebyshevOrdSpherBessel(int el, r_8 kt, r_8 lowBnd, r_8 uppBnd){
int n=0;
int qq = - ( el/2 +1 );
r_8 val = M_PI/(2.0* kt) * (2*qq + el + 2 );
while(val<lowBnd) {
qq++;
val = M_PI/(2.0* kt) * (2*qq + el + 2);
}
while(val<=uppBnd){
qq++;
n++;
val = M_PI/(2.0* kt) * (2*qq + el + 2);
}
int nNodesAndMinMax = 2*n; //factor 2 to take into account min/max
return floor(log2((r_8)nNodesAndMinMax))+1;
}
/*
estimates the order of the Chebyshev expansion approx using the number
of roots of Laguerre polynomial in the range [lowBnd, uppBnd]
*/
int EstimChebyshevOrdLagFunc(const vector<r_8>& nodes, r_8 lowBnd, r_8 uppBnd) {
//If C++11 not available find an alternative with Class fucntion
int n= count_if(nodes.begin(), nodes.end(), [&](r_8 x){ return (x>=lowBnd)&&(x<=uppBnd);} );
if(n==0)n=1; //no root for Laguerre[0,x]
int nNodesAndMinMax = 2*n; //factor 2 to take into account min/max
return floor(log2((r_8)nNodesAndMinMax))+1;
}
//---------------------------------------------------------------------------------
void TestJnlpOrder() {
int Nmax = param.N;
int Lmax = param.Lmax;
int Pmax = param.Pmax;
r_8 Rmax = param.R;
string geometry = param.geometry;
int ntheta = param.ntheta;
int nphi = param.nphi;
double alpha = param.alpha;
cout << "_______________ TestJnlpOrder Start _____________ " << endl;
LaguerreSphericalTransform sphlagtrans(geometry,
Lmax,
-1,
Nmax,
Rmax,
ntheta,
nphi
);
LaguerreTransform* lagTrans = sphlagtrans.GetLagTransform();
Bessel jnu(Lmax,std::max(Pmax,Nmax)); //the max is here just in case Nmax =/= Pmax
r_8 tau = lagTrans->GetTau();
r_8 rNm1 = Rmax / tau; // R = r[N-1] with N the original transform value. Todo (lagTrans_->GetNodes()).back()
vector<r_8> integUpperBound(Nmax);
integUpperBound[0] = min(100.,rNm1); //this is the case of L_0^{\alpha}(x) =1 with no root (L
integUpperBound[1] = min(120.,rNm1); //this is the case of L_1^{\alpha}(x) =1+alpha-x with single root = 1/(1+alpha)
for (int n=2;n<Nmax;n++){
LaguerreFuncQuad lag(n,alpha);
vector<r_8> nodes;
vector<r_8> weights;
lag.QuadWeightNodes(nodes,weights);
integUpperBound[n] = nodes[n-1] + 5*(nodes[n-1]-nodes[n-2]); //upper bound from Laguerre function end point, may be modified by Bessel part later
}
stringstream ss; ss << "FFT-order"<<Lmax<<"-N"<<Nmax<<"-P"<<Pmax<<".txt";
std::ofstream ofs;
string fname("./"+ss.str());
ofs.open (fname.c_str(), std::ofstream::out);
for(int p=0;p<Pmax;p++){
for(int l=0; l<Lmax; l++){
r_8 qlp = jnu(l,p);
r_8 klptau = qlp/rNm1;
JBess jFunc(l,klptau);
r_8 intLowBound = (l<=10)? 0.0: 0.5*l/klptau;
r_8 ql2scaled = jnu(l,1)/klptau; // the 2nd BesselRoot rescaled
for (int n=0; n<Nmax; n++) {
r_8 intUppBound = std::max(integUpperBound[n], ql2scaled);
int nJ = EstimChebyshevOrdSpherBessel(l,klptau,intLowBound,intUppBound);
LaguerreFuncQuad lag(n,alpha);
vector<r_8> nodes;
vector<r_8> weights;
lag.QuadWeightNodes(nodes,weights);
int nK = EstimChebyshevOrdLagFunc(nodes,intLowBound,intUppBound);
ofs << l << " "
<< n << " "
<< p << " "
<< nJ << " "
<< nK << endl;
}//n
}//l
}//p
ofs.close();
cout << "_______________ TestJnlpOrder End _____________ " << endl;
}//TestJnlpOrder
//---------------------------------------------------------------------------------
void TestJlnpComputationByFFT() {
int Nmax = param.N;
int Lmax = param.Lmax;
int Pmax = param.Pmax;
r_8 Rmax = param.R;
string geometry = param.geometry;
int ntheta = param.ntheta;
int nphi = param.nphi;
double alpha = param.alpha;
string jlnpDir = param.jlnpDir;
string clenshawDir = param.clenshawDir;
cout << " ______________ TestJlnpComputation FFT START ___________ " << endl;
LaguerreSphericalTransform sphlagtrans(geometry,
Lmax,
-1,
Nmax,
Rmax,
ntheta,
nphi
);
tstack_push("Ctors....");
// BaseGeometry* sphere = sphlagtrans.GetSphericalGeometry();
LaguerreTransform* lagTrans = sphlagtrans.GetLagTransform();
Bessel jnu(Lmax,std::max(Pmax,Nmax)); //the max is here just in case Nmax =/= Pmax
r_8 tau = lagTrans->GetTau();
// r_8 tau3sqrt = pow(tau,(r_8)(3./2.));
r_8 rNm1 = Rmax / tau; // R = r[N-1] with N the original transform value. Todo (lagTrans_->GetNodes()).back()
cout << "(JEC) ComputeJInteg Facts START " << endl;
vector<r_8> facts(Nmax); //sqrt(n!/(n+alpha)!) the normalization
facts[0] = 1.0/sqrt(boost::math::tgamma(alpha+1.0)); //1/sqrt(alpha)!
for(int n=1;n<Nmax;n++) facts[n] = facts[n-1]*sqrt(((r_8)n)/((r_8)(n+alpha)) );
cout << "(JEC) ComputeJInteg Facts END " << endl;
cout << "(JEC) ComputeJInteg integUpperBound START " << endl;
tstack_pop("Ctors....");
tstack_push("Bounds init...");
vector<r_8> integUpperBound(Nmax);
integUpperBound[0] = min(100.,rNm1); //this is the case of L_0^{\alpha}(x) =1 with no root
integUpperBound[1] = min(120.,rNm1); //this is the case of L_1^{\alpha}(x) =1+alpha-x with single root = 1/(1+alpha)
for (int n=2;n<Nmax;n++){
LaguerreFuncQuad lag(n,alpha);
vector<r_8> nodes;
vector<r_8> weights;
lag.QuadWeightNodes(nodes,weights);
integUpperBound[n] = nodes[n-1] + 5*(nodes[n-1]-nodes[n-2]); //upper bound from Laguerre function end point, may be modified by Bessel part later
}
tstack_pop("Bounds init...");
cout << "(JEC) ComputeJInteg integUpperBound END " << endl;
tstack_push("Common FFTW Plan init...");
cout << "(JEC) Plan Init START" << endl;
//Try a common order for K & J function this make the FFTW plan for once
int iOrdGen = 10;
int nOrdGen=pow(2.,(r_8)iOrdGen)-1; //the -1 is just to use FFTW with 2^iOrdGen
vector<r_8> VecDCT1(nOrdGen+1,0.); //but for FFTW_REDFT00 it is requiered n+1... see FFTW doc
FFTPlanning planJK(nOrdGen,VecDCT1);
cout << "(JEC) Plan Init END" << endl;
cout << "(JEC) Plan Inverse Init START" << endl;
//Unique Product plan
int nOrdProd= 2*nOrdGen+1; //order max of product is 2*nOrdGen but to get a power of 2 for the FFT I add 1.
vector<r_8> VecDCT1Inv(nOrdProd+1,0.);
FFTPlanning planInv(nOrdProd,VecDCT1Inv);
cout << "(JEC) Plan Inverse Init END" << endl;
tstack_pop("Common FFTW Plan init...");
tstack_push("Common CC weights init...");
cout << "(JEC) CC weights START" << endl;
//Clenshow-Curtis single quadratuer weight
vector<r_8> wCC(nOrdProd+1,0);
FFTPlanning planCC(nOrdProd, wCC);
ClenshawCurtisWeightsFast(nOrdProd+1,planCC,wCC);
#if DEBVEC>0
VectorDebug("DCT CCurtis W [0,10]: ",wCC,0,10);
VectorDebug("DCT CCurtis W [last-10,last]: ",wCC,nOrdProd-11,nOrdProd+1);
#endif
cout << "(JEC) CC weights END" << endl;
tstack_pop("Common CC weights init...");
stringstream ss; ss << "OMP-FFTClass-jlnp-L"<<Lmax<<"-N"<<Nmax<<"-P"<<Pmax<<".txt";
std::ofstream ofs;
string fname(jlnpDir+"/"+ss.str());
ofs.open (fname.c_str(), std::ofstream::out);
for(int p=0;p<Pmax;p++){
//for(int p=0;p<10;p++){
for(int l=0; l<Lmax; l++){
//for(int l=0; l<10; l++){
r_8 qlp = jnu(l,p);
r_8 klptau = qlp/rNm1;
JBess jFunc(l,klptau);
//---------> r_8 klptau =1.; //JUST FOR TEST wrt M10
r_8 intLowBound = (l<=10)? 0.0: 0.5*l/klptau;
r_8 ql2scaled = jnu(l,1)/klptau; // the 2nd BesselRoot rescaled
for (int n=0; n<Nmax; n++) {
//for (int n=0; n<10; n++) {
r_8 intUppBound = std::max(integUpperBound[n], ql2scaled);
#if DEBUG>10
cout << "lowBnd, uppBnd: " << setprecision(30)
<< intLowBound << ", " << intUppBound
<< setprecision(6)
<< endl;
cout << "l,n,p,klptau: "
<< l << ", "
<< n << ", "
<< p << ", "
<< klptau << endl;
#endif
tstack_push("Integral Computation...");
tstack_push("Chebyshev J...");
//Get Chebyshev coeff of Bessel
// int iOrdJ = 8; int nOrdJ=pow(2.,(r_8)iOrdJ);
int nOrdJ = nOrdGen; //USE COMMON dimension
vector<r_8> coefJ(nOrdJ+1,0.);
{
tstack_push("Chebyshev J SAMPLING...");
fill(VecDCT1.begin(), VecDCT1.end(), 0.0); //may be unusefull here
ChebyshevSampling(nOrdJ,VecDCT1,jFunc,intLowBound,intUppBound);
#if DEBVEC>0
VectorDebug("Sampling J-func",VecDCT1,0,10);
#endif
tstack_pop("Chebyshev J SAMPLING...");
tstack_push("Chebyshev J FFT...");
ChebyshevCoeffFFT(nOrdJ,planJK, VecDCT1);
copy(VecDCT1.begin(),VecDCT1.end(),coefJ.begin());
tstack_pop("Chebyshev J FFT...");
#if DEBVEC>0
VectorDebug("Che J-func",coefJ,0,10);
#endif
}
tstack_pop("Chebyshev J...");
tstack_push("Chebyshev K...");