Commit f380c858 authored by Pierre Aubert's avatar Pierre Aubert
Browse files

First image is OK

parent 9a8544e3
......@@ -49,7 +49,7 @@ void evaluateGrayScott(size_t nbElement){
size_t nbVecRow(tmpVecInV.getFullNbRow()), nbVecCol(tmpVecInV.getNbCol());
micro_benchmarkAutoNsPrint("evaluate GrayScott reaction, intrinsics", nbElement, grayscott_propagation,
tmpU2, tmpV2, tmpU1, tmpV1, nbVecRow, nbVecCol, 0l,
tmpU2, tmpV2, tmpU1, tmpV1, nbVecRow, nbVecCol,
ptrVecMatStencil, nbStencilRow, nbStencilCol,
diffudionRateU, diffusionRateV, feedRate, killRate, dt);
}
......
......@@ -32,7 +32,7 @@ void allocate_temporary(float *& tmpU1, float *& tmpU2, float *& tmpV1, float *&
tmpInV.fill(0.0f);
tmpOutV.fill(0.0f);
size_t frac(9lu), numBegin(4lu), numEnd(5lu), rowShift(0lu);
size_t frac(9lu), numBegin(4lu), numEnd(5lu), rowShift(-25lu);
for(size_t i(rowShift + (numBegin*nbRow)/frac); i < rowShift + (numEnd*nbRow)/frac; ++i){
for(size_t j((numBegin*nbCol)/frac); j < (numEnd*nbCol)/frac; ++j){
tmpInU.setValue(i, j, 0.0f);
......
......@@ -17,7 +17,6 @@
* @param matVecV : input of matrix V (with vectorial neighbours)
* @param nbRow : number of rows of the matrices
* @param nbCol : number of columns of the matrices
* @param padding : padding of the columns of all matrices
* @param matBroadcastDeltaSquare : matrix of the delta square values (with broadcast neighbours)
* @param nbStencilRow : number of rows of the matrix matBroadcastDeltaSquare
* @param nbStencilCol : number of columns of the matrix matBroadcastDeltaSquare
......@@ -27,7 +26,7 @@
* @param killRate : rate of the process which converts V into P
* @param dt : time interval between two steps
*/
void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * matVecVecU, const float * matVecVecV, long nbRow, long nbCol, long padding,
void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * matVecVecU, const float * matVecVecV, long nbRow, long nbCol,
const float * matBroadcastDeltaSquare, long nbStencilRow, long nbStencilCol,
float diffusionRateU, float diffusionRateV, float feedRate, float killRate, float dt)
{
......@@ -42,7 +41,7 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
PRegVecf vecDiffudionRateV(plib_broadcast_ss(diffusionRateV));
PRegVecf vecDt(plib_broadcast_ss(dt));
for(long i(0l); i < nbRow; ++i){
for(long i(1l); i < nbRow - 1l; ++i){
long firstRowStencil(std::max(i - offsetStencilRow, 0l));
long lastRowStencil(std::min(i + offsetStencilRow + 1l, nbRow));
for(long j(0l); j < nbVecCol; ++j){
......@@ -51,6 +50,9 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
long stencilIndexRow(0l);
// float u(matU[i*nbCol + j]), v(matV[i*nbCol + j]);
// float fullU(0.0f), fullV(0.0f);
PRegVecf vecU(plib_load_ps(matVecVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT));
PRegVecf vecV(plib_load_ps(matVecVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT));
......@@ -59,8 +61,10 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
for(long k(firstRowStencil); k < lastRowStencil; ++k){
long stencilIndexCol(0l);
for(long l(firstColStencil); l < lastColStencil; ++l){
PRegVecf vecDeltaSquare(plib_load_ps(matBroadcastDeltaSquare +
(stencilIndexRow*nbStencilCol + stencilIndexCol)*PLIB_VECTOR_SIZE_FLOAT));
// float deltaSquare(matDeltaSquare[stencilIndexRow*nbStencilCol + stencilIndexCol]);
PRegVecf vecDeltaSquare(vecOne);
// PRegVecf vecDeltaSquare(plib_load_ps(matBroadcastDeltaSquare +
// (stencilIndexRow*nbStencilCol + stencilIndexCol)*PLIB_VECTOR_SIZE_FLOAT));
PRegVecf vecKLU(plib_load_ps(matVecVecU + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT));
PRegVecf vecKLV(plib_load_ps(matVecVecV + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT));
......@@ -73,10 +77,14 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
vecFullU = plib_add_ps(vecFullU, vecKLUminUdMultDeltaSquare);
vecFullV = plib_add_ps(vecFullV, vecKLVminVdMultDeltaSquare);
// fullU += (matU[k*nbCol + l] - u)*deltaSquare;
// fullV += (matV[k*nbCol + l] - v)*deltaSquare;
++stencilIndexCol;
}
++stencilIndexRow;
}
// float uvSquare(u*v*v);
PRegVecf vecUVSquare(plib_mul_ps(vecU, plib_mul_ps(vecV, vecV)));
PRegVecf vecOneMinusU(plib_sub_ps(vecOne, vecU));
......@@ -94,6 +102,9 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
PRegVecf vecDu(plib_add_ps(vecDiffFullUMinusUVSquare, vecFeedRateMultOneMinusU));
PRegVecf vecDv(plib_sub_ps(vecDiffFullVPlusUVSquare, vecFeedPlusKillMultV));
// float du(diffudionRateU*fullU - uvSquare + feedRate*(1.0f - u));
// float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v);
PRegVecf vecDuDt(plib_mul_ps(vecDu, vecDt));
PRegVecf vecDvDt(plib_mul_ps(vecDv, vecDt));
......@@ -103,10 +114,6 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
plib_store_ps(outMatVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecUPlusDuDt);
plib_store_ps(outMatVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecVPlusDvDt);
// float uvSquare(u*v*v);
// float du(diffusionRateU*fullU - uvSquare + feedRate*(1.0f - u));
// float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v);
// outMatVecU[i*nbCol + j] = u + du*dt;
// outMatVecV[i*nbCol + j] = v + dv*dt;
}
......
......@@ -10,7 +10,7 @@
#include <iostream>
void grayscott_propagation(float * outMatU, float * outMatV, const float * matU, const float * matV, long nbRow, long nbCol, long padding,
void grayscott_propagation(float * outMatU, float * outMatV, const float * matU, const float * matV, long nbRow, long nbCol,
const float * matDeltaSquare, long nbStencilRow, long nbStencilCol,
float diffudionRateU, float diffusionRateV, float feedRate, float killRate, float dt);
......
......@@ -87,7 +87,11 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
swapValue(tmpU1, tmpU2);
swapValue(tmpV1, tmpV2);
}
if(tmpInV.getData() == tmpV2){
fullMat.setRow(i, tmpV2);
}else{
fullMat.setRow(i, tmpV1);
}
}
progress.finish();
std::cerr << "Done" << std::endl;
......
......@@ -53,7 +53,7 @@ OptionParser createOptionParser(){
* @return true on succsess, false otherwise
*/
bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraStep, float killRate, float feedRate, float dt, const std::string & outputFile){
std::cout << "simulateImage : nbRow = " << nbRow << ", nbCol = " << nbCol << std::endl;
std::cout << "simulateImage : nbImage = "<<nbImage<<", nbRow = " << nbRow << ", nbCol = " << nbCol << std::endl;
MatrixHdf5 fullMat;
fullMat.setAllDim(nbCol, nbRow);
......@@ -76,16 +76,16 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
//Let's convert these temporaries into intrinsics temporaries
PTensor<float> tmpVecInU(AllocMode::ALIGNED), tmpVecInV(AllocMode::ALIGNED), tmpVecOutU(AllocMode::ALIGNED), tmpVecOutV(AllocMode::ALIGNED);
tmpVecInU.fromScalToVecNeigbhour(tmpInU, PLIB_VECTOR_SIZE_FLOAT);
tmpVecInV.fromScalToVecNeigbhour(tmpInV, PLIB_VECTOR_SIZE_FLOAT);
tmpVecOutU.fromScalToVecNeigbhour(tmpOutU, PLIB_VECTOR_SIZE_FLOAT);
tmpVecInV.fromScalToVecNeigbhour(tmpInV, PLIB_VECTOR_SIZE_FLOAT);
tmpVecOutV.fromScalToVecNeigbhour(tmpOutV, PLIB_VECTOR_SIZE_FLOAT);
PTensor<float> vecMatDeltaSquare(AllocMode::ALIGNED, nbStencilRow, nbStencilCol*PLIB_VECTOR_SIZE_FLOAT);
reshuffle_broadcastTensor(vecMatDeltaSquare.getData(), matDeltaSquare, nbStencilRow, nbStencilCol, 0lu, PLIB_VECTOR_SIZE_FLOAT);
tmpU1 = tmpVecInU.getData();
tmpU2 = tmpVecInV.getData();
tmpV1 = tmpVecOutU.getData();
tmpU2 = tmpVecOutU.getData();
tmpV1 = tmpVecInV.getData();
tmpV2 = tmpVecOutV.getData();
float * ptrVecMatStencil = vecMatDeltaSquare.getData();
......@@ -99,7 +99,7 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
for(size_t i(0lu); i < nbImage; ++i){
progress.print();
for(size_t j(0lu); j < nbExtraStep; ++j){
grayscott_propagation(tmpU2, tmpV2, tmpU1, tmpV1, nbVecRow, nbVecCol, 0lu,
grayscott_propagation(tmpU2, tmpV2, tmpU1, tmpV1, nbVecRow, nbVecCol,
ptrVecMatStencil, nbStencilRow, nbStencilCol,
diffudionRateU, diffusionRateV, feedRate, killRate, dt);
//Let's update the dupplicated values
......@@ -110,11 +110,12 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
swapValue(tmpU1, tmpU2);
swapValue(tmpV1, tmpV2);
}
if(tmpV2 == tmpVecOutV.getData()){
if(tmpV1 != tmpVecOutV.getData()){
tmpScalOutV.fromVecToScalNeigbhour(tmpVecOutV);
}else{
tmpScalOutV.fromVecToScalNeigbhour(tmpVecInV); //The pointers were swaped
}
fullMat.setRow(i, tmpScalOutV.getData());
// fullMat.setRow(i, tmpV1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment