Commit f380c858 authored by Pierre Aubert's avatar Pierre Aubert
Browse files

First image is OK

parent 9a8544e3
...@@ -49,7 +49,7 @@ void evaluateGrayScott(size_t nbElement){ ...@@ -49,7 +49,7 @@ void evaluateGrayScott(size_t nbElement){
size_t nbVecRow(tmpVecInV.getFullNbRow()), nbVecCol(tmpVecInV.getNbCol()); size_t nbVecRow(tmpVecInV.getFullNbRow()), nbVecCol(tmpVecInV.getNbCol());
micro_benchmarkAutoNsPrint("evaluate GrayScott reaction, intrinsics", nbElement, grayscott_propagation, micro_benchmarkAutoNsPrint("evaluate GrayScott reaction, intrinsics", nbElement, grayscott_propagation,
tmpU2, tmpV2, tmpU1, tmpV1, nbVecRow, nbVecCol, 0l, tmpU2, tmpV2, tmpU1, tmpV1, nbVecRow, nbVecCol,
ptrVecMatStencil, nbStencilRow, nbStencilCol, ptrVecMatStencil, nbStencilRow, nbStencilCol,
diffudionRateU, diffusionRateV, feedRate, killRate, dt); diffudionRateU, diffusionRateV, feedRate, killRate, dt);
} }
......
...@@ -32,7 +32,7 @@ void allocate_temporary(float *& tmpU1, float *& tmpU2, float *& tmpV1, float *& ...@@ -32,7 +32,7 @@ void allocate_temporary(float *& tmpU1, float *& tmpU2, float *& tmpV1, float *&
tmpInV.fill(0.0f); tmpInV.fill(0.0f);
tmpOutV.fill(0.0f); tmpOutV.fill(0.0f);
size_t frac(9lu), numBegin(4lu), numEnd(5lu), rowShift(0lu); size_t frac(9lu), numBegin(4lu), numEnd(5lu), rowShift(-25lu);
for(size_t i(rowShift + (numBegin*nbRow)/frac); i < rowShift + (numEnd*nbRow)/frac; ++i){ for(size_t i(rowShift + (numBegin*nbRow)/frac); i < rowShift + (numEnd*nbRow)/frac; ++i){
for(size_t j((numBegin*nbCol)/frac); j < (numEnd*nbCol)/frac; ++j){ for(size_t j((numBegin*nbCol)/frac); j < (numEnd*nbCol)/frac; ++j){
tmpInU.setValue(i, j, 0.0f); tmpInU.setValue(i, j, 0.0f);
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
* @param matVecV : input of matrix V (with vectorial neighbours) * @param matVecV : input of matrix V (with vectorial neighbours)
* @param nbRow : number of rows of the matrices * @param nbRow : number of rows of the matrices
* @param nbCol : number of columns of the matrices * @param nbCol : number of columns of the matrices
* @param padding : padding of the columns of all matrices
* @param matBroadcastDeltaSquare : matrix of the delta square values (with broadcast neighbours) * @param matBroadcastDeltaSquare : matrix of the delta square values (with broadcast neighbours)
* @param nbStencilRow : number of rows of the matrix matBroadcastDeltaSquare * @param nbStencilRow : number of rows of the matrix matBroadcastDeltaSquare
* @param nbStencilCol : number of columns of the matrix matBroadcastDeltaSquare * @param nbStencilCol : number of columns of the matrix matBroadcastDeltaSquare
...@@ -27,7 +26,7 @@ ...@@ -27,7 +26,7 @@
* @param killRate : rate of the process which converts V into P * @param killRate : rate of the process which converts V into P
* @param dt : time interval between two steps * @param dt : time interval between two steps
*/ */
void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * matVecVecU, const float * matVecVecV, long nbRow, long nbCol, long padding, void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * matVecVecU, const float * matVecVecV, long nbRow, long nbCol,
const float * matBroadcastDeltaSquare, long nbStencilRow, long nbStencilCol, const float * matBroadcastDeltaSquare, long nbStencilRow, long nbStencilCol,
float diffusionRateU, float diffusionRateV, float feedRate, float killRate, float dt) float diffusionRateU, float diffusionRateV, float feedRate, float killRate, float dt)
{ {
...@@ -42,7 +41,7 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * ...@@ -42,7 +41,7 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
PRegVecf vecDiffudionRateV(plib_broadcast_ss(diffusionRateV)); PRegVecf vecDiffudionRateV(plib_broadcast_ss(diffusionRateV));
PRegVecf vecDt(plib_broadcast_ss(dt)); PRegVecf vecDt(plib_broadcast_ss(dt));
for(long i(0l); i < nbRow; ++i){ for(long i(1l); i < nbRow - 1l; ++i){
long firstRowStencil(std::max(i - offsetStencilRow, 0l)); long firstRowStencil(std::max(i - offsetStencilRow, 0l));
long lastRowStencil(std::min(i + offsetStencilRow + 1l, nbRow)); long lastRowStencil(std::min(i + offsetStencilRow + 1l, nbRow));
for(long j(0l); j < nbVecCol; ++j){ for(long j(0l); j < nbVecCol; ++j){
...@@ -51,6 +50,9 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * ...@@ -51,6 +50,9 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
long stencilIndexRow(0l); long stencilIndexRow(0l);
// float u(matU[i*nbCol + j]), v(matV[i*nbCol + j]);
// float fullU(0.0f), fullV(0.0f);
PRegVecf vecU(plib_load_ps(matVecVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecU(plib_load_ps(matVecVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT));
PRegVecf vecV(plib_load_ps(matVecVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecV(plib_load_ps(matVecVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT));
...@@ -59,8 +61,10 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * ...@@ -59,8 +61,10 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
for(long k(firstRowStencil); k < lastRowStencil; ++k){ for(long k(firstRowStencil); k < lastRowStencil; ++k){
long stencilIndexCol(0l); long stencilIndexCol(0l);
for(long l(firstColStencil); l < lastColStencil; ++l){ for(long l(firstColStencil); l < lastColStencil; ++l){
PRegVecf vecDeltaSquare(plib_load_ps(matBroadcastDeltaSquare + // float deltaSquare(matDeltaSquare[stencilIndexRow*nbStencilCol + stencilIndexCol]);
(stencilIndexRow*nbStencilCol + stencilIndexCol)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecDeltaSquare(vecOne);
// PRegVecf vecDeltaSquare(plib_load_ps(matBroadcastDeltaSquare +
// (stencilIndexRow*nbStencilCol + stencilIndexCol)*PLIB_VECTOR_SIZE_FLOAT));
PRegVecf vecKLU(plib_load_ps(matVecVecU + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecKLU(plib_load_ps(matVecVecU + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT));
PRegVecf vecKLV(plib_load_ps(matVecVecV + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecKLV(plib_load_ps(matVecVecV + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT));
...@@ -73,10 +77,14 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * ...@@ -73,10 +77,14 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
vecFullU = plib_add_ps(vecFullU, vecKLUminUdMultDeltaSquare); vecFullU = plib_add_ps(vecFullU, vecKLUminUdMultDeltaSquare);
vecFullV = plib_add_ps(vecFullV, vecKLVminVdMultDeltaSquare); vecFullV = plib_add_ps(vecFullV, vecKLVminVdMultDeltaSquare);
// fullU += (matU[k*nbCol + l] - u)*deltaSquare;
// fullV += (matV[k*nbCol + l] - v)*deltaSquare;
++stencilIndexCol; ++stencilIndexCol;
} }
++stencilIndexRow; ++stencilIndexRow;
} }
// float uvSquare(u*v*v);
PRegVecf vecUVSquare(plib_mul_ps(vecU, plib_mul_ps(vecV, vecV))); PRegVecf vecUVSquare(plib_mul_ps(vecU, plib_mul_ps(vecV, vecV)));
PRegVecf vecOneMinusU(plib_sub_ps(vecOne, vecU)); PRegVecf vecOneMinusU(plib_sub_ps(vecOne, vecU));
...@@ -94,6 +102,9 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * ...@@ -94,6 +102,9 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
PRegVecf vecDu(plib_add_ps(vecDiffFullUMinusUVSquare, vecFeedRateMultOneMinusU)); PRegVecf vecDu(plib_add_ps(vecDiffFullUMinusUVSquare, vecFeedRateMultOneMinusU));
PRegVecf vecDv(plib_sub_ps(vecDiffFullVPlusUVSquare, vecFeedPlusKillMultV)); PRegVecf vecDv(plib_sub_ps(vecDiffFullVPlusUVSquare, vecFeedPlusKillMultV));
// float du(diffudionRateU*fullU - uvSquare + feedRate*(1.0f - u));
// float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v);
PRegVecf vecDuDt(plib_mul_ps(vecDu, vecDt)); PRegVecf vecDuDt(plib_mul_ps(vecDu, vecDt));
PRegVecf vecDvDt(plib_mul_ps(vecDv, vecDt)); PRegVecf vecDvDt(plib_mul_ps(vecDv, vecDt));
...@@ -103,10 +114,6 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * ...@@ -103,10 +114,6 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
plib_store_ps(outMatVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecUPlusDuDt); plib_store_ps(outMatVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecUPlusDuDt);
plib_store_ps(outMatVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecVPlusDvDt); plib_store_ps(outMatVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecVPlusDvDt);
// float uvSquare(u*v*v);
// float du(diffusionRateU*fullU - uvSquare + feedRate*(1.0f - u));
// float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v);
// outMatVecU[i*nbCol + j] = u + du*dt; // outMatVecU[i*nbCol + j] = u + du*dt;
// outMatVecV[i*nbCol + j] = v + dv*dt; // outMatVecV[i*nbCol + j] = v + dv*dt;
} }
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#include <iostream> #include <iostream>
void grayscott_propagation(float * outMatU, float * outMatV, const float * matU, const float * matV, long nbRow, long nbCol, long padding, void grayscott_propagation(float * outMatU, float * outMatV, const float * matU, const float * matV, long nbRow, long nbCol,
const float * matDeltaSquare, long nbStencilRow, long nbStencilCol, const float * matDeltaSquare, long nbStencilRow, long nbStencilCol,
float diffudionRateU, float diffusionRateV, float feedRate, float killRate, float dt); float diffudionRateU, float diffusionRateV, float feedRate, float killRate, float dt);
......
...@@ -87,7 +87,11 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte ...@@ -87,7 +87,11 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
swapValue(tmpU1, tmpU2); swapValue(tmpU1, tmpU2);
swapValue(tmpV1, tmpV2); swapValue(tmpV1, tmpV2);
} }
fullMat.setRow(i, tmpV2); if(tmpInV.getData() == tmpV2){
fullMat.setRow(i, tmpV2);
}else{
fullMat.setRow(i, tmpV1);
}
} }
progress.finish(); progress.finish();
std::cerr << "Done" << std::endl; std::cerr << "Done" << std::endl;
......
...@@ -53,7 +53,7 @@ OptionParser createOptionParser(){ ...@@ -53,7 +53,7 @@ OptionParser createOptionParser(){
* @return true on succsess, false otherwise * @return true on succsess, false otherwise
*/ */
bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraStep, float killRate, float feedRate, float dt, const std::string & outputFile){ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraStep, float killRate, float feedRate, float dt, const std::string & outputFile){
std::cout << "simulateImage : nbRow = " << nbRow << ", nbCol = " << nbCol << std::endl; std::cout << "simulateImage : nbImage = "<<nbImage<<", nbRow = " << nbRow << ", nbCol = " << nbCol << std::endl;
MatrixHdf5 fullMat; MatrixHdf5 fullMat;
fullMat.setAllDim(nbCol, nbRow); fullMat.setAllDim(nbCol, nbRow);
...@@ -76,16 +76,16 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte ...@@ -76,16 +76,16 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
//Let's convert these temporaries into intrinsics temporaries //Let's convert these temporaries into intrinsics temporaries
PTensor<float> tmpVecInU(AllocMode::ALIGNED), tmpVecInV(AllocMode::ALIGNED), tmpVecOutU(AllocMode::ALIGNED), tmpVecOutV(AllocMode::ALIGNED); PTensor<float> tmpVecInU(AllocMode::ALIGNED), tmpVecInV(AllocMode::ALIGNED), tmpVecOutU(AllocMode::ALIGNED), tmpVecOutV(AllocMode::ALIGNED);
tmpVecInU.fromScalToVecNeigbhour(tmpInU, PLIB_VECTOR_SIZE_FLOAT); tmpVecInU.fromScalToVecNeigbhour(tmpInU, PLIB_VECTOR_SIZE_FLOAT);
tmpVecInV.fromScalToVecNeigbhour(tmpInV, PLIB_VECTOR_SIZE_FLOAT);
tmpVecOutU.fromScalToVecNeigbhour(tmpOutU, PLIB_VECTOR_SIZE_FLOAT); tmpVecOutU.fromScalToVecNeigbhour(tmpOutU, PLIB_VECTOR_SIZE_FLOAT);
tmpVecInV.fromScalToVecNeigbhour(tmpInV, PLIB_VECTOR_SIZE_FLOAT);
tmpVecOutV.fromScalToVecNeigbhour(tmpOutV, PLIB_VECTOR_SIZE_FLOAT); tmpVecOutV.fromScalToVecNeigbhour(tmpOutV, PLIB_VECTOR_SIZE_FLOAT);
PTensor<float> vecMatDeltaSquare(AllocMode::ALIGNED, nbStencilRow, nbStencilCol*PLIB_VECTOR_SIZE_FLOAT); PTensor<float> vecMatDeltaSquare(AllocMode::ALIGNED, nbStencilRow, nbStencilCol*PLIB_VECTOR_SIZE_FLOAT);
reshuffle_broadcastTensor(vecMatDeltaSquare.getData(), matDeltaSquare, nbStencilRow, nbStencilCol, 0lu, PLIB_VECTOR_SIZE_FLOAT); reshuffle_broadcastTensor(vecMatDeltaSquare.getData(), matDeltaSquare, nbStencilRow, nbStencilCol, 0lu, PLIB_VECTOR_SIZE_FLOAT);
tmpU1 = tmpVecInU.getData(); tmpU1 = tmpVecInU.getData();
tmpU2 = tmpVecInV.getData(); tmpU2 = tmpVecOutU.getData();
tmpV1 = tmpVecOutU.getData(); tmpV1 = tmpVecInV.getData();
tmpV2 = tmpVecOutV.getData(); tmpV2 = tmpVecOutV.getData();
float * ptrVecMatStencil = vecMatDeltaSquare.getData(); float * ptrVecMatStencil = vecMatDeltaSquare.getData();
...@@ -99,7 +99,7 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte ...@@ -99,7 +99,7 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
for(size_t i(0lu); i < nbImage; ++i){ for(size_t i(0lu); i < nbImage; ++i){
progress.print(); progress.print();
for(size_t j(0lu); j < nbExtraStep; ++j){ for(size_t j(0lu); j < nbExtraStep; ++j){
grayscott_propagation(tmpU2, tmpV2, tmpU1, tmpV1, nbVecRow, nbVecCol, 0lu, grayscott_propagation(tmpU2, tmpV2, tmpU1, tmpV1, nbVecRow, nbVecCol,
ptrVecMatStencil, nbStencilRow, nbStencilCol, ptrVecMatStencil, nbStencilRow, nbStencilCol,
diffudionRateU, diffusionRateV, feedRate, killRate, dt); diffudionRateU, diffusionRateV, feedRate, killRate, dt);
//Let's update the dupplicated values //Let's update the dupplicated values
...@@ -110,11 +110,12 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte ...@@ -110,11 +110,12 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
swapValue(tmpU1, tmpU2); swapValue(tmpU1, tmpU2);
swapValue(tmpV1, tmpV2); swapValue(tmpV1, tmpV2);
} }
if(tmpV2 == tmpVecOutV.getData()){ if(tmpV1 != tmpVecOutV.getData()){
tmpScalOutV.fromVecToScalNeigbhour(tmpVecOutV); tmpScalOutV.fromVecToScalNeigbhour(tmpVecOutV);
}else{ }else{
tmpScalOutV.fromVecToScalNeigbhour(tmpVecInV); //The pointers were swaped tmpScalOutV.fromVecToScalNeigbhour(tmpVecInV); //The pointers were swaped
} }
fullMat.setRow(i, tmpScalOutV.getData()); fullMat.setRow(i, tmpScalOutV.getData());
// fullMat.setRow(i, tmpV1); // fullMat.setRow(i, tmpV1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment