Commit 7f9259bd authored by Pierre Aubert's avatar Pierre Aubert
Browse files

Fix intrinsics version of Gray Scott

parent 46c80770
...@@ -32,7 +32,7 @@ void allocate_temporary(float *& tmpU1, float *& tmpU2, float *& tmpV1, float *& ...@@ -32,7 +32,7 @@ void allocate_temporary(float *& tmpU1, float *& tmpU2, float *& tmpV1, float *&
tmpInV.fill(0.0f); tmpInV.fill(0.0f);
tmpOutV.fill(0.0f); tmpOutV.fill(0.0f);
size_t frac(9lu), numBegin(4lu), numEnd(5lu), rowShift(-25lu); size_t frac(16lu), numBegin(7lu), numEnd(8lu), rowShift(-4lu);
for(size_t i(rowShift + (numBegin*nbRow)/frac); i < rowShift + (numEnd*nbRow)/frac; ++i){ for(size_t i(rowShift + (numBegin*nbRow)/frac); i < rowShift + (numEnd*nbRow)/frac; ++i){
for(size_t j((numBegin*nbCol)/frac); j < (numEnd*nbCol)/frac; ++j){ for(size_t j((numBegin*nbCol)/frac); j < (numEnd*nbCol)/frac; ++j){
tmpInU.setValue(i, j, 0.0f); tmpInU.setValue(i, j, 0.0f);
......
...@@ -72,8 +72,8 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * ...@@ -72,8 +72,8 @@ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float *
PRegVecf vecKLUminU(plib_sub_ps(vecKLU, vecU)); PRegVecf vecKLUminU(plib_sub_ps(vecKLU, vecU));
PRegVecf vecKLVminV(plib_sub_ps(vecKLV, vecV)); PRegVecf vecKLVminV(plib_sub_ps(vecKLV, vecV));
PRegVecf vecKLUminUdMultDeltaSquare(plib_sub_ps(vecKLUminU, vecDeltaSquare)); PRegVecf vecKLUminUdMultDeltaSquare(plib_mul_ps(vecKLUminU, vecDeltaSquare));
PRegVecf vecKLVminVdMultDeltaSquare(plib_sub_ps(vecKLVminV, vecDeltaSquare)); PRegVecf vecKLVminVdMultDeltaSquare(plib_mul_ps(vecKLVminV, vecDeltaSquare));
vecFullU = plib_add_ps(vecFullU, vecKLUminUdMultDeltaSquare); vecFullU = plib_add_ps(vecFullU, vecKLUminUdMultDeltaSquare);
vecFullV = plib_add_ps(vecFullV, vecKLVminVdMultDeltaSquare); vecFullV = plib_add_ps(vecFullV, vecKLVminVdMultDeltaSquare);
......
...@@ -61,6 +61,7 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte ...@@ -61,6 +61,7 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
//TODO : create an other MatrixHdf5 to store the vectorial matrices and see what's going on //TODO : create an other MatrixHdf5 to store the vectorial matrices and see what's going on
PTensor<float> tmpInU, tmpInV, tmpOutU, tmpOutV; PTensor<float> tmpInU, tmpInV, tmpOutU, tmpOutV;
float *tmpU1 = NULL, *tmpU2 = NULL, *tmpV1 = NULL, *tmpV2 = NULL; float *tmpU1 = NULL, *tmpU2 = NULL, *tmpV1 = NULL, *tmpV2 = NULL;
allocate_temporary(tmpU1, tmpU2, tmpV1, tmpV2, tmpInU, tmpInV, tmpOutU, tmpOutV, nbRow, nbCol); allocate_temporary(tmpU1, tmpU2, tmpV1, tmpV2, tmpInU, tmpInV, tmpOutU, tmpOutV, nbRow, nbCol);
...@@ -85,14 +86,20 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte ...@@ -85,14 +86,20 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
PTensor<float> vecMatDeltaSquare(AllocMode::ALIGNED, nbStencilRow, nbStencilCol*PLIB_VECTOR_SIZE_FLOAT); PTensor<float> vecMatDeltaSquare(AllocMode::ALIGNED, nbStencilRow, nbStencilCol*PLIB_VECTOR_SIZE_FLOAT);
reshuffle_broadcastTensor(vecMatDeltaSquare.getData(), matDeltaSquare, nbStencilRow, nbStencilCol, 0lu, PLIB_VECTOR_SIZE_FLOAT); reshuffle_broadcastTensor(vecMatDeltaSquare.getData(), matDeltaSquare, nbStencilRow, nbStencilCol, 0lu, PLIB_VECTOR_SIZE_FLOAT);
tmpU1 = tmpVecInU.getData(); float * tmpVecU1 = tmpVecInU.getData();
tmpU2 = tmpVecOutU.getData(); float * tmpVecU2 = tmpVecOutU.getData();
tmpV1 = tmpVecInV.getData(); float * tmpVecV1 = tmpVecInV.getData();
tmpV2 = tmpVecOutV.getData(); float * tmpVecV2 = tmpVecOutV.getData();
float * ptrVecMatStencil = vecMatDeltaSquare.getData(); float * ptrVecMatStencil = vecMatDeltaSquare.getData();
size_t nbVecRow(tmpVecInV.getFullNbRow()), nbVecCol(tmpVecInV.getNbCol()); size_t nbVecRow(tmpVecInV.getFullNbRow()), nbVecCol(tmpVecInV.getNbCol());
MatrixHdf5 fullVecMat;
fullVecMat.setAllDim(nbVecCol, nbVecRow);
fullVecMat.resize(nbImage);
//TODO : create an other MatrixHdf5 to store the vectorial matrices and see what's going on
PTensor<float> tmpScalOutV(AllocMode::ALIGNED); PTensor<float> tmpScalOutV(AllocMode::ALIGNED);
ProgressTime progress(nbImage); ProgressTime progress(nbImage);
...@@ -100,32 +107,35 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte ...@@ -100,32 +107,35 @@ bool simulateImage(size_t nbRow, size_t nbCol, size_t nbImage, size_t nbExtraSte
for(size_t i(0lu); i < nbImage; ++i){ for(size_t i(0lu); i < nbImage; ++i){
progress.print(); progress.print();
for(size_t j(0lu); j < nbExtraStep; ++j){ for(size_t j(0lu); j < nbExtraStep; ++j){
grayscott_propagation(tmpU2, tmpV2, tmpU1, tmpV1, nbVecRow, nbVecCol, grayscott_propagation(tmpVecU2, tmpVecV2, tmpVecU1, tmpVecV1, nbVecRow, nbVecCol,
ptrVecMatStencil, nbStencilRow, nbStencilCol, ptrVecMatStencil, nbStencilRow, nbStencilCol,
diffudionRateU, diffusionRateV, feedRate, killRate, dt); diffudionRateU, diffusionRateV, feedRate, killRate, dt);
//Let's update the dupplicated values //Let's update the dupplicated values
reshuffle_updateDupplicateVecNeighbour(tmpU2, nbVecRow, nbVecCol, PLIB_VECTOR_SIZE_FLOAT); reshuffle_updateDupplicateVecNeighbour(tmpVecU2, nbVecRow, nbVecCol, PLIB_VECTOR_SIZE_FLOAT);
reshuffle_updateDupplicateVecNeighbour(tmpV2, nbVecRow, nbVecCol, PLIB_VECTOR_SIZE_FLOAT); reshuffle_updateDupplicateVecNeighbour(tmpVecV2, nbVecRow, nbVecCol, PLIB_VECTOR_SIZE_FLOAT);
///Let's swap the pointer ///Let's swap the pointer
swapValue(tmpU1, tmpU2); swapValue(tmpVecU1, tmpVecU2);
swapValue(tmpV1, tmpV2); swapValue(tmpVecV1, tmpVecV2);
} }
if(tmpV1 != tmpVecOutV.getData()){ if(tmpVecV1 != tmpVecOutV.getData()){
tmpScalOutV.fromVecToScalNeigbhour(tmpVecOutV); tmpScalOutV.fromVecToScalNeigbhour(tmpVecOutV);
fullVecMat.setRow(i, tmpVecOutV.getData());
}else{ }else{
tmpScalOutV.fromVecToScalNeigbhour(tmpVecInV); //The pointers were swaped tmpScalOutV.fromVecToScalNeigbhour(tmpVecInV); //The pointers were swaped
fullVecMat.setRow(i, tmpVecInV.getData());
} }
fullMat.setRow(i, tmpScalOutV.getData()); fullMat.setRow(i, tmpScalOutV.getData());
// fullMat.setRow(i, tmpV1); // fullMat.setRow(i, tmpVecV1);
// fullMat.setRow(i, tmpV2); // fullMat.setRow(i, tmpV2);
} }
progress.finish(); progress.finish();
std::cerr << "Done" << std::endl; std::cerr << "Done" << std::endl;
//Let's save the output file //Let's save the output file
fullMat.write(outputFile); fullMat.write(outputFile);
fullVecMat.write("./output_vec.h5");
return true; return true;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment