Commit 8b087a22 authored by CHAMONT David's avatar CHAMONT David
Browse files

After PRACE

parent b3a679ea
......@@ -536,11 +536,11 @@
"\n",
"using Liter = StrongTypedef<double,struct LiterTag> ;\n",
"\n",
"// A COMPLETER\n",
"// TO BE COMPLETED\n",
"\n",
"using Meter = StrongTypedef<double,struct MeterTag> ;\n",
"\n",
"// A COMPLETER\n",
"// TO BE COMPLETED\n",
"\n",
"int main()\n",
" {\n",
......
......@@ -21,7 +21,7 @@
"source": [
"## Downgraded mathematic functions\n",
"\n",
"Let's take this example : we want to compute the total energy `e` of an electron whose linear momentum `p` is known. We can deduce it from the inequality `e^2 = m^2*c^4 + p^2*c^2`, with `c` the speed of light and `m` the mass of the electron."
"Let's take this example : we want to compute the total energy `e` of an electron whose linear momentum `p` is known. We can deduce it from the equality `e^2 = m^2*c^4 + p^2*c^2`, with `c` the speed of light and `m` the mass of the electron."
]
},
{
......@@ -377,7 +377,7 @@
}
},
"source": [
"Actually, Eigen does not accept to multiply a scalar with avector if all the numbers are not expressed in the same types : **strong typing is not compatible with popular linera algebra libraries**."
"Actually, Eigen does not accept to multiply a scalar with avector if all the numbers are not expressed in the same types : **strong typing is not compatible with popular linear algebra libraries**."
]
},
{
......
This diff is collapsed.
......@@ -144,7 +144,7 @@
"\n",
"For each portion of your code that can be configured separately, you can create a template, taking as a parameter the floating-point type to be used.\n",
"\n",
"This flexibility comes with a price: the function bodies must be completely moved into header, which causes the usual lengthening of compilation and bloating of executables... untile C++20 provide more efficient solutions using *Modules*."
"This flexibility comes with a price: the function bodies must be completely moved into header, which causes the usual lengthening of compilation and bloating of executables... until C++20 provide more efficient solutions using *Modules*."
]
},
{
......@@ -171,6 +171,8 @@
},
"outputs": [],
"source": [
"%%file tmp.precision.cpp\n",
"\n",
"#include <iostream>\n",
"#include <vector>\n",
"\n",
......@@ -185,7 +187,7 @@
"int main()\n",
" {\n",
" print(std::vector<double>({ 1., 2., 3., 4., 5. })) ;\n",
" }\n"
" }"
]
},
{
......@@ -300,13 +302,14 @@
" assert(argc==3) ;\n",
" int size = atoi(argv[1]) ;\n",
" int repeat = atoi(argv[2]) ;\n",
" std::cout.precision(18) ;\n",
"\n",
" std::vector<XY> collection(size) ;\n",
" randomize_x(collection) ;\n",
" while (repeat--)\n",
" saxpy(collection,0.1) ;\n",
" double res = accumulate_y(collection)/size ;\n",
"\n",
" std::cout.precision(18) ;\n",
" std::cout<<res<<std::endl ;\n",
" }"
]
......@@ -339,30 +342,22 @@
"%%file tmp.precision.sh\n",
"echo\n",
"\n",
"rm -f tmp.precision.exe tmp.precision.py\n",
"rm -f tmp.precision.exe\n",
"g++ -std=c++17 tmp.precision.cpp -o tmp.precision.exe\n",
"./tmp.precision.exe $*\n",
"\n",
"rm -f tmp.precision.py\n",
"echo \"s = 0\" >> tmp.precision.py\n",
"g++ -std=c++17 tmp.precision.cpp -o tmp.precision.exe\n",
"for i in 0 1 2 3 4 5 6 7 8 9 ; do \\time -f \"s += %U\" -a -o ./tmp.precision.py ./tmp.precision.exe $* ; done\n",
"for i in 0 1 2 3 4 5 6 7 8 9 ; do\n",
" \\time -f \"s += %U\" -a -o ./tmp.precision.py ./tmp.precision.exe $* >> /dev/null\n",
"done\n",
"echo \"print(s/10.)\" >> tmp.precision.py\n",
"python3 tmp.precision.py\n",
"\n",
"echo"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true,
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [],
"source": [
"! bash -l tmp.precision.sh 4 4"
]
},
{
"cell_type": "code",
"execution_count": null,
......@@ -373,7 +368,7 @@
},
"outputs": [],
"source": [
"! bash -l tmp.precision.sh 1000 100000"
"! bash -l tmp.precision.sh 1024 100000"
]
},
{
......@@ -397,7 +392,7 @@
}
},
"source": [
"Fill the table below, which summarize results and calculation times for different precision with the arguments `1000 100000`. Calculate the ratio between the computation time and the `double` reference time. Establish the number of correct digits in the result by comparing with the benchmark result for long double.\n",
"Fill the table below, which summarize results and calculation times for different precision with the arguments `1024 100000`. Calculate the ratio between the computation time and the `double` reference time. Establish the number of correct digits in the result by comparing with the benchmark result for long double.\n",
"\n",
"| Type | temps (s) | /double | result | significant digits |\n",
"| :------| --------: | ------: | -----------------: | -----------------: |\n",
......
......@@ -59,7 +59,7 @@
}
},
"source": [
"The `srand ()` function allows one to set the starting point of the process, the **seed**. We can give it a fixed value if we want to reproduce the same sequence each time and to always obtain the same final result."
"The `srand()` function allows one to set the starting point of the process, the **seed**. We can give it a fixed value if we want to reproduce the same sequence each time and to always obtain the same final result."
]
},
{
......@@ -86,21 +86,13 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing tmp.random-numbers.cpp\n"
]
}
],
"outputs": [],
"source": [
"%%file tmp.random-numbers.cpp\n",
"\n",
......@@ -133,7 +125,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "subslide"
......@@ -146,23 +138,15 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0\n"
]
}
],
"outputs": [],
"source": [
"!./tmp.random-numbers.exe non-deterministic"
"!./tmp.random-numbers.exe deterministic"
]
},
{
......@@ -216,25 +200,13 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1466933256\n",
"1634966032\n",
"1820836459\n",
"1156396663\n",
"831709691\n"
]
}
],
"outputs": [],
"source": [
"#include <iostream>\n",
"#include <random>\n",
......@@ -286,22 +258,13 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mean : -0.0134657\n",
"stddev : 0.986879\n"
]
}
],
"outputs": [],
"source": [
"#include <iostream>\n",
"#include <array>\n",
......@@ -357,21 +320,13 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting tmp.random-numbers.cpp\n"
]
}
],
"outputs": [],
"source": [
"%%file tmp.random-numbers.cpp\n",
"\n",
......@@ -404,7 +359,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "skip"
......@@ -417,17 +372,13 @@
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.45464\n"
]
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
],
},
"outputs": [],
"source": [
"!./tmp.random-numbers.exe non-deterministic"
]
......
# Performance optimization
1. [Execution time measurement](en.1-chrono.md)
1. [Choice of data structure](en.2-arrays.md)
1. [The cost of the different operations](en.3-operations.md)
---
© *CNRS 2021*
*Assembled and written in french by David Chamont, translated by Karim Hasnaoui, this work is made available according to the terms of the [Creative Commons License - Attribution - NonCommercial - ShareAlike 4.0 International](http://creativecommons.org/licenses/by-nc-sa/4.0/)*
......@@ -26,13 +26,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing tmp.chrono.1.h\n"
]
}
],
"source": [
"%%file tmp.chrono.1.h\n",
"\n",
......@@ -53,7 +61,7 @@
" double res = 0 ;\n",
" for ( double data : datas )\n",
" {\n",
" double val = 1 ;\n",
" double val = 1 ; \n",
" for ( int j=0 ; j<power ; ++j )\n",
" val *= data ;\n",
" res += val ;\n",
......@@ -64,13 +72,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing tmp.chrono.cpp\n"
]
}
],
"source": [
"%%file tmp.chrono.cpp\n",
"\n",
......@@ -86,19 +102,27 @@
"\n",
" auto datas = generate(size) ;\n",
" auto res = analyse(datas,power) ;\n",
" std::cout<<res<<std::endl ;\n",
" std::cout<<res<<std::endl ; \n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing tmp.chrono.sh\n"
]
}
],
"source": [
"%%file tmp.chrono.sh\n",
"\n",
......@@ -109,15 +133,27 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3.56846e-36\n",
"\n",
"real\t0m8.198s\n",
"user\t0m7.819s\n",
"sys\t0m0.246s\n"
]
}
],
"source": [
"!bash -l ./tmp.chrono.sh 1000 100000"
"!bash -l ./tmp.chrono.sh 1024 100000"
]
},
{
......@@ -132,7 +168,7 @@
"\n",
"When monitoring the execution time, especially a small code, and especially when running on a non-reserved dedicated machine :\n",
"* run your program many times and compute the mean execution time,\n",
"* ensure each single run is long enough so that the proprecessor pipelines get filled and you go well beyond the initial computing latency.\n",
"* ensure each single run is long enough so that the processor pipelines get filled and you go well beyond the initial computing latency.\n",
"\n",
"Also, be aware that if your data size is larger than the CPU cache, this may reduce I/O throughput, thus make your program I/ bound. In such a case, optimizing the computation instructions will not improve your global execution time."
]
......@@ -145,18 +181,26 @@
}
},
"source": [
"Below, we run the program once, so to get the ouput. Then we run it 10 times, measuring the execution time with a GNU flavor of `time`, and redirect the results into a python script, which will finally compute the mean time. We ask a power of `100000`, to be sure the arithmetic intensity is high, and we limit the size of the array to `1000`, below the usual cache size."
"Below, we run the program once, so to get the ouput. Then we run it 10 times, measuring the execution time with a GNU flavor of `time`, and redirect the results into a python script, which will finally compute the mean time. We ask a power of `100000`, to be sure the arithmetic intensity is high, and we limit the size of the array to `1024`, below the usual cache size."
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting tmp.chrono.sh\n"
]
}
],
"source": [
"%%file tmp.chrono.sh\n",
"\n",
......@@ -175,15 +219,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3.56846e-36\n",
"(4.580 s)\n"
]
}
],
"source": [
"!bash -l tmp.chrono.sh 1000 100000"
"!bash -l tmp.chrono.sh 1024 100000"
]
},
{
......@@ -212,13 +265,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing tmp.chrono.2.h\n"
]
}
],
"source": [
"%%file tmp.chrono.2.h\n",
"\n",
......@@ -238,18 +299,26 @@
" std::cout<<\"(\"<<dt<<\" us)\"<<std::endl ;\n",
" \n",
" return res ;\n",
" }\n"
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting tmp.chrono.cpp\n"
]
}
],
"source": [
"%%file tmp.chrono.cpp\n",
"\n",
......@@ -272,7 +341,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {
"slideshow": {
"slide_type": "-"
......@@ -285,15 +354,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(64 us)\n",
"(5507671 us)\n",
"3.56846e-36\n"
]
}
],
"source": [
"!./tmp.chrono.exe 1000 100000"
"!./tmp.chrono.exe 1024 100000"
]
},
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment