Commit 1e6269a4 authored by Thomas Dubos's avatar Thomas Dubos
Browse files

CI job running on 4 GPUs on jean-zay

parent f73ae22d
Pipeline #136179 passed with stages
in 8 minutes and 52 seconds
......@@ -91,18 +91,28 @@ build-python:
#============================== CHECK ============================#
.check-jean-zay: # remove dot to activate job
stage: check
check-jean-zay: # remove dot to activate job
allow_failure: true
stage: check
needs: ["build-nvidia"]
tags:
- jean-zay
variables:
XIOS_DIR: /gpfsdswork/projects/rech/wuu/rdzt903/JEANZAY/XIOS/JEANZAY_NVIDIA_ACC/XIOS.2233
script:
- hostname
- whoami
- uname -a
- ls -l
- git branch -a
- pwd
- rm -f ../XIOS && ln -s $XIOS_DIR ../XIOS && ls -l ../XIOS/bin
- ./make_icosa -arch JEANZAY_NVIDIA_ACC -parallel mpi -with_xios -job 16
- cd test/jeanzay && rm -f *.xml && ln -s ../../xml/*.xml .
- python ../../xml/file_def_dynamico.xml.py > file_def_dynamico.xml
- sbatch --wait job_JEANZAY_ACC.sh
- grep ' NaN ' rundir/gcm.log && exit -1
- grep -B 5 GLOB rundir/gcm.log
- grep 'Throughput' rundir/gcm.log | tail -n 5
- grep 'dyn ' rundir/gcm.log | tail -n 1
- mv DYNAMICO*.out rundir/ && mv rundir ../..
artifacts:
paths:
- rundir
check-dcmip41:
allow_failure: true
......
module purge
module load gcc/9.1.0
module load nvidia-compilers/21.3 openmpi/4.0.5-cuda
module load hdf5/1.12.0-mpi-cuda netcdf-c/4.7.4-mpi-cuda netcdf-fortran/4.5.3-mpi-cuda
makelocalrc -x -d . -gcc `which gcc` -gpp `which g++` -g77 `which gfortran`
export NVLOCALRC=$PWD/localrc
%COMPILER mpif90
%LINK mpif90
%FPP mpif90 -E
%AR ar
%MAKE gmake
%FPP_FLAGS -acc
%FPP_DEF KEY_NONE CPP_NETCDF4 CPP_USING_MPI_NEW
%BASE_FFLAGS -i4 -r8 -acc -ta=tesla:cc70 -Minfo=accel -pg
%PROD_FFLAGS -gopt -traceback -fast
%DEV_FFLAGS -g -O1 -traceback
%DEBUG_FFLAGS -g -Mbounds -Kieee -O0 -traceback
%MPI_FFLAGS
%OMP_FFLAGS -mp
%BASE_LD -i4 -r8 -lblas -llapack -acc -lstdc++ -pg -L$CUDA_HOME/lib64 -lnvToolsExt
%MPI_LD
%OMP_LD -mp
NETCDF_INCDIR="$(nc-config --cflags) $(nc-config --fflags)"
NETCDF_LIBDIR="$(nc-config --libs) $(nc-config --flibs)"
NETCDF_LIB=""
XIOS_ROOTDIR="$PWD/../XIOS"
XIOS_INCDIR="-I$XIOS_ROOTDIR/inc"
XIOS_LIBDIR="-L$XIOS_ROOTDIR/lib"
XIOS_LIB="-lxios"
......@@ -4,8 +4,6 @@ function build_dynamico_()
{
NAME=$1 ; shift
LOGFILE=$1 ; shift
echo "# command used to build DYNAMICO ($NAME) :" >> logs/build.sh
echo "./make_icosa $*" >> logs/build.sh
cd $ROOT/build_$NAME
echo "In $PWD : building DYNAMICO with options $*"
......@@ -26,21 +24,23 @@ function build_dynamico()
function build_xios()
{
LOGFILE=$1 ; shift
echo '# command used to build XIOS :' >> logs/build.sh
echo "./make_xios $*" >> logs/build.sh
ln -s $ROOT/DYNAMICO/arch/arch-${arch}.env $ROOT/.
cd -P $ROOT/XIOS
echo "In $PWD : Building XIOS"
CMD_XIOS="./make_xios $* --full --arch_path $ROOT"
echo '# command used to build XIOS :' > rebuild.sh
echo $CMD_XIOS >> rebuild.sh
chmod a+x rebuild.sh
echo "To watch build progress : tail -f $LOGFILE"
echo "./make_xios $*"
./make_xios $* --full --arch_path $ROOT > $LOGFILE 2>&1
cat rebuild.sh
./rebuild.sh > $LOGFILE 2>&1
}
function main()
{
rm -f tmp/build.sh
LOGFILE=$ROOT/logs/xios.log
build_xios $LOGFILE --arch $arch_XIOS $other_XIOS --job $job || ( tail $LOGFILE ; exit 1 )
......
#!/bin/bash
## Request name
#SBATCH --job-name=DCMIP41_mpi
#SBATCH --nodes=1 # nombre de noeud
#SBATCH --ntasks=4 # nombre de tache MPI (= nombre de GPU ici)
#SBATCH --ntasks-per-node=4 # nombre de tache MPI par noeud (= nombre de GPU ici)
#SBATCH --gres=gpu:4 # nombre de GPU par noeud
#SBATCH --cpus-per-task=10 # nombre de coeurs CPU par tache
## computing project
#SBATCH -A wuu@gpu
## Elapsed time limit HH:MM:SS
#SBATCH --time=00:10:00
# do not use hyperthreading
#SBATCH --hint=nomultithread
# standard outputs
#SBATCH --output=DYNAMICO%j.out
#SBATCH --error=DYNAMICO%j.out
export OMP_NUM_THREADS=1
# OpenMP binding
export OMP_PLACES=cores
# stack
export OMP_STACKSIZE=128M
ulimit -s unlimited
# move to submission directory
cd ${SLURM_SUBMIT_DIR}
# load the same modules as during compilation
source ../../arch.env
module load nvidia-nsight-systems/2021.1.1
module list
# set up execution directory
rm -rf rundir
mkdir rundir
cp *.def *.xml rundir/
# and run
cd rundir
export TMPDIR=$JOBSCRATCH
ln -s $JOBSCRATCH /tmp/nvidia
echo "Run started : $(date)" > gcm.log
ulimit -s unlimited
srun --unbuffered nsys profile -t nvtx,openacc -b dwarf -o profile_${CI_COMMIT_SHORT_SHA}_%q{SLURM_PROCID} ../../../bin/icosa_gcm.exe >> gcm.log
echo "Run finished at $(date), now collecting profiling data (takes a couple of minutes)" >> gcm.log
nsys stats profile_${CI_COMMIT_SHORT_SHA}_0.qdrep >> gcm.log
echo "Finished : $(date)" >> gcm.log
#------------- Planet ---------------
radius=6.37122e6
omega=7.292e-5
g=9.80616
cpp=1004.5
kappa=0.2857143
preff=1e5
#------------- Dynamics ------------
#-------------- Mesh ----------------
nbp=40
optim_it=500
nsplit_i=1
nsplit_j=1
llm=30
disvert=ncarl30
#------------ Numerics --------------
dt=480
nqtot=2
#----------- Dissipation ------------
nitergdiv=2
tau_graddiv=10000
nitergrot=2
tau_gradrot=10000
niterdivgrad=2
tau_divgrad=10000
#-------------- Physics -------------
#---------------- Run ---------------
run_length=259200
write_period=10800
etat0=dcmip4
dcmip4_testcase=1
#------------ Diagnostics -----------
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment