StartDate: 2026-05-20 06:04:13+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: 0eb17dafdc05d981e0d72c83881190b124fdd251 CommitTime: 2026-05-19 21:38:18 +0200 CommitAuthor: Matthias Krack CommitSubject: Update spack tester for OpenSUSE: 15.6 -> 16.0 (#5252) #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=0eb17dafdc05d981e0d72c83881190b124fdd251 SPACK_CACHE=gs://cp2k-spack-cache Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 419.5MB Step 1/42 : FROM ubuntu:26.04 26.04: Pulling from library/ubuntu 6f5c5aa4e145: Pulling fs layer 1c24335ddd46: Pulling fs layer 1c24335ddd46: Verifying Checksum 1c24335ddd46: Download complete 6f5c5aa4e145: Verifying Checksum 6f5c5aa4e145: Download complete 6f5c5aa4e145: Pull complete 1c24335ddd46: Pull complete Digest: sha256:f3d28607ddd78734bb7f71f117f3c6706c666b8b76cbff7c9ff6e5718d46ff64 Status: Downloaded newer image for ubuntu:26.04 ---> 30ba44506a6d Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> aeb614fb0c08 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 2c8c6a15e040 Step 4/42 : RUN ./install_requirements.sh ubuntu:26.04 ---> Using cache ---> 7648768d280d Step 5/42 : RUN mkdir scripts ---> Using cache ---> a27de00ed5f2 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 76d57c947aad Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> bfc63cf0e921 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-dbcsr --with-gcc=system --dry-run ---> Using cache ---> 3d0712f9f332 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> d0a30baf7d82 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 653dfa4164fb Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 31d0ed9d7889 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 0b2d113c6514 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 7e77889819c2 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> a7b0f39faa05 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> d45c3b800938 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 65224bd2613c Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> f255520f68c3 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 670928af25a8 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 25efd2edfb73 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 875923a0a1da Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> f8105301555f Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> ce9dd0d76a5b Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> e904378deb58 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 854b23532b31 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 0ebcdcd8d102 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 1b835e0b8374 Step 27/42 : COPY ./tools/toolchain/scripts/stage9/ ./scripts/stage9/ ---> Using cache ---> 3b0dcdafd558 Step 28/42 : RUN ./scripts/stage9/install_stage9.sh && rm -rf ./build ---> Using cache ---> 93704b1e8b23 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> c6fe83f761d6 Step 30/42 : COPY ./src ./src ---> Using cache ---> 7664b681ca26 Step 31/42 : COPY ./data ./data ---> Using cache ---> 62bda7780d10 Step 32/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> 4bea4bb56b16 Step 33/42 : COPY ./cmake ./cmake ---> Using cache ---> 8148507440ec Step 34/42 : COPY ./CMakeLists.txt . ---> Using cache ---> fe70a184e7fd Step 35/42 : COPY ./tools/docker/scripts/build_cp2k.sh . ---> Using cache ---> 0ba9dcee5e53 Step 36/42 : RUN ./build_cp2k.sh toolchain psmp ---> Running in 724ed6169074 ==================== Building CP2K ==================== -- The Fortran compiler identification is GNU 15.2.0 -- The C compiler identification is GNU 15.2.0 -- The CXX compiler identification is GNU 15.2.0 -- Detecting Fortran compiler ABI info -- Detecting Fortran compiler ABI info - done -- Check for working Fortran compiler: /usr/bin/gfortran - skipped -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done -- Check for working C compiler: /usr/bin/gcc - skipped -- Detecting C compile features -- Detecting C compile features - done -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done -- Check for working CXX compiler: /usr/bin/g++ - skipped -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found PkgConfig: /usr/bin/pkg-config (found version "2.5.1") -- Found Python: /usr/bin/python3.14 (found version "3.14.4") found components: Interpreter -- Found MPI_C: /opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpi.so (found version "5.0") -- Found MPI_CXX: /opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpicxx.so (found version "5.0") -- Found MPI_Fortran: /opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpifort.so (found version "5.0") -- Found MPI: TRUE (found version "5.0") found components: C CXX Fortran -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE -- Found MPI: TRUE (found version "5.0") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Could NOT find MKL (missing: CP2K_MKL_INCLUDE_DIRS) -- Checking for module 'openblas' -- Found openblas, version 0.3.33 -- Found OpenBLAS: /opt/cp2k-toolchain/install/openblas-0.3.33/include -- Found Blas: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so -- Found Lapack: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so -- Checking for module 'libxsmm-shared' -- Found libxsmm-shared, version 1.17.0 -- Checking for module 'libxsmmf-shared' -- Found libxsmmf-shared, version 1.17.0 -- Checking for module 'libxsmmext-shared' -- Found libxsmmext-shared, version 1.17.0 -- Checking for module 'libxsmmnoblas-shared' -- Found libxsmmnoblas-shared, version 1.17.0 -- Found LibXSMM: /opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/include -- Using LIBXSMM for Small Matrix Multiplication -- Checking for module 'scalapack' -- Package 'mpi', required by 'scalapack', not found Package 'lapack', required by 'scalapack', not found Package 'blas', required by 'scalapack', not found -- Found SCALAPACK: /opt/cp2k-toolchain/install/scalapack-2.2.3/lib/libscalapack.a ------------------------------------------------------------ - OPENMP - ------------------------------------------------------------ -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: Fortran C CXX ------------------------------------------------------------ - DBCSR - ------------------------------------------------------------ -- Found MPI: TRUE (found version "5.0") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") -- Checking for module 'libxsmmf' -- Found libxsmmf, version 1.17.0 -- Checking for module 'libxsmmext' -- Found libxsmmext, version 1.17.0 ------------------------------------------------------------ - Other dependencies - ------------------------------------------------------------ -- Checking for one of the modules 'elpa_openmp' -- Found Elpa: /opt/cp2k-toolchain/install/elpa-2026.02.001/cpu/lib/libelpa_openmp.so;/opt/cp2k-toolchain/install/scalapack-2.2.3/lib/libscalapack.a;:libopenblas.a -- BLAS_LIBRARIES Not Given: Will Perform Search -- Checking if OpenMP is GNU -- Checking if OpenMP is GNU -- YES -- Could NOT find IntelMKL (missing: IntelMKL_LIBRARIES IntelMKL_INCLUDE_DIR lp64) -- Could NOT find IBMESSL (missing: IBMESSL_LIBRARIES IBMESSL_INCLUDE_DIR lp64) -- Could NOT find BLIS (missing: BLIS_LIBRARIES BLIS_INCLUDE_DIR lp64) -- Found OpenBLAS: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so found components: lp64 -- Performing Test BLAS_LOWER_UNDERSCORE -- Performing Test BLAS_LOWER_UNDERSCORE -- found -- Found BLAS: TRUE found components: lp64 -- Found MPI: TRUE (found version "5.0") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Found Torch: /opt/cp2k-toolchain/install/libtorch-2.7.1/lib/libtorch.so -- Found HDF5: hdf5-shared;hdf5_fortran-shared (found version "2.1.1") found components: C Fortran -- Found MPI: TRUE (found version "5.0") found components: CXX -- Found OPENBLAS: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so -- Found Blas: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so -- Found LibVORI: /opt/cp2k-toolchain/install/libvori-220621/lib/libvori.a -- Checking for one of the modules 'fftw3' -- Checking for one of the modules 'fftw3f' -- Checking for one of the modules 'fftw3l' -- Checking for one of the modules 'fftw3q' -- Found Fftw: /opt/cp2k-toolchain/install/fftw-3.3.11/include -- Checking for module 'libint2' -- Package 'libint2' not found -- Found Libint2: /opt/cp2k-toolchain/install/libint-v2.13.1-cp2k-lmax-5/include -- Looking for Fortran cheev -- Looking for Fortran cheev - found -- Component omp of Spglib: NOT FOUND -- Component fortran of Spglib: FOUND (LIB_TYPE: static) -- Found package: Spglib -- Found libsmeagol: /opt/cp2k-toolchain/install/libsmeagol-1.2/lib/libsmeagol.a -- Found BLAS: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so -- mctc-lib: Find installed package -- multicharge: Find installed package -- DFTD4: found version 4.2.0, using v4.2+ API -- Found ACE: /opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libpace.a -- DFTD4: found version 4.2.0, using v4.2+ API -- Found GSL: /opt/cp2k-toolchain/install/gsl-2.8/include (found version "2.8") -- Checking for one of the modules 'libxc>=3.0.0' -- Found LibXC: /opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxc.a (Required is at least version "3.0.0") -- Found LibSPG: /opt/cp2k-toolchain/install/spglib-2.7.0/lib/libsymspg.a -- Found HDF5: hdf5-shared (found version "2.1.1") found components: C -- Found FFTW: /opt/cp2k-toolchain/install/fftw-3.3.11/include -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") -- Checking for one of the modules 's-dftd3' -- Checking for one of the modules 'mctc-lib' -- Found DFTD3: /opt/cp2k-toolchain/install/tblite-0.6.0/lib/libs-dftd3.a -- Checking for one of the modules 'dftd4' -- Checking for one of the modules 'multicharge' -- Found DFTD4: /opt/cp2k-toolchain/install/tblite-0.6.0/lib/libdftd4.a -- Found LAPACK: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so;-lm;-ldl -- Checking for one of the modules 'elpa;elpa_openmp;elpa-openmp-2019.05.001;elpa_openmp-2019.11.001;elpa_openmp-2020.05.001;elpa-2019.05.001;elpa-2019.11.001;elpa-2020.05.001' -- Found Elpa: /opt/cp2k-toolchain/install/elpa-2026.02.001/cpu/lib/libelpa_openmp.so -- Checking for module 'libvdwxc>=0.5.0' -- Found libvdwxc, version 0.5.0 -- Checking for module 'fftw3' -- Found fftw3, version 3.3.11 -- Found LibVDWXC: vdwxc;fftw3 (Required is at least version "0.5.0") -- Checking for one of the modules 'plumed;plumedInternals' -- Found Plumed: /opt/cp2k-toolchain/install/plumed-2.10.0/include -- Found MPI: TRUE (found version "5.0") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Checking for modules 'mclf;mcl' -- Package 'mclf' not found -- Package 'mcl' not found -- Found MiMiC: True -- Checking for module 'trexio' -- Package 'trexio' not found -- Found TrexIO: /opt/cp2k-toolchain/install/trexio-2.6.1/include -- Setting build type to 'Release' as none was specified. -- Performing Test f2008-norm2 -- Performing Test f2008-norm2 - Success -- Performing Test f2008-block_construct -- Performing Test f2008-block_construct - Success -- Performing Test f2008-contiguous -- Performing Test f2008-contiguous - Success -- Performing Test f95-reshape-order-allocatable -- Performing Test f95-reshape-order-allocatable - Success -- FYPP preprocessor found. -------------------------------------------------------------------- - - - Summary of enabled dependencies - - - -------------------------------------------------------------------- - BLAS - vendor: OpenBLAS - include directories: /opt/cp2k-toolchain/install/openblas-0.3.33/include - libraries: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so - LAPACK - include directories: /opt/cp2k-toolchain/install/openblas-0.3.33/include - libraries: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so - MPI - include directories: /opt/cp2k-toolchain/install/mpich-5.0.1/include - libraries: /opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpicxx.so;/opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpi.so - MPI_F08: ON - ScaLAPACK - vendor: auto - include directories: - libraries: /opt/cp2k-toolchain/install/scalapack-2.2.3/lib/libscalapack.a - LibXC - version: 7.0.0 - include directories: /opt/cp2k-toolchain/install/libxc-7.0.0/include/ - libraries: /opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxcf03.a;/opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxc.a - GauXC - version: 1.0.0 - install directories: /opt/cp2k-toolchain/install/gauxc-1.1-skala-cp2k-fixes/lib/cmake/gauxc - Spglib - include directories: /opt/cp2k-toolchain/install/spglib-2.7.0/include;$ - LibTorch - extra CXX flags: -D_GLIBCXX_USE_CXX11_ABI=1 - include directories: /opt/cp2k-toolchain/install/libtorch-2.7.1/include;/opt/cp2k-toolchain/install/libtorch-2.7.1/include/torch/csrc/api/include - libraries: /opt/cp2k-toolchain/install/libtorch-2.7.1/lib/libtorch.so - HDF5 - version: 2.1.1 - include directories: /opt/cp2k-toolchain/install/hdf5-2.1.1/include - libraries: hdf5-shared - FFTW3 - include directories: /opt/cp2k-toolchain/install/fftw-3.3.11/include - libraries: /opt/cp2k-toolchain/install/fftw-3.3.11/lib/libfftw3.a - PLUMED - include directories: /opt/cp2k-toolchain/install/plumed-2.10.0/include - libraries: /opt/cp2k-toolchain/install/plumed-2.10.0/lib/libplumed.so - LIBXSMM - include directories: /opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/include - libraries: /opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/lib/libxsmmext.so;:libxsmm.a;/usr/lib/x86_64-linux-gnu/libpthread.a;/usr/lib/x86_64-linux-gnu/librt.a;/usr/lib/x86_64-linux-gnu/libdl.a;/usr/lib/x86_64-linux-gnu/libm.so;/usr/lib/x86_64-linux-gnu/libc.so;/opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/lib/libxsmmf.so;:libxsmmext.a;:libxsmm.a;/usr/lib/x86_64-linux-gnu/libpthread.a;/usr/lib/x86_64-linux-gnu/librt.a;/usr/lib/x86_64-linux-gnu/libdl.a;/usr/lib/x86_64-linux-gnu/libm.so;/usr/lib/x86_64-linux-gnu/libc.so - SpLA - include directories: /opt/cp2k-toolchain/install/SpLA-1.6.1/include;/opt/cp2k-toolchain/install/SpLA-1.6.1/include/spla - libraries: $;$;MPI::MPI_CXX;MPI::MPI_C;MPI::MPI_Fortran - MiMiC - include directories: /opt/cp2k-toolchain/install/mcl-3.0.0/include/MiMiC/ - libraries: /opt/cp2k-toolchain/install/mcl-3.0.0/lib/MiMiC/libmclf.so;/opt/cp2k-toolchain/install/mcl-3.0.0/lib/MiMiC/libmcl.so - DFTD4 - include directories : /opt/cp2k-toolchain/install/tblite-0.6.0/include;/opt/cp2k-toolchain/install/tblite-0.6.0/include/dftd4/GNU-15.2.0 - libraries : - DeePMD - ACE - include directories: /opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/include - libraries: /opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libpace.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libyaml-cpp-pace.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libcnpy.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libyaml-cpp-pace.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libcnpy.a - LibSMEAGOL - include directories: /opt/cp2k-toolchain/install/libsmeagol-1.2/include - libraries: /opt/cp2k-toolchain/install/libsmeagol-1.2/lib/libsmeagol.a - TBLITE : - include directories : /opt/cp2k-toolchain/install/tblite-0.6.0/include;/opt/cp2k-toolchain/install/tblite-0.6.0/include/tblite/GNU-15.2.0 - tblite libraries : - SIRIUS - include directories: - libraries: - COSMA - include directories: /opt/cp2k-toolchain/install/COSMA-2.8.4/include - libraries: MPI::MPI_CXX;costa::costa;$;$;$<$:cosma::BLAS::blas>;$;$<$:Tiled-MM::Tiled-MM>;$<$:Tiled-MM::Tiled-MM>;$<$:semiprof::semiprof>;$<$:cosma::scalapack::scalapack> - Libint2 - include directories: /opt/cp2k-toolchain/install/libint-v2.13.1-cp2k-lmax-5/include - libraries: /opt/cp2k-toolchain/install/libint-v2.13.1-cp2k-lmax-5/lib/libint2.a - LibFCI - include directories: /opt/cp2k-toolchain/install/libfci-0.1.0/include - libraries: libfci::fci - Libvori - libraries: /opt/cp2k-toolchain/install/libvori-220621/lib/libvori.a - ELPA - include directories: /opt/cp2k-toolchain/install/elpa-2026.02.001/cpu/include/elpa_openmp-2026.02.001 - libraries: /opt/cp2k-toolchain/install/elpa-2026.02.001/cpu/lib/libelpa_openmp.so;/opt/cp2k-toolchain/install/scalapack-2.2.3/lib/libscalapack.a;:libopenblas.a - TREXIO - include directories: /opt/cp2k-toolchain/install/trexio-2.6.1/include - libraries: /opt/cp2k-toolchain/install/trexio-2.6.1/lib/libtrexio.so - GreenX - include directories: /opt/cp2k-toolchain/install/greenX-2.2/include/modules - libraries: /opt/cp2k-toolchain/install/greenX-2.2/lib/libGXCommon.so.0.0.1;/opt/cp2k-toolchain/install/greenX-2.2/lib/libgx_minimax.so.0.0.1;/opt/cp2k-toolchain/install/greenX-2.2/lib/libgx_ac.so.0.0.1 -------------------------------------------------------------------- - - - List of dependencies not included in this build - - - -------------------------------------------------------------------- - PEXSI - openPMD - GPU acceleration is disabled - DLA-Future After building and installing CP2K the regtests can be run with the following command: /opt/cp2k/tests/do_regtest.py /opt/cp2k/bin psmp -- Configuring done (8.9s) -- Generating done (0.4s) -- Build files have been written to: /opt/cp2k/build Compiling CP2K ... done ---> Removed intermediate container 724ed6169074 ---> 6f562776e0c2 Step 37/42 : COPY ./benchmarks ./benchmarks ---> 91e615d9d7c6 Step 38/42 : COPY ./tools/regtesting ./tools/regtesting ---> d2f85db22641 Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 9bda9c51f98b Step 40/42 : RUN ./test_performance.sh "toolchain" 2>&1 | tee report.log ---> Running in 67c7df7979a6 ============== CP2K Binary Flags ============= cp2kflags: omp libint fftw3 libxc gauxc gauxc_mpi gauxc_onedft gauxc_host elpa parallel scalapack mpi_f08 cosma ace deepmd xsmm plumed2 spglib libdftd4 dftd4_v4_2 mctc-lib tblite sirius libvori libbqb libtorch mimic libvdwxc hdf5 trexio libfci libsmeagol greenx ========== Checking Benchmark Inputs ========= Found 83 input files and 0 errors. ========== Running Performance Test ========== Plot: name="total_timings_32omp", title="Total Timings with 32 OpenMP Threads", ylabel="time [s]" Plot: name="total_timings_32mpi", title="Total Timings with 32 MPI Ranks", ylabel="time [s]" Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.024 0.024 87.635 87.635 qs_mol_dyn_low 1 2.0 0.003 0.003 87.103 87.103 qs_forces 11 3.9 0.001 0.001 87.066 87.066 qs_energies 11 4.9 0.001 0.001 81.323 81.323 scf_env_do_scf 11 5.9 0.001 0.001 70.553 70.553 scf_env_do_scf_inner_loop 108 6.5 0.012 0.012 56.653 56.653 velocity_verlet 10 3.0 0.002 0.002 55.731 55.731 dbcsr_multiply_generic 2286 12.5 0.162 0.162 25.257 25.257 qs_scf_new_mos 108 7.5 0.001 0.001 22.853 22.853 qs_scf_loop_do_ot 108 8.5 0.001 0.001 22.852 22.852 ot_scf_mini 108 9.5 0.002 0.002 21.320 21.320 rebuild_ks_matrix 119 8.3 0.001 0.001 20.914 20.914 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.014 20.913 20.913 qs_ks_update_qs_env 119 7.6 0.001 0.001 19.263 19.263 qs_rho_update_rho_low 119 7.7 0.001 0.001 18.520 18.520 calculate_rho_elec 119 8.7 0.969 0.969 18.520 18.520 make_m2s 4572 13.5 0.042 0.042 16.522 16.522 grid_collocate_task_list 119 9.7 13.975 13.975 13.975 13.975 init_scf_loop 11 6.9 0.000 0.000 13.793 13.793 ot_mini 108 10.5 0.001 0.001 13.627 13.627 sum_up_and_integrate 119 10.3 0.001 0.001 11.943 11.943 integrate_v_rspace 119 11.3 0.093 0.093 11.864 11.864 make_images 4572 14.5 2.003 2.003 11.754 11.754 prepare_preconditioner 11 7.9 0.000 0.000 10.703 10.703 make_preconditioner 11 8.9 0.000 0.000 10.703 10.703 grid_integrate_task_list 119 12.3 9.313 9.313 9.313 9.313 make_full_inverse_cholesky 11 9.9 0.038 0.038 9.180 9.180 hybrid_alltoall_any 4725 16.4 7.661 7.661 8.066 8.066 make_images_data 4572 15.5 0.034 0.034 7.777 7.777 ot_diis_step 108 11.5 0.004 0.004 7.423 7.423 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.786 6.786 apply_single 119 13.6 0.000 0.000 6.786 6.786 fft_wrap_pw1pw2 1201 11.6 0.010 0.010 6.711 6.711 qs_ot_get_derivative 108 11.5 0.001 0.001 6.192 6.192 fft_wrap_pw1pw2_140 487 12.2 1.236 1.236 5.668 5.668 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 5.654 5.654 dbcsr_make_dense_low 5837 15.5 0.029 0.029 5.138 5.138 make_dense_data 5837 16.5 4.497 4.497 5.097 5.097 multiply_cannon 2286 13.5 0.247 0.247 4.882 4.882 init_scf_run 11 5.9 0.000 0.000 4.469 4.469 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.468 4.468 multiply_cannon_loop 2286 14.5 0.045 0.045 4.421 4.421 dbcsr_make_images_dense 3978 14.8 0.015 0.015 4.396 4.396 multiply_cannon_multrec 2286 15.5 4.316 4.316 4.376 4.376 dbcsr_complete_redistribute 329 12.2 1.869 1.869 4.214 4.214 wfi_extrapolate 11 7.9 0.001 0.001 3.935 3.935 copy_dbcsr_to_fm 153 11.3 0.003 0.003 3.760 3.760 density_rs2pw 119 9.7 0.004 0.004 3.576 3.576 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.204 3.204 transfer_dbcsr_to_fm 11 10.9 0.001 0.001 3.071 3.071 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.990 2.990 dbcsr_copy 2102 12.0 0.226 0.226 2.906 2.906 qs_ot_get_p 119 10.4 0.001 0.001 2.813 2.813 qs_create_task_list 11 7.9 0.000 0.000 2.791 2.791 generate_qs_task_list 11 8.9 1.824 1.824 2.791 2.791 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 2.750 2.750 dbcsr_data_release 278921 16.0 2.675 2.675 2.675 2.675 dbcsr_copy_into_existing 22 7.9 2.667 2.667 2.667 2.667 fft3d_s 1202 13.6 2.626 2.626 2.630 2.630 cp_fm_cholesky_invert 11 10.9 2.620 2.620 2.620 2.620 potential_pw2rs 119 12.3 0.045 0.045 2.458 2.458 dbcsr_finalize 5048 13.8 0.238 0.238 2.275 2.275 copy_fm_to_dbcsr 176 11.2 0.001 0.001 2.225 2.225 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 2.154 2.154 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.115 2.115 pw_poisson_solve 119 10.3 0.002 0.002 2.098 2.098 dbcsr_dot 1205 11.9 2.048 2.048 2.056 2.056 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.879 1.879 cp_fm_cholesky_decompose 22 10.9 1.846 1.846 1.846 1.846 calculate_dm_sparse 119 9.5 0.001 0.001 1.781 1.781 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.013 42.834 42.835 qs_mol_dyn_low 1 2.0 0.005 0.006 42.692 42.696 qs_forces 11 3.9 0.002 0.002 42.653 42.655 qs_energies 11 4.9 0.001 0.001 40.031 40.034 scf_env_do_scf 11 5.9 0.000 0.002 36.757 36.758 scf_env_do_scf_inner_loop 108 6.5 0.003 0.018 33.520 33.521 velocity_verlet 10 3.0 0.001 0.003 25.368 25.370 rebuild_ks_matrix 119 8.3 0.000 0.001 15.519 15.566 qs_ks_build_kohn_sham_matrix 119 9.3 0.017 0.018 15.519 15.566 qs_ks_update_qs_env 119 7.6 0.001 0.002 13.806 13.850 dbcsr_multiply_generic 2286 12.5 0.079 0.082 13.369 13.456 qs_rho_update_rho_low 119 7.7 0.001 0.001 11.446 11.452 calculate_rho_elec 119 8.7 0.030 0.032 11.445 11.451 sum_up_and_integrate 119 10.3 0.002 0.003 11.004 11.026 integrate_v_rspace 119 11.3 0.004 0.005 10.979 11.001 qs_scf_new_mos 108 7.5 0.001 0.001 10.725 10.806 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.724 10.806 multiply_cannon 2286 13.5 0.135 0.152 9.864 10.182 ot_scf_mini 108 9.5 0.002 0.002 9.999 10.071 multiply_cannon_loop 2286 14.5 0.083 0.090 9.301 9.526 mp_waitall_1 158411 16.6 7.812 8.145 7.812 8.145 grid_collocate_task_list 119 9.7 7.185 7.430 7.185 7.430 grid_integrate_task_list 119 12.3 6.870 7.022 6.870 7.022 multiply_cannon_metrocomm3 18288 15.5 0.035 0.036 5.897 6.124 ot_mini 108 10.5 0.001 0.001 5.668 5.754 density_rs2pw 119 9.7 0.006 0.007 3.890 4.255 potential_pw2rs 119 12.3 0.007 0.007 3.353 3.364 multiply_cannon_multrec 18288 15.5 3.111 3.213 3.120 3.222 init_scf_loop 11 6.9 0.000 0.000 3.219 3.219 fft_wrap_pw1pw2 1201 11.6 0.021 0.023 3.149 3.199 qs_ot_get_derivative 108 11.5 0.001 0.001 2.985 3.056 transfer_rs2pw 487 10.6 0.007 0.008 2.373 2.750 fft_wrap_pw1pw2_140 487 12.2 0.170 0.179 2.624 2.726 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.680 2.724 apply_single 119 13.6 0.000 0.000 2.680 2.724 ot_diis_step 108 11.5 0.004 0.004 2.668 2.669 make_m2s 4572 13.5 0.040 0.044 2.588 2.664 transfer_pw2rs 487 13.2 0.005 0.007 2.455 2.461 make_images 4572 14.5 0.101 0.104 2.274 2.349 fft3d_ps 1201 13.6 1.150 1.293 2.267 2.340 mp_waitany 9880 13.7 1.886 2.335 1.886 2.335 init_scf_run 11 5.9 0.000 0.000 2.301 2.304 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.301 2.301 wfi_extrapolate 11 7.9 0.001 0.001 2.069 2.069 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.871 1.876 transfer_rs2pw_140 130 11.5 0.222 0.260 1.397 1.774 prepare_preconditioner 11 7.9 0.000 0.000 1.501 1.516 make_preconditioner 11 8.9 0.000 0.000 1.501 1.516 qs_ot_get_p 119 10.4 0.001 0.001 1.434 1.510 transfer_pw2rs_140 130 13.9 0.413 0.449 1.380 1.417 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.378 1.392 make_images_data 4572 15.5 0.032 0.036 1.244 1.376 hybrid_alltoall_any 4725 16.4 0.060 0.170 1.121 1.241 mp_alltoall_d11v 2130 13.8 1.022 1.161 1.022 1.161 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.962 1.010 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.929 0.954 mp_alltoall_z22v 1201 15.6 0.872 0.943 0.872 0.943 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.912 0.914 rs_gather_matrices 119 12.3 0.066 0.076 0.717 0.877 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-64", label="H2O-64", y=87.635, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-64", label="H2O-64", y=42.834, yerr=0.0 Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=47.873000000000005, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=13.975, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.313, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=7.661, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.497, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.316, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=15.910000000000004, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=7.185, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=6.87, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.06, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.111, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.812, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.886, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 107.823 107.823 qs_mol_dyn_low 1 2.0 0.003 0.003 107.068 107.068 qs_forces 11 3.9 0.001 0.001 107.030 107.030 qs_energies 11 4.9 0.001 0.001 99.742 99.742 scf_env_do_scf 11 5.9 0.001 0.001 86.539 86.539 scf_env_do_scf_inner_loop 96 6.5 0.013 0.013 70.906 70.906 velocity_verlet 10 3.0 0.002 0.002 69.294 69.294 rebuild_ks_matrix 107 8.3 0.001 0.001 32.532 32.532 qs_ks_build_kohn_sham_matrix 107 9.3 0.013 0.013 32.532 32.532 qs_ks_update_qs_env 107 7.6 0.001 0.001 29.264 29.264 qs_rho_update_rho_low 107 7.7 0.000 0.000 29.056 29.056 calculate_rho_elec 107 8.7 0.866 0.866 29.055 29.055 grid_collocate_task_list 107 9.7 24.777 24.777 24.777 24.777 sum_up_and_integrate 107 10.3 0.001 0.001 24.470 24.470 integrate_v_rspace 107 11.3 0.086 0.086 24.411 24.411 dbcsr_multiply_generic 1966 12.4 0.141 0.141 22.629 22.629 grid_integrate_task_list 107 12.3 22.041 22.041 22.041 22.041 qs_scf_new_mos 96 7.5 0.001 0.001 19.402 19.402 qs_scf_loop_do_ot 96 8.5 0.001 0.001 19.401 19.401 ot_scf_mini 96 9.5 0.002 0.002 18.082 18.082 init_scf_loop 11 6.9 0.000 0.000 15.507 15.507 make_m2s 3932 13.4 0.035 0.035 14.743 14.743 ot_mini 96 10.5 0.001 0.001 11.720 11.720 prepare_preconditioner 11 7.9 0.000 0.000 10.829 10.829 make_preconditioner 11 8.9 0.000 0.000 10.829 10.829 make_images 3932 14.4 1.801 1.801 10.499 10.499 make_full_inverse_cholesky 11 9.9 0.042 0.042 9.292 9.292 hybrid_alltoall_any 4079 16.3 6.642 6.642 7.053 7.053 make_images_data 3932 15.4 0.029 0.029 6.747 6.747 fft_wrap_pw1pw2 1081 11.6 0.009 0.009 6.413 6.413 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.302 6.302 ot_diis_step 96 11.5 0.003 0.003 6.194 6.194 init_scf_run 11 5.9 0.000 0.000 6.149 6.149 scf_env_initial_rho_setup 11 6.9 0.002 0.002 6.147 6.147 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 6.070 6.070 apply_single 107 13.6 0.000 0.000 6.070 6.070 fft_wrap_pw1pw2_140 439 12.2 1.341 1.341 5.538 5.538 qs_ot_get_derivative 96 11.5 0.001 0.001 5.515 5.515 wfi_extrapolate 11 7.9 0.001 0.001 5.504 5.504 dbcsr_make_dense_low 4961 15.5 0.025 0.025 4.622 4.622 make_dense_data 4961 16.5 4.085 4.085 4.587 4.587 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.503 4.503 multiply_cannon 1966 13.4 0.181 0.181 4.432 4.432 dbcsr_complete_redistribute 317 12.2 1.876 1.876 4.325 4.325 multiply_cannon_loop 1966 14.4 0.043 0.043 4.070 4.070 multiply_cannon_multrec 1966 15.4 3.972 3.972 4.026 4.026 dbcsr_make_images_dense 3386 14.7 0.013 0.013 3.951 3.951 copy_dbcsr_to_fm 147 11.2 0.003 0.003 3.834 3.834 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.765 3.765 density_rs2pw 107 9.7 0.004 0.004 3.411 3.411 qs_create_task_list 11 7.9 0.000 0.000 3.291 3.291 generate_qs_task_list 11 8.9 2.266 2.266 3.291 3.291 transfer_dbcsr_to_fm 11 10.9 0.001 0.001 3.099 3.099 dbcsr_copy 1855 11.9 0.217 0.217 2.917 2.917 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 2.782 2.782 dbcsr_copy_into_existing 22 7.9 2.689 2.689 2.689 2.689 cp_fm_cholesky_invert 11 10.9 2.660 2.660 2.660 2.660 dbcsr_data_release 237968 15.9 2.626 2.626 2.626 2.626 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 2.547 2.547 fft3d_s 1082 13.6 2.452 2.452 2.457 2.457 copy_fm_to_dbcsr 170 11.1 0.001 0.001 2.349 2.349 qs_ot_get_p 107 10.4 0.001 0.001 2.287 2.287 potential_pw2rs 107 12.3 0.041 0.041 2.284 2.284 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.195 2.195 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.014 66.524 66.526 qs_mol_dyn_low 1 2.0 0.004 0.005 66.383 66.387 qs_forces 11 3.9 0.002 0.002 66.346 66.349 qs_energies 11 4.9 0.001 0.001 62.020 62.023 scf_env_do_scf 11 5.9 0.000 0.001 57.418 57.420 scf_env_do_scf_inner_loop 96 6.5 0.003 0.018 52.608 52.610 velocity_verlet 10 3.0 0.001 0.003 39.913 39.915 rebuild_ks_matrix 107 8.3 0.000 0.000 29.049 29.121 qs_ks_build_kohn_sham_matrix 107 9.3 0.016 0.017 29.049 29.121 qs_ks_update_qs_env 107 7.6 0.001 0.001 25.645 25.716 sum_up_and_integrate 107 10.3 0.002 0.002 24.835 24.871 integrate_v_rspace 107 11.3 0.004 0.004 24.812 24.848 qs_rho_update_rho_low 107 7.7 0.001 0.001 22.531 22.535 calculate_rho_elec 107 8.7 0.027 0.028 22.530 22.535 grid_integrate_task_list 107 12.3 20.541 20.933 20.541 20.933 grid_collocate_task_list 107 9.7 18.688 18.972 18.688 18.972 dbcsr_multiply_generic 1966 12.4 0.071 0.074 12.247 12.378 qs_scf_new_mos 96 7.5 0.001 0.001 9.689 9.740 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.688 9.739 multiply_cannon 1966 13.4 0.122 0.134 9.011 9.413 ot_scf_mini 96 9.5 0.002 0.002 9.083 9.136 multiply_cannon_loop 1966 14.4 0.076 0.080 8.507 8.684 mp_waitall_1 136719 16.5 7.206 7.671 7.206 7.671 multiply_cannon_metrocomm3 15728 15.4 0.032 0.033 5.387 5.691 ot_mini 96 10.5 0.001 0.001 5.105 5.164 init_scf_loop 11 6.9 0.000 0.000 4.792 4.793 density_rs2pw 107 9.7 0.005 0.006 3.489 3.961 init_scf_run 11 5.9 0.000 0.000 3.596 3.599 scf_env_initial_rho_setup 11 6.9 0.000 0.004 3.596 3.596 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.555 3.558 potential_pw2rs 107 12.3 0.006 0.007 3.273 3.295 wfi_extrapolate 11 7.9 0.001 0.001 3.258 3.258 multiply_cannon_multrec 15728 15.4 2.841 2.942 2.850 2.951 fft_wrap_pw1pw2 1081 11.6 0.018 0.020 2.827 2.906 qs_ot_get_derivative 96 11.5 0.001 0.001 2.740 2.792 transfer_rs2pw 439 10.6 0.006 0.006 2.136 2.605 transfer_pw2rs 439 13.2 0.005 0.006 2.482 2.488 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.405 2.447 apply_single 107 13.6 0.000 0.000 2.405 2.446 fft_wrap_pw1pw2_140 439 12.2 0.151 0.163 2.347 2.394 make_m2s 3932 13.4 0.036 0.040 2.288 2.355 ot_diis_step 96 11.5 0.003 0.003 2.352 2.353 mp_waitany 8968 13.7 1.786 2.297 1.786 2.297 fft3d_ps 1081 13.6 1.031 1.118 2.041 2.224 make_images 3932 14.4 0.090 0.092 2.011 2.079 mp_alltoall_d11v 1998 13.7 1.245 1.702 1.245 1.702 transfer_rs2pw_140 118 11.5 0.188 0.208 1.219 1.691 prepare_preconditioner 11 7.9 0.000 0.000 1.543 1.579 make_preconditioner 11 8.9 0.000 0.000 1.543 1.579 qs_ot_get_p 107 10.4 0.001 0.001 1.405 1.483 transfer_pw2rs_140 118 13.9 0.389 0.445 1.401 1.475 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.421 1.451 rs_gather_matrices 107 12.3 0.061 0.070 0.958 1.419 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-64_nonortho", label="H2O-64_nonortho", y=107.823, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-64_nonortho", label="H2O-64_nonortho", y=66.524, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=46.30599999999999, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=24.777, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.041, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=6.642, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.085, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.972, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=15.462000000000003, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=18.688, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=20.541, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.841, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.206, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.786, yerr=0.0 Running w64PBE.inp with 1 threads and 32 ranks... done. Running w64PBE.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/w64PBE_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.041 0.041 142.317 142.317 qs_mol_dyn_low 1 2.0 0.003 0.003 141.387 141.387 qs_forces 11 3.9 0.001 0.001 141.349 141.349 qs_energies 11 4.9 0.001 0.001 129.111 129.111 scf_env_do_scf 11 5.9 0.003 0.003 115.858 115.858 velocity_verlet 10 3.0 0.002 0.002 110.925 110.925 scf_env_do_scf_inner_loop 106 6.8 0.011 0.011 100.880 100.880 rebuild_ks_matrix 117 8.5 0.000 0.000 62.167 62.167 qs_ks_build_kohn_sham_matrix 117 9.5 0.014 0.014 62.166 62.166 qs_ks_update_qs_env 120 7.8 0.001 0.001 56.055 56.055 qs_rho_update_rho_low 117 7.9 0.001 0.001 44.320 44.320 calculate_rho_elec 117 8.9 1.533 1.533 44.319 44.319 grid_collocate_task_list 117 9.9 36.940 36.940 36.940 36.940 sum_up_and_integrate 117 10.5 0.002 0.002 29.826 29.826 integrate_v_rspace 117 11.5 0.070 0.070 29.731 29.731 grid_integrate_task_list 117 12.5 26.196 26.196 26.196 26.196 fft_wrap_pw1pw2 2000 12.9 0.026 0.026 25.416 25.416 qs_vxc_create 117 10.5 0.001 0.001 24.520 24.520 xc_vxc_pw_create 117 11.5 1.720 1.720 24.519 24.519 fft_wrap_pw1pw2_200 1298 14.3 3.835 3.835 23.775 23.775 xc_pw_derive 702 13.5 0.004 0.004 15.165 15.165 init_scf_loop 14 6.8 0.000 0.000 14.913 14.913 dbcsr_multiply_generic 2035 12.5 0.140 0.140 14.174 14.174 qs_scf_new_mos 106 7.8 0.001 0.001 12.536 12.536 qs_scf_loop_do_ot 106 8.8 0.001 0.001 12.535 12.535 xc_rho_set_and_dset_create 117 12.5 1.295 1.295 11.851 11.851 ot_scf_mini 106 9.8 0.002 0.002 11.484 11.484 fft3d_s 2001 14.9 11.016 11.016 11.024 11.024 xc_pw_divergence 117 12.5 0.002 0.002 10.804 10.804 make_m2s 4070 13.5 0.037 0.037 7.786 7.786 ot_mini 106 10.8 0.001 0.001 7.370 7.370 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.047 7.047 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.779 6.779 prepare_preconditioner 14 7.8 0.000 0.000 6.713 6.713 make_preconditioner 14 8.8 0.000 0.000 6.713 6.713 pw_scatter_s 1053 15.2 6.382 6.382 6.382 6.382 init_scf_run 11 5.9 0.000 0.000 6.051 6.051 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.049 6.049 density_rs2pw 117 9.9 0.004 0.004 5.846 5.846 make_images 4070 14.5 1.190 1.190 5.633 5.633 wfi_extrapolate 11 7.9 0.001 0.001 5.284 5.284 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.188 5.188 qs_ot_get_derivative 106 11.8 0.001 0.001 4.502 4.502 dbcsr_copy 4760 12.9 0.218 0.218 3.759 3.759 multiply_cannon 2035 13.5 0.250 0.250 3.739 3.739 make_full_inverse_cholesky 14 9.8 0.000 0.000 3.731 3.731 pw_poisson_solve 117 10.5 0.002 0.002 3.724 3.724 hybrid_alltoall_any 4213 16.4 3.564 3.564 3.681 3.681 make_images_data 4070 15.5 0.029 0.029 3.671 3.671 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.548 3.548 dbcsr_copy_into_existing 22 7.9 3.522 3.522 3.523 3.523 potential_pw2rs 117 12.5 0.074 0.074 3.465 3.465 multiply_cannon_loop 2035 14.5 0.046 0.046 3.287 3.287 xc_functional_eval 117 13.5 0.001 0.001 3.254 3.254 pbe_lda_eval 117 14.5 3.253 3.253 3.253 3.253 multiply_cannon_multrec 2035 15.5 3.192 3.192 3.240 3.240 qs_create_task_list 11 7.9 0.000 0.000 2.905 2.905 generate_qs_task_list 11 8.9 2.030 2.030 2.905 2.905 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.872 2.872 ot_diis_step 106 11.8 0.003 0.003 2.856 2.856 make_full_single_inverse 14 9.8 0.002 0.002 2.523 2.523 dbcsr_make_dense_low 4890 15.6 0.025 0.025 2.471 2.471 make_dense_data 4890 16.6 1.956 1.956 2.436 2.436 pw_gather_s 947 14.5 2.427 2.427 2.427 2.427 apply_preconditioner_dbcsr 120 12.8 0.000 0.000 2.314 2.314 apply_single 120 13.8 0.000 0.000 2.313 2.313 pw_integral_ab_c1d_c1d_gs 117 11.5 2.225 2.225 2.225 2.225 qs_ot_get_derivative_taylor 89 12.9 0.002 0.002 2.201 2.201 dbcsr_complete_redistribute 323 11.8 1.208 1.208 2.176 2.176 pw_derive 1053 13.8 2.042 2.042 2.042 2.042 dbcsr_make_images_dense 3364 14.9 0.013 0.013 1.843 1.843 pw_copy 1755 13.0 1.801 1.801 1.801 1.801 copy_dbcsr_to_fm 143 10.8 0.003 0.003 1.698 1.698 arnoldi_generalized_ev 14 10.8 0.000 0.000 1.670 1.670 dbcsr_sym_matrix_vector_mult 1269 12.5 0.026 0.026 1.643 1.643 gev_build_subspace 23 11.5 0.009 0.009 1.521 1.521 dbcsr_dot 1125 12.2 1.443 1.443 1.449 1.449 cp_fm_cholesky_invert 14 10.8 1.386 1.386 1.386 1.386 qs_ot_get_p 120 10.5 0.001 0.001 1.373 1.373 dbcsr_sym_matrix_vector_mult_l 1269 13.5 1.337 1.337 1.355 1.355 dbcsr_finalize 4628 13.9 0.099 0.099 1.312 1.312 cp_dbcsr_sm_fm_multiply 46 9.3 0.001 0.001 1.301 1.301 build_core_ppl_forces 11 5.9 1.299 1.299 1.299 1.299 calculate_dm_sparse 117 9.7 0.000 0.000 1.233 1.233 copy_fm_to_dbcsr 180 10.8 0.001 0.001 1.164 1.164 fft_wrap_pw1pw2_70 234 13.2 0.099 0.099 1.159 1.159 qs_ot_get_orbitals 106 10.8 0.000 0.000 1.121 1.121 pw_poisson_set 118 11.5 0.002 0.002 1.120 1.120 build_kinetic_matrix_low 22 6.9 1.027 1.027 1.119 1.119 transfer_dbcsr_to_fm 14 10.8 0.001 0.001 1.116 1.116 build_overlap_matrix_low 22 6.9 1.003 1.003 1.110 1.110 dbcsr_merge_all 4098 15.1 0.427 0.427 1.010 1.010 cp_fm_cholesky_decompose 28 10.5 0.992 0.992 0.992 0.992 pw_copy_to_array 947 14.5 0.945 0.945 0.945 0.945 evaluate_core_matrix_traces 117 8.5 0.001 0.001 0.934 0.934 calculate_ptrace_kp 234 9.5 0.001 0.001 0.933 0.933 qs_init_subsys 1 2.0 0.001 0.001 0.871 0.871 qs_env_setup 1 3.0 0.000 0.000 0.858 0.858 qs_env_rebuild_pw_env 23 5.3 0.000 0.000 0.858 0.858 pw_env_rebuild 1 5.0 0.000 0.000 0.858 0.858 pw_grid_setup 4 6.0 0.000 0.000 0.856 0.856 pw_grid_setup_internal 4 7.0 0.013 0.013 0.856 0.856 pw_axpy 1170 12.0 0.824 0.824 0.824 0.824 pw_grid_sort 4 8.0 0.557 0.557 0.729 0.729 dbcsr_make_undense 1526 14.1 0.616 0.616 0.726 0.726 dbcsr_sort_indices 6887 16.5 0.684 0.684 0.684 0.684 dbcsr_make_dense 1526 14.1 0.004 0.004 0.665 0.665 cp_dbcsr_sm_fm_multiply_core 46 10.3 0.000 0.000 0.665 0.665 pw_copy_from_array 1053 15.2 0.653 0.653 0.653 0.653 quick_finalize 4380 16.4 0.043 0.043 0.552 0.552 calculate_rho_core 11 7.9 0.246 0.246 0.550 0.550 dbcsr_special_finalize 4070 15.5 0.007 0.007 0.545 0.545 dbcsr_iterator_start 35255 15.3 0.516 0.516 0.542 0.542 qs_ot_get_derivative_diag 17 12.0 0.000 0.000 0.538 0.538 qs_ot_p2m_diag 19 11.0 0.054 0.054 0.536 0.536 build_core_ppl 11 7.9 0.523 0.523 0.523 0.523 grid_create_task_list 11 9.9 0.508 0.508 0.508 0.508 dbcsr_add_d 1795 13.1 0.002 0.002 0.466 0.466 dbcsr_add_anytype 1795 14.1 0.169 0.169 0.464 0.464 transfer_fm_to_dbcsr 14 9.8 0.002 0.002 0.458 0.458 make_basis_sm 14 9.3 0.000 0.000 0.439 0.439 cp_dbcsr_syevd 19 12.0 0.001 0.001 0.410 0.410 cp_dbcsr_alloc_block_from_nbl 88 7.7 0.227 0.227 0.390 0.390 transfer_rs2pw 479 10.8 0.005 0.005 0.386 0.386 cp_dbcsr_plus_fm_fm_t 22 8.9 0.000 0.000 0.385 0.385 transfer_pw2rs 479 13.4 0.002 0.002 0.381 0.381 dbcsr_iterator_stop 35255 15.3 0.372 0.372 0.379 0.379 cp_fm_diag_elpa 19 13.0 0.000 0.000 0.377 0.377 cp_fm_diag_elpa_base 19 14.0 0.361 0.361 0.377 0.377 distribute_tasks 11 9.9 0.368 0.368 0.368 0.368 pw_zero 585 13.0 0.362 0.362 0.362 0.362 build_qs_neighbor_lists 11 6.9 0.001 0.001 0.358 0.358 fft_wrap_pw1pw2_30 234 13.2 0.020 0.020 0.347 0.347 compute_matrix_w 11 5.9 0.000 0.000 0.335 0.335 calculate_w_matrix_ot 11 6.9 0.007 0.007 0.335 0.335 dbcsr_data_copy_aa2 322 15.1 0.329 0.329 0.329 0.329 mp_alltoall_d11v 1899 13.8 0.326 0.326 0.326 0.326 reorthogonalize_vectors 10 9.0 0.000 0.000 0.313 0.313 transfer_pw2rs_200 128 14.1 0.308 0.308 0.308 0.308 transfer_rs2pw_200 128 11.7 0.305 0.305 0.305 0.305 dbcsr_desymmetrize_deep 143 11.8 0.169 0.169 0.303 0.303 ot_scf_init 14 7.8 0.001 0.001 0.282 0.282 dbcsr_set 7009 14.0 0.005 0.005 0.281 0.281 dbcsr_zero 7009 15.0 0.276 0.276 0.276 0.276 cp_fm_uplo_to_full 47 13.4 0.270 0.270 0.270 0.270 dbcsr_reserve_blocks 1093 14.3 0.205 0.205 0.240 0.240 build_core_ppnl_forces 11 5.9 0.238 0.238 0.238 0.238 dbcsr_make_index_list 4070 14.5 0.236 0.236 0.236 0.236 dbcsr_make_untransposed_blocks 2481 13.4 0.145 0.145 0.225 0.225 tree_to_linear_d 323 14.8 0.218 0.218 0.218 0.218 parallel_gemm_fm_cosma 96 8.9 0.203 0.203 0.203 0.203 dbcsr_create_new 25633 14.8 0.160 0.160 0.187 0.187 build_neighbor_lists_sab_all 11 7.9 0.184 0.184 0.184 0.184 calculate_first_density_matrix 1 7.0 0.000 0.000 0.182 0.182 dbcsr_work_destroy_all 8478 15.8 0.120 0.120 0.180 0.180 sort_shells 4 9.0 0.172 0.172 0.172 0.172 copy_fm_to_dbcsr_bc 180 11.8 0.070 0.070 0.171 0.171 dbcsr_data_new 194710 15.4 0.167 0.167 0.167 0.167 qs_ot_p2m_taylor 101 11.6 0.001 0.001 0.154 0.154 copy_dbcsr_to_fm_bc 143 11.8 0.144 0.144 0.152 0.152 dbcsr_data_release 257147 16.0 0.144 0.144 0.144 0.144 ------------------------------------------------------------------------------- From /workspace/artifacts/w64PBE_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.024 102.122 102.124 qs_mol_dyn_low 1 2.0 0.005 0.006 101.959 101.962 qs_forces 11 3.9 0.002 0.002 101.920 101.922 qs_energies 11 4.9 0.001 0.001 94.171 94.174 scf_env_do_scf 11 5.9 0.001 0.006 88.673 88.676 velocity_verlet 10 3.0 0.001 0.003 80.048 80.051 scf_env_do_scf_inner_loop 106 6.8 0.003 0.019 79.963 79.965 rebuild_ks_matrix 117 8.5 0.000 0.000 55.052 55.077 qs_ks_build_kohn_sham_matrix 117 9.5 0.019 0.020 55.052 55.076 qs_ks_update_qs_env 120 7.8 0.001 0.001 48.842 48.865 sum_up_and_integrate 117 10.5 0.002 0.003 34.889 34.910 integrate_v_rspace 117 11.5 0.005 0.005 34.812 34.842 qs_rho_update_rho_low 117 7.9 0.001 0.001 33.724 33.727 calculate_rho_elec 117 8.9 0.049 0.053 33.723 33.726 grid_collocate_task_list 117 9.9 25.819 27.229 25.819 27.229 grid_integrate_task_list 117 12.5 26.559 27.181 26.559 27.181 fft_wrap_pw1pw2 2000 12.9 0.043 0.051 16.873 17.510 fft_wrap_pw1pw2_200 1298 14.3 0.721 0.841 15.789 16.188 qs_vxc_create 117 10.5 0.003 0.004 15.037 15.239 xc_vxc_pw_create 117 11.5 0.127 0.164 15.034 15.236 fft3d_ps 2000 14.9 6.224 7.023 12.609 13.969 xc_pw_derive 702 13.5 0.010 0.011 10.177 11.039 dbcsr_multiply_generic 2035 12.5 0.066 0.069 9.302 9.410 init_scf_loop 14 6.8 0.000 0.001 8.693 8.695 density_rs2pw 117 9.9 0.006 0.006 7.564 8.321 xc_rho_set_and_dset_create 117 12.5 0.157 0.204 7.923 8.223 mp_alltoall_z22v 2000 16.9 5.139 7.869 5.139 7.869 xc_pw_divergence 117 12.5 0.003 0.004 6.751 7.277 qs_scf_new_mos 106 7.8 0.001 0.001 7.040 7.076 qs_scf_loop_do_ot 106 8.8 0.001 0.001 7.039 7.076 potential_pw2rs 117 12.5 0.009 0.009 6.947 6.975 multiply_cannon 2035 13.5 0.116 0.131 6.530 6.728 ot_scf_mini 106 9.8 0.002 0.002 6.586 6.617 multiply_cannon_loop 2035 14.5 0.061 0.066 6.153 6.393 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.297 6.300 mp_waitall_1 142145 16.6 5.742 5.992 5.742 5.992 transfer_rs2pw 479 10.8 0.007 0.009 4.433 5.309 mp_waitany 9728 13.9 4.212 5.301 4.212 5.301 transfer_pw2rs 479 13.4 0.006 0.007 5.152 5.166 yz_to_x 830 15.8 0.480 0.654 3.076 5.054 ot_mini 106 10.8 0.001 0.001 4.136 4.175 init_scf_run 11 5.9 0.000 0.000 4.143 4.146 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.142 4.143 multiply_cannon_metrocomm3 16280 15.5 0.026 0.028 3.752 3.975 transfer_rs2pw_200 128 11.7 0.464 0.508 3.041 3.927 wfi_extrapolate 11 7.9 0.001 0.001 3.764 3.764 x_to_yz 936 16.2 0.681 0.830 3.206 3.652 transfer_pw2rs_200 128 14.1 1.138 1.316 3.392 3.490 xc_functional_eval 117 13.5 0.002 0.004 2.575 2.804 pbe_lda_eval 117 14.5 2.572 2.801 2.572 2.801 qs_ot_get_derivative 106 11.8 0.001 0.001 2.678 2.711 multiply_cannon_multrec 16280 15.5 2.176 2.221 2.183 2.230 mp_alltoall_d11v 2144 13.6 1.459 2.087 1.459 2.087 rs_gather_matrices 117 12.5 0.049 0.057 1.279 1.917 pw_gather_p 947 14.5 1.442 1.889 1.442 1.889 prepare_preconditioner 14 7.8 0.000 0.000 1.775 1.780 make_preconditioner 14 8.8 0.000 0.000 1.775 1.780 make_m2s 4070 13.5 0.035 0.047 1.728 1.772 pw_derive 1053 13.8 1.522 1.733 1.522 1.733 pw_poisson_solve 117 10.5 0.003 0.003 1.550 1.612 apply_preconditioner_dbcsr 120 12.8 0.000 0.000 1.512 1.607 apply_single 120 13.8 0.000 0.000 1.512 1.606 make_images 4070 14.5 0.096 0.099 1.483 1.525 transfer_pw2rs_70 117 14.5 0.400 0.453 1.359 1.445 ot_diis_step 106 11.8 0.003 0.003 1.444 1.444 pw_copy 1755 13.0 1.331 1.430 1.331 1.430 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.335 1.411 pw_scatter_p 1053 15.2 1.159 1.280 1.159 1.280 fft_wrap_pw1pw2_70 234 13.2 0.017 0.022 0.823 1.211 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 1.206 1.207 qs_ot_get_derivative_taylor 89 12.9 0.002 0.002 1.099 1.128 pw_poisson_set 118 11.5 0.005 0.005 1.013 1.076 transfer_rs2pw_70 117 11.9 0.304 0.332 1.050 1.073 mp_sendrecv_dv 10881 12.9 0.980 1.030 0.980 1.030 qs_ot_get_p 120 10.5 0.001 0.001 0.839 0.908 mp_sum_d 3833 11.6 0.620 0.887 0.620 0.887 make_full_inverse_cholesky 14 9.8 0.000 0.000 0.856 0.883 make_full_single_inverse 14 9.8 0.001 0.001 0.849 0.854 build_core_ppl_forces 11 5.9 0.751 0.853 0.751 0.853 mp_sum_l 9540 13.6 0.656 0.837 0.656 0.837 make_images_data 4070 15.5 0.026 0.029 0.744 0.831 hybrid_alltoall_any 4213 16.4 0.037 0.088 0.665 0.761 cp_dbcsr_sm_fm_multiply 46 9.3 0.001 0.001 0.680 0.682 qs_env_update_s_mstruct 11 6.9 0.000 0.000 0.585 0.636 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 0.563 0.606 cp_dbcsr_sm_fm_multiply_core 46 10.3 0.000 0.000 0.576 0.586 pw_copy_to_array 947 14.5 0.464 0.574 0.464 0.574 rs_grid_zero 139 14.6 0.526 0.562 0.526 0.562 pw_axpy 1170 12.0 0.489 0.544 0.489 0.544 calculate_dm_sparse 117 9.7 0.000 0.000 0.511 0.518 cp_fm_cholesky_invert 14 10.8 0.497 0.511 0.497 0.511 qs_ot_get_orbitals 106 10.8 0.000 0.000 0.507 0.511 pw_copy_from_array 1053 15.2 0.411 0.491 0.411 0.491 calculate_rho_core 11 7.9 0.015 0.020 0.408 0.455 ot_scf_init 14 7.8 0.001 0.001 0.386 0.389 mp_allgather_i34 2035 14.5 0.165 0.381 0.165 0.381 transfer_pw2rs_30 117 14.5 0.097 0.104 0.340 0.366 make_images_sizes 4070 15.5 0.003 0.004 0.270 0.363 build_overlap_matrix_low 22 6.9 0.338 0.357 0.343 0.362 mp_alltoall_i44 4070 16.5 0.267 0.360 0.267 0.360 build_kinetic_matrix_low 22 6.9 0.331 0.348 0.334 0.351 dbcsr_sym_matrix_vector_mult 1269 12.5 0.011 0.014 0.336 0.337 qs_ot_get_derivative_diag 17 12.0 0.000 0.000 0.329 0.337 arnoldi_generalized_ev 14 10.8 0.000 0.000 0.333 0.335 dbcsr_complete_redistribute 323 11.8 0.065 0.075 0.280 0.312 build_core_ppl 11 7.9 0.278 0.310 0.278 0.310 integrate_v_core_rspace 11 7.9 0.016 0.021 0.298 0.309 gev_build_subspace 23 11.5 0.002 0.003 0.303 0.303 transfer_rs2pw_30 117 11.9 0.075 0.083 0.276 0.298 make_images_pack 4070 15.5 0.270 0.287 0.277 0.294 pw_zero 585 13.0 0.234 0.288 0.234 0.288 copy_dbcsr_to_fm 143 10.8 0.003 0.003 0.251 0.280 make_basis_sm 14 9.3 0.000 0.000 0.274 0.279 dbcsr_dot 1125 12.2 0.096 0.101 0.205 0.268 pw_integral_ab_c1d_c1d_gs 117 11.5 0.189 0.193 0.243 0.268 parallel_gemm_fm_cosma 96 8.9 0.256 0.261 0.256 0.261 cp_fm_cholesky_decompose 28 10.5 0.245 0.252 0.245 0.252 dbcsr_sym_matrix_vector_mult_l 1269 13.5 0.204 0.222 0.206 0.223 qs_ot_p2m_diag 19 11.0 0.002 0.002 0.210 0.210 mp_alltoall_i22 633 13.6 0.159 0.209 0.159 0.209 dbcsr_make_dense_low 6572 15.7 0.021 0.021 0.195 0.206 rs_scatter_matrices 128 9.9 0.048 0.056 0.184 0.201 copy_fm_to_dbcsr 180 10.8 0.001 0.001 0.162 0.200 reorthogonalize_vectors 10 9.0 0.000 0.000 0.194 0.197 calculate_ecore_overlap 22 5.9 0.000 0.000 0.129 0.197 cp_dbcsr_cholesky_decompose 14 12.8 0.021 0.022 0.181 0.194 dbcsr_make_images_dense 3364 14.9 0.017 0.017 0.180 0.192 qs_create_task_list 11 7.9 0.000 0.000 0.172 0.191 generate_qs_task_list 11 8.9 0.066 0.072 0.172 0.191 fft_wrap_pw1pw2_30 234 13.2 0.006 0.007 0.146 0.185 make_dense_data 6572 16.7 0.132 0.142 0.166 0.177 cp_dbcsr_syevd 19 12.0 0.001 0.001 0.172 0.172 mp_sum_iv 224 11.0 0.134 0.163 0.134 0.163 cp_fm_diag_elpa 19 13.0 0.000 0.000 0.157 0.158 cp_fm_diag_elpa_base 19 14.0 0.154 0.155 0.156 0.157 build_core_ppnl_forces 11 5.9 0.122 0.133 0.122 0.133 compute_matrix_w 11 5.9 0.000 0.000 0.130 0.131 calculate_w_matrix_ot 11 6.9 0.000 0.001 0.130 0.131 dbcsr_desymmetrize_deep 143 11.8 0.011 0.012 0.101 0.122 cp_dbcsr_plus_fm_fm_t 22 8.9 0.000 0.000 0.117 0.121 pw_multiply_with 117 11.5 0.103 0.111 0.103 0.111 dbcsr_copy 4760 12.9 0.078 0.083 0.105 0.111 transfer_fm_to_dbcsr 14 9.8 0.000 0.001 0.070 0.105 mp_sum_dv 7068 14.3 0.069 0.105 0.069 0.105 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="w64PBE", label="w64PBE", y=142.317, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="w64PBE", label="w64PBE", y=102.122, yerr=0.0 Plot: name="w64PBE_timings_32omp", title="Timings of w64PBE with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="w64PBE_timings_32omp", name="rest", label="rest", y=57.94800000000001, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=36.94, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.196, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.016, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=6.382, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="fft_wrap_pw1pw2_200", label="fft_wrap_pw1pw2_200", y=3.835, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="w64PBE_timings_32mpi", title="Timings of w64PBE with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="w64PBE_timings_32mpi", name="rest", label="rest", y=31.918000000000006, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=25.819, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.559, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="fft_wrap_pw1pw2_200", label="fft_wrap_pw1pw2_200", y=0.721, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=5.742, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.224, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.139, yerr=0.0 Running w64SCAN.inp with 1 threads and 32 ranks... done. Running w64SCAN.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/w64SCAN_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.170 0.170 470.666 470.666 qs_mol_dyn_low 1 2.0 0.003 0.003 467.552 467.552 qs_forces 11 3.9 0.001 0.001 467.515 467.515 qs_energies 11 4.9 0.001 0.001 431.752 431.752 scf_env_do_scf 11 5.9 0.002 0.002 406.233 406.233 velocity_verlet 10 3.0 0.001 0.001 385.508 385.508 scf_env_do_scf_inner_loop 106 6.8 0.012 0.012 367.859 367.859 rebuild_ks_matrix 117 8.5 0.001 0.001 273.290 273.290 qs_ks_build_kohn_sham_matrix 117 9.5 0.016 0.016 273.289 273.289 qs_ks_update_qs_env 119 7.8 0.001 0.001 242.866 242.866 fft_wrap_pw1pw2 3053 12.6 0.050 0.050 214.666 214.666 fft_wrap_pw1pw2_400 1649 13.9 60.113 60.113 208.108 208.108 qs_rho_update_rho_low 117 7.9 0.001 0.001 156.396 156.396 calculate_rho_elec 234 8.9 9.311 9.311 156.394 156.394 qs_vxc_create 117 10.5 0.002 0.002 153.813 153.813 xc_vxc_pw_create 117 11.5 5.658 5.658 153.811 153.811 xc_pw_derive 702 13.5 0.007 0.007 97.062 97.062 sum_up_and_integrate 117 10.5 0.003 0.003 86.640 86.640 integrate_v_rspace 234 11.5 0.253 0.253 86.033 86.033 grid_collocate_task_list 234 9.9 82.123 82.123 82.123 82.123 xc_rho_set_and_dset_create 117 12.5 8.276 8.276 81.482 81.482 fft3d_s 3054 14.6 76.746 76.746 76.772 76.772 xc_pw_divergence 117 12.5 0.003 0.003 65.919 65.919 density_rs2pw 234 9.9 0.011 0.011 64.961 64.961 pw_scatter_s 1521 15.1 55.797 55.797 55.797 55.797 grid_integrate_task_list 234 12.5 48.135 48.135 48.135 48.135 init_scf_loop 13 6.8 0.000 0.000 38.311 38.311 potential_pw2rs 234 12.5 0.545 0.545 37.645 37.645 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 31.654 31.654 xc_functional_eval 234 13.5 0.003 0.003 24.814 24.814 libxc_lda_eval 234 14.5 24.806 24.806 24.811 24.811 init_scf_run 11 5.9 0.000 0.000 16.001 16.001 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.000 16.000 pw_poisson_solve 117 10.5 0.002 0.002 15.057 15.057 dbcsr_multiply_generic 2100 12.6 0.176 0.176 14.992 14.992 pw_gather_s 1532 14.1 14.584 14.584 14.584 14.584 wfi_extrapolate 11 7.9 0.001 0.001 14.209 14.209 qs_scf_new_mos 106 7.8 0.001 0.001 13.445 13.445 qs_scf_loop_do_ot 106 8.8 0.001 0.001 13.444 13.444 ot_scf_mini 106 9.8 0.002 0.002 12.286 12.286 pw_integral_ab_c1d_c1d_gs 117 11.5 9.378 9.378 9.378 9.378 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 9.101 9.101 make_m2s 4200 13.6 0.038 0.038 8.468 8.468 pw_derive 1053 13.8 7.918 7.918 7.918 7.918 ot_mini 106 10.8 0.001 0.001 7.823 7.823 prepare_preconditioner 13 7.8 0.000 0.000 7.212 7.212 make_preconditioner 13 8.8 0.000 0.000 7.212 7.212 make_images 4200 14.6 1.369 1.369 6.264 6.264 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.234 6.234 pw_copy 2223 13.1 6.200 6.200 6.200 6.200 qs_ot_get_derivative 106 11.8 0.001 0.001 4.653 4.653 fft_wrap_pw1pw2_140 468 13.2 0.551 0.551 4.551 4.551 pw_poisson_set 118 11.5 0.003 0.003 4.176 4.176 hybrid_alltoall_any 4338 16.5 4.006 4.006 4.148 4.148 make_images_data 4200 15.6 0.030 0.030 4.122 4.122 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 4.107 4.107 pw_axpy 1638 11.7 3.853 3.853 3.853 3.853 make_full_inverse_cholesky 13 9.8 0.000 0.000 3.848 3.848 multiply_cannon 2100 13.6 0.227 0.227 3.832 3.832 qs_create_task_list 11 7.9 0.000 0.000 3.633 3.633 generate_qs_task_list 11 8.9 1.367 1.367 3.633 3.633 pw_copy_to_array 1532 14.1 3.452 3.452 3.452 3.452 multiply_cannon_loop 2100 14.6 0.038 0.038 3.418 3.418 multiply_cannon_multrec 2100 15.6 3.333 3.333 3.379 3.379 pw_copy_from_array 1521 15.1 3.212 3.212 3.212 3.212 ot_diis_step 106 11.8 0.003 0.003 3.157 3.157 dbcsr_copy 4666 12.8 0.275 0.275 3.038 3.038 dbcsr_copy_into_existing 22 7.9 2.744 2.744 2.744 2.744 qs_init_subsys 1 2.0 0.001 0.001 2.713 2.713 qs_env_setup 1 3.0 0.000 0.000 2.700 2.700 qs_env_rebuild_pw_env 23 5.3 0.000 0.000 2.700 2.700 pw_env_rebuild 1 5.0 0.000 0.000 2.700 2.700 pw_grid_setup 4 6.0 0.000 0.000 2.698 2.698 pw_grid_setup_internal 4 7.0 0.036 0.036 2.698 2.698 make_full_single_inverse 13 9.8 0.002 0.002 2.644 2.644 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.547 2.547 dbcsr_make_dense_low 5108 15.7 0.026 0.026 2.499 2.499 transfer_rs2pw 947 10.9 0.011 0.011 2.480 2.480 transfer_pw2rs 947 13.5 0.006 0.006 2.475 2.475 make_dense_data 5108 16.7 1.967 1.967 2.463 2.463 dbcsr_complete_redistribute 312 11.8 1.446 1.446 2.446 2.446 qs_ot_get_derivative_taylor 89 12.9 0.002 0.002 2.336 2.336 pw_grid_sort 4 8.0 1.793 1.793 2.335 2.335 calculate_rho_core 11 7.9 0.690 0.690 2.308 2.308 apply_preconditioner_dbcsr 119 12.8 0.000 0.000 2.257 2.257 apply_single 119 13.8 0.000 0.000 2.257 2.257 transfer_pw2rs_400 245 14.3 2.013 2.013 2.013 2.013 grid_create_task_list 11 9.9 2.012 2.012 2.012 2.012 transfer_rs2pw_400 245 11.8 2.005 2.005 2.005 2.005 dbcsr_dot 1134 12.2 1.972 1.972 1.979 1.979 copy_dbcsr_to_fm 138 10.8 0.003 0.003 1.873 1.873 dbcsr_make_images_dense 3508 14.9 0.013 0.013 1.850 1.850 arnoldi_generalized_ev 13 10.8 0.000 0.000 1.804 1.804 dbcsr_sym_matrix_vector_mult 1206 12.5 0.027 0.027 1.759 1.759 gev_build_subspace 22 11.5 0.008 0.008 1.638 1.638 qs_ot_get_p 119 10.6 0.001 0.001 1.509 1.509 fft_wrap_pw1pw2_50 468 13.2 0.125 0.125 1.483 1.483 pw_zero 702 12.6 1.439 1.439 1.439 1.439 copy_fm_to_dbcsr 174 10.8 0.001 0.001 1.382 1.382 calculate_dm_sparse 117 9.7 0.001 0.001 1.381 1.381 dbcsr_sym_matrix_vector_mult_l 1206 13.5 1.365 1.365 1.380 1.380 cp_dbcsr_sm_fm_multiply 45 9.4 0.001 0.001 1.353 1.353 transfer_dbcsr_to_fm 13 10.8 0.001 0.001 1.336 1.336 dbcsr_finalize 4788 14.0 0.110 0.110 1.332 1.332 cp_fm_cholesky_invert 13 10.8 1.316 1.316 1.316 1.316 evaluate_core_matrix_traces 117 8.5 0.001 0.001 1.229 1.229 calculate_ptrace_kp 234 9.5 0.001 0.001 1.229 1.229 qs_ot_get_orbitals 106 10.8 0.001 0.001 1.143 1.143 build_core_ppl_forces 11 5.9 1.123 1.123 1.123 1.123 dbcsr_merge_all 4261 15.2 0.403 0.403 0.990 0.990 build_kinetic_matrix_low 22 6.9 0.887 0.887 0.971 0.971 build_overlap_matrix_low 22 6.9 0.869 0.869 0.964 0.964 cp_fm_cholesky_decompose 26 10.6 0.949 0.949 0.949 0.949 cp_dbcsr_sm_fm_multiply_core 45 10.4 0.000 0.000 0.793 0.793 pw_scale 585 11.9 0.773 0.773 0.773 0.773 dbcsr_make_undense 1600 14.2 0.647 0.647 0.749 0.749 transfer_fm_to_dbcsr 13 9.8 0.012 0.012 0.719 0.719 dbcsr_make_dense 1600 14.2 0.005 0.005 0.690 0.690 dbcsr_sort_indices 6981 16.6 0.679 0.679 0.679 0.679 qs_ot_get_derivative_diag 17 12.0 0.000 0.000 0.614 0.614 qs_ot_p2m_diag 19 11.0 0.056 0.056 0.589 0.589 dbcsr_add_d 1902 13.1 0.002 0.002 0.571 0.571 dbcsr_add_anytype 1902 14.1 0.181 0.181 0.569 0.569 quick_finalize 4507 16.5 0.044 0.044 0.553 0.553 dbcsr_special_finalize 4200 15.6 0.008 0.008 0.548 0.548 sort_shells 4 9.0 0.542 0.542 0.542 0.542 dbcsr_iterator_start 35386 15.4 0.513 0.513 0.540 0.540 pw_multiply_with 117 11.5 0.484 0.484 0.484 0.484 fft_wrap_pw1pw2_20 468 13.2 0.025 0.025 0.473 0.473 ------------------------------------------------------------------------------- From /workspace/artifacts/w64SCAN_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.059 0.073 261.547 261.551 qs_mol_dyn_low 1 2.0 0.003 0.005 261.220 261.225 qs_forces 11 3.9 0.002 0.002 261.182 261.187 qs_energies 11 4.9 0.001 0.001 238.399 238.404 scf_env_do_scf 11 5.9 0.001 0.006 227.907 227.912 velocity_verlet 10 3.0 0.001 0.003 208.213 208.218 scf_env_do_scf_inner_loop 106 6.8 0.003 0.019 208.182 208.186 rebuild_ks_matrix 117 8.5 0.000 0.001 168.524 168.555 qs_ks_build_kohn_sham_matrix 117 9.5 0.021 0.022 168.524 168.555 qs_ks_update_qs_env 119 7.8 0.001 0.001 146.878 146.907 fft_wrap_pw1pw2 3053 12.6 0.071 0.079 84.919 88.071 fft_wrap_pw1pw2_400 1649 13.9 3.345 3.617 79.996 82.603 sum_up_and_integrate 117 10.5 0.004 0.004 82.519 82.591 integrate_v_rspace 234 11.5 0.012 0.013 82.045 82.122 qs_rho_update_rho_low 117 7.9 0.001 0.001 80.358 80.363 calculate_rho_elec 234 8.9 0.314 0.326 80.357 80.362 qs_vxc_create 117 10.5 0.003 0.004 71.342 71.849 xc_vxc_pw_create 117 11.5 0.579 0.745 71.339 71.846 fft3d_ps 3053 14.6 30.950 32.472 59.206 63.108 grid_integrate_task_list 234 12.5 45.724 46.560 45.724 46.560 xc_pw_derive 702 13.5 0.015 0.016 40.853 44.466 xc_rho_set_and_dset_create 117 12.5 0.728 0.944 42.067 43.341 density_rs2pw 234 9.9 0.014 0.016 40.951 42.208 grid_collocate_task_list 234 9.9 37.998 39.118 37.998 39.118 potential_pw2rs 234 12.5 0.032 0.035 33.236 33.326 xc_pw_divergence 117 12.5 0.006 0.007 27.666 30.709 mp_alltoall_z22v 3053 16.6 20.339 28.029 20.339 28.029 xc_functional_eval 234 13.5 0.004 0.005 20.825 21.813 libxc_lda_eval 234 14.5 20.815 21.802 20.822 21.809 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 21.734 21.737 mp_waitany 35424 14.1 18.449 20.414 18.449 20.414 yz_to_x 1532 15.1 4.028 4.469 14.872 19.902 init_scf_loop 13 6.8 0.000 0.000 19.708 19.708 transfer_pw2rs 947 13.5 0.014 0.017 19.535 19.572 transfer_rs2pw 947 10.9 0.017 0.019 15.902 17.242 x_to_yz 1521 16.1 3.822 4.413 13.316 15.032 transfer_pw2rs_400 245 14.3 4.764 5.216 12.374 12.955 transfer_rs2pw_400 245 11.8 3.417 3.758 10.680 12.083 pw_gather_p 1532 14.1 9.594 10.515 9.594 10.515 dbcsr_multiply_generic 2100 12.6 0.068 0.071 8.465 8.535 init_scf_run 11 5.9 0.000 0.000 8.528 8.531 scf_env_initial_rho_setup 11 6.9 0.000 0.003 8.528 8.528 pw_scatter_p 1521 15.1 7.957 8.301 7.957 8.301 wfi_extrapolate 11 7.9 0.001 0.001 7.703 7.703 pw_derive 1053 13.8 6.654 7.103 6.654 7.103 mp_waitall_1 152250 16.6 6.061 6.572 6.061 6.572 qs_scf_new_mos 106 7.8 0.001 0.001 6.361 6.394 qs_scf_loop_do_ot 106 8.8 0.001 0.001 6.361 6.394 multiply_cannon 2100 13.6 0.120 0.131 6.065 6.300 pw_poisson_solve 117 10.5 0.004 0.005 5.925 6.158 ot_scf_mini 106 9.8 0.002 0.002 5.911 5.948 multiply_cannon_loop 2100 14.6 0.064 0.067 5.667 5.813 transfer_pw2rs_140 234 14.5 1.186 1.333 5.154 5.812 pw_copy 2223 13.1 4.728 5.093 4.728 5.093 fft_wrap_pw1pw2_140 468 13.2 0.087 0.095 3.987 4.864 mp_alltoall_d11v 2347 13.5 3.287 4.456 3.287 4.456 rs_gather_matrices 234 12.5 0.131 0.171 3.014 4.193 pw_poisson_set 118 11.5 0.007 0.008 3.819 4.051 transfer_rs2pw_140 234 11.9 1.152 1.324 3.607 3.706 ot_mini 106 10.8 0.001 0.001 3.568 3.616 multiply_cannon_metrocomm3 16800 15.6 0.027 0.029 3.342 3.517 pw_copy_to_array 1532 14.1 2.749 3.060 2.749 3.060 mp_sum_d 3895 11.5 1.901 2.945 1.901 2.945 pw_axpy 1638 11.7 2.647 2.856 2.647 2.856 rs_grid_zero 490 15.3 2.250 2.364 2.250 2.364 qs_ot_get_derivative 106 11.8 0.001 0.001 2.172 2.209 multiply_cannon_multrec 16800 15.6 2.097 2.168 2.104 2.176 pw_copy_from_array 1521 15.1 1.878 2.109 1.878 2.109 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 1.815 1.815 transfer_pw2rs_50 234 14.5 0.350 0.394 1.586 1.767 make_m2s 4200 13.6 0.036 0.042 1.703 1.750 prepare_preconditioner 13 7.8 0.000 0.000 1.627 1.634 make_preconditioner 13 8.8 0.000 0.000 1.627 1.634 make_images 4200 14.6 0.098 0.100 1.452 1.496 apply_preconditioner_dbcsr 119 12.8 0.000 0.000 1.351 1.388 apply_single 119 13.8 0.000 0.000 1.351 1.388 ot_diis_step 106 11.8 0.003 0.003 1.382 1.382 qs_env_update_s_mstruct 11 6.9 0.000 0.000 1.317 1.368 mp_sendrecv_dv 14508 12.9 1.225 1.266 1.225 1.266 transfer_rs2pw_50 234 11.9 0.274 0.357 1.192 1.223 calculate_rho_core 11 7.9 0.041 0.050 1.140 1.189 pw_zero 702 12.6 1.019 1.152 1.019 1.152 qs_ot_get_derivative_taylor 89 12.9 0.002 0.002 1.085 1.114 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.001 0.956 1.018 pw_integral_ab_c1d_c1d_gs 117 11.5 0.595 0.613 0.815 0.938 make_full_single_inverse 13 9.8 0.001 0.001 0.811 0.819 make_images_data 4200 15.6 0.027 0.030 0.726 0.814 qs_ot_get_p 119 10.6 0.001 0.001 0.698 0.785 make_full_inverse_cholesky 13 9.8 0.000 0.001 0.752 0.775 fft_wrap_pw1pw2_50 468 13.2 0.024 0.027 0.681 0.751 hybrid_alltoall_any 4338 16.5 0.037 0.081 0.641 0.723 pw_scale 585 11.9 0.537 0.668 0.537 0.668 cp_dbcsr_sm_fm_multiply 45 9.4 0.001 0.001 0.661 0.663 integrate_v_core_rspace 11 7.9 0.033 0.041 0.578 0.616 mp_sum_l 9876 13.7 0.353 0.593 0.353 0.593 cp_dbcsr_sm_fm_multiply_core 45 10.4 0.000 0.000 0.556 0.568 build_core_ppl_forces 11 5.9 0.464 0.531 0.464 0.531 rs_scatter_matrices 256 9.9 0.125 0.139 0.442 0.526 calculate_dm_sparse 117 9.7 0.000 0.000 0.509 0.518 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 0.429 0.462 qs_ot_get_orbitals 106 10.8 0.000 0.000 0.456 0.460 pw_multiply_with 117 11.5 0.434 0.453 0.434 0.453 transfer_pw2rs_20 234 14.5 0.117 0.123 0.407 0.427 transfer_rs2pw_20 234 11.9 0.098 0.103 0.405 0.419 cp_fm_cholesky_invert 13 10.8 0.410 0.419 0.410 0.419 mp_allgather_i34 2100 14.6 0.149 0.363 0.149 0.363 arnoldi_generalized_ev 13 10.8 0.000 0.000 0.343 0.344 dbcsr_sym_matrix_vector_mult 1206 12.5 0.011 0.013 0.340 0.342 make_images_sizes 4200 15.6 0.003 0.004 0.240 0.318 mp_alltoall_i44 4200 16.6 0.237 0.315 0.237 0.315 gev_build_subspace 22 11.5 0.002 0.003 0.307 0.308 make_images_pack 4200 15.6 0.283 0.298 0.290 0.305 dbcsr_dot 1134 12.2 0.101 0.105 0.220 0.304 build_overlap_matrix_low 22 6.9 0.278 0.294 0.284 0.300 build_kinetic_matrix_low 22 6.9 0.273 0.289 0.276 0.293 qs_ot_get_derivative_diag 17 12.0 0.000 0.000 0.274 0.284 dbcsr_complete_redistribute 312 11.8 0.064 0.073 0.252 0.279 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="w64SCAN", label="w64SCAN", y=470.666, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="w64SCAN", label="w64SCAN", y=261.547, yerr=0.0 Plot: name="w64SCAN_timings_32omp", title="Timings of w64SCAN with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="w64SCAN_timings_32omp", name="rest", label="rest", y=122.94600000000003, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=82.123, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="fft3d_s", label="fft3d_s", y=76.746, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="fft_wrap_pw1pw2_400", label="fft_wrap_pw1pw2_400", y=60.113, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=55.797, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.135, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="libxc_lda_eval", label="libxc_lda_eval", y=24.806, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="w64SCAN_timings_32mpi", title="Timings of w64SCAN with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="w64SCAN_timings_32mpi", name="rest", label="rest", y=102.37600000000003, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=37.998, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="fft_wrap_pw1pw2_400", label="fft_wrap_pw1pw2_400", y=3.345, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=45.724, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="libxc_lda_eval", label="libxc_lda_eval", y=20.815, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=30.95, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=20.339, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.211 0.211 93.196 93.196 qs_energies 1 2.0 0.000 0.000 92.379 92.379 scf_env_do_scf 1 3.0 0.000 0.000 91.026 91.026 qs_ks_update_qs_env 8 5.0 0.000 0.000 85.759 85.759 rebuild_ks_matrix 7 6.0 0.000 0.000 85.681 85.681 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 85.681 85.681 hfx_ks_matrix 7 8.0 0.000 0.000 76.456 76.456 integrate_four_center 7 9.0 0.760 0.760 76.306 76.306 integrate_four_center_main 7 10.0 0.730 0.730 69.943 69.943 integrate_four_center_bin 447 11.0 69.212 69.212 69.212 69.212 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 48.956 48.956 init_scf_loop 1 4.0 0.000 0.000 42.057 42.057 integrate_four_center_load 7 10.0 0.000 0.000 5.387 5.387 hfx_load_balance 1 11.0 0.010 0.010 5.387 5.387 qs_vxc_create 14 8.0 0.000 0.000 3.655 3.655 xc_vxc_pw_create 14 9.0 0.117 0.117 3.655 3.655 fft_wrap_pw1pw2 237 10.7 0.003 0.003 2.732 2.732 hfx_load_balance_bin 1 12.0 2.683 2.683 2.683 2.683 hfx_load_balance_count 1 12.0 2.678 2.678 2.678 2.678 calculate_rho_elec 15 7.4 0.119 0.119 2.650 2.650 fft_wrap_pw1pw2_140 150 12.1 0.904 0.904 2.603 2.603 xc_rho_set_and_dset_create 14 10.0 0.138 0.138 2.395 2.395 dbcsr_multiply_generic 165 10.0 0.013 0.013 2.201 2.201 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 2.043 2.043 prepare_preconditioner 1 5.0 0.000 0.000 2.008 2.008 make_preconditioner 1 6.0 0.000 0.000 2.008 2.008 grid_collocate_task_list 15 8.4 1.935 1.935 1.935 1.935 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.218 0.229 82.502 82.505 qs_energies 1 2.0 0.000 0.000 82.084 82.086 scf_env_do_scf 1 3.0 0.000 0.000 81.766 81.768 qs_ks_update_qs_env 8 5.0 0.000 0.000 79.975 79.977 rebuild_ks_matrix 7 6.0 0.000 0.000 79.965 79.968 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 79.965 79.968 hfx_ks_matrix 7 8.0 0.000 0.000 75.004 75.006 integrate_four_center 7 9.0 0.053 0.254 74.994 74.996 integrate_four_center_main 7 10.0 0.003 0.004 67.375 68.554 integrate_four_center_bin 448 11.0 67.372 68.551 67.372 68.551 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 43.552 43.554 init_scf_loop 1 4.0 0.000 0.000 38.212 38.213 integrate_four_center_load 7 10.0 0.000 0.000 5.289 5.290 hfx_load_balance 1 11.0 0.001 0.001 5.289 5.290 mp_sync 56 11.2 1.758 2.900 1.758 2.900 hfx_load_balance_count 1 12.0 2.590 2.648 2.590 2.648 hfx_load_balance_bin 1 12.0 2.582 2.639 2.582 2.639 qs_vxc_create 14 8.0 0.000 0.000 2.068 2.068 xc_vxc_pw_create 14 9.0 0.007 0.008 2.068 2.068 xc_rho_set_and_dset_create 14 10.0 0.010 0.012 1.621 1.712 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-hyb", label="H2O-hyb", y=93.196, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-hyb", label="H2O-hyb", y=82.502, yerr=0.0 Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=15.572999999999993, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=69.212, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.683, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.678, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=1.935, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="fft_wrap_pw1pw2_140", label="fft_wrap_pw1pw2_140", y=0.904, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.211, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=7.981999999999999, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=67.372, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.582, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.59, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="fft_wrap_pw1pw2_140", label="fft_wrap_pw1pw2_140", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.218, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=1.758, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 76.960 76.960 qs_energies 1 2.0 0.000 0.000 76.619 76.619 mp2_main 1 3.0 0.000 0.000 73.881 73.881 mp2_gpw_main 1 4.0 0.000 0.000 73.779 73.779 rpa_ri_compute_en 1 5.0 0.000 0.000 70.821 70.821 rpa_num_int 1 6.0 0.001 0.001 70.812 70.812 compute_mat_P_omega 1 7.0 0.003 0.003 61.367 61.367 compute_mat_P_omega_contract 10 8.0 8.243 8.243 61.142 61.142 dbt_total 2336 9.6 0.014 0.014 49.747 49.747 dbt_contract 787 11.0 0.042 0.042 42.529 42.529 dbt_tas_total 1149 12.2 0.267 0.267 41.098 41.098 dbt_tas_multiply 807 12.1 0.002 0.002 39.802 39.802 dbt_tas_dbm 807 14.1 0.003 0.003 33.308 33.308 dbm_multiply 807 16.1 33.297 33.297 33.297 33.297 dbt_tas_mm_1N 524 15.1 0.002 0.002 24.965 24.965 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 22.605 22.605 compute_mat_P_omega_calc_M_occ 250 9.0 8.247 8.247 17.287 17.287 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.722 7.722 dbt_tas_mm_2 251 15.0 0.002 0.002 6.810 6.810 dbt_copy 1103 10.7 0.140 0.140 5.903 5.903 compute_QP_energies 1 7.0 0.000 0.000 5.587 5.587 compute_self_energy_cubic_gw 1 8.0 0.153 0.153 5.586 5.586 contract_cubic_gw 21 9.0 0.000 0.000 4.229 4.229 dbm_reserve_blocks 3628 15.3 3.957 3.957 3.957 3.957 dbt_tas_reserve_blocks_index 3261 14.3 0.102 0.102 3.921 3.921 dbt_reserve_blocks_index 2280 13.1 0.057 0.057 2.979 2.979 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.948 2.948 dbt_reserve_blocks_index_array 2222 12.2 0.011 0.011 2.926 2.926 scf_env_do_scf 1 3.0 0.000 0.000 2.632 2.632 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.632 2.632 dbt_crop 1042 12.0 1.356 1.356 2.590 2.590 dbt_tas_reshape 367 15.0 0.014 0.014 2.287 2.287 dbt_tas_copy 574 11.4 1.312 1.312 2.243 2.243 dbt_reshape 278 11.9 1.109 1.109 1.836 1.836 convert_to_new_pgrid 2421 14.1 0.081 0.081 1.780 1.780 dbm_copy 1614 15.1 1.699 1.699 1.699 1.699 reshape_mm_small 367 14.1 0.041 0.041 1.664 1.664 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.018 34.083 34.087 qs_energies 1 2.0 0.000 0.000 33.951 33.953 mp2_main 1 3.0 0.000 0.000 33.015 33.017 mp2_gpw_main 1 4.0 0.000 0.000 32.976 32.978 rpa_ri_compute_en 1 5.0 0.000 0.000 31.898 31.900 rpa_num_int 1 6.0 0.000 0.001 31.898 31.899 dbt_total 2336 9.6 0.019 0.020 28.181 28.209 compute_mat_P_omega 1 7.0 0.001 0.004 26.801 26.807 compute_mat_P_omega_contract 10 8.0 0.449 0.464 26.639 26.647 dbt_contract 787 11.0 0.038 0.040 20.582 20.589 dbt_tas_total 1149 12.2 0.086 0.091 18.141 18.144 dbt_tas_multiply 807 12.1 0.002 0.003 18.078 18.087 dbt_tas_dbm 807 14.1 0.003 0.003 12.517 12.520 dbm_multiply 807 16.1 9.789 10.424 9.789 10.424 compute_mat_P_omega_calc_M_occ 250 9.0 0.425 0.438 8.093 8.097 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.801 7.805 mp_sync 8688 11.6 5.242 6.734 5.242 6.734 dbt_copy 1149 10.8 0.018 0.019 6.422 6.722 dbt_reshape 1136 11.8 2.550 2.704 5.948 6.242 dbt_tas_mm_2 251 15.0 0.002 0.002 5.720 5.723 dbt_tas_mm_1N 524 15.1 0.002 0.002 4.775 5.313 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.200 5.203 compute_QP_energies 1 7.0 0.000 0.000 3.255 3.255 compute_self_energy_cubic_gw 1 8.0 0.006 0.007 3.255 3.255 mp_waitall_2 3812 15.3 2.886 3.119 2.886 3.119 dbt_communicate_buffer 1136 12.8 0.051 0.055 2.629 2.830 contract_cubic_gw 21 9.0 0.000 0.000 2.448 2.448 dbt_reserve_blocks_index 2887 13.1 0.080 0.088 1.904 2.203 dbm_reserve_blocks 3752 15.4 1.908 2.196 1.908 2.196 dbt_reserve_blocks_index_array 2829 12.2 0.010 0.011 1.893 2.189 dbt_tas_reserve_blocks_index 3347 14.5 0.068 0.072 1.866 2.163 dbt_crop 1042 12.0 0.890 0.978 1.462 1.625 convert_to_new_pgrid 2421 14.1 0.021 0.023 1.358 1.467 dbm_copy 1608 15.1 1.330 1.441 1.330 1.441 dbt_tas_replicate 405 14.1 0.527 0.670 1.181 1.254 mp2_ri_gpw_compute_in 1 5.0 0.003 0.003 1.076 1.076 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.051 1.052 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.033 1.035 parallel_gemm_fm_cosma 105 8.4 0.977 0.991 0.977 0.991 scf_env_do_scf 1 3.0 0.000 0.000 0.900 0.900 scf_env_do_scf_inner_loop 17 4.0 0.000 0.002 0.900 0.900 compute_W_cubic_GW 10 7.0 0.001 0.001 0.821 0.828 dbm_add 807 14.1 0.709 0.740 0.709 0.740 mp_max_i 2068 9.5 0.517 0.710 0.517 0.710 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="GW_PBE_4benzene", label="GW_PBE_4benzene", y=76.96, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="GW_PBE_4benzene", label="GW_PBE_4benzene", y=34.083, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=20.407999999999994, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=33.297, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.247, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.243, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.957, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=1.699, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.109, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=9.503999999999998, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=9.789, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.425, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.449, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.908, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=1.33, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.55, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.242, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.886, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 235.609 235.609 qs_forces 1 2.0 0.000 0.000 235.119 235.119 rebuild_ks_matrix 7 6.6 0.000 0.000 233.852 233.852 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 233.852 233.852 hfx_ks_matrix 7 8.6 0.000 0.000 231.891 231.891 hfx_ri_update_ks 7 9.6 0.000 0.000 187.993 187.993 hfx_ri_update_ks_Pmat 7 10.6 28.945 28.945 187.987 187.987 dbt_total 849 11.0 0.006 0.006 183.121 183.121 qs_energies 1 3.0 0.000 0.000 166.990 166.990 scf_env_do_scf 1 4.0 0.000 0.000 166.674 166.674 qs_ks_update_qs_env 8 6.0 0.000 0.000 165.769 165.769 dbt_tas_total 369 13.4 0.599 0.599 164.762 164.762 dbt_contract 207 12.4 0.462 0.462 163.854 163.854 dbt_tas_multiply 216 13.5 0.001 0.001 159.753 159.753 dbt_tas_dbm 216 15.5 0.001 0.001 147.335 147.335 dbm_multiply 216 17.5 147.331 147.331 147.331 147.331 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 136.202 136.202 dbt_tas_mm_2 91 16.5 0.001 0.001 129.947 129.947 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 103.704 103.704 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 68.092 68.092 init_scf_loop 2 5.0 0.000 0.000 62.968 62.968 hfx_ri_update_forces 1 7.0 1.490 1.490 43.896 43.896 hfx_ri_forces_Pmat_3c 1 8.0 4.326 4.326 28.460 28.460 dbt_copy 423 11.8 0.066 0.066 14.250 14.250 precalc_derivatives 1 8.0 2.292 2.292 11.188 11.188 dbt_reshape 132 13.2 7.360 7.360 10.132 10.132 dbt_tas_mm_3T 77 17.1 0.000 0.000 9.848 9.848 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 8.973 8.973 dbt_tas_mm_3N 37 15.4 0.000 0.000 7.068 7.068 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 6.867 6.867 dbt_tas_reshape 168 14.5 0.013 0.013 6.142 6.142 dbm_reserve_blocks 1491 16.2 5.709 5.709 5.709 5.709 dbt_tas_reserve_blocks_index 1323 15.4 0.183 0.183 5.640 5.640 build_3c_derivatives 3 9.0 1.717 1.717 5.467 5.467 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.015 44.612 44.615 qs_forces 1 2.0 0.000 0.000 44.419 44.420 rebuild_ks_matrix 7 6.6 0.000 0.000 43.554 43.555 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 43.554 43.555 hfx_ks_matrix 7 8.6 0.000 0.000 42.560 42.561 dbt_total 849 11.0 0.007 0.007 37.929 37.940 dbt_contract 207 12.4 0.025 0.026 28.983 28.990 dbt_tas_total 369 13.4 0.043 0.047 25.751 25.753 dbt_tas_multiply 216 13.5 0.001 0.001 25.392 25.393 hfx_ri_update_ks 7 9.6 0.000 0.000 24.090 24.090 hfx_ri_update_ks_Pmat 7 10.6 1.272 1.349 24.089 24.089 qs_energies 1 3.0 0.000 0.000 22.960 22.960 scf_env_do_scf 1 4.0 0.000 0.001 22.827 22.827 qs_ks_update_qs_env 8 6.0 0.000 0.000 22.104 22.104 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 21.452 21.452 dbt_tas_dbm 216 15.5 0.001 0.001 19.431 19.436 hfx_ri_update_forces 1 7.0 0.058 0.062 18.470 18.470 dbm_multiply 216 17.5 17.341 18.189 17.341 18.189 hfx_ri_forces_Pmat_3c 1 8.0 0.191 0.205 13.792 13.808 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 12.837 12.837 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 10.275 10.276 init_scf_loop 2 5.0 0.000 0.000 9.989 9.989 dbt_tas_mm_2 91 16.5 0.001 0.001 8.445 8.445 dbt_copy 539 12.5 0.011 0.013 7.956 8.219 dbt_reshape 393 13.9 2.967 3.061 5.919 6.094 dbt_tas_mm_3T 77 17.1 0.000 0.000 5.155 5.581 mp_sync 2901 12.8 4.142 5.214 4.142 5.214 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 4.807 4.807 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.183 4.565 precalc_derivatives 1 8.0 0.089 0.093 3.524 3.524 dbm_reserve_blocks 1641 16.6 2.864 3.156 2.864 3.156 dbt_tas_reserve_blocks_index 1471 15.8 0.132 0.135 2.803 3.100 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.085 3.085 mp_waitall_2 1318 16.2 2.958 3.083 2.958 3.083 dbt_reserve_blocks_index 1107 14.8 0.116 0.120 2.321 2.558 dbt_reserve_blocks_index_array 1077 13.9 0.005 0.006 2.293 2.526 dbt_crop 372 13.7 1.653 1.686 2.358 2.446 dbt_communicate_buffer 393 14.9 0.011 0.012 2.060 2.178 convert_to_new_pgrid 648 15.5 0.030 0.077 1.935 2.032 build_3c_derivatives 3 9.0 0.127 0.137 1.920 1.925 dbm_copy 452 16.3 1.741 1.832 1.741 1.832 dbt_tas_replicate 170 15.1 0.699 0.725 1.634 1.669 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.588 1.591 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.407 1.418 dbt_tas_copy 146 12.6 0.741 0.784 1.299 1.401 dbt_tas_communicate_buffer 370 16.3 0.012 0.012 0.958 1.013 dbm_add 216 15.5 0.884 0.931 0.884 0.931 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="RI-HFX_H2O-32", label="RI-HFX_H2O-32", y=235.609, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="RI-HFX_H2O-32", label="RI-HFX_H2O-32", y=44.612, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=41.93800000000002, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=147.331, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=28.945, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=7.36, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=5.709, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.326, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=12.876999999999999, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=17.341, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.272, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.967, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.864, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.191, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=4.142, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.958, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.011 190.251 190.251 qs_energies 1 2.0 0.000 0.000 190.082 190.082 mp2_main 1 3.0 0.000 0.000 186.173 186.173 mp2_gpw_main 1 4.0 0.001 0.001 185.716 185.716 mp2_ri_gpw_compute_in 1 5.0 0.571 0.571 143.151 143.151 mp2_ri_gpw_compute_in_loop 1 6.0 0.010 0.010 134.795 134.795 mp2_eri_3c_integrate_gpw 2656 7.0 0.015 0.015 96.941 96.941 integrate_v_rspace 2666 8.0 0.684 0.684 83.658 83.658 grid_integrate_task_list 2666 9.0 80.504 80.504 80.504 80.504 mp2_ri_gpw_compute_en 1 5.0 0.125 0.125 42.546 42.546 mp2_ri_gpw_compute_en_RI_loop 1 6.0 10.831 10.831 39.937 39.937 dbcsr_multiply_generic 5322 8.0 0.249 0.249 31.700 31.700 ao_to_mo_and_store_B_mult_1 2656 7.0 0.012 0.012 31.687 31.687 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.220 2.220 19.904 19.904 make_m2s 10644 9.0 0.051 0.051 18.135 18.135 local_gemm 2080 8.0 17.684 17.684 17.684 17.684 make_images 10644 10.0 3.646 3.646 17.659 17.659 hybrid_alltoall_any 13323 11.6 12.609 12.609 12.912 12.912 make_images_data 10644 11.0 0.086 0.086 12.873 12.873 fft_wrap_pw1pw2 53228 10.4 0.118 0.118 11.929 11.929 multiply_cannon 5322 9.0 0.679 0.679 11.387 11.387 collocate_function 2656 8.0 6.015 6.015 9.984 9.984 multiply_cannon_loop 5322 10.0 0.150 0.150 9.961 9.961 fft_wrap_pw1pw2_20 21271 11.4 0.815 0.815 8.264 8.264 multiply_cannon_multrec 5322 11.0 7.915 7.915 7.946 7.946 get_2c_integrals 1 6.0 0.000 0.000 7.784 7.784 compute_2c_integrals 1 7.0 0.005 0.005 7.363 7.363 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 7.354 7.354 mp2_eri_2c_integrate_gpw 1 9.0 0.719 0.719 7.343 7.343 fft3d_s 53229 12.4 7.152 7.152 7.175 7.175 mp2_ri_gpw_compute_en_ener 2080 7.0 7.134 7.134 7.134 7.134 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.769 2.769 6.065 6.065 potential_pw2rs 5322 10.0 0.146 0.146 4.486 4.486 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.016 33.138 33.139 qs_energies 1 2.0 0.000 0.000 32.986 32.987 mp2_main 1 3.0 0.000 0.001 31.156 31.157 mp2_gpw_main 1 4.0 0.001 0.001 31.047 31.048 mp2_ri_gpw_compute_en 1 5.0 0.251 0.256 16.232 16.557 mp2_ri_gpw_compute_en_RI_loop 1 6.0 1.886 2.011 15.204 15.207 mp2_ri_gpw_compute_in 1 5.0 0.046 0.046 14.747 14.963 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 13.547 13.763 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 11.220 11.468 integrate_v_rspace 93 8.1 0.120 0.138 11.140 11.402 grid_integrate_task_list 93 9.1 10.869 11.135 10.869 11.135 mp2_ri_gpw_compute_en_expansio 65 7.0 0.111 0.129 10.727 10.922 local_gemm 65 8.0 10.616 10.800 10.616 10.800 mp2_ri_gpw_compute_en_comm 17 7.0 0.064 0.076 2.165 2.566 dbcsr_multiply_generic 176 8.0 0.009 0.010 1.998 2.248 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.981 2.229 mp_sendrecv_dm3 1054 8.0 1.716 2.172 1.716 2.172 scf_env_do_scf 1 3.0 0.000 0.000 1.724 1.726 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 1.724 1.726 multiply_cannon 176 9.0 0.019 0.020 1.118 1.181 get_2c_integrals 1 6.0 0.000 0.000 1.137 1.150 multiply_cannon_loop 176 10.0 0.002 0.003 1.057 1.119 make_m2s 352 9.0 0.003 0.003 0.840 1.049 make_images 352 10.0 0.038 0.039 0.830 1.039 multiply_cannon_multrec 246 11.0 0.885 0.925 0.891 0.931 qs_scf_new_mos 10 5.0 0.000 0.000 0.911 0.915 eigensolver 11 5.8 0.001 0.001 0.859 0.861 compute_2c_integrals 1 7.0 0.002 0.003 0.788 0.799 compute_2c_integrals_loop_lm 1 8.0 0.001 0.002 0.678 0.708 mp2_eri_2c_integrate_gpw 1 9.0 0.165 0.177 0.678 0.707 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="RI-MP2_ammonia", label="RI-MP2_ammonia", y=190.251, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="RI-MP2_ammonia", label="RI-MP2_ammonia", y=33.138, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=60.708, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=80.504, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=17.684, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=12.609, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=10.831, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.915, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.166, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.869, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=10.616, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=1.886, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.885, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.716, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.135 0.135 136.471 136.471 qs_energies 1 2.0 0.000 0.000 135.196 135.196 scf_env_do_scf 1 3.0 0.000 0.000 129.754 129.754 scf_env_do_scf_inner_loop 15 4.0 0.001 0.001 129.754 129.754 qs_ks_update_qs_env 15 5.0 0.000 0.000 56.416 56.416 rebuild_ks_matrix 15 6.0 0.000 0.000 56.162 56.162 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 56.162 56.162 qs_scf_new_mos 15 5.0 0.000 0.000 55.466 55.466 eigensolver 15 6.0 0.002 0.002 45.283 45.283 qs_vxc_create 15 8.0 0.038 0.038 43.204 43.204 cp_fm_diag_elpa 15 7.0 0.000 0.000 38.603 38.603 cp_fm_diag_elpa_base 15 8.0 37.292 37.292 38.602 38.602 calculate_dispersion_nonloc 15 9.0 6.798 6.798 37.731 37.731 fft_wrap_pw1pw2 1086 10.0 0.017 0.017 31.847 31.847 fft_wrap_pw1pw2_150 765 11.0 8.964 8.964 23.651 23.651 qs_rho_update_rho_low 16 5.0 0.000 0.000 13.848 13.848 calculate_rho_elec 16 6.0 0.216 0.216 13.848 13.848 grid_collocate_task_list 16 7.0 12.115 12.115 12.115 12.115 sum_up_and_integrate 15 8.0 0.000 0.000 11.852 11.852 integrate_v_rspace 15 9.0 0.020 0.020 11.838 11.838 grid_integrate_task_list 15 10.0 11.086 11.086 11.086 11.086 fft3d_s 1087 12.0 9.975 9.975 9.982 9.982 pw_scatter_s 585 12.1 8.912 8.912 8.912 8.912 fft_wrap_pw1pw2_200 197 11.3 2.112 2.112 7.986 7.986 copy_dbcsr_to_fm 16 5.9 0.001 0.001 7.594 7.594 dbcsr_complete_redistribute 46 8.3 2.192 2.192 7.106 7.106 vdW_energy 15 10.0 5.644 5.644 5.644 5.644 xc_vxc_pw_create 15 9.0 0.207 0.207 5.435 5.435 cp_fm_cholesky_restore 45 7.0 5.350 5.350 5.350 5.350 gspace_mixing 14 5.0 0.171 0.171 4.671 4.671 broyden_mixing 14 6.0 3.983 3.983 3.983 3.983 xc_pw_derive 90 11.0 0.001 0.001 3.900 3.900 dbcsr_finalize 159 9.9 0.020 0.020 3.481 3.481 dbcsr_merge_all 91 11.1 0.127 0.127 3.329 3.329 calculate_dm_sparse 15 6.0 0.017 0.017 3.117 3.117 cp_dbcsr_plus_fm_fm_t 15 7.0 0.001 0.001 3.022 3.022 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.017 50.283 50.284 qs_energies 1 2.0 0.000 0.000 49.973 49.975 scf_env_do_scf 1 3.0 0.000 0.000 46.539 46.540 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 46.539 46.540 qs_ks_update_qs_env 15 5.0 0.000 0.000 20.885 20.891 rebuild_ks_matrix 15 6.0 0.000 0.000 20.847 20.853 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.004 20.847 20.853 qs_scf_new_mos 15 5.0 0.000 0.001 13.519 13.537 eigensolver 15 6.0 0.001 0.002 12.422 12.518 qs_rho_update_rho_low 16 5.0 0.000 0.000 12.261 12.265 calculate_rho_elec 16 6.0 0.007 0.007 12.261 12.265 sum_up_and_integrate 15 8.0 0.000 0.001 11.425 11.473 integrate_v_rspace 15 9.0 0.001 0.001 11.416 11.468 grid_collocate_task_list 16 7.0 10.991 11.339 10.991 11.339 grid_integrate_task_list 15 10.0 10.553 10.674 10.553 10.674 qs_vxc_create 15 8.0 0.001 0.001 9.013 9.023 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.504 8.508 cp_fm_diag_elpa_base 15 8.0 8.364 8.398 8.500 8.501 calculate_dispersion_nonloc 15 9.0 0.735 0.855 7.183 7.214 fft_wrap_pw1pw2 1086 10.0 0.024 0.027 6.794 7.006 fft3d_ps 1086 12.0 2.315 2.593 5.112 5.703 fft_wrap_pw1pw2_150 765 11.0 0.144 0.172 4.411 4.458 cp_fm_cholesky_restore 45 7.0 3.768 3.827 3.768 3.827 mp_alltoall_z22v 1086 14.0 2.302 3.372 2.302 3.372 fft_wrap_pw1pw2_200 197 11.3 0.109 0.125 2.250 2.386 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.289 2.289 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.029 2.189 yz_to_x 501 12.9 0.190 0.209 1.422 1.880 x_to_yz 585 13.1 0.278 0.319 1.348 1.868 xc_vxc_pw_create 15 9.0 0.016 0.024 1.829 1.861 build_core_ppnl 1 5.0 1.274 1.392 1.274 1.392 vdW_energy 15 10.0 1.285 1.337 1.285 1.337 density_rs2pw 16 7.0 0.001 0.001 1.139 1.324 xc_pw_derive 90 11.0 0.001 0.002 1.237 1.321 transfer_rs2pw 82 8.0 0.001 0.001 0.874 1.216 mp_waitany 520 11.3 0.819 1.211 0.819 1.211 transfer_rs2pw_200 18 8.8 0.027 0.033 0.606 1.114 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="diag_cu144_broy", label="diag_cu144_broy", y=136.471, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="diag_cu144_broy", label="diag_cu144_broy", y=50.283, yerr=0.0 Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=51.68900000000001, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=37.292, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=12.115, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.086, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.975, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=8.964, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=5.35, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=14.148000000000003, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.364, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=10.991, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.553, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.144, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.768, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.315, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 1.242 1.242 151.214 151.214 qs_energies 1 2.0 0.000 0.000 149.916 149.916 ls_scf 1 3.0 0.000 0.000 140.262 140.262 ls_scf_main 1 4.0 0.001 0.001 131.508 131.508 density_matrix_trs4 5 5.0 0.003 0.003 75.778 75.778 ls_scf_dm_to_ks 5 5.0 0.000 0.000 53.566 53.566 dbcsr_multiply_generic 95 6.2 0.346 0.346 38.076 38.076 matrix_ls_to_qs 5 6.0 0.000 0.000 32.897 32.897 arnoldi_extremal 6 6.2 0.000 0.000 30.547 30.547 arnoldi_normal_ev 6 7.2 0.007 0.007 30.547 30.547 build_subspace 12 8.2 0.033 0.033 30.006 30.006 dbcsr_matrix_vector_mult 310 9.0 0.089 0.089 29.525 29.525 dbcsr_matrix_vector_mult_local 310 10.0 28.889 28.889 28.896 28.896 qs_ks_update_qs_env 6 6.2 0.000 0.000 24.704 24.704 rebuild_ks_matrix 6 7.2 0.000 0.000 24.394 24.394 build_dftb_ks_matrix 6 8.2 0.000 0.000 24.394 24.394 build_dftb_coulomb 6 9.2 1.069 1.069 24.048 24.048 tb_ewald_overlap 6 10.2 22.904 22.904 22.904 22.904 multiply_cannon 95 7.2 0.163 0.163 21.708 21.708 dbcsr_complete_redistribute 11 7.5 14.274 14.274 18.537 18.537 multiply_cannon_loop 95 8.2 0.140 0.140 17.226 17.226 matrix_decluster 5 7.0 0.000 0.000 16.828 16.828 dbcsr_copy 443 8.0 0.635 0.635 16.712 16.712 dbcsr_copy_into_existing 5 8.0 16.069 16.069 16.069 16.069 make_m2s 190 7.2 0.009 0.009 14.160 14.160 make_images 190 8.2 3.070 3.070 13.311 13.311 multiply_cannon_multrec 95 9.2 12.210 12.210 12.227 12.227 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 9.579 9.579 build_qs_neighbor_lists 1 4.0 0.000 0.000 8.830 8.830 build_neighbor_lists_sab_tbe 1 5.0 8.548 8.548 8.548 8.548 ls_scf_init_scf 1 4.0 0.000 0.000 7.938 7.938 dbcsr_finalize 277 7.6 0.140 0.140 7.155 7.155 dbcsr_merge_all 247 8.6 2.122 2.122 6.542 6.542 make_images_data 190 9.2 0.004 0.004 5.912 5.912 hybrid_alltoall_any 201 10.0 5.441 5.441 5.568 5.568 dbcsr_sort_indices 443 10.1 5.383 5.383 5.383 5.383 calculate_norms 190 9.2 4.859 4.859 4.859 4.859 quick_finalize 203 10.1 0.091 0.091 4.507 4.507 dbcsr_dot 66 6.3 4.366 4.366 4.367 4.367 setup_rec_index_2d 190 8.2 4.277 4.277 4.277 4.277 dbcsr_special_finalize 190 9.2 0.001 0.001 4.160 4.160 ls_scf_initial_guess 1 5.0 0.000 0.000 4.076 4.076 ls_scf_qs_atomic_guess 1 6.0 0.000 0.000 4.076 4.076 ls_scf_init_matrix_S 1 5.0 0.000 0.000 3.852 3.852 tree_to_linear_d 11 10.5 3.709 3.709 3.709 3.709 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.000 3.242 3.242 dbcsr_add_d 130 6.0 0.000 0.000 3.085 3.085 dbcsr_add_anytype 130 7.0 0.649 0.649 3.084 3.084 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.056 37.930 37.932 qs_energies 1 2.0 0.000 0.000 37.766 37.768 ls_scf 1 3.0 0.000 0.000 37.441 37.443 ls_scf_main 1 4.0 0.000 0.003 34.483 34.484 density_matrix_trs4 5 5.0 0.003 0.003 32.669 32.717 dbcsr_multiply_generic 95 6.2 0.026 0.033 32.444 32.569 multiply_cannon 95 7.2 0.018 0.019 26.896 27.294 multiply_cannon_loop 95 8.2 0.065 0.068 25.637 26.191 multiply_cannon_multrec 760 9.2 14.415 15.573 14.543 15.703 mp_waitall_1 6128 10.4 10.360 12.754 10.360 12.754 multiply_cannon_metrocomm3 760 9.2 0.006 0.006 8.146 10.845 make_m2s 190 7.2 0.017 0.018 3.448 3.506 make_images 190 8.2 0.236 0.249 3.396 3.455 calculate_norms 1520 9.2 2.111 2.229 2.111 2.229 ls_scf_init_scf 1 4.0 0.000 0.000 2.213 2.214 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.072 2.079 mp_sum_l 421 7.0 1.665 2.012 1.665 2.012 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.000 1.890 1.893 make_images_data 190 9.2 0.004 0.004 1.552 1.750 ls_scf_dm_to_ks 5 5.0 0.000 0.000 1.669 1.713 hybrid_alltoall_any 201 10.0 0.076 0.347 1.299 1.513 multiply_cannon_metrocomm1 760 9.2 0.003 0.003 0.756 1.485 dbcsr_multiply_generic_mpsum_f 71 7.2 0.000 0.000 1.157 1.399 arnoldi_extremal 6 6.2 0.000 0.000 1.166 1.180 arnoldi_normal_ev 6 7.2 0.001 0.002 1.166 1.180 build_subspace 12 8.2 0.010 0.013 1.120 1.121 dbcsr_complete_redistribute 11 7.5 0.559 0.630 0.937 1.005 matrix_ls_to_qs 5 6.0 0.000 0.000 0.939 1.001 make_images_pack 190 9.2 0.812 0.945 0.814 0.947 qs_ks_update_qs_env 6 6.2 0.000 0.000 0.861 0.939 dbcsr_matrix_vector_mult 310 9.0 0.006 0.024 0.899 0.930 matrix_decluster 5 7.0 0.000 0.000 0.849 0.914 buffer_matrices_ensure_size 190 8.2 0.720 0.866 0.720 0.866 dbcsr_finalize 277 7.6 0.004 0.005 0.700 0.791 dbcsr_matrix_vector_mult_local 310 10.0 0.722 0.788 0.722 0.789 rebuild_ks_matrix 6 7.2 0.000 0.000 0.787 0.788 build_dftb_ks_matrix 6 8.2 0.000 0.000 0.787 0.788 build_dftb_coulomb 6 9.2 0.038 0.040 0.760 0.761 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="bench_dftb", label="bench_dftb", y=151.214, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="bench_dftb", label="bench_dftb", y=37.93, yerr=0.0 Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=52.009, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=28.889, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="tb_ewald_overlap", label="tb_ewald_overlap", y=22.904, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=16.069, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=14.274, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.21, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=4.859, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=7.286000000000001, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=0.722, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="tb_ewald_overlap", label="tb_ewald_overlap", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=0.559, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=14.415, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=2.111, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=10.36, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=0.812, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.665, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.003 51.436 51.436 lib_test 1 2.0 0.000 0.000 51.432 51.432 dbcsr_run_tests 3 3.0 0.000 0.000 51.432 51.432 test_multiplies_multiproc 3 4.0 0.001 0.001 30.288 30.288 dbcsr_multiply_generic 9 5.0 0.001 0.001 21.991 21.991 dbcsr_make_random_matrix 9 4.0 17.251 17.251 21.000 21.000 multiply_cannon 9 6.0 0.002 0.002 11.717 11.717 multiply_cannon_loop 9 7.0 0.011 0.011 11.467 11.467 multiply_cannon_multrec 9 8.0 11.455 11.455 11.456 11.456 dbcsr_finalize 27 5.7 0.025 0.025 7.636 7.636 dbcsr_merge_all 18 6.5 3.130 3.130 6.661 6.661 dbcsr_redistribute 9 5.0 4.473 4.473 6.637 6.637 make_m2s 18 6.0 0.000 0.000 5.690 5.690 make_images 18 7.0 0.456 0.456 5.638 5.638 make_images_data 18 8.0 0.000 0.000 4.550 4.550 hybrid_alltoall_any 18 9.0 4.521 4.521 4.522 4.522 dbcsr_data_release 975 7.6 3.773 3.773 3.773 3.773 tree_to_linear_d 9 7.0 2.317 2.317 2.317 2.317 dbcsr_destroy 93 5.8 0.000 0.000 1.871 1.871 mp_alltoall_d11v 27 6.0 1.691 1.691 1.691 1.691 dbcsr_data_copy_aa2 9 7.0 1.179 1.179 1.179 1.179 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 19.654 19.655 lib_test 1 2.0 0.000 0.000 19.629 19.645 dbcsr_run_tests 3 3.0 0.000 0.001 19.628 19.644 test_multiplies_multiproc 3 4.0 0.000 0.002 18.913 18.974 dbcsr_multiply_generic 9 5.0 0.001 0.001 17.521 17.606 multiply_cannon 9 6.0 0.002 0.002 15.293 15.794 multiply_cannon_loop 9 7.0 0.002 0.002 14.980 15.461 multiply_cannon_multrec 72 8.0 11.100 11.581 11.101 11.581 mp_waitall_1 576 9.2 4.251 5.170 4.251 5.170 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 3.758 4.552 mp_sum_l 70 5.1 0.902 1.517 0.902 1.517 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.898 1.514 make_m2s 18 6.0 0.001 0.001 0.735 0.791 make_images 18 7.0 0.016 0.017 0.732 0.789 dbcsr_data_release 444 7.6 0.668 0.778 0.668 0.778 dbcsr_finalize 27 5.7 0.000 0.000 0.657 0.732 dbcsr_make_random_matrix 9 4.0 0.488 0.512 0.680 0.712 dbcsr_destroy 111 5.9 0.000 0.000 0.587 0.695 dbcsr_merge_all 18 6.5 0.103 0.116 0.573 0.672 dbcsr_redistribute 9 5.0 0.223 0.287 0.480 0.517 make_images_data 18 8.0 0.000 0.000 0.408 0.482 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.117 0.466 hybrid_alltoall_any 18 9.0 0.029 0.137 0.357 0.413 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="dbcsr", label="dbcsr", y=51.436, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="dbcsr", label="dbcsr", y=19.654, yerr=0.0 Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.963000000000001, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=17.251, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.455, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=4.521, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=4.473, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=3.773, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=1.9929999999999986, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.488, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.1, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.029, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.223, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.668, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.902, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.251, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.080 0.080 157.538 157.538 qs_mol_dyn_low 1 2.0 0.004 0.004 156.238 156.238 qs_forces 6 3.8 0.001 0.001 101.081 101.081 qs_energies 6 4.8 0.000 0.000 96.433 96.433 scf_env_do_scf 6 5.8 0.000 0.000 93.796 93.796 scf_env_do_scf_inner_loop 113 6.2 0.012 0.012 89.549 89.549 velocity_verlet 5 3.0 0.003 0.003 74.505 74.505 rebuild_ks_matrix 119 8.1 0.000 0.000 73.146 73.146 qs_ks_build_kohn_sham_matrix 119 9.1 0.014 0.014 73.146 73.146 qs_ks_update_qs_env 119 7.3 0.001 0.001 68.964 68.964 fft_wrap_pw1pw2 2059 12.4 0.030 0.030 63.598 63.598 fft_wrap_pw1pw2_150 1321 13.9 19.169 19.169 62.487 62.487 qmmm_el_coupling 6 3.8 0.000 0.000 47.806 47.806 qmmm_elec_with_gaussian 6 4.8 0.014 0.014 47.799 47.799 qs_vxc_create 119 10.1 0.001 0.001 47.375 47.375 xc_vxc_pw_create 119 11.1 1.725 1.725 47.374 47.374 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 47.203 47.203 qmmm_elec_gaussian_low_G 6 6.8 46.326 46.326 46.326 46.326 xc_pw_derive 714 13.1 0.005 0.005 36.049 36.049 xc_pw_divergence 119 12.1 0.002 0.002 24.591 24.591 qs_rho_update_rho_low 119 7.3 0.001 0.001 22.158 22.158 calculate_rho_elec 119 8.3 1.490 1.490 22.157 22.157 fft3d_s 2060 14.4 20.933 20.933 20.968 20.968 xc_rho_set_and_dset_create 119 12.1 1.563 1.563 20.891 20.891 pw_scatter_s 1095 14.8 19.233 19.233 19.233 19.233 density_rs2pw 119 9.3 0.004 0.004 12.214 12.214 sum_up_and_integrate 119 10.1 0.001 0.001 10.414 10.414 integrate_v_rspace 119 11.1 0.024 0.024 10.260 10.260 qs_ks_ddapc 119 10.1 0.001 0.001 9.191 9.191 grid_collocate_task_list 119 9.3 8.453 8.453 8.453 8.453 potential_pw2rs 119 12.1 0.041 0.041 6.938 6.938 qmmm_forces 6 3.8 0.001 0.001 5.193 5.193 qmmm_forces_with_gaussian 6 4.8 0.020 0.020 4.815 4.815 pw_integral_ab_c1d_c1d_gs 280 11.0 4.513 4.513 4.513 4.513 init_scf_loop 6 6.8 0.000 0.000 4.244 4.244 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 4.219 4.219 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.081 4.081 cp_ddapc_apply_CD 119 11.1 0.020 0.020 3.946 3.946 pw_poisson_solve 125 9.9 0.002 0.002 3.935 3.935 qs_scf_new_mos 113 7.2 0.001 0.001 3.374 3.374 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.373 3.373 qmmm_forces_gaussian_low_G 6 6.8 3.364 3.364 3.364 3.364 grid_integrate_task_list 119 12.1 3.298 3.298 3.298 3.298 ot_scf_mini 113 9.2 0.001 0.001 3.230 3.230 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.042 0.049 53.700 53.701 qs_mol_dyn_low 1 2.0 0.005 0.006 52.586 52.634 qs_forces 6 3.8 0.001 0.001 39.524 39.525 qs_energies 6 4.8 0.000 0.000 37.752 37.753 scf_env_do_scf 6 5.8 0.000 0.001 36.880 36.881 scf_env_do_scf_inner_loop 113 6.2 0.003 0.021 35.244 35.247 rebuild_ks_matrix 119 8.1 0.000 0.001 26.447 26.454 qs_ks_build_kohn_sham_matrix 119 9.1 0.018 0.019 26.446 26.453 qs_ks_update_qs_env 119 7.3 0.001 0.001 24.903 24.909 velocity_verlet 5 3.0 0.002 0.004 21.706 21.709 fft_wrap_pw1pw2 2059 12.4 0.040 0.043 16.604 17.269 fft_wrap_pw1pw2_150 1321 13.9 0.725 0.803 15.854 16.415 fft3d_ps 2059 14.4 7.216 7.883 12.556 13.697 qs_vxc_create 119 10.1 0.002 0.002 13.572 13.576 xc_vxc_pw_create 119 11.1 0.167 0.231 13.570 13.574 xc_pw_derive 714 13.1 0.010 0.011 10.133 10.539 qs_rho_update_rho_low 119 7.3 0.001 0.001 9.544 9.545 calculate_rho_elec 119 8.3 0.049 0.056 9.543 9.545 sum_up_and_integrate 119 10.1 0.002 0.002 8.627 8.640 integrate_v_rspace 119 11.1 0.004 0.005 8.563 8.587 qmmm_forces 6 3.8 0.002 0.003 7.390 7.391 qmmm_forces_with_gaussian 6 4.8 0.009 0.010 7.190 7.299 xc_pw_divergence 119 12.1 0.004 0.005 6.761 7.096 xc_rho_set_and_dset_create 119 12.1 0.379 0.510 6.426 6.595 density_rs2pw 119 9.3 0.005 0.007 5.879 6.258 potential_pw2rs 119 12.1 0.006 0.008 5.400 5.417 qmmm_el_coupling 6 3.8 0.000 0.000 5.207 5.265 qmmm_elec_with_gaussian 6 4.8 0.027 0.029 5.206 5.263 mp_alltoall_z22v 2059 16.4 3.899 4.613 3.899 4.613 grid_collocate_task_list 119 9.3 3.489 3.876 3.489 3.876 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.731 3.761 transfer_pw2rs 500 12.8 0.005 0.006 3.643 3.674 mp_waitany 4028 12.8 2.720 3.354 2.720 3.354 transfer_rs2pw 488 10.2 0.006 0.008 2.812 3.256 x_to_yz 1095 15.8 0.816 0.908 2.949 3.249 grid_integrate_task_list 119 12.1 2.909 3.122 2.909 3.122 qmmm_forces_gaussian_low_G 6 6.8 3.044 3.065 3.044 3.065 yz_to_x 964 15.0 0.575 0.663 2.341 2.948 pw_restrict_s3 18 5.8 1.345 1.367 2.837 2.929 transfer_pw2rs_150 125 13.9 0.857 0.952 2.560 2.628 qs_scf_new_mos 113 7.2 0.001 0.001 2.493 2.501 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.492 2.501 transfer_rs2pw_150 125 11.2 0.546 0.766 2.024 2.443 ot_scf_mini 113 9.2 0.001 0.001 2.405 2.411 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.288 2.345 pw_prolongate_s3 18 6.8 1.101 1.125 2.288 2.345 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 2.109 2.131 qs_ks_ddapc 119 10.1 0.003 0.003 1.928 2.018 dbcsr_multiply_generic 2598 12.3 0.062 0.064 1.857 1.887 pw_derive 1089 13.4 1.518 1.687 1.518 1.687 pw_gather_p 964 14.0 1.346 1.639 1.346 1.639 init_scf_loop 6 6.8 0.000 0.000 1.629 1.630 qmmm_elec_gaussian_low_G 6 6.8 1.591 1.614 1.591 1.614 pw_poisson_solve 125 9.9 0.003 0.004 1.533 1.586 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.559 1.560 pw_copy 2027 12.4 1.451 1.536 1.451 1.536 mp_waitall_1 178435 16.4 1.180 1.351 1.180 1.351 pw_integral_ab_r3d_r3d_rs 2481 7.4 1.051 1.105 1.165 1.261 mp_sum_dm3 33 5.7 1.189 1.231 1.189 1.231 ot_mini 113 10.2 0.000 0.001 1.223 1.228 pw_scatter_p 1095 14.8 1.133 1.220 1.133 1.220 pw_axpy 2529 9.3 1.114 1.185 1.114 1.185 pw_zero 2653 8.4 1.125 1.175 1.125 1.175 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="MQAE_single_node", label="MQAE_single_node", y=157.538, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="MQAE_single_node", label="MQAE_single_node", y=53.7, yerr=0.0 Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=36.762000000000015, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=46.326, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=20.933, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=19.233, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=19.169, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=8.453, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.364, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.298, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=30.827, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=1.591, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.725, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.489, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.044, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=2.909, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.899, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.216, yerr=0.0 Summary: Performance test took 45 minutes. Status: OK ---> Removed intermediate container 67c7df7979a6 ---> c62e5c1f4dd4 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in cbde16a8533d ---> Removed intermediate container cbde16a8533d ---> 7841274c6736 Step 42/42 : ENTRYPOINT [] ---> Running in a6996ec1a164 ---> Removed intermediate container a6996ec1a164 ---> 23e9c0b8ea0c [Warning] One or more build-args [GIT_COMMIT_SHA SPACK_CACHE] were not consumed Successfully built 23e9c0b8ea0c Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2026-05-20 07:01:38+00:00