StartDate: 2026-01-18 06:18:11+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: 2dab80f166a95d4c662edfadd9dc5087979e3a7f CommitTime: 2026-01-17 17:11:27 +0100 CommitAuthor: Matthias Krack CommitSubject: Use the same libxsmm version with Spack as with the toolchain #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=2dab80f166a95d4c662edfadd9dc5087979e3a7f SPACK_CACHE=gs://cp2k-spack-cache Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 408.9MB Step 1/42 : FROM ubuntu:24.04 24.04: Pulling from library/ubuntu a3629ac5b9f4: Pulling fs layer a3629ac5b9f4: Verifying Checksum a3629ac5b9f4: Download complete a3629ac5b9f4: Pull complete Digest: sha256:7a398144c5a2fa7dbd9362e460779dc6659bd9b19df50f724250c62ca7812eb3 Status: Downloaded newer image for ubuntu:24.04 ---> 493218ed0f40 Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> b5a2adc045ab Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> f3ab567f9bed Step 4/42 : RUN ./install_requirements.sh ubuntu:24.04 ---> Using cache ---> 8b7f9aca6fec Step 5/42 : RUN mkdir scripts ---> Using cache ---> d33a5a389203 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 8849986f9440 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> cb619c7777b8 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-dbcsr --with-gcc=system --dry-run ---> Using cache ---> 322651c26fc1 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 9db5da4c33f1 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> fe0a8f651b12 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> ed12ec744f10 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> e161dc3a9146 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 2a7f04dd9bc7 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 18b23a1f9358 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> c9f2fe82d412 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 300419f80c6a Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 77c449f17beb Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 7776dfce4f9d Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> a1c2025e26dd Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> f6f29edc4309 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 8761a2de07f0 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> f4f35a110f6b Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> aa22411f1712 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> abcbc0c67669 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 35c9e4f558a0 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 628761a00bb9 Step 27/42 : COPY ./tools/toolchain/scripts/stage9/ ./scripts/stage9/ ---> Using cache ---> 1e64b6fe05f6 Step 28/42 : RUN ./scripts/stage9/install_stage9.sh && rm -rf ./build ---> Using cache ---> 89f145bdf9a4 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> d86602aec156 Step 30/42 : COPY ./src ./src ---> Using cache ---> 5a9f4686832c Step 31/42 : COPY ./data ./data ---> Using cache ---> 65d5215ef6c5 Step 32/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> a7ad64253e01 Step 33/42 : COPY ./cmake ./cmake ---> Using cache ---> 7c0eb2f8d8b5 Step 34/42 : COPY ./CMakeLists.txt . ---> Using cache ---> f502916da73c Step 35/42 : COPY ./tools/docker/scripts/build_cp2k.sh . ---> Using cache ---> 760df92fbff9 Step 36/42 : RUN ./build_cp2k.sh toolchain psmp ---> Running in a129f29ec8be ==================== Building CP2K ==================== -- The Fortran compiler identification is GNU 13.3.0 -- The C compiler identification is GNU 13.3.0 -- The CXX compiler identification is GNU 13.3.0 -- Detecting Fortran compiler ABI info -- Detecting Fortran compiler ABI info - done -- Check for working Fortran compiler: /usr/bin/gfortran - skipped -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done -- Check for working C compiler: /usr/bin/gcc - skipped -- Detecting C compile features -- Detecting C compile features - done -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done -- Check for working CXX compiler: /usr/bin/g++ - skipped -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found PkgConfig: /usr/bin/pkg-config (found version "1.8.1") -- Found Python: /usr/bin/python3.12 (found version "3.12.3") found components: Interpreter -- Found MPI_C: /opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpi.so (found version "4.1") -- Found MPI_CXX: /opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpicxx.so (found version "4.1") -- Found MPI_Fortran: /opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpifort.so (found version "4.1") -- Found MPI: TRUE (found version "4.1") found components: C CXX Fortran -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE -- Found MPI: TRUE (found version "4.1") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Could NOT find MKL (missing: CP2K_MKL_INCLUDE_DIRS) -- Checking for module 'openblas' -- Found openblas, version 0.3.30 -- Found OpenBLAS: /opt/cp2k-toolchain/install/openblas-0.3.30/include -- Found Blas: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Found Lapack: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Checking for module 'libxsmm-shared' -- Found libxsmm-shared, version 1.17.0 -- Checking for module 'libxsmmf-shared' -- Found libxsmmf-shared, version 1.17.0 -- Checking for module 'libxsmmext-shared' -- Found libxsmmext-shared, version 1.17.0 -- Checking for module 'libxsmmnoblas-shared' -- Found libxsmmnoblas-shared, version 1.17.0 -- Found LibXSMM: /opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/include -- Using LIBXSMM for Small Matrix Multiplication -- Checking for module 'scalapack' -- Package 'mpi', required by 'scalapack', not found Package 'lapack', required by 'scalapack', not found Package 'blas', required by 'scalapack', not found -- Found SCALAPACK: /opt/cp2k-toolchain/install/scalapack-2.2.2/lib/libscalapack.a ------------------------------------------------------------ - OPENMP - ------------------------------------------------------------ -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: Fortran C CXX ------------------------------------------------------------ - DBCSR - ------------------------------------------------------------ -- Found MPI: TRUE (found version "4.1") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") -- Checking for module 'libxsmmf' -- Found libxsmmf, version 1.17.0 -- Checking for module 'libxsmmext' -- Found libxsmmext, version 1.17.0 ------------------------------------------------------------ - Other dependencies - ------------------------------------------------------------ -- Checking for one of the modules 'elpa_openmp' -- Found Elpa: /opt/cp2k-toolchain/install/elpa-2024.05.001/cpu/lib/libelpa_openmp.so;/opt/cp2k-toolchain/install/scalapack-2.2.2/lib/libscalapack.a -- Found HDF5: hdf5-shared;hdf5_fortran-shared (found version "1.14.6") found components: C Fortran -- Found MPI: TRUE (found version "4.1") found components: CXX -- Found OPENBLAS: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Found Blas: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Found LibVORI: /opt/cp2k-toolchain/install/libvori-220621/lib/libvori.a -- Checking for one of the modules 'fftw3' -- Checking for one of the modules 'fftw3f' -- Checking for one of the modules 'fftw3l' -- Checking for one of the modules 'fftw3q' -- Found Fftw: /opt/cp2k-toolchain/install/fftw-3.3.10/include -- Checking for module 'libint2' -- Found libint2, version 2.6.0 -- Found Libint2: /opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/include;/opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/include/libint2 -- Component omp of Spglib: NOT FOUND -- Component fortran of Spglib: FOUND (LIB_TYPE: static) -- Found package: Spglib -- Found libsmeagol: /opt/cp2k-toolchain/install/libsmeagol-1.2/lib/libsmeagol.a -- Looking for Fortran sgemm -- Looking for Fortran sgemm - found -- mctc-lib: Find installed package -- multicharge: Find installed package -- Found ACE: /opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libpace.a -- Checking for one of the modules 'plumed;plumedInternals' -- Found Plumed: /opt/cp2k-toolchain/install/plumed-2.9.3/include -- Found MPI: TRUE (found version "4.1") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Found Torch: /opt/cp2k-toolchain/install/libtorch-2.7.1/lib/libtorch.so -- Checking for modules 'mclf;mcl' -- Package 'mclf', required by 'virtual:world', not found -- Package 'mcl', required by 'virtual:world', not found -- Found MiMiC: True -- Checking for module 'trexio' -- Found trexio, version 2.6.0 -- Found TrexIO: /opt/cp2k-toolchain/install/trexio-2.6.0/include;/opt/cp2k-toolchain/install/hdf5-1.14.6/include -- Looking for Fortran sgemm -- Looking for Fortran sgemm - not found -- Found BLAS: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Looking for Fortran cheev -- Looking for Fortran cheev - found -- Found LAPACK: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so;-lm;-ldl -- Setting build type to 'Release' as none was specified. -- Performing Test f2008-norm2 -- Performing Test f2008-norm2 - Success -- Performing Test f2008-block_construct -- Performing Test f2008-block_construct - Success -- Performing Test f2008-contiguous -- Performing Test f2008-contiguous - Success -- Performing Test f95-reshape-order-allocatable -- Performing Test f95-reshape-order-allocatable - Success -- FYPP preprocessor found. -------------------------------------------------------------------- - - - Summary of enabled dependencies - - - -------------------------------------------------------------------- - BLAS - vendor: OpenBLAS - include directories: /opt/cp2k-toolchain/install/openblas-0.3.30/include - libraries: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so - LAPACK - include directories: /opt/cp2k-toolchain/install/openblas-0.3.30/include - libraries: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so - MPI - include directories: /opt/cp2k-toolchain/install/mpich-4.3.2/include - libraries: /opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpicxx.so;/opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpi.so - MPI_F08: ON - ScaLAPACK - vendor: auto - include directories: - libraries: /opt/cp2k-toolchain/install/scalapack-2.2.2/lib/libscalapack.a - LibXC - version: 7.0.0 - include directories: /opt/cp2k-toolchain/install/libxc-7.0.0/include/ - libraries: /opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxcf03.a;/opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxc.a - Spglib - include directories: /opt/cp2k-toolchain/install/spglib-2.5.0/include - LibTorch - extra CXX flags: -D_GLIBCXX_USE_CXX11_ABI=1 - include directories: /opt/cp2k-toolchain/install/libtorch-2.7.1/include;/opt/cp2k-toolchain/install/libtorch-2.7.1/include/torch/csrc/api/include - libraries: /opt/cp2k-toolchain/install/libtorch-2.7.1/lib/libtorch.so - HDF5 - version: 1.14.6 - include directories: /opt/cp2k-toolchain/install/hdf5-1.14.6/include - libraries: hdf5-shared;hdf5_fortran-shared - FFTW3 - include directories: /opt/cp2k-toolchain/install/fftw-3.3.10/include - libraries: /opt/cp2k-toolchain/install/fftw-3.3.10/lib/libfftw3.a - PLUMED - include directories: /opt/cp2k-toolchain/install/plumed-2.9.3/include - libraries: /opt/cp2k-toolchain/install/plumed-2.9.3/lib/libplumed.so - LIBXSMM - include directories: /opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/include - libraries: /opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/lib/libxsmmext.so;:libxsmm.a;/usr/lib/x86_64-linux-gnu/libpthread.a;/usr/lib/x86_64-linux-gnu/librt.a;/usr/lib/x86_64-linux-gnu/libdl.a;/usr/lib/x86_64-linux-gnu/libm.so;/usr/lib/x86_64-linux-gnu/libc.so;/opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/lib/libxsmmf.so;:libxsmmext.a;:libxsmm.a;/usr/lib/x86_64-linux-gnu/libpthread.a;/usr/lib/x86_64-linux-gnu/librt.a;/usr/lib/x86_64-linux-gnu/libdl.a;/usr/lib/x86_64-linux-gnu/libm.so;/usr/lib/x86_64-linux-gnu/libc.so - SpLA - include directories: /opt/cp2k-toolchain/install/SpLA-1.6.1/include;/opt/cp2k-toolchain/install/SpLA-1.6.1/include/spla - libraries: $;$;MPI::MPI_CXX;MPI::MPI_C;MPI::MPI_Fortran - MiMiC - include directories: /opt/cp2k-toolchain/install/mcl-3.0.0/include/MiMiC/ - libraries: /opt/cp2k-toolchain/install/mcl-3.0.0/lib/MiMiC/libmclf.so;/opt/cp2k-toolchain/install/mcl-3.0.0/lib/MiMiC/libmcl.so - DFTD4 - include directories : /opt/cp2k-toolchain/install/tblite-0.5.0/include;/opt/cp2k-toolchain/install/tblite-0.5.0/include/dftd4/GNU-13.3.0 - libraries : - DeePMD - ACE - include directories: /opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/include - libraries: /opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libpace.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libyaml-cpp-pace.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libcnpy.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libyaml-cpp-pace.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libcnpy.a - LibSMEAGOL - include directories: /opt/cp2k-toolchain/install/libsmeagol-1.2/include - libraries: /opt/cp2k-toolchain/install/libsmeagol-1.2/lib/libsmeagol.a - TBLITE : - include directories : /opt/cp2k-toolchain/install/tblite-0.5.0/include;/opt/cp2k-toolchain/install/tblite-0.5.0/include/tblite/GNU-13.3.0 - tblite libraries : - COSMA - include directories: /opt/cp2k-toolchain/install/COSMA-2.7.0/include - libraries: MPI::MPI_CXX;costa::costa;$;$;cosma::BLAS::blas;cosma::scalapack::scalapack - Libint2 - include directories: /opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/include;/opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/include/libint2 - libraries: /opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/lib/libint2.a - Libvori - libraries: /opt/cp2k-toolchain/install/libvori-220621/lib/libvori.a - ELPA - include directories: /opt/cp2k-toolchain/install/elpa-2024.05.001/cpu/include/elpa_openmp-2024.05.001 - libraries: /opt/cp2k-toolchain/install/elpa-2024.05.001/cpu/lib/libelpa_openmp.so;/opt/cp2k-toolchain/install/scalapack-2.2.2/lib/libscalapack.a - TREXIO - include directories: /opt/cp2k-toolchain/install/trexio-2.6.0/include;/opt/cp2k-toolchain/install/hdf5-1.14.6/include - libraries: /opt/cp2k-toolchain/install/trexio-2.6.0/lib/libtrexio.so;/opt/cp2k-toolchain/install/hdf5-1.14.6/lib/libhdf5.so - GreenX - include directories: /opt/cp2k-toolchain/install/greenX-2.2/include/modules - libraries: /opt/cp2k-toolchain/install/greenX-2.2/lib/libGXCommon.so.0.0.1;/opt/cp2k-toolchain/install/greenX-2.2/lib/libgx_minimax.so.0.0.1;/opt/cp2k-toolchain/install/greenX-2.2/lib/libgx_ac.so.0.0.1 -------------------------------------------------------------------- - - - List of dependencies not included in this build - - - -------------------------------------------------------------------- - PEXSI - SIRIUS - openPMD - GPU acceleration is disabled - DLA-Future After building CP2K the regtests can be run with the following command: ./tests/do_regtest.py /opt/cp2k/build/bin psmp -- Configuring done (7.3s) -- Generating done (0.4s) -- Build files have been written to: /opt/cp2k/build Compiling CP2K ... done ---> Removed intermediate container a129f29ec8be ---> 29f368fe9603 Step 37/42 : COPY ./benchmarks ./benchmarks ---> c21620aa7a29 Step 38/42 : COPY ./tools/regtesting ./tools/regtesting ---> a9d96e0f8889 Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 7f8d87d103b0 Step 40/42 : RUN ./test_performance.sh "toolchain" 2>&1 | tee report.log ---> Running in ff00060c3d9f ============== CP2K Binary Flags ============= cp2kflags: omp libint fftw3 libxc elpa parallel scalapack mpi_f08 cosma ace deepmd xsmm plumed2 spglib libdftd4 mctc-lib tblite libvori libbqb libtorch mimic hdf5 trexio libsmeagol greenx ========== Checking Benchmark Inputs ========= Found 77 input files and 0 errors. ========== Running Performance Test ========== Plot: name="total_timings_32omp", title="Total Timings with 32 OpenMP Threads", ylabel="time [s]" Plot: name="total_timings_32mpi", title="Total Timings with 32 MPI Ranks", ylabel="time [s]" Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.036 0.036 96.094 96.094 qs_mol_dyn_low 1 2.0 0.003 0.003 95.493 95.493 qs_forces 11 3.9 0.001 0.001 95.451 95.451 qs_energies 11 4.9 0.001 0.001 88.947 88.947 scf_env_do_scf 11 5.9 0.001 0.001 76.796 76.796 velocity_verlet 10 3.0 0.002 0.002 62.618 62.618 scf_env_do_scf_inner_loop 108 6.5 0.014 0.014 61.991 61.991 dbcsr_multiply_generic 2286 12.5 0.178 0.178 28.161 28.161 qs_scf_new_mos 108 7.5 0.001 0.001 25.137 25.137 qs_scf_loop_do_ot 108 8.5 0.001 0.001 25.136 25.136 ot_scf_mini 108 9.5 0.002 0.002 23.396 23.396 rebuild_ks_matrix 119 8.3 0.001 0.001 22.667 22.667 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.015 22.666 22.666 qs_ks_update_qs_env 119 7.6 0.001 0.001 20.957 20.957 qs_rho_update_rho_low 119 7.7 0.001 0.001 20.116 20.116 calculate_rho_elec 119 8.7 0.966 0.966 20.115 20.115 make_m2s 4572 13.5 0.051 0.051 17.627 17.627 grid_collocate_task_list 119 9.7 15.411 15.411 15.411 15.411 ot_mini 108 10.5 0.001 0.001 14.907 14.907 init_scf_loop 11 6.9 0.000 0.000 14.671 14.671 sum_up_and_integrate 119 10.3 0.002 0.002 13.301 13.301 integrate_v_rspace 119 11.3 0.115 0.115 13.249 13.249 make_images 4572 14.5 2.038 2.038 12.307 12.307 prepare_preconditioner 11 7.9 0.000 0.000 11.567 11.567 make_preconditioner 11 8.9 0.000 0.000 11.567 11.567 grid_integrate_task_list 119 12.3 10.873 10.873 10.873 10.873 make_full_inverse_cholesky 11 9.9 0.034 0.034 10.054 10.054 hybrid_alltoall_any 4725 16.4 8.001 8.001 8.404 8.404 make_images_data 4572 15.5 0.038 0.038 8.122 8.122 ot_diis_step 108 11.5 0.004 0.004 7.902 7.902 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 7.578 7.578 apply_single 119 13.6 0.000 0.000 7.578 7.578 qs_ot_get_derivative 108 11.5 0.001 0.001 6.988 6.988 fft_wrap_pw1pw2 1201 11.6 0.013 0.013 6.696 6.696 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.695 6.695 multiply_cannon 2286 13.5 0.368 0.368 6.273 6.273 fft_wrap_pw1pw2_140 487 12.2 1.121 1.121 5.757 5.757 multiply_cannon_loop 2286 14.5 0.061 0.061 5.607 5.607 dbcsr_make_dense_low 5837 15.5 0.032 0.032 5.593 5.593 make_dense_data 5837 16.5 4.879 4.879 5.545 5.545 multiply_cannon_multrec 2286 15.5 5.482 5.482 5.545 5.545 dbcsr_make_images_dense 3978 14.8 0.018 0.018 4.846 4.846 init_scf_run 11 5.9 0.000 0.000 4.758 4.758 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.757 4.757 dbcsr_complete_redistribute 329 12.2 2.146 2.146 4.420 4.420 wfi_extrapolate 11 7.9 0.001 0.001 4.210 4.210 copy_dbcsr_to_fm 153 11.3 0.003 0.003 3.962 3.962 dbcsr_copy 2102 12.0 0.229 0.229 3.750 3.750 density_rs2pw 119 9.7 0.004 0.004 3.738 3.738 dbcsr_copy_into_existing 22 7.9 3.507 3.507 3.508 3.508 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.491 3.491 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.394 3.394 transfer_dbcsr_to_fm 11 10.9 0.001 0.001 3.133 3.133 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.107 3.107 qs_create_task_list 11 7.9 0.000 0.000 3.035 3.035 generate_qs_task_list 11 8.9 1.882 1.882 3.035 3.035 qs_ot_get_p 119 10.4 0.001 0.001 3.015 3.015 cp_fm_cholesky_invert 11 10.9 2.959 2.959 2.959 2.959 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.807 2.807 fft3d_s 1202 13.6 2.757 2.757 2.762 2.762 dbcsr_data_release 278921 16.0 2.749 2.749 2.749 2.749 cp_fm_cholesky_decompose 22 10.9 2.336 2.336 2.336 2.336 potential_pw2rs 119 12.3 0.049 0.049 2.261 2.261 copy_fm_to_dbcsr 176 11.2 0.001 0.001 2.195 2.195 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 2.184 2.184 dbcsr_dot 1205 11.9 2.112 2.112 2.122 2.122 qs_ot_get_derivative_diag 49 12.0 0.002 0.002 2.081 2.081 calculate_dm_sparse 119 9.5 0.001 0.001 2.054 2.054 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 2.007 2.007 pw_poisson_solve 119 10.3 0.002 0.002 1.996 1.996 dbcsr_finalize 5048 13.8 0.109 0.109 1.985 1.985 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.014 53.332 53.336 qs_mol_dyn_low 1 2.0 0.003 0.003 53.182 53.187 qs_forces 11 3.9 0.002 0.002 53.126 53.129 qs_energies 11 4.9 0.001 0.001 49.852 49.854 scf_env_do_scf 11 5.9 0.000 0.002 45.689 45.692 scf_env_do_scf_inner_loop 108 6.5 0.003 0.019 41.211 41.219 velocity_verlet 10 3.0 0.001 0.003 32.680 32.682 rebuild_ks_matrix 119 8.3 0.000 0.001 18.407 18.451 qs_ks_build_kohn_sham_matrix 119 9.3 0.017 0.020 18.406 18.451 dbcsr_multiply_generic 2286 12.5 0.080 0.088 17.549 17.641 qs_ks_update_qs_env 119 7.6 0.001 0.001 16.417 16.462 qs_scf_new_mos 108 7.5 0.001 0.001 14.409 14.452 qs_scf_loop_do_ot 108 8.5 0.001 0.001 14.408 14.452 ot_scf_mini 108 9.5 0.002 0.002 13.590 13.634 qs_rho_update_rho_low 119 7.7 0.001 0.001 13.388 13.398 calculate_rho_elec 119 8.7 0.031 0.032 13.388 13.398 sum_up_and_integrate 119 10.3 0.002 0.003 13.179 13.211 integrate_v_rspace 119 11.3 0.004 0.005 13.153 13.187 multiply_cannon 2286 13.5 0.152 0.158 11.520 11.832 multiply_cannon_loop 2286 14.5 0.099 0.106 10.685 10.787 mp_waitall_1 158411 16.6 8.879 9.248 8.879 9.248 grid_collocate_task_list 119 9.7 8.698 8.955 8.698 8.955 grid_integrate_task_list 119 12.3 8.342 8.562 8.342 8.562 ot_mini 108 10.5 0.001 0.001 8.435 8.477 multiply_cannon_metrocomm3 18288 15.5 0.041 0.044 6.539 6.755 qs_ot_get_derivative 108 11.5 0.001 0.001 5.130 5.174 density_rs2pw 119 9.7 0.005 0.005 4.221 4.611 init_scf_loop 11 6.9 0.000 0.000 4.448 4.451 multiply_cannon_multrec 18288 15.5 3.708 3.801 3.719 3.813 potential_pw2rs 119 12.3 0.006 0.007 3.764 3.784 fft_wrap_pw1pw2 1201 11.6 0.018 0.020 3.366 3.430 make_m2s 4572 13.5 0.045 0.052 3.307 3.395 ot_diis_step 108 11.5 0.004 0.004 3.267 3.270 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.206 3.243 apply_single 119 13.6 0.000 0.001 3.206 3.243 transfer_rs2pw 487 10.6 0.007 0.009 2.687 3.092 make_images 4572 14.5 0.113 0.118 2.953 3.044 init_scf_run 11 5.9 0.000 0.000 2.889 2.892 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.889 2.891 transfer_pw2rs 487 13.2 0.006 0.007 2.803 2.815 fft_wrap_pw1pw2_140 487 12.2 0.135 0.152 2.695 2.774 fft3d_ps 1201 13.6 1.046 1.221 2.588 2.712 mp_waitany 9880 13.7 2.181 2.701 2.181 2.701 wfi_extrapolate 11 7.9 0.001 0.001 2.640 2.642 prepare_preconditioner 11 7.9 0.000 0.000 2.284 2.300 make_preconditioner 11 8.9 0.000 0.000 2.284 2.300 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.182 2.192 make_full_inverse_cholesky 11 9.9 0.000 0.000 2.147 2.167 transfer_rs2pw_140 130 11.5 0.194 0.214 1.561 1.976 mp_sum_l 10898 13.6 1.870 1.969 1.870 1.969 qs_ot_get_p 119 10.4 0.001 0.001 1.772 1.811 qs_ot_get_derivative_taylor 59 13.0 0.001 0.002 1.720 1.752 make_images_data 4572 15.5 0.037 0.044 1.436 1.610 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.578 1.600 mp_alltoall_d11v 2130 13.8 1.366 1.565 1.366 1.565 mp_alltoall_z22v 1201 15.6 1.298 1.505 1.298 1.505 transfer_pw2rs_140 130 13.9 0.360 0.412 1.454 1.479 cp_fm_cholesky_invert 11 10.9 1.392 1.412 1.392 1.412 hybrid_alltoall_any 4725 16.4 0.069 0.210 1.215 1.336 rs_gather_matrices 119 12.3 0.075 0.086 1.002 1.214 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 1.129 1.132 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-64", label="H2O-64", y=96.094, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-64", label="H2O-64", y=53.332, yerr=0.0 Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=51.44799999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.411, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.873, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=8.001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.482, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.879, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=21.455, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=8.698, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.342, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.069, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.708, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.879, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.181, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.031 120.424 120.424 qs_mol_dyn_low 1 2.0 0.003 0.003 119.809 119.809 qs_forces 11 3.9 0.002 0.002 119.767 119.767 qs_energies 11 4.9 0.001 0.001 111.801 111.801 scf_env_do_scf 11 5.9 0.001 0.001 96.708 96.708 scf_env_do_scf_inner_loop 96 6.5 0.014 0.014 79.877 79.877 velocity_verlet 10 3.0 0.002 0.002 76.686 76.686 rebuild_ks_matrix 107 8.3 0.001 0.001 34.498 34.498 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.014 34.497 34.497 qs_rho_update_rho_low 107 7.7 0.001 0.001 33.412 33.412 calculate_rho_elec 107 8.7 0.875 0.875 33.412 33.412 qs_ks_update_qs_env 107 7.6 0.001 0.001 31.194 31.194 grid_collocate_task_list 107 9.7 28.915 28.915 28.915 28.915 sum_up_and_integrate 107 10.3 0.002 0.002 26.046 26.046 integrate_v_rspace 107 11.3 0.099 0.099 25.993 25.993 dbcsr_multiply_generic 1966 12.4 0.170 0.170 25.802 25.802 grid_integrate_task_list 107 12.3 23.788 23.788 23.788 23.788 qs_scf_new_mos 96 7.5 0.001 0.001 22.639 22.639 qs_scf_loop_do_ot 96 8.5 0.001 0.001 22.638 22.638 ot_scf_mini 96 9.5 0.002 0.002 20.991 20.991 init_scf_loop 11 6.9 0.000 0.000 16.664 16.664 make_m2s 3932 13.4 0.045 0.045 15.730 15.730 ot_mini 96 10.5 0.001 0.001 13.485 13.485 prepare_preconditioner 11 7.9 0.000 0.000 11.905 11.905 make_preconditioner 11 8.9 0.000 0.000 11.905 11.905 make_images 3932 14.4 1.910 1.910 11.034 11.034 make_full_inverse_cholesky 11 9.9 0.036 0.036 10.358 10.358 hybrid_alltoall_any 4079 16.3 7.234 7.234 7.635 7.635 make_images_data 3932 15.4 0.033 0.033 7.341 7.341 qs_ot_get_derivative 96 11.5 0.001 0.001 7.280 7.280 init_scf_run 11 5.9 0.000 0.000 7.246 7.246 scf_env_initial_rho_setup 11 6.9 0.002 0.002 7.244 7.244 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.066 7.066 wfi_extrapolate 11 7.9 0.001 0.001 6.556 6.556 fft_wrap_pw1pw2 1081 11.6 0.013 0.013 6.393 6.393 ot_diis_step 96 11.5 0.004 0.004 6.188 6.188 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 6.098 6.098 apply_single 107 13.6 0.000 0.000 6.098 6.098 multiply_cannon 1966 13.4 0.327 0.327 6.028 6.028 fft_wrap_pw1pw2_140 439 12.2 1.002 1.002 5.488 5.488 multiply_cannon_loop 1966 14.4 0.083 0.083 5.417 5.417 multiply_cannon_multrec 1966 15.4 5.268 5.268 5.333 5.333 dbcsr_make_dense_low 4961 15.5 0.028 0.028 4.959 4.959 make_dense_data 4961 16.5 4.282 4.282 4.917 4.917 dbcsr_complete_redistribute 317 12.2 2.200 2.200 4.770 4.770 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.574 4.574 copy_dbcsr_to_fm 147 11.2 0.003 0.003 4.233 4.233 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.229 4.229 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.064 4.064 density_rs2pw 107 9.7 0.004 0.004 3.621 3.621 qs_create_task_list 11 7.9 0.000 0.000 3.614 3.614 generate_qs_task_list 11 8.9 2.463 2.463 3.614 3.614 dbcsr_copy 1855 11.9 0.220 0.220 3.485 3.485 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.390 3.390 dbcsr_copy_into_existing 22 7.9 3.251 3.251 3.252 3.252 transfer_dbcsr_to_fm 11 10.9 0.001 0.001 3.183 3.183 cp_fm_cholesky_invert 11 10.9 3.096 3.096 3.096 3.096 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 2.976 2.976 qs_ot_get_p 107 10.4 0.001 0.001 2.790 2.790 fft3d_s 1082 13.6 2.654 2.654 2.659 2.659 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.606 2.606 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.016 76.293 76.294 qs_mol_dyn_low 1 2.0 0.003 0.004 76.142 76.146 qs_forces 11 3.9 0.002 0.002 76.101 76.102 qs_energies 11 4.9 0.001 0.001 71.231 71.235 scf_env_do_scf 11 5.9 0.000 0.001 65.568 65.570 scf_env_do_scf_inner_loop 96 6.5 0.003 0.017 60.107 60.108 velocity_verlet 10 3.0 0.001 0.003 46.796 46.798 rebuild_ks_matrix 107 8.3 0.000 0.001 31.463 31.494 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.018 31.462 31.493 qs_ks_update_qs_env 107 7.6 0.001 0.002 27.673 27.702 qs_rho_update_rho_low 107 7.7 0.001 0.001 26.957 26.961 calculate_rho_elec 107 8.7 0.028 0.029 26.956 26.960 sum_up_and_integrate 107 10.3 0.002 0.003 26.726 26.746 integrate_v_rspace 107 11.3 0.004 0.005 26.700 26.724 grid_collocate_task_list 107 9.7 22.196 23.415 22.196 23.415 grid_integrate_task_list 107 12.3 21.850 22.235 21.850 22.235 dbcsr_multiply_generic 1966 12.4 0.075 0.078 14.926 14.988 qs_scf_new_mos 96 7.5 0.001 0.001 11.699 11.737 qs_scf_loop_do_ot 96 8.5 0.001 0.001 11.698 11.737 ot_scf_mini 96 9.5 0.002 0.002 11.021 11.050 multiply_cannon 1966 13.4 0.141 0.151 10.767 11.015 multiply_cannon_loop 1966 14.4 0.099 0.109 10.080 10.206 mp_waitall_1 136719 16.5 8.300 8.660 8.300 8.660 ot_mini 96 10.5 0.001 0.001 6.631 6.663 multiply_cannon_metrocomm3 15728 15.4 0.039 0.043 6.182 6.434 init_scf_loop 11 6.9 0.000 0.001 5.440 5.440 density_rs2pw 107 9.7 0.005 0.005 4.343 4.939 init_scf_run 11 5.9 0.000 0.000 4.447 4.449 scf_env_initial_rho_setup 11 6.9 0.000 0.004 4.446 4.447 wfi_extrapolate 11 7.9 0.001 0.001 4.045 4.045 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.938 3.942 qs_ot_get_derivative 96 11.5 0.001 0.001 3.615 3.646 multiply_cannon_multrec 15728 15.4 3.491 3.629 3.502 3.641 potential_pw2rs 107 12.3 0.006 0.006 3.613 3.640 transfer_rs2pw 439 10.6 0.007 0.007 2.892 3.495 fft_wrap_pw1pw2 1081 11.6 0.018 0.019 3.132 3.192 mp_waitany 8968 13.7 2.579 3.166 2.579 3.166 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.008 3.053 apply_single 107 13.6 0.000 0.001 3.008 3.053 ot_diis_step 96 11.5 0.004 0.004 2.986 2.989 make_m2s 3932 13.4 0.041 0.047 2.707 2.794 transfer_pw2rs 439 13.2 0.006 0.007 2.751 2.757 fft_wrap_pw1pw2_140 439 12.2 0.138 0.156 2.573 2.616 transfer_rs2pw_140 118 11.5 0.167 0.182 1.922 2.527 fft3d_ps 1081 13.6 1.021 1.158 2.366 2.510 make_images 3932 14.4 0.102 0.106 2.384 2.476 mp_alltoall_d11v 1998 13.7 1.527 2.125 1.527 2.125 rs_gather_matrices 107 12.3 0.080 0.090 1.189 1.783 prepare_preconditioner 11 7.9 0.000 0.000 1.613 1.629 make_preconditioner 11 8.9 0.000 0.000 1.613 1.629 transfer_pw2rs_140 118 13.9 0.383 0.462 1.501 1.545 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-64_nonortho", label="H2O-64_nonortho", y=120.424, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-64_nonortho", label="H2O-64_nonortho", y=76.293, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=50.93700000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=28.915, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=23.788, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=7.234, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.268, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.282, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=17.877000000000002, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.196, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.85, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.491, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.579, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.3, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.225 0.225 104.915 104.915 qs_energies 1 2.0 0.000 0.000 104.052 104.052 scf_env_do_scf 1 3.0 0.000 0.000 102.721 102.721 qs_ks_update_qs_env 8 5.0 0.000 0.000 95.662 95.662 rebuild_ks_matrix 7 6.0 0.000 0.000 95.582 95.582 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 95.582 95.582 hfx_ks_matrix 7 8.0 0.000 0.000 85.678 85.678 integrate_four_center 7 9.0 0.772 0.772 85.631 85.631 integrate_four_center_main 7 10.0 0.629 0.629 78.113 78.113 integrate_four_center_bin 451 11.0 77.484 77.484 77.484 77.484 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 53.322 53.322 init_scf_loop 1 4.0 0.000 0.000 49.381 49.381 integrate_four_center_load 7 10.0 0.001 0.001 6.496 6.496 hfx_load_balance 1 11.0 0.001 0.001 6.496 6.496 qs_vxc_create 14 8.0 0.000 0.000 3.787 3.787 xc_vxc_pw_create 14 9.0 0.134 0.134 3.787 3.787 prepare_preconditioner 1 5.0 0.000 0.000 3.600 3.600 make_preconditioner 1 6.0 0.000 0.000 3.600 3.600 hfx_load_balance_bin 1 12.0 3.279 3.279 3.279 3.279 hfx_load_balance_count 1 12.0 3.200 3.200 3.200 3.200 fft_wrap_pw1pw2 237 10.7 0.004 0.004 2.892 2.892 calculate_rho_elec 15 7.4 0.120 0.120 2.821 2.821 fft_wrap_pw1pw2_140 150 12.1 0.926 0.926 2.763 2.763 xc_rho_set_and_dset_create 14 10.0 0.135 0.135 2.439 2.439 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 2.244 2.244 dbcsr_multiply_generic 165 10.0 0.016 0.016 2.232 2.232 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.223 0.231 92.932 92.934 qs_energies 1 2.0 0.000 0.000 92.505 92.507 scf_env_do_scf 1 3.0 0.000 0.000 92.134 92.136 qs_ks_update_qs_env 8 5.0 0.000 0.000 89.947 89.949 rebuild_ks_matrix 7 6.0 0.000 0.000 89.933 89.936 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 89.933 89.936 hfx_ks_matrix 7 8.0 0.000 0.000 84.379 84.381 integrate_four_center 7 9.0 0.055 0.278 84.369 84.372 integrate_four_center_main 7 10.0 0.004 0.004 75.361 77.100 integrate_four_center_bin 448 11.0 75.357 77.096 75.357 77.096 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 47.980 47.981 init_scf_loop 1 4.0 0.000 0.000 44.152 44.153 integrate_four_center_load 7 10.0 0.000 0.000 6.187 6.188 hfx_load_balance 1 11.0 0.001 0.001 6.187 6.188 mp_sync 56 11.2 2.178 3.515 2.178 3.515 hfx_load_balance_bin 1 12.0 2.815 3.110 2.815 3.110 hfx_load_balance_count 1 12.0 2.806 3.072 2.806 3.072 qs_vxc_create 14 8.0 0.000 0.000 2.169 2.171 xc_vxc_pw_create 14 9.0 0.008 0.008 2.169 2.170 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-hyb", label="H2O-hyb", y=104.915, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-hyb", label="H2O-hyb", y=92.932, yerr=0.0 Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=19.02900000000001, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=77.484, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.279, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.2, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="fft_wrap_pw1pw2_140", label="fft_wrap_pw1pw2_140", y=0.926, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=0.772, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.225, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.498000000000005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=75.357, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.815, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.806, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="fft_wrap_pw1pw2_140", label="fft_wrap_pw1pw2_140", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.055, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.223, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=2.178, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 78.207 78.207 qs_energies 1 2.0 0.000 0.000 77.840 77.840 mp2_main 1 3.0 0.000 0.000 74.883 74.883 mp2_gpw_main 1 4.0 0.000 0.000 74.769 74.769 rpa_ri_compute_en 1 5.0 0.000 0.000 71.530 71.530 rpa_num_int 1 6.0 0.001 0.001 71.520 71.520 compute_mat_P_omega 1 7.0 0.003 0.003 61.065 61.065 compute_mat_P_omega_contract 10 8.0 8.488 8.488 60.814 60.814 dbt_total 2336 9.6 0.014 0.014 49.205 49.205 dbt_contract 787 11.0 0.042 0.042 41.893 41.893 dbt_tas_total 1149 12.2 0.255 0.255 40.189 40.189 dbt_tas_multiply 807 12.1 0.002 0.002 38.860 38.860 dbt_tas_dbm 807 14.1 0.005 0.005 32.129 32.129 dbm_multiply 807 16.1 32.117 32.117 32.117 32.117 dbt_tas_mm_1N 524 15.1 0.002 0.002 23.515 23.515 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 21.458 21.458 compute_mat_P_omega_calc_M_occ 250 9.0 8.490 8.490 17.377 17.377 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.099 8.099 dbt_tas_mm_2 251 15.0 0.002 0.002 7.054 7.054 dbt_copy 1103 10.7 0.117 0.117 5.964 5.964 compute_QP_energies 1 7.0 0.000 0.000 5.947 5.947 compute_self_energy_cubic_gw 1 8.0 0.167 0.167 5.947 5.947 contract_cubic_gw 21 9.0 0.000 0.000 4.393 4.393 dbm_reserve_blocks 3628 15.3 4.102 4.102 4.102 4.102 dbt_tas_reserve_blocks_index 3261 14.3 0.108 0.108 4.048 4.048 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.228 3.228 dbt_reserve_blocks_index 2280 13.1 0.069 0.069 3.151 3.151 dbt_reserve_blocks_index_array 2222 12.2 0.010 0.010 3.092 3.092 dbt_crop 1042 12.0 1.605 1.605 2.914 2.914 scf_env_do_scf 1 3.0 0.000 0.000 2.854 2.854 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.854 2.854 dbt_tas_reshape 367 15.0 0.015 0.015 2.333 2.333 dbt_tas_copy 574 11.4 1.309 1.309 2.119 2.119 dbt_reshape 278 11.9 1.166 1.166 1.939 1.939 convert_to_new_pgrid 2421 14.1 0.107 0.107 1.886 1.886 dbm_copy 1614 15.1 1.778 1.778 1.778 1.778 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 1.689 1.689 reshape_mm_small 367 14.1 0.041 0.041 1.681 1.681 compute_W_cubic_GW 10 7.0 0.003 0.003 1.574 1.574 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.012 38.030 38.031 qs_energies 1 2.0 0.000 0.000 37.544 37.546 mp2_main 1 3.0 0.000 0.000 36.380 36.383 mp2_gpw_main 1 4.0 0.000 0.000 36.326 36.328 rpa_ri_compute_en 1 5.0 0.000 0.000 34.750 34.752 rpa_num_int 1 6.0 0.000 0.002 34.749 34.751 dbt_total 2336 9.6 0.015 0.016 30.812 30.840 compute_mat_P_omega 1 7.0 0.001 0.005 29.201 29.244 compute_mat_P_omega_contract 10 8.0 0.411 0.434 29.046 29.050 dbt_contract 787 11.0 0.037 0.038 22.928 22.946 dbt_tas_total 1149 12.2 0.083 0.090 20.403 20.421 dbt_tas_multiply 807 12.1 0.002 0.002 20.334 20.355 dbt_tas_dbm 807 14.1 0.003 0.003 13.951 13.970 dbm_multiply 807 16.1 10.658 11.377 10.658 11.377 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.250 9.274 compute_mat_P_omega_calc_M_occ 250 9.0 0.402 0.420 8.425 8.429 mp_sync 8688 11.6 6.551 7.901 6.551 7.901 dbt_copy 1149 10.8 0.022 0.023 6.474 6.814 dbt_tas_mm_2 251 15.0 0.002 0.002 6.571 6.578 dbt_reshape 1136 11.8 2.365 2.562 5.977 6.310 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.676 5.679 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.960 5.431 compute_QP_energies 1 7.0 0.000 0.000 3.614 3.617 compute_self_energy_cubic_gw 1 8.0 0.007 0.007 3.593 3.596 mp_waitall_2 3812 15.3 3.011 3.224 3.011 3.224 dbt_communicate_buffer 1136 12.8 0.055 0.061 2.800 2.976 contract_cubic_gw 21 9.0 0.000 0.000 2.647 2.649 dbm_reserve_blocks 3752 15.4 1.780 2.022 1.780 2.022 dbt_reserve_blocks_index 2887 13.1 0.079 0.085 1.769 2.017 dbt_reserve_blocks_index_array 2829 12.2 0.008 0.009 1.757 2.002 dbt_tas_reserve_blocks_index 3347 14.5 0.068 0.073 1.729 1.975 dbt_crop 1042 12.0 0.887 0.987 1.430 1.610 mp2_ri_gpw_compute_in 1 5.0 0.003 0.004 1.574 1.574 convert_to_new_pgrid 2421 14.1 0.023 0.027 1.092 1.247 dbm_copy 1608 15.1 1.063 1.217 1.063 1.217 dbt_tas_replicate 405 14.1 0.540 0.699 1.139 1.197 scf_env_do_scf 1 3.0 0.000 0.000 1.124 1.124 scf_env_do_scf_inner_loop 17 4.0 0.001 0.003 1.124 1.124 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.084 1.087 parallel_gemm_fm_cosma 105 8.4 1.047 1.061 1.047 1.061 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.040 1.043 mp_sum_l 5765 13.7 0.816 0.956 0.816 0.956 dbt_tas_create_split 2550 15.2 0.005 0.005 0.912 0.924 dbt_tas_create_split_rows_or_c 2550 16.2 0.013 0.014 0.907 0.920 compute_W_cubic_GW 10 7.0 0.001 0.001 0.860 0.870 mp_max_i 2044 9.6 0.663 0.861 0.663 0.861 get_2c_integrals 1 6.0 0.000 0.000 0.804 0.804 reshape_mm_compatible 807 14.1 0.020 0.021 0.783 0.798 dbm_add 807 14.1 0.681 0.773 0.681 0.773 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="GW_PBE_4benzene", label="GW_PBE_4benzene", y=78.207, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="GW_PBE_4benzene", label="GW_PBE_4benzene", y=38.03, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=22.065999999999995, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=32.117, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.49, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.488, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=4.102, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=1.778, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.166, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=11.789000000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.658, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.402, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.411, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.78, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=1.063, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.365, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.551, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.011, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.021 0.021 211.106 211.106 qs_forces 1 2.0 0.000 0.000 210.558 210.558 rebuild_ks_matrix 7 6.6 0.000 0.000 209.118 209.118 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 209.118 209.118 hfx_ks_matrix 7 8.6 0.000 0.000 207.047 207.047 hfx_ri_update_ks 7 9.6 0.000 0.000 160.259 160.259 hfx_ri_update_ks_Pmat 7 10.6 30.508 30.508 160.251 160.251 dbt_total 849 11.0 0.006 0.006 154.746 154.746 qs_energies 1 3.0 0.000 0.000 143.082 143.082 scf_env_do_scf 1 4.0 0.000 0.000 142.729 142.729 qs_ks_update_qs_env 8 6.0 0.000 0.000 141.704 141.704 dbt_tas_total 369 13.4 0.430 0.430 134.888 134.888 dbt_contract 207 12.4 0.297 0.297 134.820 134.820 dbt_tas_multiply 216 13.5 0.001 0.001 130.302 130.302 dbt_tas_dbm 216 15.5 0.001 0.001 118.047 118.047 dbm_multiply 216 17.5 118.042 118.042 118.042 118.042 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 105.383 105.383 dbt_tas_mm_2 91 16.5 0.001 0.001 99.784 99.784 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 87.844 87.844 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 67.426 67.426 init_scf_loop 2 5.0 0.000 0.000 54.882 54.882 hfx_ri_update_forces 1 7.0 1.629 1.629 46.785 46.785 hfx_ri_forces_Pmat_3c 1 8.0 4.522 4.522 29.721 29.721 dbt_copy 423 11.8 0.052 0.052 15.330 15.330 precalc_derivatives 1 8.0 2.499 2.499 12.515 12.515 dbt_reshape 132 13.2 7.718 7.718 10.662 10.662 dbt_tas_mm_3T 77 17.1 0.000 0.000 10.291 10.291 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.376 9.376 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 7.438 7.438 dbt_tas_mm_3N 37 15.4 0.000 0.000 7.420 7.420 dbm_reserve_blocks 1491 16.2 6.323 6.323 6.323 6.323 dbt_tas_reserve_blocks_index 1323 15.4 0.201 0.201 6.272 6.272 build_3c_derivatives 3 9.0 1.942 1.942 6.238 6.238 dbt_tas_reshape 168 14.5 0.003 0.003 6.124 6.124 dbt_reserve_blocks_index 846 14.4 0.119 0.119 4.898 4.898 dbt_reserve_blocks_index_array 816 13.5 0.009 0.009 4.789 4.789 dbt_crop 372 13.7 2.987 2.987 4.394 4.394 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 4.358 4.358 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.020 47.309 47.312 qs_forces 1 2.0 0.000 0.000 46.944 46.945 rebuild_ks_matrix 7 6.6 0.000 0.000 46.043 46.045 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 46.043 46.045 hfx_ks_matrix 7 8.6 0.000 0.000 45.028 45.029 dbt_total 849 11.0 0.007 0.007 39.952 39.955 dbt_contract 207 12.4 0.026 0.026 30.458 30.470 dbt_tas_total 369 13.4 0.046 0.047 27.060 27.061 dbt_tas_multiply 216 13.5 0.001 0.001 26.659 26.663 hfx_ri_update_ks 7 9.6 0.000 0.000 25.598 25.599 hfx_ri_update_ks_Pmat 7 10.6 1.322 1.402 25.597 25.597 qs_energies 1 3.0 0.000 0.000 24.369 24.370 scf_env_do_scf 1 4.0 0.000 0.001 24.163 24.163 qs_ks_update_qs_env 8 6.0 0.000 0.000 23.481 23.482 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 22.564 22.564 dbt_tas_dbm 216 15.5 0.001 0.001 20.324 20.330 hfx_ri_update_forces 1 7.0 0.062 0.066 19.429 19.429 dbm_multiply 216 17.5 17.879 18.648 17.879 18.648 hfx_ri_forces_Pmat_3c 1 8.0 0.179 0.188 14.289 14.304 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 13.458 13.458 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 11.004 11.005 init_scf_loop 2 5.0 0.000 0.000 10.704 10.704 dbt_tas_mm_2 91 16.5 0.001 0.001 8.980 8.984 dbt_copy 539 12.5 0.014 0.015 8.254 8.598 mp_sync 2901 12.8 5.171 6.720 5.171 6.720 dbt_reshape 393 13.9 3.042 3.199 6.243 6.487 dbt_tas_mm_3T 77 17.1 0.000 0.000 4.891 5.228 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.522 4.850 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 4.795 4.795 precalc_derivatives 1 8.0 0.098 0.107 3.880 3.880 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.498 3.498 mp_waitall_2 1318 16.2 3.279 3.434 3.279 3.434 dbm_reserve_blocks 1641 16.6 2.857 3.186 2.857 3.186 dbt_tas_reserve_blocks_index 1471 15.8 0.138 0.147 2.782 3.092 dbt_reserve_blocks_index 1107 14.8 0.126 0.131 2.301 2.563 dbt_crop 372 13.7 1.772 1.809 2.448 2.548 dbt_reserve_blocks_index_array 1077 13.9 0.006 0.006 2.277 2.538 dbt_communicate_buffer 393 14.9 0.013 0.014 2.271 2.403 build_3c_derivatives 3 9.0 0.198 0.213 2.165 2.171 convert_to_new_pgrid 648 15.5 0.039 0.076 1.781 1.957 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.862 1.866 dbt_tas_replicate 170 15.1 0.658 0.693 1.707 1.776 dbm_copy 452 16.3 1.578 1.745 1.578 1.745 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.524 1.524 dbt_tas_copy 146 12.6 0.695 0.715 1.266 1.369 dbt_tas_communicate_buffer 370 16.3 0.012 0.013 1.090 1.135 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="RI-HFX_H2O-32", label="RI-HFX_H2O-32", y=211.106, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="RI-HFX_H2O-32", label="RI-HFX_H2O-32", y=47.309, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=43.992999999999995, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=118.042, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=30.508, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=7.718, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=6.323, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.522, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=13.579999999999998, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=17.879, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.322, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.042, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.857, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.179, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=5.171, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.279, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 192.410 192.410 qs_energies 1 2.0 0.000 0.000 192.216 192.216 mp2_main 1 3.0 0.000 0.000 187.452 187.452 mp2_gpw_main 1 4.0 0.001 0.001 187.100 187.100 mp2_ri_gpw_compute_in 1 5.0 0.621 0.621 141.553 141.553 mp2_ri_gpw_compute_in_loop 1 6.0 0.015 0.015 131.410 131.410 mp2_eri_3c_integrate_gpw 2656 7.0 0.019 0.019 85.611 85.611 integrate_v_rspace 2666 8.0 0.816 0.816 68.504 68.504 grid_integrate_task_list 2666 9.0 64.223 64.223 64.223 64.223 mp2_ri_gpw_compute_en 1 5.0 0.123 0.123 45.527 45.527 mp2_ri_gpw_compute_en_RI_loop 1 6.0 10.859 10.859 42.768 42.768 dbcsr_multiply_generic 5322 8.0 0.379 0.379 38.108 38.108 ao_to_mo_and_store_B_mult_1 2656 7.0 0.021 0.021 38.101 38.101 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.171 2.171 23.828 23.828 local_gemm 2080 8.0 21.656 21.656 21.656 21.656 make_m2s 10644 9.0 0.085 0.085 19.675 19.675 make_images 10644 10.0 3.990 3.990 18.958 18.958 multiply_cannon 5322 9.0 1.032 1.032 15.086 15.086 fft_wrap_pw1pw2 53228 10.4 0.129 0.129 15.026 15.026 hybrid_alltoall_any 13323 11.6 13.329 13.329 13.711 13.711 make_images_data 10644 11.0 0.093 0.093 13.580 13.580 collocate_function 2656 8.0 7.980 7.980 13.534 13.534 multiply_cannon_loop 5322 10.0 0.245 0.245 12.959 12.959 multiply_cannon_multrec 5322 11.0 9.949 9.949 10.009 10.009 fft_wrap_pw1pw2_20 21271 11.4 0.817 0.817 9.742 9.742 get_2c_integrals 1 6.0 0.000 0.000 9.521 9.521 compute_2c_integrals 1 7.0 0.006 0.006 9.023 9.023 compute_2c_integrals_loop_lm 1 8.0 0.009 0.009 9.011 9.011 mp2_eri_2c_integrate_gpw 1 9.0 0.929 0.929 9.001 9.001 fft3d_s 53229 12.4 8.850 8.850 8.881 8.881 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 3.026 3.026 7.588 7.588 potential_pw2rs 5322 10.0 0.157 0.157 6.037 6.037 mp2_ri_gpw_compute_en_ener 2080 7.0 6.028 6.028 6.028 6.028 copy_dbcsr_to_fm 2679 8.0 0.054 0.054 5.161 5.161 fft_wrap_pw1pw2_10 31893 11.5 0.201 0.201 4.914 4.914 scf_env_do_scf 1 3.0 0.000 0.000 4.359 4.359 scf_env_do_scf_inner_loop 10 4.0 0.002 0.002 4.359 4.359 mp2_eri_2c_integrate_gpw_pot_l 2656 10.0 0.004 0.004 4.131 4.131 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.115 0.120 37.689 37.690 qs_energies 1 2.0 0.000 0.000 37.280 37.281 mp2_main 1 3.0 0.000 0.001 35.169 35.170 mp2_gpw_main 1 4.0 0.001 0.001 35.050 35.051 mp2_ri_gpw_compute_en 1 5.0 0.219 0.226 17.611 17.762 mp2_ri_gpw_compute_in 1 5.0 0.051 0.052 17.368 17.574 mp2_ri_gpw_compute_en_RI_loop 1 6.0 1.738 1.937 16.610 16.614 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 16.129 16.339 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 13.779 14.040 integrate_v_rspace 93 8.1 0.113 0.124 13.747 14.019 grid_integrate_task_list 93 9.1 13.460 13.737 13.460 13.737 mp2_ri_gpw_compute_en_expansio 65 7.0 0.108 0.128 11.924 12.025 local_gemm 65 8.0 11.815 11.925 11.815 11.925 mp2_ri_gpw_compute_en_comm 17 7.0 0.073 0.099 2.584 2.891 mp_sendrecv_dm3 1054 8.0 2.067 2.515 2.067 2.515 dbcsr_multiply_generic 176 8.0 0.010 0.010 2.031 2.194 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.013 2.173 scf_env_do_scf 1 3.0 0.000 0.000 1.993 1.993 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 1.993 1.993 multiply_cannon 176 9.0 0.021 0.022 1.250 1.318 multiply_cannon_loop 176 10.0 0.002 0.003 1.189 1.257 get_2c_integrals 1 6.0 0.000 0.001 1.179 1.191 multiply_cannon_multrec 246 11.0 1.034 1.069 1.039 1.075 qs_scf_new_mos 10 5.0 0.000 0.000 0.997 1.001 eigensolver 11 5.8 0.001 0.001 0.944 0.946 make_m2s 352 9.0 0.003 0.004 0.739 0.845 compute_2c_integrals 1 7.0 0.002 0.004 0.829 0.838 make_images 352 10.0 0.042 0.043 0.728 0.834 compute_2c_integrals_loop_lm 1 8.0 0.001 0.001 0.726 0.761 mp2_eri_2c_integrate_gpw 1 9.0 0.209 0.218 0.726 0.761 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="RI-MP2_ammonia", label="RI-MP2_ammonia", y=192.41, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="RI-MP2_ammonia", label="RI-MP2_ammonia", y=37.689, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=72.394, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=64.223, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=21.656, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=13.329, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=10.859, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=9.949, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.574999999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.46, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=11.815, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=1.738, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=1.034, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=2.067, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.098 0.098 133.947 133.947 qs_energies 1 2.0 0.000 0.000 132.601 132.601 scf_env_do_scf 1 3.0 0.000 0.000 126.339 126.339 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 126.339 126.339 qs_ks_update_qs_env 15 5.0 0.000 0.000 62.871 62.871 rebuild_ks_matrix 15 6.0 0.000 0.000 62.621 62.621 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 62.621 62.621 qs_vxc_create 15 8.0 0.000 0.000 46.100 46.100 qs_scf_new_mos 15 5.0 0.000 0.000 42.110 42.110 calculate_dispersion_nonloc 15 9.0 7.115 7.115 39.960 39.960 fft_wrap_pw1pw2 1086 10.0 0.024 0.024 34.309 34.309 eigensolver 15 6.0 0.002 0.002 31.698 31.698 fft_wrap_pw1pw2_150 765 11.0 9.643 9.643 24.870 24.870 cp_fm_diag_elpa 15 7.0 0.000 0.000 23.353 23.353 cp_fm_diag_elpa_base 15 8.0 22.030 22.030 23.352 23.352 qs_rho_update_rho_low 16 5.0 0.000 0.000 17.337 17.337 calculate_rho_elec 16 6.0 0.226 0.226 17.337 17.337 grid_collocate_task_list 16 7.0 15.355 15.355 15.355 15.355 sum_up_and_integrate 15 8.0 0.000 0.000 15.298 15.298 integrate_v_rspace 15 9.0 0.021 0.021 15.284 15.284 grid_integrate_task_list 15 10.0 14.407 14.407 14.407 14.407 fft3d_s 1087 12.0 10.677 10.677 10.686 10.686 pw_scatter_s 585 12.1 9.485 9.485 9.485 9.485 fft_wrap_pw1pw2_200 197 11.3 2.474 2.474 9.194 9.194 copy_dbcsr_to_fm 16 5.9 0.001 0.001 7.899 7.899 dbcsr_complete_redistribute 46 8.3 2.478 2.478 7.201 7.201 cp_fm_cholesky_restore 45 7.0 7.004 7.004 7.004 7.004 vdW_energy 15 10.0 6.181 6.181 6.181 6.181 xc_vxc_pw_create 15 9.0 0.211 0.211 6.140 6.140 gspace_mixing 14 5.0 0.171 0.171 4.872 4.872 xc_pw_derive 90 11.0 0.001 0.001 4.478 4.478 broyden_mixing 14 6.0 4.095 4.095 4.095 4.095 dbcsr_finalize 159 9.9 0.011 0.011 3.388 3.388 dbcsr_merge_all 91 11.1 0.066 0.066 3.235 3.235 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.154 3.154 xc_pw_divergence 15 10.0 0.000 0.000 3.066 3.066 calculate_dm_sparse 15 6.0 0.019 0.019 3.064 3.064 cp_dbcsr_plus_fm_fm_t 15 7.0 0.001 0.001 2.948 2.948 xc_rho_set_and_dset_create 15 10.0 0.180 0.180 2.848 2.848 init_scf_run 1 3.0 0.000 0.000 2.687 2.687 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.018 61.458 61.460 qs_energies 1 2.0 0.000 0.000 61.161 61.162 scf_env_do_scf 1 3.0 0.000 0.000 57.141 57.143 scf_env_do_scf_inner_loop 15 4.0 0.001 0.004 57.141 57.143 qs_ks_update_qs_env 15 5.0 0.000 0.001 26.416 26.424 rebuild_ks_matrix 15 6.0 0.000 0.000 26.373 26.380 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 26.372 26.380 qs_scf_new_mos 15 5.0 0.001 0.001 15.832 15.852 qs_rho_update_rho_low 16 5.0 0.000 0.000 15.094 15.100 calculate_rho_elec 16 6.0 0.007 0.007 15.094 15.100 sum_up_and_integrate 15 8.0 0.000 0.001 14.846 14.888 integrate_v_rspace 15 9.0 0.001 0.001 14.836 14.883 eigensolver 15 6.0 0.002 0.002 14.555 14.636 grid_collocate_task_list 16 7.0 13.910 14.217 13.910 14.217 grid_integrate_task_list 15 10.0 13.857 14.080 13.857 14.080 qs_vxc_create 15 8.0 0.001 0.001 11.062 11.076 cp_fm_diag_elpa 15 7.0 0.000 0.000 10.277 10.283 cp_fm_diag_elpa_base 15 8.0 10.114 10.143 10.271 10.273 calculate_dispersion_nonloc 15 9.0 0.816 1.201 9.001 9.034 fft_wrap_pw1pw2 1086 10.0 0.026 0.029 8.610 8.795 fft3d_ps 1086 12.0 2.574 2.943 6.836 7.266 fft_wrap_pw1pw2_150 765 11.0 0.157 0.188 5.870 5.955 mp_alltoall_z22v 1086 14.0 3.708 4.541 3.708 4.541 cp_fm_cholesky_restore 45 7.0 4.091 4.181 4.091 4.181 yz_to_x 501 12.9 0.220 0.265 2.343 2.781 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.714 2.714 fft_wrap_pw1pw2_200 197 11.3 0.111 0.135 2.585 2.701 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.415 2.607 x_to_yz 585 13.1 0.304 0.341 1.890 2.235 xc_vxc_pw_create 15 9.0 0.017 0.023 2.060 2.094 build_core_ppnl 1 5.0 1.465 1.614 1.465 1.614 vdW_energy 15 10.0 1.428 1.501 1.428 1.501 xc_pw_derive 90 11.0 0.001 0.001 1.389 1.477 density_rs2pw 16 7.0 0.001 0.001 1.015 1.323 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="diag_cu144_broy", label="diag_cu144_broy", y=133.947, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="diag_cu144_broy", label="diag_cu144_broy", y=61.458, yerr=0.0 Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=54.831, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=22.03, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.355, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.407, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.677, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=9.643, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=7.004, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.620999999999995, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=10.114, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=13.91, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.857, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.157, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.091, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.708, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.085 0.085 278.069 278.069 qs_energies 1 2.0 0.000 0.000 277.919 277.919 ls_scf 1 3.0 0.000 0.000 276.372 276.372 ls_scf_main 1 4.0 0.001 0.001 268.466 268.466 density_matrix_trs4 11 5.0 0.006 0.006 174.110 174.110 ls_scf_dm_to_ks 11 5.0 0.000 0.000 88.557 88.557 dbcsr_multiply_generic 185 6.1 0.835 0.835 88.525 88.525 matrix_ls_to_qs 11 6.0 0.000 0.000 84.777 84.777 arnoldi_extremal 12 6.1 0.000 0.000 64.708 64.708 arnoldi_normal_ev 12 7.1 0.031 0.031 64.707 64.707 build_subspace 23 8.1 0.080 0.080 63.462 63.462 dbcsr_matrix_vector_mult 652 9.0 0.319 0.319 62.871 62.871 dbcsr_matrix_vector_mult_local 652 10.0 61.212 61.212 61.224 61.224 multiply_cannon 185 7.1 0.367 0.367 46.818 46.818 dbcsr_complete_redistribute 23 7.5 32.234 32.234 45.620 45.620 dbcsr_copy 761 7.5 1.912 1.912 45.257 45.257 dbcsr_copy_into_existing 11 8.0 43.313 43.313 43.313 43.313 matrix_decluster 11 7.0 0.000 0.000 41.463 41.463 make_m2s 370 7.1 0.021 0.021 35.720 35.720 multiply_cannon_loop 185 8.1 0.384 0.384 35.197 35.197 make_images 370 8.1 8.093 8.093 32.824 32.824 multiply_cannon_multrec 185 9.1 25.250 25.250 25.301 25.301 dbcsr_finalize 559 7.6 0.332 0.332 18.727 18.727 dbcsr_merge_all 510 8.6 3.509 3.509 17.082 17.082 dbcsr_sort_indices 892 10.0 14.263 14.263 14.263 14.263 make_images_data 370 9.1 0.010 0.010 13.325 13.325 hybrid_alltoall_any 393 9.9 12.283 12.283 12.560 12.560 quick_finalize 395 10.0 0.381 0.381 12.143 12.143 tree_to_linear_d 23 10.5 11.740 11.740 11.740 11.740 dbcsr_dot 144 6.3 11.607 11.607 11.610 11.610 dbcsr_special_finalize 370 9.1 0.002 0.002 11.220 11.220 setup_rec_index_2d 370 8.1 11.147 11.147 11.147 11.147 calculate_norms 370 9.1 9.512 9.512 9.512 9.512 ls_scf_init_scf 1 4.0 0.000 0.000 7.125 7.125 ls_scf_init_matrix_S 1 5.0 0.000 0.000 6.585 6.585 dbcsr_frobenius_norm 142 6.1 6.172 6.172 6.175 6.175 matrix_qs_to_ls 12 5.1 0.000 0.000 6.158 6.158 matrix_cluster 12 6.1 0.000 0.000 6.158 6.158 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.000 5.618 5.618 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.014 85.411 85.413 qs_energies 1 2.0 0.000 0.000 85.277 85.280 ls_scf 1 3.0 0.000 0.000 85.224 85.227 ls_scf_main 1 4.0 0.001 0.007 82.152 82.155 density_matrix_trs4 11 5.0 0.007 0.008 78.923 79.022 dbcsr_multiply_generic 185 6.1 0.070 0.085 73.653 73.964 multiply_cannon 185 7.1 0.042 0.046 60.809 61.567 multiply_cannon_loop 185 8.1 0.177 0.188 58.019 58.723 multiply_cannon_multrec 1480 9.1 33.140 35.167 33.549 35.598 mp_waitall_1 11936 10.3 22.177 25.550 22.177 25.550 multiply_cannon_metrocomm3 1480 9.1 0.017 0.023 16.805 19.827 make_m2s 370 7.1 0.039 0.042 8.411 8.482 make_images 370 8.1 0.567 0.596 8.277 8.351 calculate_norms 2960 9.1 5.681 5.925 5.681 5.925 make_images_data 370 9.1 0.012 0.013 3.908 4.329 mp_sum_l 799 7.0 3.405 4.272 3.405 4.272 arnoldi_extremal 12 6.1 0.001 0.001 3.771 3.786 arnoldi_normal_ev 12 7.1 0.001 0.006 3.771 3.785 build_subspace 23 8.1 0.026 0.031 3.651 3.663 hybrid_alltoall_any 393 9.9 0.245 1.458 3.330 3.585 multiply_cannon_metrocomm1 1480 9.1 0.008 0.009 1.762 3.458 dbcsr_matrix_vector_mult 652 9.0 0.021 0.079 3.037 3.130 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 2.282 3.114 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.782 2.910 dbcsr_complete_redistribute 23 7.5 1.407 1.496 2.466 2.606 matrix_ls_to_qs 11 6.0 0.000 0.000 2.449 2.590 dbcsr_matrix_vector_mult_local 652 10.0 2.256 2.377 2.259 2.380 matrix_decluster 11 7.0 0.000 0.000 2.235 2.371 ls_scf_init_scf 1 4.0 0.000 0.000 2.250 2.251 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.220 2.227 make_images_pack 370 9.1 2.026 2.189 2.031 2.194 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.000 2.023 2.025 buffer_matrices_ensure_size 370 8.1 1.625 1.864 1.625 1.864 dbcsr_finalize 559 7.6 0.010 0.011 1.625 1.770 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="bench_dftb", label="bench_dftb", y=278.069, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="bench_dftb", label="bench_dftb", y=85.411, yerr=0.0 Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=92.285, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=61.212, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=43.313, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=32.234, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=25.25, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_sort_indices", label="dbcsr_sort_indices", y=14.263, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.512, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=17.345, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.256, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.407, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=33.14, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_sort_indices", label="dbcsr_sort_indices", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.681, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.405, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=22.177, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.004 46.011 46.011 lib_test 1 2.0 0.000 0.000 46.005 46.005 dbcsr_run_tests 3 3.0 0.000 0.000 46.005 46.005 test_multiplies_multiproc 3 4.0 0.001 0.001 31.711 31.711 dbcsr_multiply_generic 9 5.0 0.002 0.002 23.064 23.064 dbcsr_make_random_matrix 9 4.0 10.146 10.146 14.149 14.149 multiply_cannon 9 6.0 0.002 0.002 12.586 12.586 multiply_cannon_loop 9 7.0 0.015 0.015 12.292 12.292 multiply_cannon_multrec 9 8.0 12.277 12.277 12.277 12.277 dbcsr_finalize 27 5.7 0.025 0.025 7.992 7.992 dbcsr_redistribute 9 5.0 4.691 4.691 6.925 6.925 dbcsr_merge_all 18 6.5 3.123 3.123 6.870 6.870 make_m2s 18 6.0 0.001 0.001 5.757 5.757 make_images 18 7.0 0.500 0.500 5.697 5.697 make_images_data 18 8.0 0.000 0.000 4.553 4.553 hybrid_alltoall_any 18 9.0 4.526 4.526 4.526 4.526 dbcsr_data_release 975 7.6 3.906 3.906 3.906 3.906 tree_to_linear_d 9 7.0 2.493 2.493 2.493 2.493 dbcsr_destroy 93 5.8 0.001 0.001 1.863 1.863 mp_alltoall_d11v 27 6.0 1.754 1.754 1.754 1.754 dbcsr_data_copy_aa2 9 7.0 1.216 1.216 1.216 1.216 dbcsr_work_destroy_all 45 7.6 0.001 0.001 1.068 1.068 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.008 20.730 20.731 lib_test 1 2.0 0.000 0.000 20.699 20.719 dbcsr_run_tests 3 3.0 0.000 0.001 20.698 20.718 test_multiplies_multiproc 3 4.0 0.000 0.003 19.974 20.027 dbcsr_multiply_generic 9 5.0 0.001 0.001 18.600 18.705 multiply_cannon 9 6.0 0.002 0.002 16.376 16.825 multiply_cannon_loop 9 7.0 0.003 0.003 16.041 16.476 multiply_cannon_multrec 72 8.0 11.955 12.579 11.956 12.580 mp_waitall_1 576 9.2 4.464 5.594 4.464 5.594 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 3.982 5.080 mp_sum_l 70 5.1 0.837 1.525 0.837 1.525 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.832 1.520 make_m2s 18 6.0 0.001 0.001 0.770 0.820 make_images 18 7.0 0.019 0.020 0.767 0.817 dbcsr_finalize 27 5.7 0.000 0.000 0.666 0.808 dbcsr_merge_all 18 6.5 0.106 0.124 0.573 0.739 dbcsr_make_random_matrix 9 4.0 0.523 0.531 0.693 0.730 dbcsr_data_release 444 7.6 0.593 0.686 0.593 0.686 dbcsr_destroy 111 5.9 0.002 0.049 0.504 0.586 dbcsr_redistribute 9 5.0 0.220 0.276 0.502 0.562 make_images_data 18 8.0 0.001 0.001 0.425 0.499 dbcsr_data_copy_aa2 18 7.5 0.334 0.453 0.334 0.453 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.098 0.450 hybrid_alltoall_any 18 9.0 0.037 0.156 0.372 0.445 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="dbcsr", label="dbcsr", y=46.011, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="dbcsr", label="dbcsr", y=20.73, yerr=0.0 Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=10.465000000000003, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.277, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=10.146, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=4.691, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=4.526, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=3.906, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.100999999999999, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.955, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.523, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.22, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.037, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.593, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.464, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.837, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.081 0.081 160.376 160.376 qs_mol_dyn_low 1 2.0 0.004 0.004 159.006 159.006 qs_forces 6 3.8 0.001 0.001 104.448 104.448 qs_energies 6 4.8 0.000 0.000 100.057 100.057 scf_env_do_scf 6 5.8 0.001 0.001 97.337 97.337 scf_env_do_scf_inner_loop 113 6.2 0.014 0.014 93.042 93.042 rebuild_ks_matrix 119 8.1 0.000 0.000 74.814 74.814 qs_ks_build_kohn_sham_matrix 119 9.1 0.016 0.016 74.813 74.813 velocity_verlet 5 3.0 0.003 0.003 74.238 74.238 qs_ks_update_qs_env 119 7.3 0.001 0.001 70.837 70.837 fft_wrap_pw1pw2 2059 12.4 0.036 0.036 64.681 64.681 fft_wrap_pw1pw2_150 1321 13.9 19.232 19.232 63.570 63.570 qs_vxc_create 119 10.1 0.002 0.002 48.109 48.109 xc_vxc_pw_create 119 11.1 1.754 1.754 48.107 48.107 qmmm_el_coupling 6 3.8 0.000 0.000 46.642 46.642 qmmm_elec_with_gaussian 6 4.8 0.013 0.013 46.639 46.639 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 46.056 46.056 qmmm_elec_gaussian_low_G 6 6.8 45.221 45.221 45.221 45.221 xc_pw_derive 714 13.1 0.005 0.005 36.564 36.564 xc_pw_divergence 119 12.1 0.002 0.002 24.885 24.885 qs_rho_update_rho_low 119 7.3 0.001 0.001 23.005 23.005 calculate_rho_elec 119 8.3 1.493 1.493 23.004 23.004 fft3d_s 2060 14.4 22.011 22.011 22.023 22.023 xc_rho_set_and_dset_create 119 12.1 1.755 1.755 21.295 21.295 pw_scatter_s 1095 14.8 19.111 19.111 19.111 19.111 density_rs2pw 119 9.3 0.004 0.004 12.594 12.594 sum_up_and_integrate 119 10.1 0.002 0.002 11.268 11.268 integrate_v_rspace 119 11.1 0.029 0.029 11.162 11.162 qs_ks_ddapc 119 10.1 0.002 0.002 9.298 9.298 grid_collocate_task_list 119 9.3 8.917 8.917 8.917 8.917 potential_pw2rs 119 12.1 0.044 0.044 6.937 6.937 qmmm_forces 6 3.8 0.001 0.001 5.322 5.322 qmmm_forces_with_gaussian 6 4.8 0.020 0.020 4.952 4.952 pw_integral_ab_c1d_c1d_gs 280 11.0 4.595 4.595 4.595 4.595 init_scf_loop 6 6.8 0.000 0.000 4.291 4.291 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.249 4.249 grid_integrate_task_list 119 12.1 4.196 4.196 4.196 4.196 qs_scf_new_mos 113 7.2 0.001 0.001 4.187 4.187 qs_scf_loop_do_ot 113 8.2 0.001 0.001 4.186 4.186 ot_scf_mini 113 9.2 0.001 0.001 4.024 4.024 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 4.020 4.020 cp_ddapc_apply_CD 119 11.1 0.022 0.022 3.941 3.941 pw_poisson_solve 125 9.9 0.002 0.002 3.781 3.781 dbcsr_multiply_generic 2598 12.3 0.099 0.099 3.632 3.632 qmmm_forces_gaussian_low_G 6 6.8 3.507 3.507 3.507 3.507 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.043 60.160 60.161 qs_mol_dyn_low 1 2.0 0.003 0.004 58.895 58.952 qs_forces 6 3.8 0.001 0.001 45.159 45.160 qs_energies 6 4.8 0.000 0.000 43.276 43.277 scf_env_do_scf 6 5.8 0.000 0.001 42.292 42.293 scf_env_do_scf_inner_loop 113 6.2 0.003 0.022 40.651 40.652 rebuild_ks_matrix 119 8.1 0.000 0.000 29.725 29.734 qs_ks_build_kohn_sham_matrix 119 9.1 0.019 0.035 29.725 29.733 qs_ks_update_qs_env 119 7.3 0.001 0.002 28.124 28.132 velocity_verlet 5 3.0 0.002 0.004 22.710 22.712 fft_wrap_pw1pw2 2059 12.4 0.043 0.047 17.611 18.238 fft_wrap_pw1pw2_150 1321 13.9 0.629 0.708 16.746 17.614 fft3d_ps 2059 14.4 6.380 7.084 13.848 15.016 qs_vxc_create 119 10.1 0.002 0.003 14.228 14.234 xc_vxc_pw_create 119 11.1 0.163 0.217 14.226 14.231 qs_rho_update_rho_low 119 7.3 0.001 0.001 11.642 11.644 calculate_rho_elec 119 8.3 0.050 0.056 11.642 11.644 xc_pw_derive 714 13.1 0.009 0.012 10.742 11.224 sum_up_and_integrate 119 10.1 0.002 0.002 10.915 10.949 integrate_v_rspace 119 11.1 0.004 0.005 10.848 10.882 mp_alltoall_z22v 2059 16.4 6.185 7.777 6.185 7.777 qmmm_forces 6 3.8 0.002 0.003 7.664 7.665 qmmm_forces_with_gaussian 6 4.8 0.006 0.007 7.286 7.498 xc_pw_divergence 119 12.1 0.004 0.004 7.158 7.456 density_rs2pw 119 9.3 0.005 0.006 7.086 7.342 xc_rho_set_and_dset_create 119 12.1 0.372 0.459 6.651 6.796 potential_pw2rs 119 12.1 0.006 0.007 6.693 6.716 qmmm_el_coupling 6 3.8 0.000 0.000 5.351 5.480 qmmm_elec_with_gaussian 6 4.8 0.003 0.004 5.350 5.478 transfer_pw2rs 500 12.8 0.006 0.006 4.871 4.889 yz_to_x 964 15.0 0.569 0.703 3.581 4.587 grid_collocate_task_list 119 9.3 4.316 4.537 4.316 4.537 x_to_yz 1095 15.8 0.666 0.789 3.839 4.487 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.060 4.113 transfer_rs2pw 488 10.2 0.008 0.008 3.871 4.076 grid_integrate_task_list 119 12.1 3.869 3.979 3.869 3.979 mp_waitany 4028 12.8 3.416 3.911 3.416 3.911 transfer_pw2rs_150 125 13.9 1.246 1.334 3.389 3.435 qmmm_forces_gaussian_low_G 6 6.8 3.322 3.373 3.322 3.373 transfer_rs2pw_150 125 11.2 1.144 1.285 2.857 3.057 pw_restrict_s3 18 5.8 1.489 1.528 2.643 2.849 qs_scf_new_mos 113 7.2 0.001 0.001 2.777 2.789 qs_scf_loop_do_ot 113 8.2 0.001 0.001 2.776 2.788 dbcsr_multiply_generic 2598 12.3 0.063 0.065 2.649 2.701 ot_scf_mini 113 9.2 0.001 0.001 2.679 2.686 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 2.319 2.467 qs_ks_ddapc 119 10.1 0.002 0.003 2.216 2.362 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.162 2.291 pw_prolongate_s3 18 6.8 1.218 1.251 2.162 2.291 mp_sum_d 5830 12.2 1.462 2.079 1.462 2.079 pw_gather_p 964 14.0 1.281 2.037 1.281 2.037 qmmm_elec_gaussian_low_G 6 6.8 1.792 1.937 1.792 1.937 ot_mini 113 10.2 0.001 0.001 1.864 1.869 init_scf_loop 6 6.8 0.000 0.000 1.637 1.637 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.628 1.628 mp_waitall_1 178435 16.4 1.463 1.579 1.463 1.579 qs_ot_get_derivative 113 11.2 0.001 0.001 1.552 1.560 pw_copy 2027 12.4 1.331 1.508 1.331 1.508 pw_derive 1089 13.4 1.273 1.443 1.273 1.443 pw_poisson_solve 125 9.9 0.003 0.003 1.299 1.393 mp_sum_dm3 33 5.7 1.252 1.333 1.252 1.333 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="MQAE_single_node", label="MQAE_single_node", y=160.376, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="MQAE_single_node", label="MQAE_single_node", y=60.16, yerr=0.0 Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=41.68800000000002, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=45.221, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=22.011, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=19.232, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=19.111, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=8.917, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=4.196, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=33.57299999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=1.792, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.629, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.316, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.869, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.416, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=6.185, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.38, yerr=0.0 Summary: Performance test took 33 minutes. Status: OK ---> Removed intermediate container ff00060c3d9f ---> 584ea211051a Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 1c9268505e52 ---> Removed intermediate container 1c9268505e52 ---> 912712b337d1 Step 42/42 : ENTRYPOINT [] ---> Running in 86cc0a679945 ---> Removed intermediate container 86cc0a679945 ---> f1e6127f7ab8 [Warning] One or more build-args [GIT_COMMIT_SHA SPACK_CACHE] were not consumed Successfully built f1e6127f7ab8 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2026-01-18 07:01:48+00:00