StartDate: 2026-01-08 06:15:47+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: 1f1a7a2a435155a403097f4c56526f55ad971118 CommitTime: 2026-01-07 16:23:01 +0100 CommitAuthor: Juerg Hutter CommitSubject: Response force error calculation from external sampling (#4672) #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=1f1a7a2a435155a403097f4c56526f55ad971118 SPACK_CACHE=gs://cp2k-spack-cache Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 408.9MB Step 1/43 : FROM ubuntu:24.04 24.04: Pulling from library/ubuntu 20043066d3d5: Pulling fs layer 20043066d3d5: Verifying Checksum 20043066d3d5: Download complete 20043066d3d5: Pull complete Digest: sha256:c35e29c9450151419d9448b0fd75374fec4fff364a27f176fb458d472dfc9e54 Status: Downloaded newer image for ubuntu:24.04 ---> c3a134f2ace4 Step 2/43 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> cf2ccec2f0d0 Step 3/43 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> b0e2ce66f72f Step 4/43 : RUN ./install_requirements.sh ubuntu:24.04 ---> Using cache ---> 726340c67757 Step 5/43 : RUN mkdir scripts ---> Using cache ---> 1cd30608746f Step 6/43 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> febf7993997c Step 7/43 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 76050c213933 Step 8/43 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-dbcsr --with-gcc=system --dry-run ---> Using cache ---> 7be52ef4af6d Step 9/43 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 8b6dda310588 Step 10/43 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> e6b7b0beab4f Step 11/43 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 7f56c08f60f2 Step 12/43 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 01af32f31136 Step 13/43 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 6bd07b18e93d Step 14/43 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> d4722658ddd8 Step 15/43 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 0b1f5d34e655 Step 16/43 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> b2d619724ba1 Step 17/43 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 5c34e4e0675b Step 18/43 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 38e2672e68db Step 19/43 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> dfb7d10114d6 Step 20/43 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> c19566fa37f6 Step 21/43 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 54b6c4b07180 Step 22/43 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 487295823632 Step 23/43 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> fad150ed270a Step 24/43 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> d2af71cff7cf Step 25/43 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 7f396c29bb81 Step 26/43 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 49f494a23fe3 Step 27/43 : COPY ./tools/toolchain/scripts/stage9/ ./scripts/stage9/ ---> Using cache ---> 1044cfeb8acf Step 28/43 : RUN ./scripts/stage9/install_stage9.sh && rm -rf ./build ---> Using cache ---> 48572348bb25 Step 29/43 : WORKDIR /opt/cp2k ---> Using cache ---> a58a9797bb7e Step 30/43 : COPY ./src ./src ---> Using cache ---> 0de31b85bdd1 Step 31/43 : COPY ./data ./data ---> Using cache ---> bc09c6f1be9d Step 32/43 : COPY ./tests ./tests ---> Using cache ---> 3cd38fae3e41 Step 33/43 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> a8ac1cfc8e10 Step 34/43 : COPY ./cmake ./cmake ---> Using cache ---> 8f87fb3a5048 Step 35/43 : COPY ./CMakeLists.txt . ---> Using cache ---> c789a2cacebc Step 36/43 : COPY ./tools/docker/scripts/build_cp2k.sh . ---> Using cache ---> 6e8c756f86b0 Step 37/43 : RUN ./build_cp2k.sh toolchain psmp ---> Running in c4904e8a5a11 ==================== Building CP2K ==================== -- The Fortran compiler identification is GNU 13.3.0 -- The C compiler identification is GNU 13.3.0 -- The CXX compiler identification is GNU 13.3.0 -- Detecting Fortran compiler ABI info -- Detecting Fortran compiler ABI info - done -- Check for working Fortran compiler: /usr/bin/gfortran - skipped -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done -- Check for working C compiler: /usr/bin/gcc - skipped -- Detecting C compile features -- Detecting C compile features - done -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done -- Check for working CXX compiler: /usr/bin/g++ - skipped -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found PkgConfig: /usr/bin/pkg-config (found version "1.8.1") -- Found Python: /usr/bin/python3.12 (found version "3.12.3") found components: Interpreter -- Found MPI_C: /opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpi.so (found version "4.1") -- Found MPI_CXX: /opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpicxx.so (found version "4.1") -- Found MPI_Fortran: /opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpifort.so (found version "4.1") -- Found MPI: TRUE (found version "4.1") found components: C CXX Fortran -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE -- Found MPI: TRUE (found version "4.1") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Could NOT find MKL (missing: CP2K_MKL_INCLUDE_DIRS) -- Checking for module 'openblas' -- Found openblas, version 0.3.30 -- Found OpenBLAS: /opt/cp2k-toolchain/install/openblas-0.3.30/include -- Found Blas: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Found Lapack: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Checking for module 'libxsmm-shared' -- Found libxsmm-shared, version 1.17.0 -- Checking for module 'libxsmmf-shared' -- Found libxsmmf-shared, version 1.17.0 -- Checking for module 'libxsmmext-shared' -- Found libxsmmext-shared, version 1.17.0 -- Checking for module 'libxsmmnoblas-shared' -- Found libxsmmnoblas-shared, version 1.17.0 -- Found LibXSMM: /opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/include -- Using LIBXSMM for Small Matrix Multiplication -- Checking for module 'scalapack' -- Package 'mpi', required by 'scalapack', not found Package 'lapack', required by 'scalapack', not found Package 'blas', required by 'scalapack', not found -- Found SCALAPACK: /opt/cp2k-toolchain/install/scalapack-2.2.2/lib/libscalapack.a ------------------------------------------------------------ - OPENMP - ------------------------------------------------------------ -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: Fortran C CXX ------------------------------------------------------------ - DBCSR - ------------------------------------------------------------ -- Found MPI: TRUE (found version "4.1") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") -- Checking for module 'libxsmmf' -- Found libxsmmf, version 1.17.0 -- Checking for module 'libxsmmext' -- Found libxsmmext, version 1.17.0 ------------------------------------------------------------ - Other dependencies - ------------------------------------------------------------ -- Checking for one of the modules 'elpa_openmp' -- Found Elpa: /opt/cp2k-toolchain/install/elpa-2024.05.001/cpu/lib/libelpa_openmp.so;/opt/cp2k-toolchain/install/scalapack-2.2.2/lib/libscalapack.a;:libopenblas.a -- Found HDF5: hdf5-shared;hdf5_fortran-shared (found version "1.14.6") found components: C Fortran -- Found MPI: TRUE (found version "4.1") found components: CXX -- Found OPENBLAS: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Found Blas: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Found LibVORI: /opt/cp2k-toolchain/install/libvori-220621/lib/libvori.a -- Checking for one of the modules 'fftw3' -- Checking for one of the modules 'fftw3f' -- Checking for one of the modules 'fftw3l' -- Checking for one of the modules 'fftw3q' -- Found Fftw: /opt/cp2k-toolchain/install/fftw-3.3.10/include -- Checking for module 'libint2' -- Found libint2, version 2.6.0 -- Found Libint2: /opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/include;/opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/include/libint2 -- Component omp of Spglib: NOT FOUND -- Component fortran of Spglib: FOUND (LIB_TYPE: static) -- Found package: Spglib -- Found libsmeagol: /opt/cp2k-toolchain/install/libsmeagol-1.2/lib/libsmeagol.a -- Looking for Fortran sgemm -- Looking for Fortran sgemm - found -- mctc-lib: Find installed package -- multicharge: Find installed package -- Looking for Fortran sgemm -- Looking for Fortran sgemm - not found -- Found BLAS: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so -- Looking for Fortran cheev -- Looking for Fortran cheev - found -- Found LAPACK: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so;-lm;-ldl -- Found ACE: /opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libpace.a -- Checking for one of the modules 'plumed;plumedInternals' -- Found Plumed: /opt/cp2k-toolchain/install/plumed-2.9.3/include -- Found MPI: TRUE (found version "4.1") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Found Torch: /opt/cp2k-toolchain/install/libtorch-2.7.1/lib/libtorch.so -- Checking for modules 'mclf;mcl' -- Package 'mclf', required by 'virtual:world', not found -- Package 'mcl', required by 'virtual:world', not found -- Found MiMiC: True -- Checking for module 'trexio' -- Found trexio, version 2.6.0 -- Found TrexIO: /opt/cp2k-toolchain/install/trexio-2.6.0/include;/opt/cp2k-toolchain/install/hdf5-1.14.6/include -- Setting build type to 'Release' as none was specified. -- Performing Test f2008-norm2 -- Performing Test f2008-norm2 - Success -- Performing Test f2008-block_construct -- Performing Test f2008-block_construct - Success -- Performing Test f2008-contiguous -- Performing Test f2008-contiguous - Success -- Performing Test f95-reshape-order-allocatable -- Performing Test f95-reshape-order-allocatable - Success -- FYPP preprocessor found. -------------------------------------------------------------------- - - - Summary of enabled dependencies - - - -------------------------------------------------------------------- - BLAS - vendor: OpenBLAS - include directories: /opt/cp2k-toolchain/install/openblas-0.3.30/include - libraries: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so - LAPACK - include directories: /opt/cp2k-toolchain/install/openblas-0.3.30/include - libraries: /opt/cp2k-toolchain/install/openblas-0.3.30/lib/libopenblas.so - MPI - include directories: /opt/cp2k-toolchain/install/mpich-4.3.2/include - libraries: /opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpicxx.so;/opt/cp2k-toolchain/install/mpich-4.3.2/lib/libmpi.so - MPI_F08: ON - ScaLAPACK - vendor: auto - include directories: - libraries: /opt/cp2k-toolchain/install/scalapack-2.2.2/lib/libscalapack.a - LibXC - version: 7.0.0 - include directories: /opt/cp2k-toolchain/install/libxc-7.0.0/include/ - libraries: /opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxcf03.a;/opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxc.a - Spglib - include directories: /opt/cp2k-toolchain/install/spglib-2.5.0/include - LibTorch - extra CXX flags: -D_GLIBCXX_USE_CXX11_ABI=1 - include directories: /opt/cp2k-toolchain/install/libtorch-2.7.1/include;/opt/cp2k-toolchain/install/libtorch-2.7.1/include/torch/csrc/api/include - libraries: /opt/cp2k-toolchain/install/libtorch-2.7.1/lib/libtorch.so - HDF5 - version: 1.14.6 - include directories: /opt/cp2k-toolchain/install/hdf5-1.14.6/include - libraries: hdf5-shared;hdf5_fortran-shared - FFTW3 - include directories: /opt/cp2k-toolchain/install/fftw-3.3.10/include - libraries: /opt/cp2k-toolchain/install/fftw-3.3.10/lib/libfftw3.a - PLUMED - include directories: /opt/cp2k-toolchain/install/plumed-2.9.3/include - libraries: /opt/cp2k-toolchain/install/plumed-2.9.3/lib/libplumed.so - LIBXSMM - include directories: /opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/include - libraries: /opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/lib/libxsmmext.so;:libxsmm.a;/usr/lib/x86_64-linux-gnu/libpthread.a;/usr/lib/x86_64-linux-gnu/librt.a;/usr/lib/x86_64-linux-gnu/libdl.a;/usr/lib/x86_64-linux-gnu/libm.so;/usr/lib/x86_64-linux-gnu/libc.so;/opt/cp2k-toolchain/install/libxsmm-e0c4a2389afba36c453233ad7de07bd92c715bec/lib/libxsmmf.so;:libxsmmext.a;:libxsmm.a;/usr/lib/x86_64-linux-gnu/libpthread.a;/usr/lib/x86_64-linux-gnu/librt.a;/usr/lib/x86_64-linux-gnu/libdl.a;/usr/lib/x86_64-linux-gnu/libm.so;/usr/lib/x86_64-linux-gnu/libc.so - SpLA - include directories: /opt/cp2k-toolchain/install/SpLA-1.6.1/include;/opt/cp2k-toolchain/install/SpLA-1.6.1/include/spla - libraries: $;$;MPI::MPI_CXX;MPI::MPI_C;MPI::MPI_Fortran - MiMiC - include directories: /opt/cp2k-toolchain/install/mcl-3.0.0/include/MiMiC/ - libraries: /opt/cp2k-toolchain/install/mcl-3.0.0/lib/MiMiC/libmclf.so;/opt/cp2k-toolchain/install/mcl-3.0.0/lib/MiMiC/libmcl.so - DFTD4 - include directories : /opt/cp2k-toolchain/install/tblite-0.5.0/include;/opt/cp2k-toolchain/install/tblite-0.5.0/include/dftd4/GNU-13.3.0 - libraries : - DeePMD - ACE - include directories: /opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/include - libraries: /opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libpace.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libyaml-cpp-pace.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libcnpy.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libyaml-cpp-pace.a;/opt/cp2k-toolchain/install/lammps-user-pace-v.2023.11.25.fix2/lib/libcnpy.a - LibSMEAGOL - include directories: /opt/cp2k-toolchain/install/libsmeagol-1.2/include - libraries: /opt/cp2k-toolchain/install/libsmeagol-1.2/lib/libsmeagol.a - TBLITE : - include directories : /opt/cp2k-toolchain/install/tblite-0.5.0/include;/opt/cp2k-toolchain/install/tblite-0.5.0/include/tblite/GNU-13.3.0 - tblite libraries : - COSMA - include directories: /opt/cp2k-toolchain/install/COSMA-2.7.0/include - libraries: MPI::MPI_CXX;costa::costa;$;$;cosma::BLAS::blas;cosma::scalapack::scalapack - Libint2 - include directories: /opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/include;/opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/include/libint2 - libraries: /opt/cp2k-toolchain/install/libint-v2.6.0-cp2k-lmax-5/lib/libint2.a - Libvori - libraries: /opt/cp2k-toolchain/install/libvori-220621/lib/libvori.a - ELPA - include directories: /opt/cp2k-toolchain/install/elpa-2024.05.001/cpu/include/elpa_openmp-2024.05.001 - libraries: /opt/cp2k-toolchain/install/elpa-2024.05.001/cpu/lib/libelpa_openmp.so;/opt/cp2k-toolchain/install/scalapack-2.2.2/lib/libscalapack.a;:libopenblas.a - TREXIO - include directories: /opt/cp2k-toolchain/install/trexio-2.6.0/include;/opt/cp2k-toolchain/install/hdf5-1.14.6/include - libraries: /opt/cp2k-toolchain/install/trexio-2.6.0/lib/libtrexio.so;/opt/cp2k-toolchain/install/hdf5-1.14.6/lib/libhdf5.so - GreenX - include directories: /opt/cp2k-toolchain/install/greenX-2.2/include/modules - libraries: /opt/cp2k-toolchain/install/greenX-2.2/lib/libGXCommon.so.0.0.1;/opt/cp2k-toolchain/install/greenX-2.2/lib/libgx_minimax.so.0.0.1;/opt/cp2k-toolchain/install/greenX-2.2/lib/libgx_ac.so.0.0.1 -------------------------------------------------------------------- - - - List of dependencies not included in this build - - - -------------------------------------------------------------------- - PEXSI - SIRIUS - openPMD - GPU acceleration is disabled - DLA-Future After building CP2K the regtests can be run with the following command: ./tests/do_regtest.py /opt/cp2k/build/bin psmp -- Configuring done (6.9s) -- Generating done (0.4s) -- Build files have been written to: /opt/cp2k/build Compiling CP2K ... done ---> Removed intermediate container c4904e8a5a11 ---> f63761a2ea73 Step 38/43 : COPY ./benchmarks ./benchmarks ---> ebf0bb8999d4 Step 39/43 : COPY ./tools/regtesting ./tools/regtesting ---> 663077adb36b Step 40/43 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 7adcf2c861db Step 41/43 : RUN ./test_performance.sh "toolchain" 2>&1 | tee report.log ---> Running in f276ac2e3c99 ============== CP2K Binary Flags ============= cp2kflags: omp libint fftw3 libxc elpa parallel scalapack mpi_f08 cosma ace deepmd xsmm plumed2 spglib libdftd4 mctc-lib tblite libvori libbqb libtorch mimic hdf5 trexio libsmeagol greenx ========== Checking Benchmark Inputs ========= Found 77 input files and 0 errors. ========== Running Performance Test ========== Plot: name="total_timings_32omp", title="Total Timings with 32 OpenMP Threads", ylabel="time [s]" Plot: name="total_timings_32mpi", title="Total Timings with 32 MPI Ranks", ylabel="time [s]" Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 113.227 113.227 qs_mol_dyn_low 1 2.0 0.003 0.003 112.633 112.633 qs_forces 11 3.9 0.002 0.002 112.581 112.581 qs_energies 11 4.9 0.001 0.001 104.758 104.758 scf_env_do_scf 11 5.9 0.001 0.001 90.984 90.984 scf_env_do_scf_inner_loop 108 6.5 0.016 0.016 73.920 73.920 velocity_verlet 10 3.0 0.003 0.003 71.142 71.142 dbcsr_multiply_generic 2286 12.5 0.250 0.250 33.644 33.644 qs_scf_new_mos 108 7.5 0.001 0.001 31.152 31.152 qs_scf_loop_do_ot 108 8.5 0.001 0.001 31.151 31.151 ot_scf_mini 108 9.5 0.003 0.003 29.038 29.038 rebuild_ks_matrix 119 8.3 0.001 0.001 26.096 26.096 qs_ks_build_kohn_sham_matrix 119 9.3 0.020 0.020 26.095 26.095 qs_ks_update_qs_env 119 7.6 0.001 0.001 23.795 23.795 qs_rho_update_rho_low 119 7.7 0.001 0.001 23.754 23.754 calculate_rho_elec 119 8.7 1.001 1.001 23.753 23.753 make_m2s 4572 13.5 0.073 0.073 21.832 21.832 ot_mini 108 10.5 0.001 0.001 18.374 18.374 grid_collocate_task_list 119 9.7 17.675 17.675 17.675 17.675 init_scf_loop 11 6.9 0.000 0.000 16.885 16.885 sum_up_and_integrate 119 10.3 0.003 0.003 15.451 15.451 integrate_v_rspace 119 11.3 0.157 0.157 15.333 15.333 make_images 4572 14.5 2.752 2.752 15.292 15.292 prepare_preconditioner 11 7.9 0.000 0.000 13.470 13.470 make_preconditioner 11 8.9 0.000 0.000 13.470 13.470 grid_integrate_task_list 119 12.3 11.983 11.983 11.983 11.983 make_full_inverse_cholesky 11 9.9 0.031 0.031 11.896 11.896 hybrid_alltoall_any 4725 16.4 10.240 10.240 10.694 10.694 make_images_data 4572 15.5 0.048 0.048 10.389 10.389 qs_ot_get_derivative 108 11.5 0.002 0.002 9.670 9.670 fft_wrap_pw1pw2 1201 11.6 0.019 0.019 9.046 9.046 ot_diis_step 108 11.5 0.005 0.005 8.687 8.687 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 8.205 8.205 apply_single 119 13.6 0.001 0.001 8.205 8.205 fft_wrap_pw1pw2_140 487 12.2 1.011 1.011 7.663 7.663 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.195 7.195 dbcsr_make_dense_low 5837 15.5 0.045 0.045 6.965 6.965 make_dense_data 5837 16.5 5.796 5.796 6.901 6.901 multiply_cannon 2286 13.5 0.418 0.418 6.296 6.296 dbcsr_make_images_dense 3978 14.8 0.024 0.024 5.870 5.870 init_scf_run 11 5.9 0.000 0.000 5.687 5.687 scf_env_initial_rho_setup 11 6.9 0.002 0.002 5.684 5.684 multiply_cannon_loop 2286 14.5 0.075 0.075 5.548 5.548 multiply_cannon_multrec 2286 15.5 5.383 5.383 5.471 5.471 density_rs2pw 119 9.7 0.005 0.005 5.078 5.078 wfi_extrapolate 11 7.9 0.001 0.001 5.040 5.040 dbcsr_copy 2102 12.0 0.306 0.306 4.948 4.948 dbcsr_complete_redistribute 329 12.2 2.276 2.276 4.892 4.892 dbcsr_copy_into_existing 22 7.9 4.623 4.623 4.623 4.623 qs_ot_get_p 119 10.4 0.002 0.002 4.375 4.375 copy_dbcsr_to_fm 153 11.3 0.004 0.004 4.336 4.336 fft3d_s 1202 13.6 4.137 4.137 4.144 4.144 build_core_hamiltonian_matrix_ 11 4.9 0.002 0.002 4.060 4.060 cp_fm_cholesky_invert 11 10.9 3.986 3.986 3.986 3.986 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.759 3.759 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.496 3.496 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.290 3.290 qs_ot_get_derivative_diag 49 12.0 0.002 0.002 3.247 3.247 transfer_dbcsr_to_fm 11 10.9 0.001 0.001 3.196 3.196 potential_pw2rs 119 12.3 0.060 0.060 3.192 3.192 cp_fm_cholesky_decompose 22 10.9 3.129 3.129 3.129 3.129 qs_create_task_list 11 7.9 0.000 0.000 2.997 2.997 generate_qs_task_list 11 8.9 1.878 1.878 2.997 2.997 dbcsr_data_release 278921 16.0 2.885 2.885 2.885 2.885 qs_ot_get_derivative_taylor 59 13.0 0.003 0.003 2.757 2.757 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 2.663 2.663 dbcsr_finalize 5048 13.8 0.186 0.186 2.612 2.612 copy_fm_to_dbcsr 176 11.2 0.002 0.002 2.491 2.491 qs_ot_p2m_diag 50 11.0 0.149 0.149 2.484 2.484 calculate_dm_sparse 119 9.5 0.001 0.001 2.470 2.470 pw_poisson_solve 119 10.3 0.003 0.003 2.334 2.334 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.020 63.124 63.125 qs_mol_dyn_low 1 2.0 0.005 0.006 62.933 62.937 qs_forces 11 3.9 0.002 0.003 62.882 62.884 qs_energies 11 4.9 0.001 0.001 59.396 59.403 scf_env_do_scf 11 5.9 0.001 0.002 54.471 54.473 scf_env_do_scf_inner_loop 108 6.5 0.004 0.024 49.958 49.960 velocity_verlet 10 3.0 0.001 0.004 37.500 37.502 dbcsr_multiply_generic 2286 12.5 0.110 0.125 21.618 21.866 rebuild_ks_matrix 119 8.3 0.001 0.004 21.497 21.657 qs_ks_build_kohn_sham_matrix 119 9.3 0.023 0.024 21.496 21.657 qs_ks_update_qs_env 119 7.6 0.002 0.002 19.192 19.340 qs_scf_new_mos 108 7.5 0.001 0.001 17.809 17.913 qs_scf_loop_do_ot 108 8.5 0.001 0.001 17.808 17.912 ot_scf_mini 108 9.5 0.003 0.003 16.826 16.932 qs_rho_update_rho_low 119 7.7 0.001 0.001 16.393 16.429 calculate_rho_elec 119 8.7 0.032 0.034 16.392 16.429 sum_up_and_integrate 119 10.3 0.003 0.004 15.190 15.234 integrate_v_rspace 119 11.3 0.006 0.007 15.155 15.200 multiply_cannon 2286 13.5 0.197 0.221 14.594 15.187 multiply_cannon_loop 2286 14.5 0.131 0.152 13.388 13.967 mp_waitall_1 158411 16.6 12.048 12.757 12.048 12.757 grid_collocate_task_list 119 9.7 9.449 10.236 9.449 10.236 ot_mini 108 10.5 0.001 0.001 9.964 10.071 multiply_cannon_metrocomm3 18288 15.5 0.051 0.059 8.942 9.767 grid_integrate_task_list 119 12.3 8.858 9.415 8.858 9.415 density_rs2pw 119 9.7 0.006 0.006 6.355 7.000 qs_ot_get_derivative 108 11.5 0.001 0.002 5.489 5.603 transfer_rs2pw 487 10.6 0.009 0.010 4.358 5.012 potential_pw2rs 119 12.3 0.007 0.008 4.654 4.696 init_scf_loop 11 6.9 0.000 0.001 4.488 4.490 fft_wrap_pw1pw2 1201 11.6 0.023 0.025 4.274 4.409 mp_waitany 9880 13.7 3.842 4.398 3.842 4.398 ot_diis_step 108 11.5 0.005 0.006 4.374 4.377 make_m2s 4572 13.5 0.056 0.063 4.209 4.368 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.180 4.325 apply_single 119 13.6 0.001 0.001 4.180 4.325 multiply_cannon_multrec 18288 15.5 3.756 4.041 3.774 4.062 make_images 4572 14.5 0.127 0.136 3.790 3.969 init_scf_run 11 5.9 0.000 0.000 3.695 3.696 scf_env_initial_rho_setup 11 6.9 0.000 0.004 3.695 3.696 transfer_rs2pw_140 130 11.5 0.190 0.204 3.017 3.672 fft3d_ps 1201 13.6 1.288 1.416 3.324 3.557 fft_wrap_pw1pw2_140 487 12.2 0.181 0.221 3.401 3.532 transfer_pw2rs 487 13.2 0.008 0.010 3.413 3.425 wfi_extrapolate 11 7.9 0.001 0.002 3.336 3.336 mp_alltoall_d11v 2130 13.8 2.046 2.605 2.046 2.605 qs_ot_get_p 119 10.4 0.002 0.002 2.460 2.561 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.527 2.540 mp_sum_l 10898 13.6 2.096 2.381 2.096 2.381 rs_gather_matrices 119 12.3 0.098 0.110 1.588 2.090 prepare_preconditioner 11 7.9 0.000 0.000 2.016 2.043 make_preconditioner 11 8.9 0.000 0.000 2.016 2.043 mp_alltoall_z22v 1201 15.6 1.736 2.028 1.736 2.028 make_images_data 4572 15.5 0.043 0.048 1.812 2.023 transfer_pw2rs_140 130 13.9 0.424 0.586 1.774 1.887 make_full_inverse_cholesky 11 9.9 0.000 0.001 1.848 1.885 qs_ot_get_derivative_diag 49 12.0 0.002 0.002 1.728 1.816 qs_ot_get_derivative_taylor 59 13.0 0.002 0.003 1.747 1.814 hybrid_alltoall_any 4725 16.4 0.091 0.274 1.518 1.693 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 1.649 1.656 mp_sum_d 4143 11.9 1.207 1.412 1.207 1.412 transfer_pw2rs_50 119 14.3 0.332 0.364 1.241 1.355 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 1.255 1.316 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-64", label="H2O-64", y=113.227, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-64", label="H2O-64", y=63.124, yerr=0.0 Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=62.150000000000006, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=17.675, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.983, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=10.24, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.796, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.383, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=25.080000000000005, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.449, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.858, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.091, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.756, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.842, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=12.048, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.037 133.525 133.525 qs_mol_dyn_low 1 2.0 0.003 0.003 132.910 132.910 qs_forces 11 3.9 0.002 0.002 132.867 132.867 qs_energies 11 4.9 0.001 0.001 123.563 123.563 scf_env_do_scf 11 5.9 0.001 0.001 107.455 107.455 scf_env_do_scf_inner_loop 96 6.5 0.014 0.014 88.597 88.597 velocity_verlet 10 3.0 0.003 0.003 84.362 84.362 rebuild_ks_matrix 107 8.3 0.001 0.001 37.476 37.476 qs_ks_build_kohn_sham_matrix 107 9.3 0.018 0.018 37.476 37.476 qs_rho_update_rho_low 107 7.7 0.001 0.001 36.529 36.529 calculate_rho_elec 107 8.7 0.902 0.902 36.529 36.529 qs_ks_update_qs_env 107 7.6 0.001 0.001 33.704 33.704 grid_collocate_task_list 107 9.7 30.854 30.854 30.854 30.854 dbcsr_multiply_generic 1966 12.4 0.206 0.206 29.214 29.214 sum_up_and_integrate 107 10.3 0.002 0.002 27.651 27.651 integrate_v_rspace 107 11.3 0.130 0.130 27.541 27.541 qs_scf_new_mos 96 7.5 0.001 0.001 26.377 26.377 qs_scf_loop_do_ot 96 8.5 0.001 0.001 26.376 26.376 ot_scf_mini 96 9.5 0.003 0.003 24.511 24.511 grid_integrate_task_list 107 12.3 24.460 24.460 24.460 24.460 make_m2s 3932 13.4 0.059 0.059 18.866 18.866 init_scf_loop 11 6.9 0.000 0.000 18.676 18.676 ot_mini 96 10.5 0.001 0.001 15.443 15.443 prepare_preconditioner 11 7.9 0.000 0.000 13.485 13.485 make_preconditioner 11 8.9 0.000 0.000 13.485 13.485 make_images 3932 14.4 2.270 2.270 13.330 13.330 make_full_inverse_cholesky 11 9.9 0.038 0.038 11.899 11.899 hybrid_alltoall_any 4079 16.3 9.039 9.039 9.484 9.484 make_images_data 3932 15.4 0.040 0.040 9.167 9.167 fft_wrap_pw1pw2 1081 11.6 0.016 0.016 8.714 8.714 qs_ot_get_derivative 96 11.5 0.002 0.002 8.166 8.166 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.885 7.885 fft_wrap_pw1pw2_140 439 12.2 1.266 1.266 7.469 7.469 init_scf_run 11 5.9 0.000 0.000 7.348 7.348 scf_env_initial_rho_setup 11 6.9 0.002 0.002 7.346 7.346 ot_diis_step 96 11.5 0.004 0.004 7.261 7.261 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 7.059 7.059 apply_single 107 13.6 0.001 0.001 7.059 7.059 wfi_extrapolate 11 7.9 0.001 0.001 6.592 6.592 dbcsr_make_dense_low 4961 15.5 0.036 0.036 5.871 5.871 make_dense_data 4961 16.5 4.963 4.963 5.819 5.819 multiply_cannon 1966 13.4 0.334 0.334 5.614 5.614 dbcsr_copy 1855 11.9 0.267 0.267 5.158 5.158 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 5.080 5.080 dbcsr_make_images_dense 3386 14.7 0.020 0.020 5.007 5.007 multiply_cannon_loop 1966 14.4 0.056 0.056 4.994 4.994 dbcsr_complete_redistribute 317 12.2 2.222 2.222 4.954 4.954 multiply_cannon_multrec 1966 15.4 4.856 4.856 4.937 4.937 dbcsr_copy_into_existing 22 7.9 4.873 4.873 4.874 4.874 density_rs2pw 107 9.7 0.005 0.005 4.773 4.773 copy_dbcsr_to_fm 147 11.2 0.004 0.004 4.415 4.415 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 4.221 4.221 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.097 4.097 cp_fm_cholesky_invert 11 10.9 3.909 3.909 3.909 3.909 fft3d_s 1082 13.6 3.794 3.794 3.800 3.800 qs_ot_get_p 107 10.4 0.001 0.001 3.715 3.715 qs_create_task_list 11 7.9 0.000 0.000 3.594 3.594 generate_qs_task_list 11 8.9 2.476 2.476 3.594 3.594 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.394 3.394 transfer_dbcsr_to_fm 11 10.9 0.001 0.001 3.250 3.250 cp_fm_cholesky_decompose 22 10.9 3.083 3.083 3.083 3.083 potential_pw2rs 107 12.3 0.054 0.054 2.951 2.951 dbcsr_data_release 237968 15.9 2.849 2.849 2.849 2.849 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 2.815 2.815 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.013 91.611 91.612 qs_mol_dyn_low 1 2.0 0.003 0.006 91.432 91.436 qs_forces 11 3.9 0.002 0.002 91.378 91.380 qs_energies 11 4.9 0.001 0.001 86.059 86.063 scf_env_do_scf 11 5.9 0.001 0.001 79.408 79.410 scf_env_do_scf_inner_loop 96 6.5 0.004 0.025 73.294 73.296 velocity_verlet 10 3.0 0.001 0.004 54.471 54.473 rebuild_ks_matrix 107 8.3 0.001 0.001 36.401 36.478 qs_ks_build_kohn_sham_matrix 107 9.3 0.020 0.022 36.400 36.477 qs_rho_update_rho_low 107 7.7 0.001 0.001 32.253 32.284 calculate_rho_elec 107 8.7 0.028 0.030 32.252 32.283 qs_ks_update_qs_env 107 7.6 0.001 0.002 32.213 32.280 sum_up_and_integrate 107 10.3 0.003 0.003 30.693 30.734 integrate_v_rspace 107 11.3 0.006 0.006 30.662 30.701 grid_collocate_task_list 107 9.7 23.946 25.959 23.946 25.959 grid_integrate_task_list 107 12.3 23.124 24.598 23.124 24.598 dbcsr_multiply_generic 1966 12.4 0.098 0.110 19.484 19.646 qs_scf_new_mos 96 7.5 0.001 0.001 15.804 15.875 qs_scf_loop_do_ot 96 8.5 0.001 0.001 15.803 15.874 ot_scf_mini 96 9.5 0.002 0.003 14.915 14.990 multiply_cannon 1966 13.4 0.177 0.200 13.572 13.888 multiply_cannon_loop 1966 14.4 0.119 0.139 12.528 12.927 mp_waitall_1 136719 16.5 11.366 12.004 11.366 12.004 density_rs2pw 107 9.7 0.005 0.006 7.794 9.454 ot_mini 96 10.5 0.001 0.001 8.864 8.947 multiply_cannon_metrocomm3 15728 15.4 0.046 0.053 8.460 8.909 transfer_rs2pw 439 10.6 0.009 0.012 6.055 7.713 mp_waitany 8968 13.7 5.597 7.116 5.597 7.116 transfer_rs2pw_140 118 11.5 0.174 0.191 4.758 6.422 init_scf_loop 11 6.9 0.000 0.001 6.090 6.090 init_scf_run 11 5.9 0.000 0.000 5.264 5.267 scf_env_initial_rho_setup 11 6.9 0.000 0.005 5.264 5.265 qs_ot_get_derivative 96 11.5 0.001 0.002 4.879 4.955 wfi_extrapolate 11 7.9 0.001 0.002 4.770 4.770 potential_pw2rs 107 12.3 0.007 0.008 4.522 4.549 mp_alltoall_d11v 1998 13.7 3.383 4.407 3.383 4.407 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.356 4.368 fft_wrap_pw1pw2 1081 11.6 0.021 0.022 3.882 3.997 rs_gather_matrices 107 12.3 0.094 0.109 2.961 3.972 ot_diis_step 96 11.5 0.004 0.005 3.912 3.912 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.794 3.889 apply_single 107 13.6 0.001 0.001 3.794 3.889 make_m2s 3932 13.4 0.050 0.055 3.672 3.800 multiply_cannon_multrec 15728 15.4 3.462 3.758 3.478 3.776 make_images 3932 14.4 0.113 0.122 3.299 3.441 transfer_pw2rs 439 13.2 0.008 0.009 3.387 3.394 fft3d_ps 1081 13.6 1.159 1.321 3.021 3.187 fft_wrap_pw1pw2_140 439 12.2 0.163 0.197 3.054 3.177 qs_ot_get_p 107 10.4 0.001 0.002 2.048 2.137 prepare_preconditioner 11 7.9 0.000 0.000 1.906 1.925 make_preconditioner 11 8.9 0.000 0.000 1.906 1.925 mp_sum_l 9346 13.5 1.665 1.844 1.665 1.844 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-64_nonortho", label="H2O-64_nonortho", y=133.525, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-64_nonortho", label="H2O-64_nonortho", y=91.611, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=54.480000000000004, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=30.854, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.46, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=9.039, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.963, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=4.873, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.856, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=24.116, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.946, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=23.124, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.462, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=5.597, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=11.366, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.233 0.233 106.763 106.763 qs_energies 1 2.0 0.000 0.000 105.897 105.897 scf_env_do_scf 1 3.0 0.000 0.000 104.403 104.403 qs_ks_update_qs_env 8 5.0 0.000 0.000 97.519 97.519 rebuild_ks_matrix 7 6.0 0.000 0.000 97.434 97.434 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 97.434 97.434 hfx_ks_matrix 7 8.0 0.000 0.000 86.188 86.188 integrate_four_center 7 9.0 0.846 0.846 86.139 86.139 integrate_four_center_main 7 10.0 0.516 0.516 79.168 79.168 integrate_four_center_bin 447 11.0 78.652 78.652 78.652 78.652 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 55.044 55.044 init_scf_loop 1 4.0 0.000 0.000 49.342 49.342 integrate_four_center_load 7 10.0 0.001 0.001 5.873 5.873 hfx_load_balance 1 11.0 0.001 0.001 5.872 5.872 qs_vxc_create 14 8.0 0.000 0.000 4.396 4.396 xc_vxc_pw_create 14 9.0 0.142 0.142 4.396 4.396 fft_wrap_pw1pw2 237 10.7 0.005 0.005 3.665 3.665 fft_wrap_pw1pw2_140 150 12.1 1.017 1.017 3.512 3.512 calculate_rho_elec 15 7.4 0.123 0.123 3.223 3.223 prepare_preconditioner 1 5.0 0.000 0.000 3.005 3.005 make_preconditioner 1 6.0 0.000 0.000 3.005 3.005 hfx_load_balance_bin 1 12.0 2.932 2.932 2.932 2.932 hfx_load_balance_count 1 12.0 2.924 2.924 2.924 2.924 xc_rho_set_and_dset_create 14 10.0 0.153 0.153 2.637 2.637 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 2.607 2.607 dbcsr_multiply_generic 165 10.0 0.018 0.018 2.365 2.365 grid_collocate_task_list 15 8.4 2.310 2.310 2.310 2.310 xc_pw_derive 84 11.0 0.001 0.001 2.177 2.177 qs_scf_new_mos 7 5.0 0.000 0.000 2.147 2.147 qs_scf_loop_do_ot 7 6.0 0.000 0.000 2.147 2.147 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.325 0.341 101.117 101.122 qs_energies 1 2.0 0.000 0.000 100.551 100.556 scf_env_do_scf 1 3.0 0.000 0.000 100.109 100.112 qs_ks_update_qs_env 8 5.0 0.000 0.000 96.285 96.287 rebuild_ks_matrix 7 6.0 0.000 0.000 96.270 96.272 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 96.270 96.272 hfx_ks_matrix 7 8.0 0.000 0.001 89.829 89.833 integrate_four_center 7 9.0 0.065 0.281 89.818 89.822 integrate_four_center_main 7 10.0 0.005 0.006 78.011 81.935 integrate_four_center_bin 448 11.0 78.007 81.930 78.007 81.930 scf_env_do_scf_inner_loop 7 4.0 0.000 0.002 53.351 53.352 init_scf_loop 1 4.0 0.000 0.000 46.756 46.757 mp_sync 56 11.2 4.895 8.087 4.895 8.087 integrate_four_center_load 7 10.0 0.000 0.000 6.217 6.221 hfx_load_balance 1 11.0 0.001 0.001 6.217 6.221 hfx_load_balance_bin 1 12.0 2.966 3.228 2.966 3.228 hfx_load_balance_count 1 12.0 2.910 2.981 2.910 2.981 qs_vxc_create 14 8.0 0.000 0.001 2.268 2.270 xc_vxc_pw_create 14 9.0 0.008 0.009 2.268 2.269 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-hyb", label="H2O-hyb", y=106.763, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-hyb", label="H2O-hyb", y=101.117, yerr=0.0 Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=18.695000000000007, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=78.652, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.932, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.924, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=2.31, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="fft_wrap_pw1pw2_140", label="fft_wrap_pw1pw2_140", y=1.017, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.233, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=12.013999999999996, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=78.007, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.966, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.91, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="fft_wrap_pw1pw2_140", label="fft_wrap_pw1pw2_140", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.325, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=4.895, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 92.886 92.886 qs_energies 1 2.0 0.000 0.000 92.510 92.510 mp2_main 1 3.0 0.000 0.000 88.750 88.750 mp2_gpw_main 1 4.0 0.000 0.000 88.615 88.615 rpa_ri_compute_en 1 5.0 0.000 0.000 85.281 85.281 rpa_num_int 1 6.0 0.001 0.001 85.272 85.272 compute_mat_P_omega 1 7.0 0.003 0.003 72.404 72.404 compute_mat_P_omega_contract 10 8.0 8.740 8.740 72.100 72.100 dbt_total 2336 9.6 0.019 0.019 61.334 61.334 dbt_contract 787 11.0 0.151 0.151 52.254 52.254 dbt_tas_total 1149 12.2 0.406 0.406 49.934 49.934 dbt_tas_multiply 807 12.1 0.003 0.003 48.511 48.511 dbt_tas_dbm 807 14.1 0.006 0.006 39.890 39.890 dbm_multiply 807 16.1 39.873 39.873 39.873 39.873 dbt_tas_mm_1N 524 15.1 0.003 0.003 28.193 28.193 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 25.911 25.911 compute_mat_P_omega_calc_M_occ 250 9.0 8.803 8.803 19.526 19.526 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.166 11.166 dbt_tas_mm_2 251 15.0 0.003 0.003 9.610 9.610 compute_QP_energies 1 7.0 0.000 0.000 7.645 7.645 compute_self_energy_cubic_gw 1 8.0 0.164 0.164 7.644 7.644 dbt_copy 1103 10.7 0.119 0.119 7.631 7.631 contract_cubic_gw 21 9.0 0.000 0.000 5.730 5.730 dbm_reserve_blocks 3628 15.3 5.338 5.338 5.338 5.338 dbt_tas_reserve_blocks_index 3261 14.3 0.114 0.114 5.194 5.194 dbt_reserve_blocks_index 2280 13.1 0.073 0.073 4.014 4.014 dbt_reserve_blocks_index_array 2222 12.2 0.012 0.012 3.942 3.942 scf_env_do_scf 1 3.0 0.000 0.000 3.645 3.645 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 3.644 3.644 dbt_crop 1042 12.0 1.944 1.944 3.490 3.490 mp2_ri_gpw_compute_in 1 5.0 0.002 0.002 3.322 3.322 dbt_tas_reshape 367 15.0 0.020 0.020 2.779 2.779 dbt_tas_copy 574 11.4 1.668 1.668 2.723 2.723 dbt_reshape 278 11.9 1.583 1.583 2.557 2.557 convert_to_new_pgrid 2421 14.1 0.170 0.170 2.373 2.373 dbm_copy 1614 15.1 2.204 2.204 2.204 2.204 reshape_mm_small 367 14.1 0.042 0.042 2.086 2.086 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 1.983 1.983 dbt_tas_replicate 367 14.1 1.532 1.532 1.884 1.884 compute_W_cubic_GW 10 7.0 0.006 0.006 1.881 1.881 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.017 47.310 47.312 qs_energies 1 2.0 0.000 0.000 47.131 47.133 mp2_main 1 3.0 0.000 0.000 45.717 45.719 mp2_gpw_main 1 4.0 0.000 0.001 45.662 45.664 rpa_ri_compute_en 1 5.0 0.000 0.000 44.343 44.345 rpa_num_int 1 6.0 0.000 0.003 44.342 44.344 dbt_total 2336 9.6 0.022 0.024 39.170 39.183 compute_mat_P_omega 1 7.0 0.001 0.007 36.365 36.381 compute_mat_P_omega_contract 10 8.0 0.476 0.513 36.121 36.127 dbt_contract 787 11.0 0.050 0.055 28.343 28.354 dbt_tas_total 1149 12.2 0.116 0.130 25.034 25.050 dbt_tas_multiply 807 12.1 0.003 0.003 24.991 25.007 dbt_tas_dbm 807 14.1 0.005 0.007 16.578 16.599 dbm_multiply 807 16.1 12.210 13.179 12.210 13.179 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.002 11.320 11.327 mp_sync 8688 11.6 9.231 10.613 9.231 10.613 compute_mat_P_omega_calc_M_occ 250 9.0 0.473 0.502 10.539 10.545 dbt_copy 1149 10.8 0.033 0.037 8.658 9.125 dbt_reshape 1136 11.8 2.940 3.133 7.739 8.223 dbt_tas_mm_2 251 15.0 0.003 0.004 8.043 8.047 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.684 6.687 dbt_tas_mm_1N 524 15.1 0.002 0.003 5.539 6.033 compute_QP_energies 1 7.0 0.000 0.000 5.605 5.605 compute_self_energy_cubic_gw 1 8.0 0.008 0.011 5.605 5.605 mp_waitall_2 3812 15.3 4.052 4.374 4.052 4.374 dbt_communicate_buffer 1136 12.8 0.064 0.073 3.709 4.020 contract_cubic_gw 21 9.0 0.000 0.000 3.867 3.870 dbm_reserve_blocks 3752 15.4 2.060 2.434 2.060 2.434 dbt_reserve_blocks_index 2887 13.1 0.087 0.096 2.044 2.399 dbt_reserve_blocks_index_array 2829 12.2 0.011 0.013 2.033 2.385 dbt_tas_reserve_blocks_index 3347 14.5 0.074 0.080 2.002 2.363 dbt_crop 1042 12.0 1.044 1.149 1.690 1.872 convert_to_new_pgrid 2421 14.1 0.031 0.034 1.335 1.560 dbt_tas_replicate 405 14.1 0.581 0.739 1.477 1.545 dbm_copy 1608 15.1 1.296 1.522 1.296 1.522 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.002 1.415 1.419 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.002 1.376 1.379 scf_env_do_scf 1 3.0 0.000 0.000 1.360 1.361 scf_env_do_scf_inner_loop 17 4.0 0.001 0.003 1.360 1.360 mp2_ri_gpw_compute_in 1 5.0 0.003 0.004 1.316 1.316 parallel_gemm_fm_cosma 105 8.4 1.288 1.308 1.288 1.308 dbt_tas_create_split 2550 15.2 0.006 0.006 1.260 1.280 dbt_tas_create_split_rows_or_c 2550 16.2 0.015 0.017 1.254 1.275 mp_sum_l 5765 13.7 1.112 1.192 1.112 1.192 mp_max_i 2044 9.6 0.949 1.135 0.949 1.135 compute_W_cubic_GW 10 7.0 0.001 0.001 1.063 1.077 dbm_add 807 14.1 0.840 0.976 0.840 0.976 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="GW_PBE_4benzene", label="GW_PBE_4benzene", y=92.886, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="GW_PBE_4benzene", label="GW_PBE_4benzene", y=47.31, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=26.345, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=39.873, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.803, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.74, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=5.338, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.204, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.583, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=14.572000000000003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=12.21, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.473, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.476, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.06, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=1.296, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.94, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=4.052, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=9.231, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.020 252.172 252.172 qs_forces 1 2.0 0.000 0.000 251.638 251.638 rebuild_ks_matrix 7 6.6 0.000 0.000 249.853 249.853 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 249.852 249.852 hfx_ks_matrix 7 8.6 0.000 0.000 247.402 247.402 hfx_ri_update_ks 7 9.6 0.000 0.000 194.525 194.525 hfx_ri_update_ks_Pmat 7 10.6 35.538 35.538 194.518 194.518 dbt_total 849 11.0 0.009 0.009 186.102 186.102 qs_energies 1 3.0 0.000 0.000 175.035 175.035 scf_env_do_scf 1 4.0 0.001 0.001 174.659 174.659 qs_ks_update_qs_env 8 6.0 0.000 0.000 173.315 173.315 dbt_tas_total 369 13.4 0.493 0.493 162.005 162.005 dbt_contract 207 12.4 0.268 0.268 161.730 161.730 dbt_tas_multiply 216 13.5 0.001 0.001 156.437 156.437 dbt_tas_dbm 216 15.5 0.002 0.002 142.088 142.088 dbm_multiply 216 17.5 142.082 142.082 142.082 142.082 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 125.517 125.517 dbt_tas_mm_2 91 16.5 0.001 0.001 119.165 119.165 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 101.004 101.004 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 76.550 76.550 init_scf_loop 2 5.0 0.000 0.000 73.652 73.652 hfx_ri_update_forces 1 7.0 1.579 1.579 52.872 52.872 hfx_ri_forces_Pmat_3c 1 8.0 4.521 4.521 34.644 34.644 dbt_copy 423 11.8 0.079 0.079 18.791 18.791 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 14.726 14.726 precalc_derivatives 1 8.0 2.523 2.523 13.349 13.349 dbt_reshape 132 13.2 9.659 9.659 13.325 13.325 dbt_tas_mm_3T 77 17.1 0.000 0.000 12.783 12.783 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 9.677 9.677 dbt_tas_mm_3N 37 15.4 0.000 0.000 9.510 9.510 dbt_tas_reshape 168 14.5 0.006 0.006 7.281 7.281 dbm_reserve_blocks 1491 16.2 7.266 7.266 7.266 7.266 dbt_tas_reserve_blocks_index 1323 15.4 0.207 0.207 7.138 7.138 build_3c_derivatives 3 9.0 1.968 1.968 6.756 6.756 hfx_ri_pre_scf_Pmat_copy_2 9 13.0 5.440 5.440 6.624 6.624 dbt_reserve_blocks_index 846 14.4 0.132 0.132 5.540 5.540 dbt_reserve_blocks_index_array 816 13.5 0.010 0.010 5.417 5.417 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.001 0.001 5.405 5.405 dbt_crop 372 13.7 3.562 3.562 5.218 5.218 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.022 62.557 62.558 qs_forces 1 2.0 0.000 0.000 62.298 62.299 rebuild_ks_matrix 7 6.6 0.000 0.000 61.092 61.094 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 61.092 61.094 hfx_ks_matrix 7 8.6 0.000 0.000 59.759 59.761 dbt_total 849 11.0 0.010 0.012 53.539 53.547 dbt_contract 207 12.4 0.035 0.038 40.143 40.163 dbt_tas_total 369 13.4 0.062 0.069 35.738 35.741 dbt_tas_multiply 216 13.5 0.001 0.001 35.228 35.232 hfx_ri_update_ks 7 9.6 0.000 0.000 32.093 32.093 hfx_ri_update_ks_Pmat 7 10.6 1.512 1.661 32.089 32.090 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 31.606 31.607 qs_energies 1 3.0 0.000 0.000 30.680 30.681 scf_env_do_scf 1 4.0 0.000 0.001 30.495 30.496 qs_ks_update_qs_env 8 6.0 0.000 0.000 29.489 29.491 hfx_ri_update_forces 1 7.0 0.096 0.109 27.665 27.667 dbt_tas_dbm 216 15.5 0.002 0.003 26.928 26.932 dbm_multiply 216 17.5 22.202 23.658 22.202 23.658 hfx_ri_forces_Pmat_3c 1 8.0 0.218 0.242 20.127 20.147 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 17.188 17.188 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 14.102 14.105 init_scf_loop 2 5.0 0.000 0.000 13.306 13.306 mp_sync 2901 12.8 9.842 11.951 9.842 11.951 dbt_copy 539 12.5 0.020 0.023 10.613 11.376 dbt_tas_mm_2 91 16.5 0.001 0.002 11.305 11.309 dbt_reshape 393 13.9 3.638 3.867 8.024 8.652 dbt_tas_mm_3T 77 17.1 0.000 0.001 6.457 7.111 dbt_tas_mm_3N 37 15.4 0.000 0.000 5.828 6.466 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 6.130 6.133 precalc_derivatives 1 8.0 0.120 0.128 5.715 5.715 mp_waitall_2 1318 16.2 4.376 4.683 4.376 4.683 dbm_reserve_blocks 1641 16.6 3.389 4.136 3.389 4.136 dbt_tas_reserve_blocks_index 1471 15.8 0.149 0.156 3.321 4.029 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.999 3.999 build_3c_derivatives 3 9.0 0.202 0.222 3.373 3.382 dbt_communicate_buffer 393 14.9 0.016 0.024 3.074 3.353 dbt_reserve_blocks_index 1107 14.8 0.130 0.136 2.749 3.314 dbt_reserve_blocks_index_array 1077 13.9 0.008 0.010 2.723 3.286 dbt_crop 372 13.7 2.061 2.192 2.815 2.968 convert_to_new_pgrid 648 15.5 0.080 0.165 2.212 2.553 dbt_tas_replicate 170 15.1 0.741 0.804 2.031 2.181 dbm_copy 452 16.3 1.804 2.120 1.804 2.120 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 2.110 2.115 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.001 1.900 1.901 dbt_tas_copy 146 12.6 0.864 0.906 1.526 1.713 dbt_tas_communicate_buffer 370 16.3 0.014 0.016 1.404 1.521 mp_sum_l 5985 14.5 1.225 1.399 1.225 1.399 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="RI-HFX_H2O-32", label="RI-HFX_H2O-32", y=252.172, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="RI-HFX_H2O-32", label="RI-HFX_H2O-32", y=62.557, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=52.18700000000001, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=142.082, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=35.538, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=9.659, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=7.266, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_pre_scf_Pmat_copy_2", label="hfx_ri_pre_scf_Pmat_copy_2", y=5.44, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=17.598, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=22.202, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.512, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.638, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.389, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_pre_scf_Pmat_copy_2", label="hfx_ri_pre_scf_Pmat_copy_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=4.376, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=9.842, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 244.406 244.406 qs_energies 1 2.0 0.000 0.000 244.215 244.215 mp2_main 1 3.0 0.000 0.000 238.742 238.742 mp2_gpw_main 1 4.0 0.001 0.001 238.297 238.297 mp2_ri_gpw_compute_in 1 5.0 0.626 0.626 183.252 183.252 mp2_ri_gpw_compute_in_loop 1 6.0 0.038 0.038 166.389 166.389 mp2_eri_3c_integrate_gpw 2656 7.0 0.041 0.041 104.707 104.707 integrate_v_rspace 2666 8.0 1.010 1.010 84.404 84.404 grid_integrate_task_list 2666 9.0 79.080 79.080 79.080 79.080 mp2_ri_gpw_compute_en 1 5.0 0.122 0.122 55.019 55.019 mp2_ri_gpw_compute_en_RI_loop 1 6.0 11.905 11.905 52.244 52.244 ao_to_mo_and_store_B_mult_1 2656 7.0 0.041 0.041 51.664 51.664 dbcsr_multiply_generic 5322 8.0 0.547 0.547 51.660 51.660 make_m2s 10644 9.0 0.157 0.157 31.001 31.001 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.948 2.948 30.483 30.483 make_images 10644 10.0 4.563 4.563 29.908 29.908 local_gemm 2080 8.0 27.534 27.534 27.534 27.534 hybrid_alltoall_any 13323 11.6 23.064 23.064 23.590 23.590 make_images_data 10644 11.0 0.149 0.149 23.499 23.499 fft_wrap_pw1pw2 53228 10.4 0.217 0.217 22.237 22.237 get_2c_integrals 1 6.0 0.000 0.000 16.236 16.236 multiply_cannon 5322 9.0 1.579 1.579 16.034 16.034 collocate_function 2656 8.0 7.884 7.884 15.836 15.836 compute_2c_integrals 1 7.0 0.005 0.005 15.552 15.552 compute_2c_integrals_loop_lm 1 8.0 0.010 0.010 15.544 15.544 mp2_eri_2c_integrate_gpw 1 9.0 1.116 1.116 15.533 15.533 fft_wrap_pw1pw2_20 21271 11.4 1.282 1.282 13.542 13.542 multiply_cannon_loop 5322 10.0 0.359 0.359 12.972 12.972 fft3d_s 53229 12.4 12.543 12.543 12.593 12.593 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 3.751 3.751 9.880 9.880 multiply_cannon_multrec 5322 11.0 9.671 9.671 9.771 9.771 potential_pw2rs 5322 10.0 0.218 0.218 8.864 8.864 fft_wrap_pw1pw2_10 31893 11.5 0.278 0.278 8.176 8.176 mp2_eri_2c_integrate_gpw_pot_l 2656 10.0 0.009 0.009 7.490 7.490 mp2_ri_gpw_compute_en_ener 2080 7.0 7.317 7.317 7.317 7.317 copy_dbcsr_to_fm 2679 8.0 0.090 0.090 6.822 6.822 collocate_single_gaussian 2656 10.0 0.166 0.166 6.811 6.811 calc_potential_gpw 5312 9.5 0.028 0.028 6.011 6.011 scf_env_do_scf 1 3.0 0.000 0.000 5.048 5.048 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 5.048 5.048 dbcsr_finalize 10708 9.5 0.383 0.383 4.897 4.897 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.019 42.252 42.254 qs_energies 1 2.0 0.000 0.000 42.013 42.014 mp2_main 1 3.0 0.000 0.001 39.488 39.489 mp2_gpw_main 1 4.0 0.002 0.004 39.334 39.334 mp2_ri_gpw_compute_en 1 5.0 0.303 0.329 21.017 21.409 mp2_ri_gpw_compute_in 1 5.0 0.057 0.058 18.234 19.204 mp2_ri_gpw_compute_en_RI_loop 1 6.0 1.787 2.195 18.941 18.971 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 16.771 17.744 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.002 14.099 14.790 integrate_v_rspace 93 8.1 0.131 0.142 14.101 14.770 grid_integrate_task_list 93 9.1 13.722 14.385 13.722 14.385 mp2_ri_gpw_compute_en_expansio 65 7.0 0.131 0.171 12.311 13.025 local_gemm 65 8.0 12.180 12.886 12.180 12.886 mp2_ri_gpw_compute_en_comm 17 7.0 0.113 0.140 4.416 4.948 mp_sendrecv_dm3 1054 8.0 3.730 4.344 3.730 4.344 dbcsr_multiply_generic 176 8.0 0.012 0.013 2.304 2.560 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.279 2.533 scf_env_do_scf 1 3.0 0.000 0.000 2.380 2.381 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.380 2.381 multiply_cannon 176 9.0 0.025 0.033 1.308 1.471 mp_min_d 2 7.0 1.051 1.448 1.051 1.448 get_2c_integrals 1 6.0 0.000 0.001 1.382 1.405 multiply_cannon_loop 176 10.0 0.003 0.004 1.236 1.386 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 0.973 1.365 qs_scf_new_mos 10 5.0 0.000 0.000 1.259 1.266 eigensolver 11 5.8 0.001 0.002 1.178 1.180 multiply_cannon_multrec 246 11.0 1.011 1.126 1.019 1.136 make_m2s 352 9.0 0.004 0.005 0.941 1.045 make_images 352 10.0 0.045 0.048 0.928 1.031 compute_2c_integrals 1 7.0 0.003 0.005 0.943 0.959 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.905 0.907 cp_fm_diag_elpa_base 11 7.8 0.890 0.895 0.902 0.903 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 0.806 0.857 mp2_eri_2c_integrate_gpw 1 9.0 0.213 0.222 0.805 0.856 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="RI-MP2_ammonia", label="RI-MP2_ammonia", y=244.406, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="RI-MP2_ammonia", label="RI-MP2_ammonia", y=42.252, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=90.28, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=79.08, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=27.534, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=23.064, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="fft3d_s", label="fft3d_s", y=12.543, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=11.905, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=9.782000000000004, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.722, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=12.18, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=1.787, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=3.73, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=1.051, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.103 0.103 146.764 146.764 qs_energies 1 2.0 0.000 0.000 145.433 145.433 scf_env_do_scf 1 3.0 0.000 0.000 138.844 138.844 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 138.844 138.844 qs_ks_update_qs_env 15 5.0 0.000 0.000 68.064 68.064 rebuild_ks_matrix 15 6.0 0.000 0.000 67.810 67.810 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 67.810 67.810 qs_vxc_create 15 8.0 0.031 0.031 51.915 51.915 qs_scf_new_mos 15 5.0 0.000 0.000 49.987 49.987 calculate_dispersion_nonloc 15 9.0 8.079 8.079 45.502 45.502 eigensolver 15 6.0 0.002 0.002 39.095 39.095 fft_wrap_pw1pw2 1086 10.0 0.027 0.027 39.023 39.023 fft_wrap_pw1pw2_150 765 11.0 10.185 10.185 29.393 29.393 cp_fm_diag_elpa 15 7.0 0.000 0.000 27.947 27.947 cp_fm_diag_elpa_base 15 8.0 26.508 26.508 27.947 27.947 qs_rho_update_rho_low 16 5.0 0.000 0.000 16.813 16.813 calculate_rho_elec 16 6.0 0.224 0.224 16.813 16.813 grid_collocate_task_list 16 7.0 14.843 14.843 14.843 14.843 sum_up_and_integrate 15 8.0 0.000 0.000 14.643 14.643 integrate_v_rspace 15 9.0 0.025 0.025 14.628 14.628 fft3d_s 1087 12.0 14.152 14.152 14.162 14.162 grid_integrate_task_list 15 10.0 13.729 13.729 13.729 13.729 cp_fm_cholesky_restore 45 7.0 9.801 9.801 9.801 9.801 pw_scatter_s 585 12.1 9.755 9.755 9.755 9.755 fft_wrap_pw1pw2_200 197 11.3 1.971 1.971 9.367 9.367 copy_dbcsr_to_fm 16 5.9 0.001 0.001 8.006 8.006 dbcsr_complete_redistribute 46 8.3 2.600 2.600 7.489 7.489 xc_vxc_pw_create 15 9.0 0.221 0.221 6.383 6.383 vdW_energy 15 10.0 6.136 6.136 6.136 6.136 gspace_mixing 14 5.0 0.172 0.172 4.833 4.833 xc_pw_derive 90 11.0 0.001 0.001 4.622 4.622 broyden_mixing 14 6.0 4.098 4.098 4.099 4.099 dbcsr_finalize 159 9.9 0.021 0.021 3.510 3.510 calculate_dm_sparse 15 6.0 0.022 0.022 3.450 3.450 dbcsr_merge_all 91 11.1 0.132 0.132 3.346 3.346 cp_dbcsr_plus_fm_fm_t 15 7.0 0.001 0.001 3.311 3.311 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.275 3.275 xc_pw_divergence 15 10.0 0.000 0.000 3.228 3.228 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.025 0.029 67.982 67.984 qs_energies 1 2.0 0.000 0.000 67.438 67.441 scf_env_do_scf 1 3.0 0.000 0.000 63.357 63.359 scf_env_do_scf_inner_loop 15 4.0 0.001 0.004 63.357 63.359 qs_ks_update_qs_env 15 5.0 0.000 0.000 27.292 27.298 rebuild_ks_matrix 15 6.0 0.000 0.000 27.246 27.252 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.004 27.246 27.252 qs_scf_new_mos 15 5.0 0.001 0.001 21.053 21.086 eigensolver 15 6.0 0.002 0.003 19.609 19.665 qs_rho_update_rho_low 16 5.0 0.000 0.000 15.156 15.163 calculate_rho_elec 16 6.0 0.007 0.007 15.156 15.163 sum_up_and_integrate 15 8.0 0.001 0.001 15.064 15.108 integrate_v_rspace 15 9.0 0.001 0.001 15.053 15.099 grid_collocate_task_list 16 7.0 13.310 13.943 13.310 13.943 grid_integrate_task_list 15 10.0 13.378 13.918 13.378 13.918 cp_fm_diag_elpa 15 7.0 0.000 0.000 13.459 13.472 cp_fm_diag_elpa_base 15 8.0 13.251 13.293 13.444 13.447 qs_vxc_create 15 8.0 0.001 0.001 11.697 11.707 calculate_dispersion_nonloc 15 9.0 0.833 1.013 9.463 9.500 fft_wrap_pw1pw2 1086 10.0 0.031 0.035 9.326 9.473 fft3d_ps 1086 12.0 2.714 2.940 7.480 7.762 fft_wrap_pw1pw2_150 765 11.0 0.178 0.207 6.212 6.265 cp_fm_cholesky_restore 45 7.0 5.951 6.094 5.951 6.094 mp_alltoall_z22v 1086 14.0 4.158 4.781 4.158 4.781 fft_wrap_pw1pw2_200 197 11.3 0.120 0.154 2.904 3.007 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.703 2.703 yz_to_x 501 12.9 0.234 0.286 2.342 2.686 x_to_yz 585 13.1 0.340 0.391 2.391 2.616 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.409 2.578 xc_vxc_pw_create 15 9.0 0.019 0.028 2.233 2.266 density_rs2pw 16 7.0 0.001 0.001 1.656 2.179 mp_waitany 520 11.3 1.269 1.989 1.269 1.989 transfer_rs2pw 82 8.0 0.001 0.002 1.263 1.924 mp_alltoall_d11v 217 9.2 1.279 1.666 1.279 1.666 build_core_ppnl 1 5.0 1.489 1.646 1.489 1.646 transfer_rs2pw_200 18 8.8 0.036 0.041 0.990 1.633 xc_pw_derive 90 11.0 0.001 0.002 1.542 1.632 vdW_energy 15 10.0 1.440 1.507 1.440 1.507 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="diag_cu144_broy", label="diag_cu144_broy", y=146.764, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="diag_cu144_broy", label="diag_cu144_broy", y=67.982, yerr=0.0 Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=57.54600000000001, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=26.508, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.843, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=14.152, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.729, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=10.185, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=9.801, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=17.756, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=13.251, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=13.31, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.378, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.178, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=5.951, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.158, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.075 0.075 788.144 788.144 qs_energies 1 2.0 0.000 0.000 787.989 787.989 ls_scf 1 3.0 0.000 0.000 786.685 786.685 ls_scf_main 1 4.0 0.002 0.002 774.252 774.252 ls_scf_dm_to_ks 11 5.0 0.000 0.000 570.994 570.994 matrix_ls_to_qs 11 6.0 0.000 0.000 567.153 567.153 dbcsr_copy 761 7.5 2.172 2.172 530.105 530.105 dbcsr_copy_into_existing 11 8.0 527.908 527.908 527.909 527.909 density_matrix_trs4 11 5.0 0.009 0.009 197.969 197.969 dbcsr_multiply_generic 185 6.1 0.880 0.880 95.681 95.681 arnoldi_extremal 12 6.1 0.000 0.000 86.051 86.051 arnoldi_normal_ev 12 7.1 0.022 0.022 86.051 86.051 build_subspace 23 8.1 0.086 0.086 84.438 84.438 dbcsr_matrix_vector_mult 652 9.0 0.255 0.255 80.203 80.203 dbcsr_matrix_vector_mult_local 652 10.0 77.498 77.498 77.511 77.511 multiply_cannon 185 7.1 0.778 0.778 56.428 56.428 multiply_cannon_loop 185 8.1 0.518 0.518 45.096 45.096 dbcsr_complete_redistribute 23 7.5 30.402 30.402 42.907 42.907 matrix_decluster 11 7.0 0.000 0.000 39.243 39.243 make_m2s 370 7.1 0.019 0.019 32.321 32.321 multiply_cannon_multrec 185 9.1 30.804 30.804 30.855 30.855 make_images 370 8.1 7.628 7.628 29.883 29.883 dbcsr_finalize 559 7.6 1.311 1.311 21.161 21.161 dbcsr_merge_all 510 8.6 5.672 5.672 18.705 18.705 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 1.431 1.437 125.267 125.270 qs_energies 1 2.0 0.000 0.000 122.984 122.987 ls_scf 1 3.0 0.000 0.000 122.917 122.922 ls_scf_main 1 4.0 0.001 0.009 118.901 118.909 density_matrix_trs4 11 5.0 0.009 0.013 112.891 113.127 dbcsr_multiply_generic 185 6.1 0.073 0.088 99.393 99.749 multiply_cannon 185 7.1 0.053 0.061 75.120 76.717 multiply_cannon_loop 185 8.1 0.168 0.187 71.369 72.733 multiply_cannon_multrec 1480 9.1 36.012 38.768 36.307 39.086 mp_waitall_1 11936 10.3 34.095 37.907 34.095 37.907 multiply_cannon_metrocomm3 1480 9.1 0.021 0.025 25.869 28.979 mp_sum_l 799 7.0 12.552 13.877 12.552 13.877 make_m2s 370 7.1 0.039 0.046 11.010 11.264 make_images 370 8.1 0.536 0.578 10.881 11.140 arnoldi_extremal 12 6.1 0.000 0.001 10.913 10.949 arnoldi_normal_ev 12 7.1 0.002 0.007 10.912 10.949 build_subspace 23 8.1 0.032 0.040 10.636 10.644 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 9.060 10.093 dbcsr_matrix_vector_mult 652 9.0 0.030 0.076 7.772 8.442 mp_sum_dv 2907 10.4 6.553 7.222 6.553 7.222 calculate_norms 2960 9.1 5.538 5.827 5.538 5.827 make_images_data 370 9.1 0.014 0.017 5.128 5.710 ls_scf_dm_to_ks 11 5.0 0.000 0.000 5.273 5.464 multiply_cannon_metrocomm1 1480 9.1 0.008 0.010 3.435 4.671 hybrid_alltoall_any 393 9.9 0.275 1.431 3.954 4.351 mp_sum_d 1407 6.7 3.619 4.001 3.619 4.001 dbcsr_complete_redistribute 23 7.5 1.521 1.752 3.430 3.699 matrix_ls_to_qs 11 6.0 0.000 0.000 3.139 3.402 matrix_decluster 11 7.0 0.000 0.003 2.908 3.167 dbcsr_matrix_vector_mult_local 652 10.0 2.474 2.895 2.479 2.900 Gram_Schmidt_ortho 702 9.1 0.019 0.080 2.381 2.893 ls_scf_init_scf 1 4.0 0.000 0.000 2.841 2.843 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.806 2.815 dbcsr_dot 144 6.3 0.513 0.560 2.503 2.665 make_images_sizes 370 9.1 0.001 0.002 2.128 2.590 mp_alltoall_i44 370 10.1 2.126 2.589 2.126 2.589 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.574 2.578 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="bench_dftb", label="bench_dftb", y=788.144, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="bench_dftb", label="bench_dftb", y=125.267, yerr=0.0 Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=113.904, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=527.908, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=77.498, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=30.804, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=30.402, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images", label="make_images", y=7.628, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_dv", label="mp_sum_dv", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=25.98599999999999, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.474, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=36.012, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.521, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images", label="make_images", y=0.536, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.538, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_dv", label="mp_sum_dv", y=6.553, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=12.552, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=34.095, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.004 49.931 49.931 lib_test 1 2.0 0.000 0.000 49.926 49.926 dbcsr_run_tests 3 3.0 0.000 0.000 49.926 49.926 test_multiplies_multiproc 3 4.0 0.002 0.002 35.466 35.466 dbcsr_multiply_generic 9 5.0 0.002 0.002 26.452 26.452 multiply_cannon 9 6.0 0.002 0.002 15.530 15.530 multiply_cannon_loop 9 7.0 0.031 0.031 15.226 15.226 multiply_cannon_multrec 9 8.0 15.195 15.195 15.196 15.196 dbcsr_make_random_matrix 9 4.0 10.195 10.195 14.313 14.313 dbcsr_finalize 27 5.7 0.048 0.048 8.361 8.361 dbcsr_merge_all 18 6.5 3.332 3.332 7.213 7.213 dbcsr_redistribute 9 5.0 4.786 4.786 7.188 7.188 make_m2s 18 6.0 0.001 0.001 5.940 5.940 make_images 18 7.0 0.559 0.559 5.877 5.877 make_images_data 18 8.0 0.001 0.001 4.670 4.670 hybrid_alltoall_any 18 9.0 4.641 4.641 4.642 4.642 dbcsr_data_release 975 7.6 3.965 3.965 3.965 3.965 tree_to_linear_d 9 7.0 2.534 2.534 2.534 2.534 mp_alltoall_d11v 27 6.0 1.907 1.907 1.907 1.907 dbcsr_destroy 93 5.8 0.001 0.001 1.894 1.894 dbcsr_data_copy_aa2 9 7.0 1.301 1.301 1.301 1.301 dbcsr_work_destroy_all 45 7.6 0.002 0.002 1.071 1.071 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.008 25.826 25.827 lib_test 1 2.0 0.000 0.000 25.784 25.813 dbcsr_run_tests 3 3.0 0.000 0.001 25.782 25.811 test_multiplies_multiproc 3 4.0 0.001 0.003 24.987 25.044 dbcsr_multiply_generic 9 5.0 0.001 0.002 23.095 23.199 multiply_cannon 9 6.0 0.003 0.003 20.213 21.083 multiply_cannon_loop 9 7.0 0.004 0.004 19.842 20.735 multiply_cannon_multrec 72 8.0 14.507 15.449 14.508 15.449 mp_waitall_1 576 9.2 5.814 7.433 5.814 7.433 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 5.147 6.821 mp_sum_l 70 5.1 1.281 2.241 1.281 2.241 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 1.272 2.232 dbcsr_make_random_matrix 9 4.0 0.554 1.006 0.752 1.098 make_m2s 18 6.0 0.001 0.001 0.930 0.987 make_images 18 7.0 0.017 0.019 0.927 0.984 dbcsr_finalize 27 5.7 0.001 0.001 0.738 0.877 dbcsr_merge_all 18 6.5 0.119 0.147 0.657 0.785 dbcsr_data_release 444 7.6 0.632 0.728 0.632 0.728 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.180 0.694 dbcsr_redistribute 9 5.0 0.242 0.317 0.586 0.646 make_images_data 18 8.0 0.001 0.001 0.538 0.636 dbcsr_destroy 111 5.9 0.001 0.001 0.554 0.630 hybrid_alltoall_any 18 9.0 0.045 0.234 0.469 0.591 dbcsr_data_copy_aa2 18 7.5 0.387 0.525 0.387 0.525 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="dbcsr", label="dbcsr", y=49.931, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="dbcsr", label="dbcsr", y=25.826, yerr=0.0 Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=11.149000000000001, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=15.195, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=10.195, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=4.786, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=4.641, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=3.965, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.7510000000000012, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=14.507, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.554, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.242, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.045, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.632, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=5.814, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.281, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.084 0.084 188.711 188.711 qs_mol_dyn_low 1 2.0 0.005 0.005 186.928 186.928 qs_forces 6 3.8 0.001 0.001 131.692 131.692 qs_energies 6 4.8 0.001 0.001 126.326 126.326 scf_env_do_scf 6 5.8 0.001 0.001 123.197 123.197 scf_env_do_scf_inner_loop 113 6.2 0.016 0.016 117.830 117.830 rebuild_ks_matrix 119 8.1 0.000 0.000 95.166 95.166 qs_ks_build_kohn_sham_matrix 119 9.1 0.022 0.022 95.166 95.166 qs_ks_update_qs_env 119 7.3 0.001 0.001 90.306 90.306 velocity_verlet 5 3.0 0.005 0.005 79.943 79.943 fft_wrap_pw1pw2 2059 12.4 0.047 0.047 79.625 79.625 fft_wrap_pw1pw2_150 1321 13.9 21.601 21.601 78.222 78.222 qs_vxc_create 119 10.1 0.002 0.002 61.201 61.201 xc_vxc_pw_create 119 11.1 2.071 2.071 61.199 61.199 qmmm_el_coupling 6 3.8 0.000 0.000 46.424 46.424 qmmm_elec_with_gaussian 6 4.8 0.024 0.024 46.420 46.420 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 45.612 45.612 xc_pw_derive 714 13.1 0.010 0.010 45.106 45.106 qmmm_elec_gaussian_low_G 6 6.8 44.730 44.730 44.730 44.730 xc_pw_divergence 119 12.1 0.004 0.004 30.815 30.815 fft3d_s 2060 14.4 29.314 29.314 29.330 29.330 xc_rho_set_and_dset_create 119 12.1 3.085 3.085 28.068 28.068 qs_rho_update_rho_low 119 7.3 0.001 0.001 27.400 27.400 calculate_rho_elec 119 8.3 1.673 1.673 27.399 27.399 pw_scatter_s 1095 14.8 21.694 21.694 21.694 21.694 density_rs2pw 119 9.3 0.007 0.007 15.740 15.740 sum_up_and_integrate 119 10.1 0.003 0.003 12.945 12.945 integrate_v_rspace 119 11.1 0.037 0.037 12.769 12.769 qs_ks_ddapc 119 10.1 0.003 0.003 12.260 12.260 grid_collocate_task_list 119 9.3 9.986 9.986 9.986 9.986 potential_pw2rs 119 12.1 0.101 0.101 8.557 8.557 pw_integral_ab_c1d_c1d_gs 280 11.0 7.903 7.903 7.903 7.903 qs_scf_new_mos 113 7.2 0.001 0.001 6.185 6.185 qs_scf_loop_do_ot 113 8.2 0.001 0.001 6.184 6.184 ot_scf_mini 113 9.2 0.002 0.002 5.962 5.962 pw_poisson_solve 125 9.9 0.003 0.003 5.913 5.913 qmmm_forces 6 3.8 0.003 0.003 5.722 5.722 cp_ddapc_apply_CD 119 11.1 0.027 0.027 5.697 5.697 init_scf_loop 6 6.8 0.000 0.000 5.362 5.362 dbcsr_multiply_generic 2598 12.3 0.155 0.155 5.352 5.352 qmmm_forces_with_gaussian 6 4.8 0.027 0.027 5.287 5.287 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 4.925 4.925 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.458 4.458 pw_gather_s 964 14.0 4.406 4.406 4.406 4.406 grid_integrate_task_list 119 12.1 4.174 4.174 4.174 4.174 ot_mini 113 10.2 0.001 0.001 4.072 4.072 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.046 0.058 72.764 72.767 qs_mol_dyn_low 1 2.0 0.004 0.004 71.286 71.347 qs_forces 6 3.8 0.001 0.001 54.873 54.875 qs_energies 6 4.8 0.000 0.001 52.607 52.608 scf_env_do_scf 6 5.8 0.000 0.001 51.361 51.362 scf_env_do_scf_inner_loop 113 6.2 0.004 0.023 49.240 49.241 rebuild_ks_matrix 119 8.1 0.000 0.001 34.880 34.909 qs_ks_build_kohn_sham_matrix 119 9.1 0.024 0.027 34.880 34.909 qs_ks_update_qs_env 119 7.3 0.002 0.002 32.907 32.936 velocity_verlet 5 3.0 0.002 0.005 28.225 28.228 fft_wrap_pw1pw2 2059 12.4 0.056 0.067 21.366 21.946 fft_wrap_pw1pw2_150 1321 13.9 0.797 1.006 20.135 20.662 fft3d_ps 2059 14.4 7.375 8.435 16.786 17.709 qs_vxc_create 119 10.1 0.004 0.004 16.962 16.971 xc_vxc_pw_create 119 11.1 0.196 0.275 16.958 16.968 qs_rho_update_rho_low 119 7.3 0.001 0.001 13.604 13.610 calculate_rho_elec 119 8.3 0.051 0.059 13.604 13.609 xc_pw_derive 714 13.1 0.011 0.013 12.893 13.224 sum_up_and_integrate 119 10.1 0.003 0.003 12.650 12.702 integrate_v_rspace 119 11.1 0.006 0.007 12.566 12.623 density_rs2pw 119 9.3 0.006 0.007 8.900 9.262 mp_alltoall_z22v 2059 16.4 7.920 9.012 7.920 9.012 xc_pw_divergence 119 12.1 0.005 0.006 8.641 8.952 qmmm_forces 6 3.8 0.003 0.004 8.910 8.910 qmmm_forces_with_gaussian 6 4.8 0.007 0.011 8.303 8.731 potential_pw2rs 119 12.1 0.007 0.009 8.180 8.208 xc_rho_set_and_dset_create 119 12.1 0.428 0.571 7.825 8.141 qmmm_el_coupling 6 3.8 0.000 0.000 6.520 6.854 qmmm_elec_with_gaussian 6 4.8 0.005 0.007 6.518 6.852 transfer_pw2rs 500 12.8 0.008 0.010 5.902 5.938 transfer_rs2pw 488 10.2 0.010 0.012 4.935 5.317 x_to_yz 1095 15.8 0.824 0.977 4.840 5.278 mp_waitany 4028 12.8 4.450 5.257 4.450 5.257 yz_to_x 964 15.0 0.606 0.768 4.511 5.229 qs_scf_new_mos 113 7.2 0.001 0.001 5.057 5.078 qs_scf_loop_do_ot 113 8.2 0.001 0.001 5.057 5.078 ot_scf_mini 113 9.2 0.002 0.002 4.859 4.875 grid_collocate_task_list 119 9.3 4.430 4.807 4.430 4.807 dbcsr_multiply_generic 2598 12.3 0.079 0.088 4.389 4.500 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.131 4.200 grid_integrate_task_list 119 12.1 3.971 4.165 3.971 4.165 transfer_pw2rs_150 125 13.9 1.444 1.701 4.063 4.161 transfer_rs2pw_150 125 11.2 1.194 1.391 3.507 3.901 pw_restrict_s3 18 5.8 1.679 1.818 3.293 3.750 qmmm_forces_gaussian_low_G 6 6.8 3.363 3.432 3.363 3.432 ot_mini 113 10.2 0.001 0.001 2.910 2.927 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.458 2.793 pw_prolongate_s3 18 6.8 1.229 1.328 2.458 2.793 qs_ks_ddapc 119 10.1 0.003 0.004 2.479 2.611 mp_sum_dm3 33 5.7 2.437 2.504 2.437 2.504 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 2.277 2.465 mp_waitall_1 178435 16.4 1.994 2.251 1.994 2.251 qs_ot_get_derivative 113 11.2 0.001 0.001 2.236 2.250 mp_sum_d 5830 12.2 1.792 2.148 1.792 2.148 init_scf_loop 6 6.8 0.000 0.000 2.115 2.115 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.013 2.014 pw_copy 2027 12.4 1.523 1.980 1.523 1.980 pw_derive 1089 13.4 1.480 1.918 1.480 1.918 qmmm_elec_gaussian_low_G 6 6.8 1.712 1.806 1.712 1.806 pw_poisson_solve 125 9.9 0.004 0.004 1.473 1.718 pw_gather_p 964 14.0 1.539 1.717 1.539 1.717 transfer_pw2rs_40 119 14.1 0.272 0.307 1.399 1.642 pw_integral_ab_r3d_r3d_rs 2481 7.4 1.095 1.299 1.347 1.494 pw_zero 2653 8.4 1.148 1.486 1.148 1.486 qs_ot_get_derivative_diag 85 12.0 0.002 0.002 1.466 1.484 pw_axpy 2529 9.3 1.179 1.456 1.179 1.456 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="MQAE_single_node", label="MQAE_single_node", y=188.711, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="MQAE_single_node", label="MQAE_single_node", y=72.764, yerr=0.0 Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=57.21200000000002, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=44.73, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=29.314, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=21.694, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=21.601, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.986, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=4.174, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=42.108999999999995, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=1.712, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.797, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.43, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.971, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_waitany", label="mp_waitany", y=4.45, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=7.92, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.375, yerr=0.0 Summary: Performance test took 47 minutes. Status: OK ---> Removed intermediate container f276ac2e3c99 ---> 7fc40529b50d Step 42/43 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 34e82d45efbb ---> Removed intermediate container 34e82d45efbb ---> ef36329e849e Step 43/43 : ENTRYPOINT [] ---> Running in 45988b874f8c ---> Removed intermediate container 45988b874f8c ---> ec14f32df086 [Warning] One or more build-args [GIT_COMMIT_SHA SPACK_CACHE] were not consumed Successfully built ec14f32df086 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2026-01-08 07:13:18+00:00