StartDate: 2026-06-18 06:04:11+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: 9ca34697db2ebb6305462e6856aa6c1bb4721204 CommitTime: 2026-06-17 11:56:47 +0200 CommitAuthor: SY Wang CommitSubject: Improve CP2K CMake package metadata exports (#5400) #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=9ca34697db2ebb6305462e6856aa6c1bb4721204 SPACK_CACHE=gs://cp2k-spack-cache Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 420.6MB Step 1/42 : FROM ubuntu:26.04 26.04: Pulling from library/ubuntu 6f5c5aa4e145: Pulling fs layer 1c24335ddd46: Pulling fs layer 1c24335ddd46: Verifying Checksum 1c24335ddd46: Download complete 6f5c5aa4e145: Verifying Checksum 6f5c5aa4e145: Download complete 6f5c5aa4e145: Pull complete 1c24335ddd46: Pull complete Digest: sha256:f3d28607ddd78734bb7f71f117f3c6706c666b8b76cbff7c9ff6e5718d46ff64 Status: Downloaded newer image for ubuntu:26.04 ---> 30ba44506a6d Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> aeb614fb0c08 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 2c8c6a15e040 Step 4/42 : RUN ./install_requirements.sh ubuntu:26.04 ---> Using cache ---> 7648768d280d Step 5/42 : RUN mkdir scripts ---> Using cache ---> a27de00ed5f2 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./tools/build_utils/fypp ./scripts/ ---> Using cache ---> 9b85db74c91a Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> a95f6cfc719a Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-dbcsr --with-gcc=system --dry-run ---> Using cache ---> 808541ef644d Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> bb6b264e3dd8 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> d725fad0b6e5 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> abff97ca0b1e Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 219c187a1447 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 31764bddc3a4 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 6383897a7400 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 86fa3cda1bee Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 658cc2cdb851 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 985a05ad730e Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> e1ac08a06610 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> ce479c48ed12 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 90c2e8f38398 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> e6e84f632a4c Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> fd2970a74861 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 2756b39ca8e3 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> fb8cb8008253 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> a817ab41ada8 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> b42fb889ee58 Step 27/42 : COPY ./tools/toolchain/scripts/stage9/ ./scripts/stage9/ ---> Using cache ---> b31fe6120819 Step 28/42 : RUN ./scripts/stage9/install_stage9.sh && rm -rf ./build ---> Using cache ---> 803d0e8fcc47 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> d73157c882d9 Step 30/42 : COPY ./src ./src ---> Using cache ---> 004fbf01fc82 Step 31/42 : COPY ./data ./data ---> Using cache ---> 5f5f2d6ad790 Step 32/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> bfe430a8cc07 Step 33/42 : COPY ./cmake ./cmake ---> Using cache ---> 46672457fdea Step 34/42 : COPY ./CMakeLists.txt . ---> Using cache ---> 5db1fc2039ef Step 35/42 : COPY ./tools/docker/scripts/build_cp2k.sh . ---> Using cache ---> 4bfbebf3b955 Step 36/42 : RUN ./build_cp2k.sh toolchain psmp ---> Running in 4ee4295fedfd ==================== Building CP2K ==================== -- The Fortran compiler identification is GNU 15.2.0 -- The C compiler identification is GNU 15.2.0 -- The CXX compiler identification is GNU 15.2.0 -- Detecting Fortran compiler ABI info -- Detecting Fortran compiler ABI info - done -- Check for working Fortran compiler: /usr/bin/gfortran - skipped -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done -- Check for working C compiler: /usr/bin/gcc - skipped -- Detecting C compile features -- Detecting C compile features - done -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done -- Check for working CXX compiler: /usr/bin/g++ - skipped -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found PkgConfig: /usr/bin/pkg-config (found version "2.5.1") -- Found Python: /usr/bin/python3.14 (found version "3.14.4") found components: Interpreter -- Found MPI_C: /opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpi.so (found version "5.0") -- Found MPI_CXX: /opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpicxx.so (found version "5.0") -- Found MPI_Fortran: /opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpifort.so (found version "5.0") -- Found MPI: TRUE (found version "5.0") found components: C CXX Fortran -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE -- Found MPI: TRUE (found version "5.0") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Could NOT find MKL (missing: CP2K_MKL_INCLUDE_DIRS) -- Checking for module 'openblas' -- Found openblas, version 0.3.33 -- Found OpenBLAS: /opt/cp2k-toolchain/install/openblas-0.3.33/include -- Found Blas: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so -- Found Lapack: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so ------------------------------------------------------------ - DBCSR - ------------------------------------------------------------ -- Found MPI: TRUE (found version "5.0") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") -- Using LIBXS + LIBXSMM for Small Matrix Multiplication -- Checking for module 'scalapack' -- Package 'mpi', required by 'scalapack', not found Package 'lapack', required by 'scalapack', not found Package 'blas', required by 'scalapack', not found -- Found SCALAPACK: /opt/cp2k-toolchain/install/scalapack-2.2.3/lib/libscalapack.a ------------------------------------------------------------ - OPENMP - ------------------------------------------------------------ -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: Fortran C CXX ------------------------------------------------------------ - Other dependencies - ------------------------------------------------------------ -- Checking for one of the modules 'elpa_openmp' -- Found Elpa: /opt/cp2k-toolchain/install/elpa-2026.02.001/cpu/lib/libelpa_openmp.so;/opt/cp2k-toolchain/install/scalapack-2.2.3/lib/libscalapack.a;:libopenblas.a -- BLAS_LIBRARIES Not Given: Will Perform Search -- Checking if OpenMP is GNU -- Checking if OpenMP is GNU -- YES -- Could NOT find IntelMKL (missing: IntelMKL_LIBRARIES IntelMKL_INCLUDE_DIR lp64) -- Could NOT find IBMESSL (missing: IBMESSL_LIBRARIES IBMESSL_INCLUDE_DIR lp64) -- Could NOT find BLIS (missing: BLIS_LIBRARIES BLIS_INCLUDE_DIR lp64) -- Found OpenBLAS: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so found components: lp64 -- Performing Test BLAS_LOWER_UNDERSCORE -- Performing Test BLAS_LOWER_UNDERSCORE -- found -- Found BLAS: TRUE found components: lp64 -- Found MPI: TRUE (found version "5.0") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Found Torch: /opt/cp2k-toolchain/install/libtorch-2.7.1/lib/libtorch.so -- Found HDF5: hdf5-shared;hdf5_fortran-shared (found version "2.1.1") found components: C Fortran -- Found MPI: TRUE (found version "5.0") found components: CXX -- Found OPENBLAS: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so -- Found Blas: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so -- Found LibVORI: /opt/cp2k-toolchain/install/libvori-220621/lib/libvori.a -- Checking for one of the modules 'fftw3' -- Checking for one of the modules 'fftw3f' -- Checking for one of the modules 'fftw3l' -- Checking for one of the modules 'fftw3q' -- Found Fftw: /opt/cp2k-toolchain/install/fftw-3.3.11/include -- Checking for module 'libint2' -- Package 'libint2' not found -- Found Libint2: /opt/cp2k-toolchain/install/libint-v2.13.1-cp2k-lmax-5/include -- Looking for Fortran cheev -- Looking for Fortran cheev - found -- Component omp of Spglib: NOT FOUND -- Component fortran of Spglib: FOUND (LIB_TYPE: static) -- Found package: Spglib -- Found libsmeagol: /opt/cp2k-toolchain/install/libsmeagol-1.2/lib/libsmeagol.a -- Found BLAS: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so -- mctc-lib: Find installed package -- multicharge: Find installed package -- DFTD4: found version 4.2.0, using v4.2+ API -- toml-f: Find installed package -- s-dftd3: Find installed package -- DFTD4: found version 4.2.0, using v4.2+ API -- Found GSL: /opt/cp2k-toolchain/install/gsl-2.8/include (found version "2.8") -- Checking for one of the modules 'libxc>=3.0.0' -- Found LibXC: /opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxc.a (Required is at least version "3.0.0") -- Found LibSPG: /opt/cp2k-toolchain/install/spglib-2.7.0/lib/libsymspg.a -- Found HDF5: hdf5-shared (found version "2.1.1") found components: C -- Found FFTW: /opt/cp2k-toolchain/install/fftw-3.3.11/include -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") -- Checking for one of the modules 's-dftd3' -- Checking for one of the modules 'mctc-lib' -- Found DFTD3: /opt/cp2k-toolchain/install/tblite-0.6.0/lib/libs-dftd3.a -- Checking for one of the modules 'dftd4' -- Checking for one of the modules 'multicharge' -- Found DFTD4: /opt/cp2k-toolchain/install/tblite-0.6.0/lib/libdftd4.a -- Found LAPACK: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so;-lm;-ldl -- Checking for one of the modules 'elpa;elpa_openmp;elpa-openmp-2019.05.001;elpa_openmp-2019.11.001;elpa_openmp-2020.05.001;elpa-2019.05.001;elpa-2019.11.001;elpa-2020.05.001' -- Found Elpa: /opt/cp2k-toolchain/install/elpa-2026.02.001/cpu/lib/libelpa_openmp.so -- Checking for module 'libvdwxc>=0.5.0' -- Found libvdwxc, version 0.5.0 -- Checking for module 'fftw3' -- Found fftw3, version 3.3.11 -- Found LibVDWXC: vdwxc;fftw3 (Required is at least version "0.5.0") -- Checking for one of the modules 'plumed;plumedInternals' -- Found Plumed: /opt/cp2k-toolchain/install/plumed-2.10.0/include -- Found MPI: TRUE (found version "5.0") found components: CXX C Fortran -- Found OpenMP_CXX: -fopenmp (found version "4.5") -- Found OpenMP_C: -fopenmp (found version "4.5") -- Found OpenMP_Fortran: -fopenmp (found version "4.5") -- Found OpenMP: TRUE (found version "4.5") found components: CXX C Fortran -- Checking for modules 'mclf;mcl' -- Package 'mclf' not found -- Package 'mcl' not found -- Found MiMiC: True -- Checking for module 'trexio' -- Package 'trexio' not found -- Found TrexIO: /opt/cp2k-toolchain/install/trexio-2.6.1/include -- Setting build type to 'Release' as none was specified. -- Performing Test f2008-norm2 -- Performing Test f2008-norm2 - Success -- Performing Test f2008-block_construct -- Performing Test f2008-block_construct - Success -- Performing Test f2008-contiguous -- Performing Test f2008-contiguous - Success -- Performing Test f95-reshape-order-allocatable -- Performing Test f95-reshape-order-allocatable - Success -- FYPP preprocessor found. -- Adding libxs_jit.F from dependency libxs for compilation -------------------------------------------------------------------- - - - Summary of enabled dependencies - - - -------------------------------------------------------------------- - BLAS - vendor: OpenBLAS - include directories: /opt/cp2k-toolchain/install/openblas-0.3.33/include - libraries: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so - LAPACK - include directories: /opt/cp2k-toolchain/install/openblas-0.3.33/include - libraries: /opt/cp2k-toolchain/install/openblas-0.3.33/lib/libopenblas.so - MPI - include directories: /opt/cp2k-toolchain/install/mpich-5.0.1/include - libraries: /opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpicxx.so;/opt/cp2k-toolchain/install/mpich-5.0.1/lib/libmpi.so - MPI_F08: ON - ScaLAPACK - vendor: auto - include directories: - libraries: /opt/cp2k-toolchain/install/scalapack-2.2.3/lib/libscalapack.a - LibXC - version: 7.0.0 - include directories: /opt/cp2k-toolchain/install/libxc-7.0.0/include/ - libraries: /opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxcf03.a;/opt/cp2k-toolchain/install/libxc-7.0.0/lib/libxc.a - GauXC - version: 1.0.0 - install directories: /opt/cp2k-toolchain/install/gauxc-1.1-skala-cp2k-fixes/lib/cmake/gauxc - Spglib - include directories: /opt/cp2k-toolchain/install/spglib-2.7.0/include;$ - LibTorch - extra CXX flags: -D_GLIBCXX_USE_CXX11_ABI=1 - include directories: /opt/cp2k-toolchain/install/libtorch-2.7.1/include;/opt/cp2k-toolchain/install/libtorch-2.7.1/include/torch/csrc/api/include - libraries: /opt/cp2k-toolchain/install/libtorch-2.7.1/lib/libtorch.so - HDF5 - version: 2.1.1 - include directories: /opt/cp2k-toolchain/install/hdf5-2.1.1/include - libraries: hdf5-shared - FFTW3 - include directories: /opt/cp2k-toolchain/install/fftw-3.3.11/include - libraries: /opt/cp2k-toolchain/install/fftw-3.3.11/lib/libfftw3.a - PLUMED - include directories: /opt/cp2k-toolchain/install/plumed-2.10.0/include - libraries: /opt/cp2k-toolchain/install/plumed-2.10.0/lib/libplumed.so - LIBXS - include directories: - libraries: - SpLA - include directories: /opt/cp2k-toolchain/install/SpLA-1.6.1/include;/opt/cp2k-toolchain/install/SpLA-1.6.1/include/spla - libraries: $;$;MPI::MPI_CXX;MPI::MPI_C;MPI::MPI_Fortran - MiMiC - include directories: /opt/cp2k-toolchain/install/mcl-3.0.0/include/MiMiC/ - libraries: /opt/cp2k-toolchain/install/mcl-3.0.0/lib/MiMiC/libmclf.so;/opt/cp2k-toolchain/install/mcl-3.0.0/lib/MiMiC/libmcl.so - DFTD4 - include directories : /opt/cp2k-toolchain/install/tblite-0.6.0/include;/opt/cp2k-toolchain/install/tblite-0.6.0/include/dftd4/GNU-15.2.0 - libraries : - DeePMD - ACE - include directories: /opt/cp2k-toolchain/install/lammps-user-pace-v.2025.12.4.p1/include/pace - libraries: pace::yaml-cpp-pace;pace::cnpy-static;; - LibSMEAGOL - include directories: /opt/cp2k-toolchain/install/libsmeagol-1.2/include - libraries: /opt/cp2k-toolchain/install/libsmeagol-1.2/lib/libsmeagol.a - TBLITE : - include directories : /opt/cp2k-toolchain/install/tblite-0.6.0/include;/opt/cp2k-toolchain/install/tblite-0.6.0/include/tblite/GNU-15.2.0 - tblite libraries : - SIRIUS - include directories: - libraries: - COSMA - include directories: /opt/cp2k-toolchain/install/COSMA-2.8.4/include - libraries: MPI::MPI_CXX;costa::costa;$;$;$<$:cosma::BLAS::blas>;$;$<$:Tiled-MM::Tiled-MM>;$<$:Tiled-MM::Tiled-MM>;$<$:semiprof::semiprof>;$<$:cosma::scalapack::scalapack> - Libint2 - include directories: /opt/cp2k-toolchain/install/libint-v2.13.1-cp2k-lmax-5/include - libraries: /opt/cp2k-toolchain/install/libint-v2.13.1-cp2k-lmax-5/lib/libint2.a - LibFCI - include directories: /opt/cp2k-toolchain/install/libfci-0.1.0/include - libraries: libfci::fci - Libvori - libraries: /opt/cp2k-toolchain/install/libvori-220621/lib/libvori.a - ELPA - include directories: /opt/cp2k-toolchain/install/elpa-2026.02.001/cpu/include/elpa_openmp-2026.02.001 - libraries: /opt/cp2k-toolchain/install/elpa-2026.02.001/cpu/lib/libelpa_openmp.so;/opt/cp2k-toolchain/install/scalapack-2.2.3/lib/libscalapack.a;:libopenblas.a - TREXIO - include directories: /opt/cp2k-toolchain/install/trexio-2.6.1/include - libraries: /opt/cp2k-toolchain/install/trexio-2.6.1/lib/libtrexio.so - GreenX - include directories: /opt/cp2k-toolchain/install/greenX-2.2/include/modules - libraries: /opt/cp2k-toolchain/install/greenX-2.2/lib/libGXCommon.so.0.0.1;/opt/cp2k-toolchain/install/greenX-2.2/lib/libgx_minimax.so.0.0.1;/opt/cp2k-toolchain/install/greenX-2.2/lib/libgx_ac.so.0.0.1 -------------------------------------------------------------------- - - - List of dependencies not included in this build - - - -------------------------------------------------------------------- - PEXSI - openPMD - GPU acceleration is disabled - DLA-Future After building and installing CP2K the regtests can be run with the following command: /opt/cp2k/tests/do_regtest.py /opt/cp2k/bin psmp -- Configuring done (7.4s) -- Generating done (0.4s) -- Build files have been written to: /opt/cp2k/build Compiling CP2K ... done ---> Removed intermediate container 4ee4295fedfd ---> ff94888adacc Step 37/42 : COPY ./benchmarks ./benchmarks ---> 165c110ee217 Step 38/42 : COPY ./tools/regtesting ./tools/regtesting ---> b910c64a6e54 Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 427c7950bdfd Step 40/42 : RUN ./test_performance.sh "toolchain" 2>&1 | tee report.log ---> Running in 60097fa32564 ============== CP2K Binary Flags ============= cp2kflags: omp libint fftw3 libxc gauxc gauxc_mpi gauxc_onedft gauxc_host elpa parallel scalapack mpi_f08 cosma ace deepmd libxs libxsmm plumed2 spglib libdftd4 dftd4_v4_2 s_dftd3 mctc-lib tblite sirius libvori libbqb libtorch mimic libvdwxc hdf5 trexio libfci libsmeagol greenx ========== Checking Benchmark Inputs ========= Found 83 input files and 0 errors. ========== Running Performance Test ========== Plot: name="total_timings_32omp", title="Total Timings with 32 OpenMP Threads", ylabel="time [s]" Plot: name="total_timings_32mpi", title="Total Timings with 32 MPI Ranks", ylabel="time [s]" Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.028 0.028 88.774 88.774 qs_mol_dyn_low 1 2.0 0.004 0.004 88.231 88.231 qs_forces 11 3.9 0.001 0.001 88.191 88.191 qs_energies 11 4.9 0.001 0.001 82.641 82.641 scf_env_do_scf 11 5.9 0.001 0.001 71.827 71.827 scf_env_do_scf_inner_loop 108 6.5 0.013 0.013 58.183 58.183 velocity_verlet 10 3.0 0.002 0.002 57.456 57.456 dbcsr_multiply_generic 2286 12.5 0.179 0.179 27.824 27.824 qs_scf_new_mos 108 7.5 0.001 0.001 25.513 25.513 qs_scf_loop_do_ot 108 8.5 0.001 0.001 25.512 25.512 ot_scf_mini 108 9.5 0.002 0.002 23.983 23.983 rebuild_ks_matrix 119 8.3 0.001 0.001 20.018 20.018 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.014 20.018 20.018 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.645 18.645 qs_rho_update_rho_low 119 7.7 0.001 0.001 17.825 17.825 calculate_rho_elec 119 8.7 0.962 0.962 17.824 17.824 make_m2s 4572 13.5 0.044 0.044 16.520 16.520 ot_mini 108 10.5 0.001 0.001 16.294 16.294 grid_collocate_task_list 119 9.7 13.566 13.566 13.566 13.566 init_scf_loop 11 6.9 0.000 0.000 13.545 13.545 make_images 4572 14.5 1.940 1.940 11.583 11.583 sum_up_and_integrate 119 10.3 0.001 0.001 11.558 11.558 integrate_v_rspace 119 11.3 0.111 0.111 11.502 11.502 prepare_preconditioner 11 7.9 0.000 0.000 10.380 10.380 make_preconditioner 11 8.9 0.000 0.000 10.380 10.380 grid_integrate_task_list 119 12.3 9.250 9.250 9.250 9.250 ot_diis_step 108 11.5 0.004 0.004 9.197 9.197 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 9.189 9.189 apply_single 119 13.6 0.000 0.000 9.189 9.189 make_full_inverse_cholesky 11 9.9 0.023 0.023 8.956 8.956 hybrid_alltoall_any 4725 16.4 7.398 7.398 7.806 7.806 multiply_cannon 2286 13.5 0.296 0.296 7.533 7.533 make_images_data 4572 15.5 0.037 0.037 7.524 7.524 qs_ot_get_derivative 108 11.5 0.001 0.001 7.084 7.084 multiply_cannon_loop 2286 14.5 0.059 0.059 7.008 7.008 multiply_cannon_multrec 2286 15.5 6.897 6.897 6.948 6.948 fft_wrap_pw1pw2 1201 11.6 0.010 0.010 6.000 6.000 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 5.939 5.939 dbcsr_make_dense_low 5837 15.5 0.031 0.031 5.221 5.221 make_dense_data 5837 16.5 4.579 4.579 5.176 5.176 fft_wrap_pw1pw2_140 487 12.2 0.905 0.905 5.078 5.078 dbcsr_make_images_dense 3978 14.8 0.016 0.016 4.500 4.500 init_scf_run 11 5.9 0.000 0.000 4.264 4.264 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.262 4.262 dbcsr_complete_redistribute 329 12.2 1.773 1.773 4.238 4.238 wfi_extrapolate 11 7.9 0.001 0.001 3.775 3.775 copy_dbcsr_to_fm 153 11.3 0.003 0.003 3.726 3.726 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.348 3.348 density_rs2pw 119 9.7 0.004 0.004 3.296 3.296 dbcsr_copy 2102 12.0 0.244 0.244 3.056 3.056 qs_ot_get_p 119 10.4 0.001 0.001 3.056 3.056 transfer_dbcsr_to_fm 11 10.9 0.001 0.001 3.019 3.019 qs_create_task_list 11 7.9 0.000 0.000 2.858 2.858 generate_qs_task_list 11 8.9 1.819 1.819 2.858 2.858 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 2.820 2.820 dbcsr_copy_into_existing 22 7.9 2.799 2.799 2.800 2.800 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.726 2.726 fft3d_s 1202 13.6 2.576 2.576 2.590 2.590 dbcsr_data_release 278921 16.0 2.577 2.577 2.577 2.577 cp_fm_cholesky_invert 11 10.9 2.559 2.559 2.559 2.559 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.257 2.257 dbcsr_finalize 5048 13.8 0.130 0.130 2.220 2.220 copy_fm_to_dbcsr 176 11.2 0.001 0.001 2.150 2.150 potential_pw2rs 119 12.3 0.046 0.046 2.142 2.142 dbcsr_dot 1205 11.9 2.060 2.060 2.067 2.067 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 2.054 2.054 pw_poisson_solve 119 10.3 0.002 0.002 2.044 2.044 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.032 2.032 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 1.977 1.977 cp_fm_cholesky_decompose 22 10.9 1.870 1.870 1.870 1.870 calculate_dm_sparse 119 9.5 0.001 0.001 1.777 1.777 dbcsr_merge_all 4530 15.0 0.574 0.574 1.776 1.776 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.012 41.342 41.343 qs_mol_dyn_low 1 2.0 0.003 0.005 41.207 41.212 qs_forces 11 3.9 0.002 0.002 41.163 41.164 qs_energies 11 4.9 0.001 0.001 38.599 38.600 scf_env_do_scf 11 5.9 0.000 0.001 35.452 35.453 scf_env_do_scf_inner_loop 108 6.5 0.003 0.019 32.346 32.347 velocity_verlet 10 3.0 0.001 0.003 24.550 24.552 rebuild_ks_matrix 119 8.3 0.001 0.003 15.078 15.109 qs_ks_build_kohn_sham_matrix 119 9.3 0.017 0.018 15.078 15.109 qs_ks_update_qs_env 119 7.6 0.001 0.001 13.436 13.466 dbcsr_multiply_generic 2286 12.5 0.077 0.080 13.157 13.233 sum_up_and_integrate 119 10.3 0.002 0.003 10.659 10.700 integrate_v_rspace 119 11.3 0.004 0.005 10.636 10.677 qs_scf_new_mos 108 7.5 0.001 0.001 10.631 10.666 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.630 10.665 qs_rho_update_rho_low 119 7.7 0.001 0.001 10.643 10.645 calculate_rho_elec 119 8.7 0.030 0.031 10.642 10.645 multiply_cannon 2286 13.5 0.128 0.137 9.658 10.097 ot_scf_mini 108 9.5 0.002 0.002 9.969 10.004 multiply_cannon_loop 2286 14.5 0.083 0.088 9.085 9.206 mp_waitall_1 158411 16.6 7.409 7.823 7.409 7.823 grid_collocate_task_list 119 9.7 6.855 7.035 6.855 7.035 grid_integrate_task_list 119 12.3 6.785 7.000 6.785 7.000 ot_mini 108 10.5 0.001 0.001 5.760 5.799 multiply_cannon_metrocomm3 18288 15.5 0.035 0.037 5.510 5.717 density_rs2pw 119 9.7 0.006 0.007 3.428 3.763 multiply_cannon_multrec 18288 15.5 3.284 3.450 3.293 3.458 potential_pw2rs 119 12.3 0.007 0.007 3.232 3.264 init_scf_loop 11 6.9 0.000 0.000 3.088 3.089 fft_wrap_pw1pw2 1201 11.6 0.018 0.022 2.957 3.012 qs_ot_get_derivative 108 11.5 0.001 0.001 2.911 2.943 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.791 2.844 apply_single 119 13.6 0.000 0.000 2.791 2.844 ot_diis_step 108 11.5 0.004 0.004 2.830 2.832 make_m2s 4572 13.5 0.041 0.044 2.553 2.648 fft_wrap_pw1pw2_140 487 12.2 0.146 0.164 2.424 2.534 transfer_pw2rs 487 13.2 0.005 0.006 2.400 2.412 make_images 4572 14.5 0.105 0.108 2.252 2.352 transfer_rs2pw 487 10.6 0.006 0.007 2.018 2.339 fft3d_ps 1201 13.6 1.001 1.078 2.161 2.305 init_scf_run 11 5.9 0.000 0.000 2.233 2.235 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.233 2.233 wfi_extrapolate 11 7.9 0.001 0.001 2.011 2.011 mp_waitany 9880 13.7 1.594 1.898 1.594 1.898 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.794 1.798 qs_ot_get_p 119 10.4 0.001 0.001 1.422 1.476 prepare_preconditioner 11 7.9 0.000 0.000 1.390 1.401 make_preconditioner 11 8.9 0.000 0.000 1.390 1.401 make_images_data 4572 15.5 0.034 0.039 1.223 1.380 transfer_pw2rs_140 130 13.9 0.347 0.410 1.341 1.369 transfer_rs2pw_140 130 11.5 0.190 0.261 1.035 1.357 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.278 1.294 mp_alltoall_z22v 1201 15.6 0.954 1.245 0.954 1.245 hybrid_alltoall_any 4725 16.4 0.060 0.175 1.081 1.211 mp_alltoall_d11v 2130 13.8 0.849 1.018 0.849 1.018 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.924 0.953 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.920 0.924 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.864 0.886 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-64", label="H2O-64", y=88.774, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-64", label="H2O-64", y=41.342, yerr=0.0 Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=47.084, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=13.566, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.25, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=7.398, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=6.897, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.579, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=15.354999999999997, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.855, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=6.785, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.06, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.284, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.594, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.409, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 114.092 114.092 qs_mol_dyn_low 1 2.0 0.004 0.004 113.413 113.413 qs_forces 11 3.9 0.002 0.002 113.372 113.372 qs_energies 11 4.9 0.001 0.001 105.889 105.889 scf_env_do_scf 11 5.9 0.001 0.001 92.853 92.853 scf_env_do_scf_inner_loop 96 6.5 0.013 0.013 76.880 76.880 velocity_verlet 10 3.0 0.002 0.002 73.715 73.715 rebuild_ks_matrix 107 8.3 0.001 0.001 33.140 33.140 qs_ks_build_kohn_sham_matrix 107 9.3 0.013 0.013 33.139 33.139 qs_ks_update_qs_env 107 7.6 0.001 0.001 29.839 29.839 qs_rho_update_rho_low 107 7.7 0.001 0.001 29.411 29.411 calculate_rho_elec 107 8.7 0.878 0.878 29.411 29.411 dbcsr_multiply_generic 1966 12.4 0.146 0.146 27.904 27.904 grid_collocate_task_list 107 9.7 25.264 25.264 25.264 25.264 sum_up_and_integrate 107 10.3 0.001 0.001 24.609 24.609 integrate_v_rspace 107 11.3 0.093 0.093 24.550 24.550 qs_scf_new_mos 96 7.5 0.001 0.001 24.465 24.465 qs_scf_loop_do_ot 96 8.5 0.001 0.001 24.464 24.464 ot_scf_mini 96 9.5 0.002 0.002 23.129 23.129 grid_integrate_task_list 107 12.3 22.361 22.361 22.361 22.361 make_m2s 3932 13.4 0.039 0.039 16.535 16.535 init_scf_loop 11 6.9 0.000 0.000 15.837 15.837 ot_mini 96 10.5 0.001 0.001 15.791 15.791 make_images 3932 14.4 1.688 1.688 12.059 12.059 prepare_preconditioner 11 7.9 0.000 0.000 11.167 11.167 make_preconditioner 11 8.9 0.000 0.000 11.167 11.167 make_full_inverse_cholesky 11 9.9 0.036 0.036 9.634 9.634 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 9.351 9.351 apply_single 107 13.6 0.000 0.000 9.351 9.351 ot_diis_step 96 11.5 0.004 0.004 9.165 9.165 hybrid_alltoall_any 4079 16.3 8.272 8.272 8.693 8.693 make_images_data 3932 15.4 0.032 0.032 8.384 8.384 multiply_cannon 1966 13.4 0.241 0.241 7.776 7.776 multiply_cannon_loop 1966 14.4 0.047 0.047 7.334 7.334 multiply_cannon_multrec 1966 15.4 7.237 7.237 7.287 7.287 qs_ot_get_derivative 96 11.5 0.001 0.001 6.614 6.614 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.169 6.169 init_scf_run 11 5.9 0.000 0.000 6.159 6.159 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.157 6.157 fft_wrap_pw1pw2 1081 11.6 0.009 0.009 5.978 5.978 wfi_extrapolate 11 7.9 0.001 0.001 5.565 5.565 fft_wrap_pw1pw2_140 439 12.2 1.029 1.029 5.070 5.070 dbcsr_make_dense_low 4961 15.5 0.027 0.027 4.765 4.765 make_dense_data 4961 16.5 4.214 4.214 4.726 4.726 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.526 4.526 dbcsr_complete_redistribute 317 12.2 1.787 1.787 4.289 4.289 dbcsr_make_images_dense 3386 14.7 0.014 0.014 4.169 4.169 copy_dbcsr_to_fm 147 11.2 0.003 0.003 3.803 3.803 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.711 3.711 qs_create_task_list 11 7.9 0.000 0.000 3.281 3.281 generate_qs_task_list 11 8.9 2.269 2.269 3.281 3.281 density_rs2pw 107 9.7 0.004 0.004 3.269 3.269 transfer_dbcsr_to_fm 11 10.9 0.001 0.001 3.053 3.053 dbcsr_copy 1855 11.9 0.217 0.217 3.003 3.003 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 2.954 2.954 dbcsr_copy_into_existing 22 7.9 2.775 2.775 2.775 2.775 dbcsr_data_release 237968 15.9 2.763 2.763 2.763 2.763 qs_ot_get_p 107 10.4 0.001 0.001 2.701 2.701 cp_fm_cholesky_invert 11 10.9 2.623 2.623 2.623 2.623 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 2.461 2.461 fft3d_s 1082 13.6 2.402 2.402 2.407 2.407 cp_fm_cholesky_decompose 22 10.9 2.365 2.365 2.365 2.365 copy_fm_to_dbcsr 170 11.1 0.001 0.001 2.312 2.312 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.013 68.713 68.716 qs_mol_dyn_low 1 2.0 0.003 0.004 68.578 68.582 qs_forces 11 3.9 0.002 0.002 68.538 68.538 qs_energies 11 4.9 0.001 0.001 64.105 64.106 scf_env_do_scf 11 5.9 0.000 0.001 59.383 59.384 scf_env_do_scf_inner_loop 96 6.5 0.003 0.018 54.597 54.599 velocity_verlet 10 3.0 0.001 0.003 41.033 41.033 rebuild_ks_matrix 107 8.3 0.000 0.000 29.442 29.485 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.018 29.442 29.485 qs_ks_update_qs_env 107 7.6 0.001 0.001 25.928 25.967 sum_up_and_integrate 107 10.3 0.002 0.002 25.128 25.149 integrate_v_rspace 107 11.3 0.004 0.005 25.105 25.126 qs_rho_update_rho_low 107 7.7 0.001 0.001 23.575 23.579 calculate_rho_elec 107 8.7 0.027 0.028 23.574 23.579 grid_integrate_task_list 107 12.3 20.898 21.257 20.898 21.257 grid_collocate_task_list 107 9.7 19.255 20.156 19.255 20.156 dbcsr_multiply_generic 1966 12.4 0.073 0.075 13.169 13.298 qs_scf_new_mos 96 7.5 0.001 0.001 10.445 10.508 qs_scf_loop_do_ot 96 8.5 0.001 0.001 10.444 10.508 multiply_cannon 1966 13.4 0.120 0.129 9.766 10.234 ot_scf_mini 96 9.5 0.002 0.002 9.814 9.875 multiply_cannon_loop 1966 14.4 0.081 0.085 9.273 9.497 mp_waitall_1 136719 16.5 7.343 7.768 7.343 7.768 ot_mini 96 10.5 0.001 0.001 5.950 6.008 multiply_cannon_metrocomm3 15728 15.4 0.033 0.034 5.507 5.804 init_scf_loop 11 6.9 0.000 0.000 4.760 4.761 density_rs2pw 107 9.7 0.006 0.007 3.982 4.518 init_scf_run 11 5.9 0.000 0.000 3.722 3.724 scf_env_initial_rho_setup 11 6.9 0.000 0.005 3.722 3.722 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.662 3.666 multiply_cannon_multrec 15728 15.4 3.473 3.612 3.481 3.620 wfi_extrapolate 11 7.9 0.001 0.001 3.377 3.377 potential_pw2rs 107 12.3 0.006 0.007 3.265 3.283 transfer_rs2pw 439 10.6 0.006 0.007 2.621 3.155 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.073 3.149 apply_single 107 13.6 0.000 0.000 3.073 3.149 ot_diis_step 96 11.5 0.004 0.004 3.012 3.012 qs_ot_get_derivative 96 11.5 0.001 0.001 2.923 2.984 fft_wrap_pw1pw2 1081 11.6 0.019 0.021 2.866 2.911 mp_waitany 8968 13.7 2.292 2.845 2.292 2.845 make_m2s 3932 13.4 0.038 0.043 2.430 2.500 transfer_pw2rs 439 13.2 0.005 0.006 2.453 2.486 fft_wrap_pw1pw2_140 439 12.2 0.152 0.162 2.389 2.472 transfer_rs2pw_140 118 11.5 0.170 0.190 1.696 2.230 make_images 3932 14.4 0.096 0.098 2.146 2.220 fft3d_ps 1081 13.6 1.016 1.121 2.069 2.131 mp_alltoall_d11v 1998 13.7 1.187 1.804 1.187 1.804 prepare_preconditioner 11 7.9 0.000 0.000 1.498 1.516 make_preconditioner 11 8.9 0.000 0.000 1.498 1.516 rs_gather_matrices 107 12.3 0.070 0.082 0.900 1.511 transfer_pw2rs_140 118 13.9 0.387 0.443 1.411 1.456 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.378 1.397 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-64_nonortho", label="H2O-64_nonortho", y=114.092, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-64_nonortho", label="H2O-64_nonortho", y=68.713, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=46.744, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=25.264, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.361, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=8.272, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.237, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.214, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=15.451999999999998, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.255, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=20.898, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.473, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.343, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.292, yerr=0.0 Running w64PBE.inp with 1 threads and 32 ranks... done. Running w64PBE.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/w64PBE_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.045 0.045 148.467 148.467 qs_mol_dyn_low 1 2.0 0.004 0.004 147.537 147.537 qs_forces 11 3.9 0.001 0.001 147.497 147.497 qs_energies 11 4.9 0.001 0.001 135.613 135.613 scf_env_do_scf 11 5.9 0.004 0.004 122.175 122.175 velocity_verlet 10 3.0 0.002 0.002 115.726 115.726 scf_env_do_scf_inner_loop 106 6.8 0.016 0.016 106.660 106.660 rebuild_ks_matrix 117 8.5 0.001 0.001 64.484 64.484 qs_ks_build_kohn_sham_matrix 117 9.5 0.015 0.015 64.484 64.484 qs_ks_update_qs_env 120 7.8 0.001 0.001 58.182 58.182 qs_rho_update_rho_low 117 7.9 0.001 0.001 46.551 46.551 calculate_rho_elec 117 8.9 1.540 1.540 46.551 46.551 grid_collocate_task_list 117 9.9 39.023 39.023 39.023 39.023 sum_up_and_integrate 117 10.5 0.002 0.002 31.219 31.219 integrate_v_rspace 117 11.5 0.090 0.090 31.081 31.081 grid_integrate_task_list 117 12.5 27.411 27.411 27.411 27.411 fft_wrap_pw1pw2 2000 12.9 0.029 0.029 25.832 25.832 qs_vxc_create 117 10.5 0.003 0.003 24.906 24.906 xc_vxc_pw_create 117 11.5 1.676 1.676 24.903 24.903 fft_wrap_pw1pw2_200 1298 14.3 3.804 3.804 24.203 24.203 dbcsr_multiply_generic 2035 12.5 0.189 0.189 16.293 16.293 init_scf_loop 14 6.8 0.000 0.000 15.446 15.446 xc_pw_derive 702 13.5 0.005 0.005 15.401 15.401 qs_scf_new_mos 106 7.8 0.001 0.001 14.470 14.470 qs_scf_loop_do_ot 106 8.8 0.001 0.001 14.469 14.469 ot_scf_mini 106 9.8 0.002 0.002 13.350 13.350 xc_rho_set_and_dset_create 117 12.5 1.347 1.347 12.280 12.280 fft3d_s 2001 14.9 11.418 11.418 11.428 11.428 xc_pw_divergence 117 12.5 0.002 0.002 10.835 10.835 ot_mini 106 10.8 0.001 0.001 9.051 9.051 make_m2s 4070 13.5 0.040 0.040 8.200 8.200 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.270 7.270 prepare_preconditioner 14 7.8 0.000 0.000 6.865 6.865 make_preconditioner 14 8.8 0.000 0.000 6.865 6.865 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.817 6.817 pw_scatter_s 1053 15.2 6.485 6.485 6.485 6.485 init_scf_run 11 5.9 0.000 0.000 6.187 6.187 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.185 6.185 density_rs2pw 117 9.9 0.005 0.005 5.987 5.987 make_images 4070 14.5 1.449 1.449 5.871 5.871 wfi_extrapolate 11 7.9 0.001 0.001 5.463 5.463 multiply_cannon 2035 13.5 0.241 0.241 5.309 5.309 qs_ot_get_derivative 106 11.8 0.001 0.001 5.085 5.085 multiply_cannon_loop 2035 14.5 0.050 0.050 4.867 4.867 multiply_cannon_multrec 2035 15.5 4.771 4.771 4.816 4.816 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 4.611 4.611 ot_diis_step 106 11.8 0.004 0.004 3.952 3.952 pw_poisson_solve 117 10.5 0.002 0.002 3.934 3.934 make_full_inverse_cholesky 14 9.8 0.000 0.000 3.793 3.793 hybrid_alltoall_any 4213 16.4 3.525 3.525 3.643 3.643 make_images_data 4070 15.5 0.031 0.031 3.640 3.640 apply_preconditioner_dbcsr 120 12.8 0.000 0.000 3.599 3.599 apply_single 120 13.8 0.000 0.000 3.599 3.599 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.596 3.596 potential_pw2rs 117 12.5 0.078 0.078 3.580 3.580 xc_functional_eval 117 13.5 0.001 0.001 3.346 3.346 pbe_lda_eval 117 14.5 3.345 3.345 3.345 3.345 dbcsr_copy 4760 12.9 0.237 0.237 3.123 3.123 qs_create_task_list 11 7.9 0.000 0.000 2.941 2.941 generate_qs_task_list 11 8.9 2.035 2.035 2.941 2.941 dbcsr_copy_into_existing 22 7.9 2.867 2.867 2.867 2.867 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.853 2.853 dbcsr_make_dense_low 4890 15.6 0.026 0.026 2.659 2.659 make_dense_data 4890 16.6 2.068 2.068 2.621 2.621 make_full_single_inverse 14 9.8 0.002 0.002 2.576 2.576 qs_ot_get_derivative_taylor 89 12.9 0.002 0.002 2.356 2.356 pw_gather_s 947 14.5 2.331 2.331 2.331 2.331 pw_integral_ab_c1d_c1d_gs 117 11.5 2.270 2.270 2.270 2.270 dbcsr_complete_redistribute 323 11.8 1.149 1.149 2.190 2.190 pw_derive 1053 13.8 2.173 2.173 2.173 2.173 dbcsr_make_images_dense 3364 14.9 0.014 0.014 1.982 1.982 pw_copy 1755 13.0 1.977 1.977 1.977 1.977 arnoldi_generalized_ev 14 10.8 0.000 0.000 1.709 1.709 dbcsr_sym_matrix_vector_mult 1269 12.5 0.028 0.028 1.680 1.680 copy_dbcsr_to_fm 143 10.8 0.003 0.003 1.667 1.667 gev_build_subspace 23 11.5 0.009 0.009 1.553 1.553 dbcsr_dot 1125 12.2 1.475 1.475 1.481 1.481 qs_ot_get_p 120 10.5 0.001 0.001 1.474 1.474 cp_fm_cholesky_invert 14 10.8 1.449 1.449 1.449 1.449 dbcsr_sym_matrix_vector_mult_l 1269 13.5 1.368 1.368 1.385 1.385 dbcsr_finalize 4628 13.9 0.102 0.102 1.367 1.367 build_core_ppl_forces 11 5.9 1.355 1.355 1.355 1.355 calculate_dm_sparse 117 9.7 0.001 0.001 1.306 1.306 cp_dbcsr_sm_fm_multiply 46 9.3 0.001 0.001 1.285 1.285 pw_poisson_set 118 11.5 0.003 0.003 1.208 1.208 copy_fm_to_dbcsr 180 10.8 0.001 0.001 1.189 1.189 fft_wrap_pw1pw2_70 234 13.2 0.105 0.105 1.161 1.161 qs_ot_get_orbitals 106 10.8 0.001 0.001 1.152 1.152 build_overlap_matrix_low 22 6.9 0.999 0.999 1.109 1.109 build_kinetic_matrix_low 22 6.9 0.998 0.998 1.096 1.096 transfer_dbcsr_to_fm 14 10.8 0.001 0.001 1.076 1.076 dbcsr_merge_all 4098 15.1 0.434 0.434 1.057 1.057 cp_fm_cholesky_decompose 28 10.5 1.016 1.016 1.016 1.016 evaluate_core_matrix_traces 117 8.5 0.001 0.001 0.966 0.966 calculate_ptrace_kp 234 9.5 0.001 0.001 0.966 0.966 pw_copy_to_array 947 14.5 0.938 0.938 0.938 0.938 pw_axpy 1170 12.0 0.932 0.932 0.932 0.932 qs_init_subsys 1 2.0 0.001 0.001 0.868 0.868 qs_env_setup 1 3.0 0.000 0.000 0.856 0.856 qs_env_rebuild_pw_env 23 5.3 0.000 0.000 0.856 0.856 pw_env_rebuild 1 5.0 0.000 0.000 0.855 0.855 pw_grid_setup 4 6.0 0.000 0.000 0.854 0.854 pw_grid_setup_internal 4 7.0 0.013 0.013 0.854 0.854 pw_grid_sort 4 8.0 0.554 0.554 0.726 0.726 dbcsr_make_dense 1526 14.1 0.005 0.005 0.717 0.717 dbcsr_make_undense 1526 14.1 0.602 0.602 0.716 0.716 dbcsr_sort_indices 6887 16.5 0.702 0.702 0.702 0.702 pw_copy_from_array 1053 15.2 0.687 0.687 0.687 0.687 cp_dbcsr_sm_fm_multiply_core 46 10.3 0.000 0.000 0.684 0.684 dbcsr_iterator_start 35256 15.3 0.561 0.561 0.595 0.595 qs_ot_get_derivative_diag 17 12.0 0.000 0.000 0.569 0.569 quick_finalize 4380 16.4 0.044 0.044 0.567 0.567 dbcsr_special_finalize 4070 15.5 0.008 0.008 0.562 0.562 calculate_rho_core 11 7.9 0.251 0.251 0.560 0.560 qs_ot_p2m_diag 19 11.0 0.055 0.055 0.552 0.552 grid_create_task_list 11 9.9 0.529 0.529 0.529 0.529 build_core_ppl 11 7.9 0.524 0.524 0.524 0.524 transfer_fm_to_dbcsr 14 9.8 0.002 0.002 0.496 0.496 dbcsr_add_d 1795 13.1 0.002 0.002 0.467 0.467 dbcsr_add_anytype 1795 14.1 0.170 0.170 0.465 0.465 make_basis_sm 14 9.3 0.000 0.000 0.464 0.464 cp_dbcsr_syevd 19 12.0 0.001 0.001 0.422 0.422 transfer_rs2pw 479 10.8 0.005 0.005 0.409 0.409 cp_dbcsr_alloc_block_from_nbl 88 7.7 0.233 0.233 0.406 0.406 transfer_pw2rs 479 13.4 0.002 0.002 0.400 0.400 dbcsr_iterator_stop 35256 15.3 0.389 0.389 0.399 0.399 cp_dbcsr_plus_fm_fm_t 22 8.9 0.000 0.000 0.394 0.394 cp_fm_diag_elpa 19 13.0 0.000 0.000 0.384 0.384 cp_fm_diag_elpa_base 19 14.0 0.369 0.369 0.384 0.384 distribute_tasks 11 9.9 0.374 0.374 0.374 0.374 pw_zero 585 13.0 0.367 0.367 0.367 0.367 build_qs_neighbor_lists 11 6.9 0.001 0.001 0.367 0.367 dbcsr_data_copy_aa2 322 15.1 0.365 0.365 0.365 0.365 compute_matrix_w 11 5.9 0.000 0.000 0.343 0.343 calculate_w_matrix_ot 11 6.9 0.007 0.007 0.343 0.343 fft_wrap_pw1pw2_30 234 13.2 0.022 0.022 0.336 0.336 mp_alltoall_d11v 1899 13.8 0.335 0.335 0.335 0.335 transfer_pw2rs_200 128 14.1 0.325 0.325 0.325 0.325 reorthogonalize_vectors 10 9.0 0.000 0.000 0.323 0.323 transfer_rs2pw_200 128 11.7 0.322 0.322 0.322 0.322 ot_scf_init 14 7.8 0.001 0.001 0.300 0.300 build_core_ppnl_forces 11 5.9 0.293 0.293 0.293 0.293 dbcsr_set 7009 14.0 0.005 0.005 0.289 0.289 cp_fm_uplo_to_full 47 13.4 0.284 0.284 0.284 0.284 dbcsr_zero 7009 15.0 0.284 0.284 0.284 0.284 dbcsr_desymmetrize_deep 143 11.8 0.143 0.143 0.279 0.279 dbcsr_reserve_blocks 1093 14.3 0.215 0.215 0.252 0.252 dbcsr_make_untransposed_blocks 2481 13.4 0.171 0.171 0.241 0.241 dbcsr_create_new 25633 14.8 0.202 0.202 0.230 0.230 dbcsr_make_index_list 4070 14.5 0.223 0.223 0.223 0.223 tree_to_linear_d 323 14.8 0.223 0.223 0.223 0.223 parallel_gemm_fm_cosma 96 8.9 0.195 0.195 0.195 0.195 build_neighbor_lists_sab_all 11 7.9 0.189 0.189 0.189 0.189 dbcsr_work_destroy_all 8478 15.8 0.121 0.121 0.187 0.187 calculate_first_density_matrix 1 7.0 0.000 0.000 0.181 0.181 copy_fm_to_dbcsr_bc 180 11.8 0.072 0.072 0.175 0.175 sort_shells 4 9.0 0.172 0.172 0.172 0.172 dbcsr_data_new 194711 15.4 0.163 0.163 0.163 0.163 copy_dbcsr_to_fm_bc 143 11.8 0.148 0.148 0.160 0.160 qs_ot_p2m_taylor 101 11.6 0.001 0.001 0.157 0.157 pw_multiply_with 117 11.5 0.154 0.154 0.154 0.154 dbcsr_add_wm_from_matrix 446 13.1 0.117 0.117 0.153 0.153 ------------------------------------------------------------------------------- From /workspace/artifacts/w64PBE_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.017 106.353 106.354 qs_mol_dyn_low 1 2.0 0.003 0.005 106.203 106.207 qs_forces 11 3.9 0.002 0.002 106.162 106.162 qs_energies 11 4.9 0.001 0.001 97.951 97.952 scf_env_do_scf 11 5.9 0.001 0.006 92.243 92.244 velocity_verlet 10 3.0 0.001 0.003 83.863 83.864 scf_env_do_scf_inner_loop 106 6.8 0.003 0.018 83.289 83.289 rebuild_ks_matrix 117 8.5 0.000 0.001 58.709 58.732 qs_ks_build_kohn_sham_matrix 117 9.5 0.021 0.024 58.708 58.731 qs_ks_update_qs_env 120 7.8 0.001 0.002 52.098 52.118 sum_up_and_integrate 117 10.5 0.003 0.003 37.500 37.527 integrate_v_rspace 117 11.5 0.005 0.006 37.417 37.457 qs_rho_update_rho_low 117 7.9 0.001 0.001 34.807 34.810 calculate_rho_elec 117 8.9 0.049 0.053 34.806 34.810 grid_integrate_task_list 117 12.5 28.573 29.403 28.573 29.403 grid_collocate_task_list 117 9.9 26.759 27.804 26.759 27.804 fft_wrap_pw1pw2 2000 12.9 0.044 0.050 17.787 18.328 fft_wrap_pw1pw2_200 1298 14.3 0.736 0.835 16.602 16.873 qs_vxc_create 117 10.5 0.007 0.008 15.853 16.057 xc_vxc_pw_create 117 11.5 0.130 0.181 15.846 16.049 fft3d_ps 2000 14.9 6.185 7.168 13.571 14.768 xc_pw_derive 702 13.5 0.010 0.011 10.821 11.669 dbcsr_multiply_generic 2035 12.5 0.070 0.073 8.896 8.950 density_rs2pw 117 9.9 0.007 0.007 7.630 8.937 init_scf_loop 14 6.8 0.000 0.000 8.935 8.936 xc_rho_set_and_dset_create 117 12.5 0.159 0.201 8.294 8.524 mp_alltoall_z22v 2000 16.9 6.126 8.510 6.126 8.510 xc_pw_divergence 117 12.5 0.004 0.005 7.140 7.649 potential_pw2rs 117 12.5 0.009 0.010 7.313 7.351 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.709 6.713 multiply_cannon 2035 13.5 0.116 0.123 6.425 6.568 qs_scf_new_mos 106 7.8 0.001 0.001 6.513 6.543 qs_scf_loop_do_ot 106 8.8 0.001 0.001 6.512 6.542 multiply_cannon_loop 2035 14.5 0.066 0.068 6.052 6.174 mp_waitall_1 142145 16.6 5.733 6.074 5.733 6.074 ot_scf_mini 106 9.8 0.002 0.002 6.040 6.067 transfer_rs2pw 479 10.8 0.007 0.009 4.423 5.858 mp_waitany 9728 13.9 4.272 5.778 4.272 5.778 transfer_pw2rs 479 13.4 0.006 0.008 5.400 5.417 yz_to_x 830 15.8 0.464 0.645 3.463 5.172 transfer_rs2pw_200 128 11.7 0.468 0.499 3.016 4.460 init_scf_run 11 5.9 0.000 0.000 4.264 4.266 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.263 4.264 x_to_yz 936 16.2 0.722 0.889 3.831 4.256 wfi_extrapolate 11 7.9 0.001 0.001 3.887 3.887 ot_mini 106 10.8 0.001 0.001 3.631 3.660 multiply_cannon_metrocomm3 16280 15.5 0.028 0.029 3.534 3.650 transfer_pw2rs_200 128 14.1 1.105 1.291 3.475 3.613 xc_functional_eval 117 13.5 0.003 0.003 2.583 2.795 pbe_lda_eval 117 14.5 2.581 2.793 2.581 2.793 mp_alltoall_d11v 2144 13.6 1.702 2.441 1.702 2.441 multiply_cannon_multrec 16280 15.5 2.290 2.362 2.297 2.369 rs_gather_matrices 117 12.5 0.060 0.067 1.503 2.240 qs_ot_get_derivative 106 11.8 0.001 0.001 2.176 2.202 make_m2s 4070 13.5 0.037 0.040 1.783 1.832 pw_derive 1053 13.8 1.583 1.815 1.583 1.815 prepare_preconditioner 14 7.8 0.000 0.000 1.784 1.788 make_preconditioner 14 8.8 0.000 0.000 1.784 1.788 transfer_pw2rs_70 117 14.5 0.411 0.449 1.504 1.677 pw_poisson_solve 117 10.5 0.003 0.004 1.567 1.658 pw_gather_p 947 14.5 1.353 1.630 1.353 1.630 make_images 4070 14.5 0.102 0.104 1.532 1.580 pw_copy 1755 13.0 1.417 1.546 1.417 1.546 apply_preconditioner_dbcsr 120 12.8 0.000 0.000 1.460 1.489 apply_single 120 13.8 0.000 0.000 1.460 1.489 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.407 1.483 ot_diis_step 106 11.8 0.003 0.004 1.443 1.443 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 1.284 1.284 fft_wrap_pw1pw2_70 234 13.2 0.019 0.025 0.916 1.257 pw_scatter_p 1053 15.2 1.123 1.193 1.123 1.193 mp_sum_d 3833 11.6 0.740 1.152 0.740 1.152 pw_poisson_set 118 11.5 0.005 0.006 1.018 1.109 qs_ot_get_derivative_taylor 89 12.9 0.002 0.002 1.050 1.072 transfer_rs2pw_70 117 11.9 0.308 0.331 1.051 1.072 mp_sendrecv_dv 10881 12.9 0.990 1.029 0.990 1.029 make_images_data 4070 15.5 0.029 0.032 0.811 0.917 make_full_single_inverse 14 9.8 0.001 0.001 0.907 0.912 build_core_ppl_forces 11 5.9 0.796 0.890 0.796 0.890 hybrid_alltoall_any 4213 16.4 0.041 0.094 0.724 0.833 make_full_inverse_cholesky 14 9.8 0.000 0.000 0.800 0.831 qs_ot_get_p 120 10.5 0.001 0.001 0.671 0.709 cp_dbcsr_sm_fm_multiply 46 9.3 0.001 0.002 0.699 0.702 qs_env_update_s_mstruct 11 6.9 0.000 0.000 0.635 0.687 pw_copy_to_array 947 14.5 0.501 0.632 0.501 0.632 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 0.591 0.630 cp_dbcsr_sm_fm_multiply_core 46 10.3 0.000 0.000 0.592 0.600 rs_grid_zero 139 14.6 0.546 0.590 0.546 0.590 pw_axpy 1170 12.0 0.517 0.571 0.517 0.571 calculate_dm_sparse 117 9.7 0.000 0.000 0.534 0.549 pw_copy_from_array 1053 15.2 0.432 0.517 0.432 0.517 qs_ot_get_orbitals 106 10.8 0.000 0.000 0.498 0.502 calculate_rho_core 11 7.9 0.015 0.018 0.435 0.489 cp_fm_cholesky_invert 14 10.8 0.437 0.446 0.437 0.446 mp_sum_l 9540 13.6 0.341 0.442 0.341 0.442 mp_allgather_i34 2035 14.5 0.152 0.394 0.152 0.394 build_overlap_matrix_low 22 6.9 0.349 0.368 0.354 0.373 build_kinetic_matrix_low 22 6.9 0.347 0.366 0.350 0.369 transfer_pw2rs_30 117 14.5 0.100 0.107 0.349 0.369 dbcsr_sym_matrix_vector_mult 1269 12.5 0.013 0.015 0.365 0.368 arnoldi_generalized_ev 14 10.8 0.000 0.000 0.365 0.366 gev_build_subspace 23 11.5 0.003 0.004 0.330 0.330 make_images_sizes 4070 15.5 0.003 0.004 0.239 0.327 integrate_v_core_rspace 11 7.9 0.017 0.021 0.311 0.326 build_core_ppl 11 7.9 0.295 0.324 0.295 0.324 mp_alltoall_i44 4070 16.5 0.236 0.324 0.236 0.324 dbcsr_complete_redistribute 323 11.8 0.067 0.076 0.296 0.320 pw_zero 585 13.0 0.248 0.302 0.248 0.302 make_basis_sm 14 9.3 0.000 0.001 0.296 0.301 make_images_pack 4070 15.5 0.269 0.291 0.276 0.298 transfer_rs2pw_30 117 11.9 0.075 0.080 0.283 0.297 pw_integral_ab_c1d_c1d_gs 117 11.5 0.174 0.186 0.260 0.288 qs_ot_get_derivative_diag 17 12.0 0.000 0.000 0.275 0.280 parallel_gemm_fm_cosma 96 8.9 0.270 0.275 0.270 0.275 dbcsr_dot 1125 12.2 0.098 0.104 0.209 0.260 cp_fm_cholesky_decompose 28 10.5 0.249 0.256 0.249 0.256 copy_dbcsr_to_fm 143 10.8 0.003 0.003 0.243 0.256 rs_scatter_matrices 128 9.9 0.050 0.059 0.213 0.241 dbcsr_sym_matrix_vector_mult_l 1269 13.5 0.209 0.227 0.211 0.229 reorthogonalize_vectors 10 9.0 0.000 0.000 0.217 0.220 qs_create_task_list 11 7.9 0.000 0.000 0.195 0.216 generate_qs_task_list 11 8.9 0.067 0.075 0.195 0.216 qs_ot_p2m_diag 19 11.0 0.002 0.002 0.215 0.215 fft_wrap_pw1pw2_30 234 13.2 0.007 0.007 0.159 0.209 ot_scf_init 14 7.8 0.001 0.001 0.204 0.205 copy_fm_to_dbcsr 180 10.8 0.001 0.001 0.171 0.203 dbcsr_make_dense_low 6572 15.7 0.022 0.023 0.195 0.201 dbcsr_make_images_dense 3364 14.9 0.018 0.018 0.182 0.188 calculate_ecore_overlap 22 5.9 0.000 0.000 0.106 0.177 cp_dbcsr_syevd 19 12.0 0.001 0.001 0.174 0.174 make_dense_data 6572 16.7 0.128 0.133 0.163 0.169 mp_alltoall_i22 633 13.6 0.137 0.165 0.137 0.165 cp_fm_diag_elpa 19 13.0 0.000 0.000 0.158 0.159 cp_fm_diag_elpa_base 19 14.0 0.155 0.157 0.157 0.158 build_core_ppnl_forces 11 5.9 0.128 0.142 0.128 0.142 compute_matrix_w 11 5.9 0.000 0.000 0.140 0.141 calculate_w_matrix_ot 11 6.9 0.001 0.001 0.140 0.141 cp_dbcsr_plus_fm_fm_t 22 8.9 0.000 0.000 0.125 0.131 dbcsr_copy 4760 12.9 0.083 0.090 0.112 0.120 distribute_tasks 11 9.9 0.020 0.021 0.104 0.118 pw_multiply_with 117 11.5 0.100 0.114 0.100 0.114 mp_sum_dv 7068 14.3 0.078 0.110 0.078 0.110 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="w64PBE", label="w64PBE", y=148.467, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="w64PBE", label="w64PBE", y=106.353, yerr=0.0 Plot: name="w64PBE_timings_32omp", title="Timings of w64PBE with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="w64PBE_timings_32omp", name="rest", label="rest", y=59.35900000000001, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=39.023, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.411, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.418, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=6.485, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.771, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="w64PBE_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="w64PBE_timings_32mpi", title="Timings of w64PBE with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="w64PBE_timings_32mpi", name="rest", label="rest", y=30.686999999999998, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=26.759, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=28.573, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.29, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=6.126, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.185, yerr=0.0 PlotPoint: plot="w64PBE_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=5.733, yerr=0.0 Running w64SCAN.inp with 1 threads and 32 ranks... done. Running w64SCAN.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/w64SCAN_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.188 0.188 463.236 463.236 qs_mol_dyn_low 1 2.0 0.004 0.004 460.116 460.116 qs_forces 11 3.9 0.001 0.001 460.078 460.078 qs_energies 11 4.9 0.001 0.001 424.808 424.808 scf_env_do_scf 11 5.9 0.002 0.002 399.164 399.164 velocity_verlet 10 3.0 0.001 0.001 386.436 386.436 scf_env_do_scf_inner_loop 106 6.8 0.013 0.013 361.878 361.878 rebuild_ks_matrix 117 8.5 0.001 0.001 265.279 265.279 qs_ks_build_kohn_sham_matrix 117 9.5 0.016 0.016 265.279 265.279 qs_ks_update_qs_env 119 7.8 0.001 0.001 235.283 235.283 fft_wrap_pw1pw2 3053 12.6 0.048 0.048 209.608 209.608 fft_wrap_pw1pw2_400 1649 13.9 60.380 60.380 202.612 202.612 qs_rho_update_rho_low 117 7.9 0.001 0.001 156.331 156.331 calculate_rho_elec 234 8.9 8.997 8.997 156.330 156.330 qs_vxc_create 117 10.5 0.003 0.003 148.851 148.851 xc_vxc_pw_create 117 11.5 5.485 5.485 148.848 148.848 xc_pw_derive 702 13.5 0.007 0.007 94.607 94.607 sum_up_and_integrate 117 10.5 0.003 0.003 85.674 85.674 integrate_v_rspace 234 11.5 0.192 0.192 85.057 85.057 grid_collocate_task_list 234 9.9 83.685 83.685 83.685 83.685 xc_rho_set_and_dset_create 117 12.5 7.018 7.018 78.335 78.335 fft3d_s 3054 14.6 71.886 71.886 71.897 71.897 xc_pw_divergence 117 12.5 0.003 0.003 64.307 64.307 density_rs2pw 234 9.9 0.011 0.011 63.649 63.649 pw_scatter_s 1521 15.1 55.622 55.622 55.622 55.622 grid_integrate_task_list 234 12.5 48.395 48.395 48.395 48.395 init_scf_loop 13 6.8 0.000 0.000 37.189 37.189 potential_pw2rs 234 12.5 0.376 0.376 36.470 36.470 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 30.920 30.920 xc_functional_eval 234 13.5 0.003 0.003 24.080 24.080 libxc_lda_eval 234 14.5 24.073 24.073 24.077 24.077 dbcsr_multiply_generic 2100 12.6 0.168 0.168 16.157 16.157 init_scf_run 11 5.9 0.000 0.000 15.995 15.995 scf_env_initial_rho_setup 11 6.9 0.001 0.001 15.994 15.994 qs_scf_new_mos 106 7.8 0.001 0.001 14.456 14.456 qs_scf_loop_do_ot 106 8.8 0.001 0.001 14.456 14.456 wfi_extrapolate 11 7.9 0.001 0.001 14.300 14.300 pw_gather_s 1532 14.1 14.045 14.045 14.045 14.045 pw_poisson_solve 117 10.5 0.002 0.002 13.582 13.582 ot_scf_mini 106 9.8 0.002 0.002 13.278 13.278 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 9.243 9.243 ot_mini 106 10.8 0.001 0.001 8.735 8.735 make_m2s 4200 13.6 0.039 0.039 8.335 8.335 pw_derive 1053 13.8 8.148 8.148 8.148 8.148 pw_integral_ab_c1d_c1d_gs 117 11.5 7.780 7.780 7.780 7.780 prepare_preconditioner 13 7.8 0.000 0.000 6.801 6.801 make_preconditioner 13 8.8 0.000 0.000 6.801 6.801 pw_copy 2223 13.1 6.387 6.387 6.387 6.387 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.244 6.244 make_images 4200 14.6 1.264 1.264 6.134 6.134 multiply_cannon 2100 13.6 0.181 0.181 5.167 5.167 fft_wrap_pw1pw2_140 468 13.2 0.669 0.669 4.949 4.949 multiply_cannon_loop 2100 14.6 0.036 0.036 4.819 4.819 multiply_cannon_multrec 2100 15.6 4.739 4.739 4.783 4.783 qs_ot_get_derivative 106 11.8 0.001 0.001 4.771 4.771 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 4.347 4.347 pw_poisson_set 118 11.5 0.003 0.003 4.230 4.230 hybrid_alltoall_any 4338 16.5 3.971 3.971 4.103 4.103 make_images_data 4200 15.6 0.031 0.031 4.083 4.083 pw_axpy 1638 11.7 3.993 3.993 3.993 3.993 ot_diis_step 106 11.8 0.004 0.004 3.951 3.951 make_full_inverse_cholesky 13 9.8 0.000 0.000 3.750 3.750 qs_create_task_list 11 7.9 0.000 0.000 3.662 3.662 generate_qs_task_list 11 8.9 1.381 1.381 3.662 3.662 pw_copy_to_array 1532 14.1 3.615 3.615 3.615 3.615 apply_preconditioner_dbcsr 119 12.8 0.000 0.000 3.557 3.557 apply_single 119 13.8 0.000 0.000 3.557 3.557 dbcsr_copy 4666 12.8 0.220 0.220 3.303 3.303 pw_copy_from_array 1521 15.1 3.192 3.192 3.192 3.192 dbcsr_copy_into_existing 22 7.9 3.063 3.063 3.063 3.063 qs_init_subsys 1 2.0 0.001 0.001 2.699 2.699 qs_env_setup 1 3.0 0.000 0.000 2.687 2.687 qs_env_rebuild_pw_env 23 5.3 0.000 0.000 2.686 2.686 pw_env_rebuild 1 5.0 0.000 0.000 2.686 2.686 pw_grid_setup 4 6.0 0.000 0.000 2.685 2.685 pw_grid_setup_internal 4 7.0 0.036 0.036 2.685 2.685 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.682 2.682 dbcsr_make_dense_low 5108 15.7 0.027 0.027 2.528 2.528 make_dense_data 5108 16.7 1.977 1.977 2.490 2.490 make_full_single_inverse 13 9.8 0.002 0.002 2.464 2.464 pw_grid_sort 4 8.0 1.779 1.779 2.315 2.315 calculate_rho_core 11 7.9 0.667 0.667 2.284 2.284 qs_ot_get_derivative_taylor 89 12.9 0.002 0.002 2.254 2.254 dbcsr_complete_redistribute 312 11.8 1.251 1.251 2.228 2.228 transfer_rs2pw 947 10.9 0.011 0.011 2.049 2.049 grid_create_task_list 11 9.9 2.028 2.028 2.028 2.028 transfer_pw2rs 947 13.5 0.006 0.006 1.998 1.998 dbcsr_make_images_dense 3508 14.9 0.014 0.014 1.889 1.889 copy_dbcsr_to_fm 138 10.8 0.003 0.003 1.775 1.775 transfer_rs2pw_400 245 11.8 1.656 1.656 1.656 1.656 qs_ot_get_p 119 10.6 0.001 0.001 1.631 1.631 transfer_pw2rs_400 245 14.3 1.623 1.623 1.623 1.623 arnoldi_generalized_ev 13 10.8 0.000 0.000 1.572 1.572 fft_wrap_pw1pw2_50 468 13.2 0.117 0.117 1.548 1.548 dbcsr_sym_matrix_vector_mult 1206 12.5 0.026 0.026 1.528 1.528 pw_zero 702 12.6 1.477 1.477 1.477 1.477 dbcsr_dot 1134 12.2 1.434 1.434 1.441 1.441 gev_build_subspace 22 11.5 0.009 0.009 1.408 1.408 cp_dbcsr_sm_fm_multiply 45 9.4 0.001 0.001 1.359 1.359 calculate_dm_sparse 117 9.7 0.000 0.000 1.352 1.352 cp_fm_cholesky_invert 13 10.8 1.350 1.350 1.350 1.350 dbcsr_sym_matrix_vector_mult_l 1206 13.5 1.238 1.238 1.254 1.254 dbcsr_finalize 4788 14.0 0.081 0.081 1.238 1.238 transfer_dbcsr_to_fm 13 10.8 0.001 0.001 1.219 1.219 copy_fm_to_dbcsr 174 10.8 0.001 0.001 1.200 1.200 qs_ot_get_orbitals 106 10.8 0.001 0.001 1.175 1.175 build_core_ppl_forces 11 5.9 1.124 1.124 1.124 1.124 build_overlap_matrix_low 22 6.9 0.888 0.888 0.980 0.980 build_kinetic_matrix_low 22 6.9 0.891 0.891 0.971 0.971 cp_fm_cholesky_decompose 26 10.6 0.947 0.947 0.947 0.947 dbcsr_merge_all 4261 15.2 0.351 0.351 0.927 0.927 evaluate_core_matrix_traces 117 8.5 0.001 0.001 0.923 0.923 calculate_ptrace_kp 234 9.5 0.001 0.001 0.922 0.922 cp_dbcsr_sm_fm_multiply_core 45 10.4 0.000 0.000 0.804 0.804 pw_scale 585 11.9 0.753 0.753 0.753 0.753 dbcsr_make_undense 1600 14.2 0.604 0.604 0.735 0.735 dbcsr_sort_indices 6981 16.6 0.681 0.681 0.681 0.681 dbcsr_make_dense 1600 14.2 0.005 0.005 0.680 0.680 qs_ot_p2m_diag 19 11.0 0.054 0.054 0.631 0.631 dbcsr_iterator_start 35388 15.4 0.568 0.568 0.597 0.597 transfer_fm_to_dbcsr 13 9.8 0.005 0.005 0.586 0.586 qs_ot_get_derivative_diag 17 12.0 0.000 0.000 0.566 0.566 quick_finalize 4507 16.5 0.045 0.045 0.552 0.552 dbcsr_special_finalize 4200 15.6 0.009 0.009 0.549 0.549 sort_shells 4 9.0 0.537 0.537 0.537 0.537 pw_multiply_with 117 11.5 0.504 0.504 0.504 0.504 cp_dbcsr_syevd 19 12.0 0.001 0.001 0.503 0.503 dbcsr_add_d 1902 13.1 0.002 0.002 0.492 0.492 dbcsr_add_anytype 1902 14.1 0.173 0.173 0.490 0.490 cp_fm_diag_elpa 19 13.0 0.000 0.000 0.467 0.467 cp_fm_diag_elpa_base 19 14.0 0.452 0.452 0.467 0.467 ------------------------------------------------------------------------------- From /workspace/artifacts/w64SCAN_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.048 0.061 269.127 269.127 qs_mol_dyn_low 1 2.0 0.003 0.006 268.751 268.755 qs_forces 11 3.9 0.002 0.002 268.713 268.713 qs_energies 11 4.9 0.001 0.001 246.155 246.158 scf_env_do_scf 11 5.9 0.001 0.005 235.650 235.654 scf_env_do_scf_inner_loop 106 6.8 0.003 0.018 214.746 214.746 velocity_verlet 10 3.0 0.001 0.003 212.071 212.072 rebuild_ks_matrix 117 8.5 0.000 0.000 174.928 174.942 qs_ks_build_kohn_sham_matrix 117 9.5 0.019 0.020 174.927 174.942 qs_ks_update_qs_env 119 7.8 0.001 0.001 153.536 153.551 fft_wrap_pw1pw2 3053 12.6 0.058 0.064 88.099 91.923 fft_wrap_pw1pw2_400 1649 13.9 3.151 3.563 83.135 86.159 sum_up_and_integrate 117 10.5 0.004 0.004 84.106 84.195 integrate_v_rspace 234 11.5 0.011 0.012 83.522 83.634 qs_rho_update_rho_low 117 7.9 0.001 0.001 81.332 81.336 calculate_rho_elec 234 8.9 0.361 0.383 81.331 81.335 qs_vxc_create 117 10.5 0.006 0.006 75.740 76.230 xc_vxc_pw_create 117 11.5 0.775 0.885 75.734 76.224 fft3d_ps 3053 14.6 30.369 32.530 58.765 63.804 grid_integrate_task_list 234 12.5 46.608 47.880 46.608 47.880 xc_rho_set_and_dset_create 117 12.5 0.950 1.079 45.165 46.589 xc_pw_derive 702 13.5 0.012 0.013 42.152 45.785 density_rs2pw 234 9.9 0.014 0.015 41.881 43.288 grid_collocate_task_list 234 9.9 37.986 39.111 37.986 39.111 potential_pw2rs 234 12.5 0.033 0.036 33.428 33.556 xc_pw_divergence 117 12.5 0.005 0.006 28.760 32.082 mp_alltoall_z22v 3053 16.6 20.463 30.511 20.463 30.511 xc_functional_eval 234 13.5 0.003 0.004 23.014 24.233 libxc_lda_eval 234 14.5 23.006 24.225 23.011 24.230 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 21.504 21.506 yz_to_x 1532 15.1 3.804 4.296 14.675 21.046 init_scf_loop 13 6.8 0.000 0.000 20.887 20.887 transfer_pw2rs 947 13.5 0.012 0.014 19.018 19.113 mp_waitany 35424 14.1 16.389 18.604 16.389 18.604 transfer_rs2pw 947 10.9 0.015 0.017 15.586 16.719 x_to_yz 1521 16.1 4.079 4.710 13.669 15.766 transfer_pw2rs_400 245 14.3 5.041 5.668 12.090 12.794 transfer_rs2pw_400 245 11.8 4.361 4.812 10.764 11.861 pw_gather_p 1532 14.1 10.438 11.028 10.438 11.028 pw_scatter_p 1521 15.1 10.385 10.685 10.385 10.685 dbcsr_multiply_generic 2100 12.6 0.072 0.074 8.439 8.532 init_scf_run 11 5.9 0.000 0.000 8.462 8.465 scf_env_initial_rho_setup 11 6.9 0.000 0.003 8.462 8.462 pw_derive 1053 13.8 6.949 7.830 6.949 7.830 wfi_extrapolate 11 7.9 0.001 0.001 7.607 7.607 pw_poisson_solve 117 10.5 0.003 0.004 5.997 6.454 qs_scf_new_mos 106 7.8 0.001 0.001 6.417 6.432 qs_scf_loop_do_ot 106 8.8 0.001 0.001 6.416 6.432 multiply_cannon 2100 13.6 0.119 0.130 5.779 6.035 ot_scf_mini 106 9.8 0.002 0.002 5.961 5.972 transfer_pw2rs_140 234 14.5 1.319 1.534 5.049 5.942 mp_waitall_1 152250 16.6 5.275 5.700 5.275 5.700 multiply_cannon_loop 2100 14.6 0.068 0.069 5.369 5.474 pw_copy 2223 13.1 4.868 5.470 4.868 5.470 fft_wrap_pw1pw2_140 468 13.2 0.080 0.084 3.960 4.950 mp_alltoall_d11v 2347 13.5 3.639 4.906 3.639 4.906 rs_gather_matrices 234 12.5 0.169 0.188 3.430 4.676 pw_poisson_set 118 11.5 0.006 0.006 3.794 4.251 ot_mini 106 10.8 0.001 0.001 3.581 3.596 transfer_rs2pw_140 234 11.9 1.391 1.630 3.384 3.485 pw_copy_to_array 1532 14.1 2.982 3.414 2.982 3.414 pw_axpy 1638 11.7 3.093 3.410 3.093 3.410 mp_sum_d 3895 11.5 1.858 3.073 1.858 3.073 multiply_cannon_metrocomm3 16800 15.6 0.029 0.030 2.847 2.987 rs_grid_zero 490 15.3 2.366 2.503 2.366 2.503 pw_copy_from_array 1521 15.1 2.210 2.381 2.210 2.381 multiply_cannon_multrec 16800 15.6 2.287 2.362 2.294 2.369 qs_ot_get_derivative 106 11.8 0.001 0.001 2.031 2.042 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 1.836 1.836 make_m2s 4200 13.6 0.038 0.039 1.731 1.772 transfer_pw2rs_50 234 14.5 0.335 0.362 1.490 1.753 prepare_preconditioner 13 7.8 0.000 0.000 1.693 1.697 make_preconditioner 13 8.8 0.000 0.000 1.693 1.697 apply_preconditioner_dbcsr 119 12.8 0.000 0.000 1.511 1.542 apply_single 119 13.8 0.000 0.000 1.511 1.542 ot_diis_step 106 11.8 0.003 0.003 1.538 1.538 make_images 4200 14.6 0.106 0.110 1.476 1.516 qs_env_update_s_mstruct 11 6.9 0.000 0.000 1.326 1.356 calculate_rho_core 11 7.9 0.042 0.060 1.157 1.202 pw_zero 702 12.6 1.028 1.199 1.028 1.199 transfer_rs2pw_50 234 11.9 0.270 0.292 1.113 1.132 mp_sendrecv_dv 14508 12.9 1.059 1.087 1.059 1.087 pw_integral_ab_c1d_c1d_gs 117 11.5 0.632 0.672 0.897 1.058 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 0.972 1.031 qs_ot_get_derivative_taylor 89 12.9 0.002 0.002 0.965 0.972 make_full_single_inverse 13 9.8 0.001 0.001 0.877 0.881 pw_scale 585 11.9 0.723 0.865 0.723 0.865 qs_ot_get_p 119 10.6 0.001 0.001 0.794 0.839 fft_wrap_pw1pw2_50 468 13.2 0.021 0.022 0.743 0.837 make_images_data 4200 15.6 0.029 0.033 0.700 0.781 make_full_inverse_cholesky 13 9.8 0.000 0.000 0.753 0.775 hybrid_alltoall_any 4338 16.5 0.039 0.086 0.613 0.705 mp_sum_l 9876 13.7 0.550 0.663 0.550 0.663 cp_dbcsr_sm_fm_multiply 45 9.4 0.001 0.001 0.594 0.597 integrate_v_core_rspace 11 7.9 0.027 0.036 0.540 0.580 build_core_ppl_forces 11 5.9 0.470 0.539 0.470 0.539 qs_ot_get_orbitals 106 10.8 0.000 0.000 0.511 0.520 calculate_dm_sparse 117 9.7 0.000 0.000 0.512 0.519 cp_dbcsr_sm_fm_multiply_core 45 10.4 0.000 0.000 0.493 0.502 pw_multiply_with 117 11.5 0.465 0.500 0.465 0.500 rs_scatter_matrices 256 9.9 0.126 0.142 0.424 0.481 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 0.432 0.472 transfer_pw2rs_20 234 14.5 0.111 0.116 0.377 0.452 cp_fm_cholesky_invert 13 10.8 0.442 0.452 0.442 0.452 arnoldi_generalized_ev 13 10.8 0.000 0.000 0.398 0.399 dbcsr_sym_matrix_vector_mult 1206 12.5 0.012 0.015 0.381 0.392 gev_build_subspace 22 11.5 0.003 0.004 0.360 0.360 qs_ot_p2m_diag 19 11.0 0.002 0.002 0.346 0.353 make_images_pack 4200 15.6 0.306 0.326 0.313 0.333 make_images_sizes 4200 15.6 0.004 0.004 0.252 0.330 mp_alltoall_i44 4200 16.6 0.249 0.326 0.249 0.326 mp_allgather_i34 2100 14.6 0.146 0.321 0.146 0.321 transfer_rs2pw_20 234 11.9 0.093 0.099 0.309 0.321 dbcsr_dot 1134 12.2 0.123 0.128 0.254 0.300 build_overlap_matrix_low 22 6.9 0.278 0.293 0.284 0.299 build_kinetic_matrix_low 22 6.9 0.276 0.292 0.279 0.296 dbcsr_complete_redistribute 312 11.8 0.063 0.070 0.254 0.281 qs_ot_get_derivative_diag 17 12.0 0.000 0.000 0.274 0.280 parallel_gemm_fm_cosma 95 8.9 0.267 0.271 0.267 0.271 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="w64SCAN", label="w64SCAN", y=463.236, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="w64SCAN", label="w64SCAN", y=269.127, yerr=0.0 Plot: name="w64SCAN_timings_32omp", title="Timings of w64SCAN with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="w64SCAN_timings_32omp", name="rest", label="rest", y=119.195, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=83.685, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="fft3d_s", label="fft3d_s", y=71.886, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="fft_wrap_pw1pw2_400", label="fft_wrap_pw1pw2_400", y=60.38, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=55.622, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.395, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="libxc_lda_eval", label="libxc_lda_eval", y=24.073, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="w64SCAN_timings_32mpi", title="Timings of w64SCAN with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="w64SCAN_timings_32mpi", name="rest", label="rest", y=107.54400000000001, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=37.986, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="fft_wrap_pw1pw2_400", label="fft_wrap_pw1pw2_400", y=3.151, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.608, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="libxc_lda_eval", label="libxc_lda_eval", y=23.006, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=30.369, yerr=0.0 PlotPoint: plot="w64SCAN_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=20.463, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.208 0.208 93.730 93.730 qs_energies 1 2.0 0.000 0.000 92.923 92.923 scf_env_do_scf 1 3.0 0.000 0.000 91.729 91.729 qs_ks_update_qs_env 8 5.0 0.000 0.000 86.431 86.431 rebuild_ks_matrix 7 6.0 0.000 0.000 86.350 86.350 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 86.350 86.350 hfx_ks_matrix 7 8.0 0.000 0.000 77.667 77.667 integrate_four_center 7 9.0 0.742 0.742 77.528 77.528 integrate_four_center_main 7 10.0 0.622 0.622 70.731 70.731 integrate_four_center_bin 447 11.0 70.109 70.109 70.109 70.109 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 48.290 48.290 init_scf_loop 1 4.0 0.000 0.000 43.430 43.430 integrate_four_center_load 7 10.0 0.001 0.001 5.826 5.826 hfx_load_balance 1 11.0 0.001 0.001 5.825 5.825 qs_vxc_create 14 8.0 0.000 0.000 3.295 3.295 xc_vxc_pw_create 14 9.0 0.117 0.117 3.295 3.295 hfx_load_balance_bin 1 12.0 2.914 2.914 2.914 2.914 hfx_load_balance_count 1 12.0 2.895 2.895 2.895 2.895 calculate_rho_elec 15 7.4 0.119 0.119 2.525 2.525 xc_rho_set_and_dset_create 14 10.0 0.109 0.109 2.278 2.278 fft_wrap_pw1pw2 237 10.7 0.003 0.003 2.188 2.188 dbcsr_multiply_generic 165 10.0 0.014 0.014 2.133 2.133 fft_wrap_pw1pw2_140 150 12.1 0.551 0.551 2.067 2.067 prepare_preconditioner 1 5.0 0.000 0.000 2.050 2.050 make_preconditioner 1 6.0 0.000 0.000 2.050 2.050 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 1.996 1.996 grid_collocate_task_list 15 8.4 1.908 1.908 1.908 1.908 qs_scf_new_mos 7 5.0 0.000 0.000 1.886 1.886 qs_scf_loop_do_ot 7 6.0 0.000 0.000 1.886 1.886 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.209 0.215 85.968 85.970 qs_energies 1 2.0 0.000 0.000 85.577 85.578 scf_env_do_scf 1 3.0 0.000 0.000 85.250 85.250 qs_ks_update_qs_env 8 5.0 0.000 0.000 83.332 83.332 rebuild_ks_matrix 7 6.0 0.000 0.000 83.322 83.323 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 83.322 83.323 hfx_ks_matrix 7 8.0 0.000 0.000 78.157 78.158 integrate_four_center 7 9.0 0.050 0.248 78.147 78.148 integrate_four_center_main 7 10.0 0.003 0.003 69.563 70.776 integrate_four_center_bin 448 11.0 69.561 70.774 69.561 70.774 scf_env_do_scf_inner_loop 7 4.0 0.000 0.002 44.395 44.395 init_scf_loop 1 4.0 0.000 0.000 40.853 40.853 integrate_four_center_load 7 10.0 0.000 0.000 5.376 5.377 hfx_load_balance 1 11.0 0.001 0.001 5.376 5.377 mp_sync 56 11.2 2.596 3.419 2.596 3.419 hfx_load_balance_bin 1 12.0 2.636 2.701 2.636 2.701 hfx_load_balance_count 1 12.0 2.609 2.672 2.609 2.672 qs_vxc_create 14 8.0 0.001 0.001 2.133 2.133 xc_vxc_pw_create 14 9.0 0.007 0.008 2.132 2.132 xc_rho_set_and_dset_create 14 10.0 0.011 0.013 1.644 1.732 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="H2O-hyb", label="H2O-hyb", y=93.73, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="H2O-hyb", label="H2O-hyb", y=85.968, yerr=0.0 Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=14.954000000000008, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=70.109, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.914, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.895, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=1.908, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=0.742, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.208, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=8.307000000000002, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=69.561, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.636, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.609, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.05, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.209, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=2.596, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.011 86.845 86.845 qs_energies 1 2.0 0.000 0.000 86.506 86.506 mp2_main 1 3.0 0.000 0.000 83.534 83.534 mp2_gpw_main 1 4.0 0.000 0.000 83.433 83.433 rpa_ri_compute_en 1 5.0 0.000 0.000 80.194 80.194 rpa_num_int 1 6.0 0.001 0.001 80.185 80.185 compute_mat_P_omega 1 7.0 0.003 0.003 69.858 69.858 compute_mat_P_omega_contract 10 8.0 8.274 8.274 69.622 69.622 dbt_total 2336 9.6 0.015 0.015 58.946 58.946 dbt_contract 787 11.0 0.045 0.045 51.696 51.696 dbt_tas_total 1149 12.2 0.334 0.334 50.073 50.073 dbt_tas_multiply 807 12.1 0.003 0.003 48.753 48.753 dbt_tas_dbm 807 14.1 0.004 0.004 41.389 41.389 dbm_multiply 807 16.1 41.376 41.376 41.376 41.376 dbt_tas_mm_1N 524 15.1 0.002 0.002 28.446 28.446 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 24.398 24.398 compute_mat_P_omega_calc_M_occ 250 9.0 8.287 8.287 19.481 19.481 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.116 12.116 dbt_tas_mm_2 251 15.0 0.002 0.002 11.108 11.108 compute_QP_energies 1 7.0 0.000 0.000 6.196 6.196 compute_self_energy_cubic_gw 1 8.0 0.144 0.144 6.195 6.195 dbt_copy 1103 10.7 0.137 0.137 5.909 5.909 contract_cubic_gw 21 9.0 0.000 0.000 4.783 4.783 dbm_reserve_blocks 3628 15.3 4.528 4.528 4.528 4.528 dbt_tas_reserve_blocks_index 3261 14.3 0.102 0.102 4.378 4.378 dbt_reserve_blocks_index 2280 13.1 0.061 0.061 3.417 3.417 dbt_reserve_blocks_index_array 2222 12.2 0.010 0.010 3.359 3.359 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.229 3.229 dbt_crop 1042 12.0 1.423 1.423 2.804 2.804 scf_env_do_scf 1 3.0 0.000 0.000 2.713 2.713 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.712 2.712 dbt_tas_reshape 367 15.0 0.019 0.019 2.445 2.445 convert_to_new_pgrid 2421 14.1 0.147 0.147 2.230 2.230 dbt_tas_copy 574 11.4 1.331 1.331 2.113 2.113 dbm_copy 1614 15.1 2.083 2.083 2.083 2.083 dbt_reshape 278 11.9 1.039 1.039 1.825 1.825 reshape_mm_small 367 14.1 0.045 0.045 1.818 1.818 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.013 35.832 35.834 qs_energies 1 2.0 0.000 0.000 35.695 35.696 mp2_main 1 3.0 0.000 0.000 34.693 34.694 mp2_gpw_main 1 4.0 0.000 0.000 34.653 34.654 rpa_ri_compute_en 1 5.0 0.000 0.000 33.567 33.568 rpa_num_int 1 6.0 0.000 0.001 33.566 33.566 dbt_total 2336 9.6 0.019 0.024 29.697 29.719 compute_mat_P_omega 1 7.0 0.001 0.004 28.124 28.152 compute_mat_P_omega_contract 10 8.0 0.439 0.460 27.965 27.972 dbt_contract 787 11.0 0.039 0.042 21.902 21.906 dbt_tas_total 1149 12.2 0.093 0.097 19.414 19.418 dbt_tas_multiply 807 12.1 0.002 0.003 19.373 19.379 dbt_tas_dbm 807 14.1 0.003 0.004 13.423 13.442 dbm_multiply 807 16.1 10.246 10.822 10.246 10.822 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.404 8.415 compute_mat_P_omega_calc_M_occ 250 9.0 0.412 0.437 8.387 8.392 mp_sync 8688 11.6 6.019 7.280 6.019 7.280 dbt_copy 1149 10.8 0.016 0.017 6.476 6.789 dbt_reshape 1136 11.8 2.511 2.694 6.023 6.333 dbt_tas_mm_2 251 15.0 0.002 0.002 6.182 6.185 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.445 5.455 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.949 5.380 compute_QP_energies 1 7.0 0.000 0.000 3.503 3.503 compute_self_energy_cubic_gw 1 8.0 0.006 0.006 3.503 3.503 mp_waitall_2 3812 15.3 3.027 3.186 3.027 3.186 dbt_communicate_buffer 1136 12.8 0.053 0.058 2.746 2.902 contract_cubic_gw 21 9.0 0.000 0.000 2.671 2.671 dbt_reserve_blocks_index 2887 13.1 0.081 0.089 1.809 2.094 dbt_reserve_blocks_index_array 2829 12.2 0.009 0.010 1.798 2.082 dbm_reserve_blocks 3752 15.4 1.802 2.074 1.802 2.074 dbt_tas_reserve_blocks_index 3347 14.5 0.067 0.072 1.766 2.048 dbt_crop 1042 12.0 0.848 0.942 1.414 1.551 convert_to_new_pgrid 2421 14.1 0.020 0.023 1.311 1.449 dbm_copy 1608 15.1 1.285 1.423 1.285 1.423 dbt_tas_replicate 405 14.1 0.559 0.704 1.278 1.349 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.124 1.128 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.094 1.098 mp2_ri_gpw_compute_in 1 5.0 0.003 0.003 1.084 1.084 parallel_gemm_fm_cosma 105 8.4 1.024 1.039 1.024 1.039 scf_env_do_scf 1 3.0 0.000 0.000 0.965 0.965 scf_env_do_scf_inner_loop 17 4.0 0.000 0.002 0.965 0.965 compute_W_cubic_GW 10 7.0 0.001 0.001 0.874 0.884 dbm_add 807 14.1 0.688 0.768 0.688 0.768 mp_sum_l 5765 13.7 0.628 0.726 0.628 0.726 mp_max_i 2072 9.5 0.570 0.726 0.570 0.726 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="GW_PBE_4benzene", label="GW_PBE_4benzene", y=86.845, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="GW_PBE_4benzene", label="GW_PBE_4benzene", y=35.832, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=21.257999999999996, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=41.376, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.287, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.274, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=4.528, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.083, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.039, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=10.091000000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.246, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.412, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.439, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.802, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=1.285, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.511, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.019, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.027, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 249.661 249.661 qs_forces 1 2.0 0.000 0.000 249.107 249.107 rebuild_ks_matrix 7 6.6 0.000 0.000 247.803 247.803 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 247.803 247.803 hfx_ks_matrix 7 8.6 0.000 0.000 245.635 245.635 dbt_total 849 11.0 0.007 0.007 196.078 196.078 hfx_ri_update_ks 7 9.6 0.000 0.000 195.669 195.669 hfx_ri_update_ks_Pmat 7 10.6 29.356 29.356 195.662 195.662 dbt_tas_total 369 13.4 0.567 0.567 176.837 176.837 dbt_contract 207 12.4 0.394 0.394 176.462 176.462 qs_energies 1 3.0 0.000 0.000 173.267 173.267 scf_env_do_scf 1 4.0 0.000 0.000 172.967 172.967 qs_ks_update_qs_env 8 6.0 0.000 0.000 172.011 172.011 dbt_tas_multiply 216 13.5 0.001 0.001 171.927 171.927 dbt_tas_dbm 216 15.5 0.001 0.001 159.256 159.256 dbm_multiply 216 17.5 159.252 159.252 159.252 159.252 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 141.366 141.366 dbt_tas_mm_2 91 16.5 0.001 0.001 135.252 135.252 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 108.046 108.046 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 75.802 75.802 init_scf_loop 2 5.0 0.000 0.000 64.920 64.920 hfx_ri_update_forces 1 7.0 1.591 1.591 49.963 49.963 hfx_ri_forces_Pmat_3c 1 8.0 4.319 4.319 33.438 33.438 dbt_copy 423 11.8 0.079 0.079 14.696 14.696 dbt_tas_mm_3T 77 17.1 0.000 0.000 13.887 13.887 precalc_derivatives 1 8.0 2.348 2.348 11.910 11.910 dbt_reshape 132 13.2 7.039 7.039 9.994 9.994 dbt_tas_mm_3N 37 15.4 0.000 0.000 9.387 9.387 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 9.149 9.149 hfx_ri_pre_scf_Pmat 1 12.0 0.004 0.004 8.497 8.497 dbm_reserve_blocks 1491 16.2 7.004 7.004 7.004 7.004 dbt_tas_reserve_blocks_index 1323 15.4 0.191 0.191 6.874 6.874 dbt_tas_reshape 168 14.5 0.009 0.009 6.058 6.058 build_3c_derivatives 3 9.0 1.731 1.731 6.023 6.023 dbt_reserve_blocks_index 846 14.4 0.106 0.106 5.430 5.430 dbt_reserve_blocks_index_array 816 13.5 0.008 0.008 5.310 5.310 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.016 47.213 47.213 qs_forces 1 2.0 0.000 0.000 46.956 46.956 rebuild_ks_matrix 7 6.6 0.000 0.000 46.053 46.053 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 46.053 46.053 hfx_ks_matrix 7 8.6 0.000 0.000 45.001 45.002 dbt_total 849 11.0 0.008 0.008 40.138 40.142 dbt_contract 207 12.4 0.027 0.028 30.544 30.550 dbt_tas_total 369 13.4 0.047 0.049 27.122 27.125 dbt_tas_multiply 216 13.5 0.001 0.001 26.739 26.741 hfx_ri_update_ks 7 9.6 0.000 0.000 25.090 25.090 hfx_ri_update_ks_Pmat 7 10.6 1.312 1.392 25.088 25.089 qs_energies 1 3.0 0.000 0.000 23.833 23.833 scf_env_do_scf 1 4.0 0.000 0.001 23.696 23.696 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 23.115 23.115 qs_ks_update_qs_env 8 6.0 0.000 0.000 22.939 22.939 dbt_tas_dbm 216 15.5 0.001 0.001 20.199 20.206 hfx_ri_update_forces 1 7.0 0.067 0.071 19.910 19.911 dbm_multiply 216 17.5 17.840 18.852 17.840 18.852 hfx_ri_forces_Pmat_3c 1 8.0 0.185 0.194 14.721 14.737 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 13.338 13.339 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 10.729 10.730 init_scf_loop 2 5.0 0.000 0.000 10.356 10.356 dbt_copy 539 12.5 0.011 0.014 8.523 8.788 dbt_tas_mm_2 91 16.5 0.001 0.001 8.734 8.736 dbt_reshape 393 13.9 3.166 3.290 6.329 6.527 mp_sync 2901 12.8 4.737 6.277 4.737 6.277 dbt_tas_mm_3T 77 17.1 0.000 0.000 5.316 5.691 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 4.936 4.937 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.357 4.713 precalc_derivatives 1 8.0 0.094 0.100 3.865 3.865 dbm_reserve_blocks 1641 16.6 3.049 3.369 3.049 3.369 dbt_tas_reserve_blocks_index 1471 15.8 0.133 0.136 2.969 3.288 mp_waitall_2 1318 16.2 3.139 3.208 3.139 3.208 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.136 3.136 dbt_reserve_blocks_index 1107 14.8 0.124 0.129 2.420 2.709 dbt_reserve_blocks_index_array 1077 13.9 0.005 0.006 2.391 2.673 dbt_crop 372 13.7 1.728 1.778 2.456 2.575 dbt_communicate_buffer 393 14.9 0.012 0.013 2.202 2.287 convert_to_new_pgrid 648 15.5 0.039 0.082 2.030 2.183 build_3c_derivatives 3 9.0 0.137 0.147 2.134 2.139 dbm_copy 452 16.3 1.824 1.981 1.824 1.981 dbt_tas_replicate 170 15.1 0.738 0.776 1.740 1.798 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.673 1.677 dbt_tas_copy 146 12.6 0.777 0.809 1.410 1.520 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.001 1.507 1.507 dbt_tas_communicate_buffer 370 16.3 0.012 0.012 1.001 1.075 dbm_add 216 15.5 0.919 0.986 0.919 0.986 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="RI-HFX_H2O-32", label="RI-HFX_H2O-32", y=249.661, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="RI-HFX_H2O-32", label="RI-HFX_H2O-32", y=47.213, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=42.691, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=159.252, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=29.356, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=7.039, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=7.004, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.319, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=13.785000000000004, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=17.84, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.312, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.166, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.049, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.185, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=4.737, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.139, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.012 183.357 183.357 qs_energies 1 2.0 0.000 0.000 183.181 183.181 mp2_main 1 3.0 0.000 0.000 179.246 179.246 mp2_gpw_main 1 4.0 0.001 0.001 178.787 178.787 mp2_ri_gpw_compute_in 1 5.0 0.597 0.597 130.478 130.478 mp2_ri_gpw_compute_in_loop 1 6.0 0.012 0.012 121.876 121.876 mp2_eri_3c_integrate_gpw 2656 7.0 0.016 0.016 83.453 83.453 integrate_v_rspace 2666 8.0 0.762 0.762 70.696 70.696 grid_integrate_task_list 2666 9.0 67.836 67.836 67.836 67.836 mp2_ri_gpw_compute_en 1 5.0 0.122 0.122 48.255 48.255 mp2_ri_gpw_compute_en_RI_loop 1 6.0 11.503 11.503 45.547 45.547 dbcsr_multiply_generic 5322 8.0 0.261 0.261 31.992 31.992 ao_to_mo_and_store_B_mult_1 2656 7.0 0.015 0.015 31.981 31.981 mp2_ri_gpw_compute_en_expansio 2080 7.0 3.310 3.310 24.506 24.506 local_gemm 2080 8.0 21.196 21.196 21.196 21.196 make_m2s 10644 9.0 0.057 0.057 18.012 18.012 make_images 10644 10.0 3.771 3.771 17.577 17.577 hybrid_alltoall_any 13323 11.6 12.398 12.398 12.742 12.742 make_images_data 10644 11.0 0.087 0.087 12.685 12.685 multiply_cannon 5322 9.0 0.613 0.613 11.885 11.885 fft_wrap_pw1pw2 53228 10.4 0.113 0.113 11.316 11.316 multiply_cannon_loop 5322 10.0 0.163 0.163 10.467 10.467 collocate_function 2656 8.0 6.070 6.070 9.695 9.695 multiply_cannon_multrec 5322 11.0 8.379 8.379 8.415 8.415 fft_wrap_pw1pw2_20 21271 11.4 0.771 0.771 8.125 8.125 get_2c_integrals 1 6.0 0.000 0.000 8.003 8.003 compute_2c_integrals 1 7.0 0.005 0.005 7.565 7.565 compute_2c_integrals_loop_lm 1 8.0 0.010 0.010 7.556 7.556 mp2_eri_2c_integrate_gpw 1 9.0 0.741 0.741 7.546 7.546 mp2_ri_gpw_compute_en_ener 2080 7.0 7.437 7.437 7.437 7.437 fft3d_s 53229 12.4 6.643 6.643 6.667 6.667 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.933 2.933 6.339 6.339 potential_pw2rs 5322 10.0 0.151 0.151 4.202 4.202 copy_dbcsr_to_fm 2679 8.0 0.046 0.046 3.893 3.893 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.015 34.331 34.331 qs_energies 1 2.0 0.000 0.001 34.143 34.144 mp2_main 1 3.0 0.000 0.001 32.202 32.202 mp2_gpw_main 1 4.0 0.001 0.001 32.088 32.088 mp2_ri_gpw_compute_en 1 5.0 0.218 0.226 16.875 17.119 mp2_ri_gpw_compute_en_RI_loop 1 6.0 1.756 2.116 15.782 15.788 mp2_ri_gpw_compute_in 1 5.0 0.042 0.045 15.149 15.453 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 13.958 14.265 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 11.667 11.950 integrate_v_rspace 93 8.1 0.121 0.139 11.606 11.892 grid_integrate_task_list 93 9.1 11.311 11.582 11.311 11.582 mp2_ri_gpw_compute_en_expansio 65 7.0 0.118 0.142 11.195 11.446 local_gemm 65 8.0 11.077 11.319 11.077 11.319 mp2_ri_gpw_compute_en_comm 17 7.0 0.062 0.071 2.380 3.190 mp_sendrecv_dm3 1054 8.0 1.940 2.812 1.940 2.812 dbcsr_multiply_generic 176 8.0 0.009 0.010 1.965 2.174 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.948 2.157 scf_env_do_scf 1 3.0 0.000 0.000 1.832 1.834 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 1.832 1.834 multiply_cannon 176 9.0 0.020 0.022 1.116 1.194 get_2c_integrals 1 6.0 0.000 0.001 1.136 1.153 multiply_cannon_loop 176 10.0 0.002 0.003 1.055 1.132 qs_scf_new_mos 10 5.0 0.000 0.000 0.984 1.000 make_m2s 352 9.0 0.003 0.004 0.809 0.971 make_images 352 10.0 0.039 0.040 0.799 0.960 multiply_cannon_multrec 246 11.0 0.894 0.927 0.899 0.932 eigensolver 11 5.8 0.001 0.001 0.928 0.930 compute_2c_integrals 1 7.0 0.002 0.003 0.787 0.796 compute_2c_integrals_loop_lm 1 8.0 0.001 0.001 0.687 0.710 mp2_eri_2c_integrate_gpw 1 9.0 0.171 0.181 0.687 0.709 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.703 0.704 cp_fm_diag_elpa_base 11 7.8 0.691 0.696 0.701 0.702 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="RI-MP2_ammonia", label="RI-MP2_ammonia", y=183.357, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="RI-MP2_ammonia", label="RI-MP2_ammonia", y=34.331, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=62.045, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=67.836, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=21.196, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=12.398, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=11.503, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.379, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.3530000000000015, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.311, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=11.077, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=1.756, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.894, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.94, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.094 0.094 141.560 141.560 qs_energies 1 2.0 0.000 0.000 140.208 140.208 scf_env_do_scf 1 3.0 0.000 0.000 134.421 134.421 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 134.421 134.421 qs_ks_update_qs_env 15 5.0 0.000 0.000 58.448 58.448 rebuild_ks_matrix 15 6.0 0.000 0.000 58.192 58.192 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 58.192 58.192 qs_scf_new_mos 15 5.0 0.000 0.000 57.508 57.508 eigensolver 15 6.0 0.002 0.002 46.915 46.915 qs_vxc_create 15 8.0 0.021 0.021 44.301 44.301 cp_fm_diag_elpa 15 7.0 0.000 0.000 39.922 39.922 cp_fm_diag_elpa_base 15 8.0 38.721 38.721 39.922 39.922 calculate_dispersion_nonloc 15 9.0 6.840 6.840 38.570 38.570 fft_wrap_pw1pw2 1086 10.0 0.020 0.020 32.617 32.617 fft_wrap_pw1pw2_150 765 11.0 9.489 9.489 24.248 24.248 qs_rho_update_rho_low 16 5.0 0.000 0.000 14.597 14.597 calculate_rho_elec 16 6.0 0.240 0.240 14.597 14.597 grid_collocate_task_list 16 7.0 12.836 12.836 12.836 12.836 sum_up_and_integrate 15 8.0 0.000 0.000 12.698 12.698 integrate_v_rspace 15 9.0 0.024 0.024 12.684 12.684 grid_integrate_task_list 15 10.0 11.882 11.882 11.882 11.882 fft3d_s 1087 12.0 10.047 10.047 10.075 10.075 pw_scatter_s 585 12.1 9.017 9.017 9.017 9.017 fft_wrap_pw1pw2_200 197 11.3 2.066 2.066 8.138 8.138 copy_dbcsr_to_fm 16 5.9 0.001 0.001 8.085 8.085 dbcsr_complete_redistribute 46 8.3 2.254 2.254 7.272 7.272 vdW_energy 15 10.0 5.879 5.879 5.879 5.879 cp_fm_cholesky_restore 45 7.0 5.767 5.767 5.767 5.767 xc_vxc_pw_create 15 9.0 0.216 0.216 5.711 5.711 gspace_mixing 14 5.0 0.171 0.171 4.550 4.550 xc_pw_derive 90 11.0 0.001 0.001 4.073 4.073 broyden_mixing 14 6.0 3.871 3.871 3.871 3.871 dbcsr_finalize 159 9.9 0.016 0.016 3.572 3.572 dbcsr_merge_all 91 11.1 0.124 0.124 3.410 3.410 calculate_dm_sparse 15 6.0 0.018 0.018 3.047 3.047 cp_dbcsr_plus_fm_fm_t 15 7.0 0.001 0.001 2.942 2.942 xc_pw_divergence 15 10.0 0.000 0.000 2.911 2.911 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.882 2.882 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.013 52.734 52.734 qs_energies 1 2.0 0.000 0.000 52.448 52.450 scf_env_do_scf 1 3.0 0.000 0.000 48.821 48.821 scf_env_do_scf_inner_loop 15 4.0 0.001 0.004 48.821 48.821 qs_ks_update_qs_env 15 5.0 0.000 0.000 22.291 22.306 rebuild_ks_matrix 15 6.0 0.000 0.000 22.247 22.262 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.004 22.247 22.262 qs_scf_new_mos 15 5.0 0.000 0.001 13.946 13.974 eigensolver 15 6.0 0.002 0.002 12.774 12.828 qs_rho_update_rho_low 16 5.0 0.000 0.000 12.668 12.673 calculate_rho_elec 16 6.0 0.007 0.007 12.668 12.673 sum_up_and_integrate 15 8.0 0.000 0.001 12.343 12.390 integrate_v_rspace 15 9.0 0.001 0.001 12.333 12.382 grid_collocate_task_list 16 7.0 11.492 11.812 11.492 11.812 grid_integrate_task_list 15 10.0 11.499 11.626 11.499 11.626 qs_vxc_create 15 8.0 0.001 0.002 9.463 9.474 cp_fm_diag_elpa 15 7.0 0.000 0.000 9.017 9.022 cp_fm_diag_elpa_base 15 8.0 8.871 8.902 9.013 9.014 calculate_dispersion_nonloc 15 9.0 0.783 0.826 7.486 7.515 fft_wrap_pw1pw2 1086 10.0 0.024 0.026 7.104 7.217 fft3d_ps 1086 12.0 2.341 2.591 5.332 5.570 fft_wrap_pw1pw2_150 765 11.0 0.155 0.180 4.519 4.562 cp_fm_cholesky_restore 45 7.0 3.598 3.684 3.598 3.684 mp_alltoall_z22v 1086 14.0 2.489 2.999 2.489 2.999 fft_wrap_pw1pw2_200 197 11.3 0.121 0.145 2.438 2.523 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.460 2.460 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.181 2.360 xc_vxc_pw_create 15 9.0 0.017 0.024 1.977 2.008 x_to_yz 585 13.1 0.286 0.329 1.555 1.733 yz_to_x 501 12.9 0.194 0.222 1.413 1.721 build_core_ppnl 1 5.0 1.351 1.486 1.351 1.486 xc_pw_derive 90 11.0 0.001 0.001 1.349 1.439 vdW_energy 15 10.0 1.363 1.418 1.363 1.418 density_rs2pw 16 7.0 0.001 0.001 1.027 1.164 transfer_rs2pw 82 8.0 0.001 0.001 0.746 1.086 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="diag_cu144_broy", label="diag_cu144_broy", y=141.56, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="diag_cu144_broy", label="diag_cu144_broy", y=52.734, yerr=0.0 Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=52.818, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=38.721, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=12.836, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.882, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.047, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=9.489, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=5.767, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=14.630000000000003, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.871, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=11.492, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.499, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.155, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.598, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=2.489, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 1.271 1.271 154.624 154.624 qs_energies 1 2.0 0.000 0.000 153.290 153.290 ls_scf 1 3.0 0.000 0.000 143.660 143.660 ls_scf_main 1 4.0 0.001 0.001 134.520 134.520 density_matrix_trs4 5 5.0 0.003 0.003 77.171 77.171 ls_scf_dm_to_ks 5 5.0 0.000 0.000 55.395 55.395 dbcsr_multiply_generic 95 6.2 0.366 0.366 39.961 39.961 matrix_ls_to_qs 5 6.0 0.000 0.000 34.033 34.033 arnoldi_extremal 6 6.2 0.000 0.000 30.117 30.117 arnoldi_normal_ev 6 7.2 0.008 0.008 30.117 30.117 build_subspace 12 8.2 0.033 0.033 29.517 29.517 dbcsr_matrix_vector_mult 310 9.0 0.094 0.094 28.993 28.993 dbcsr_matrix_vector_mult_local 310 10.0 28.331 28.331 28.337 28.337 qs_ks_update_qs_env 6 6.2 0.000 0.000 25.607 25.607 rebuild_ks_matrix 6 7.2 0.000 0.000 25.294 25.294 build_dftb_ks_matrix 6 8.2 0.000 0.000 25.294 25.294 build_dftb_coulomb 6 9.2 1.063 1.063 24.960 24.960 tb_ewald_overlap 6 10.2 23.823 23.823 23.823 23.823 multiply_cannon 95 7.2 0.118 0.118 23.031 23.031 multiply_cannon_loop 95 8.2 0.154 0.154 18.560 18.560 dbcsr_complete_redistribute 11 7.5 13.814 13.814 18.272 18.272 dbcsr_copy 443 8.0 0.684 0.684 17.985 17.985 dbcsr_copy_into_existing 5 8.0 17.291 17.291 17.292 17.292 matrix_decluster 5 7.0 0.000 0.000 16.741 16.741 make_m2s 190 7.2 0.009 0.009 14.486 14.486 multiply_cannon_multrec 95 9.2 13.608 13.608 13.631 13.631 make_images 190 8.2 3.191 3.191 13.594 13.594 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 9.550 9.550 build_qs_neighbor_lists 1 4.0 0.000 0.000 8.751 8.751 build_neighbor_lists_sab_tbe 1 5.0 8.477 8.477 8.477 8.477 ls_scf_init_scf 1 4.0 0.000 0.000 8.343 8.343 dbcsr_finalize 277 7.6 0.190 0.190 7.583 7.583 dbcsr_merge_all 247 8.6 2.273 2.273 6.906 6.906 make_images_data 190 9.2 0.004 0.004 6.031 6.031 hybrid_alltoall_any 201 10.0 5.546 5.546 5.702 5.702 dbcsr_sort_indices 443 10.1 5.439 5.439 5.439 5.439 calculate_norms 190 9.2 4.775 4.775 4.775 4.775 quick_finalize 203 10.1 0.092 0.092 4.558 4.558 dbcsr_dot 66 6.3 4.382 4.382 4.383 4.383 setup_rec_index_2d 190 8.2 4.307 4.307 4.307 4.307 ls_scf_initial_guess 1 5.0 0.000 0.000 4.286 4.286 ls_scf_qs_atomic_guess 1 6.0 0.000 0.000 4.286 4.286 dbcsr_special_finalize 190 9.2 0.001 0.001 4.207 4.207 ls_scf_init_matrix_S 1 5.0 0.000 0.000 4.056 4.056 tree_to_linear_d 11 10.5 3.844 3.844 3.844 3.844 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.000 3.461 3.461 dbcsr_add_d 130 6.0 0.000 0.000 3.317 3.317 dbcsr_add_anytype 130 7.0 0.712 0.712 3.317 3.317 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.063 0.082 43.651 43.653 qs_energies 1 2.0 0.000 0.000 43.427 43.427 ls_scf 1 3.0 0.000 0.000 43.095 43.096 ls_scf_main 1 4.0 0.000 0.003 39.915 39.915 density_matrix_trs4 5 5.0 0.003 0.003 37.895 37.964 dbcsr_multiply_generic 95 6.2 0.030 0.051 37.535 37.686 multiply_cannon 95 7.2 0.020 0.023 31.289 31.683 multiply_cannon_loop 95 8.2 0.078 0.087 29.877 30.416 multiply_cannon_multrec 760 9.2 17.162 18.264 17.311 18.420 mp_waitall_1 6128 10.4 11.728 12.706 11.728 12.706 multiply_cannon_metrocomm3 760 9.2 0.007 0.007 9.124 10.573 make_m2s 190 7.2 0.018 0.020 3.988 4.046 make_images 190 8.2 0.250 0.267 3.931 3.992 calculate_norms 1520 9.2 2.466 2.630 2.466 2.630 ls_scf_init_scf 1 4.0 0.000 0.000 2.299 2.300 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.158 2.165 make_images_data 190 9.2 0.004 0.005 1.857 2.117 mp_sum_l 421 7.0 1.798 2.115 1.798 2.115 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.000 1.971 1.973 ls_scf_dm_to_ks 5 5.0 0.000 0.000 1.865 1.946 multiply_cannon_metrocomm1 760 9.2 0.003 0.003 0.879 1.919 hybrid_alltoall_any 201 10.0 0.112 0.616 1.590 1.802 dbcsr_multiply_generic_mpsum_f 71 7.2 0.000 0.000 1.319 1.583 arnoldi_extremal 6 6.2 0.000 0.000 1.285 1.291 arnoldi_normal_ev 6 7.2 0.001 0.002 1.284 1.291 build_subspace 12 8.2 0.013 0.015 1.236 1.237 matrix_ls_to_qs 5 6.0 0.000 0.003 1.091 1.156 dbcsr_complete_redistribute 11 7.5 0.599 0.627 1.087 1.152 make_images_pack 190 9.2 0.965 1.119 0.967 1.120 matrix_decluster 5 7.0 0.000 0.000 0.992 1.059 dbcsr_matrix_vector_mult 310 9.0 0.007 0.025 0.985 1.048 buffer_matrices_ensure_size 190 8.2 0.914 1.034 0.914 1.034 qs_ks_update_qs_env 6 6.2 0.000 0.000 0.903 0.977 ls_scf_post 1 4.0 0.000 0.000 0.882 0.882 ls_scf_store_result 1 5.0 0.000 0.000 0.856 0.879 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="bench_dftb", label="bench_dftb", y=154.624, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="bench_dftb", label="bench_dftb", y=43.651, yerr=0.0 Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=52.982, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=28.331, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="tb_ewald_overlap", label="tb_ewald_overlap", y=23.823, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=17.291, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=13.814, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=13.608, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=4.775, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=8.933000000000007, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="tb_ewald_overlap", label="tb_ewald_overlap", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=0.599, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=17.162, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=2.466, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=11.728, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=0.965, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.798, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.004 44.507 44.507 lib_test 1 2.0 0.000 0.000 44.502 44.502 dbcsr_run_tests 3 3.0 0.000 0.000 44.502 44.502 test_multiplies_multiproc 3 4.0 0.001 0.001 30.629 30.629 dbcsr_multiply_generic 9 5.0 0.008 0.008 22.167 22.167 dbcsr_make_random_matrix 9 4.0 9.917 9.917 13.704 13.704 multiply_cannon 9 6.0 0.019 0.019 11.962 11.962 multiply_cannon_loop 9 7.0 0.007 0.007 11.693 11.693 multiply_cannon_multrec 9 8.0 11.685 11.685 11.686 11.686 dbcsr_finalize 27 5.7 0.016 0.016 7.611 7.611 dbcsr_redistribute 9 5.0 4.632 4.632 6.846 6.846 dbcsr_merge_all 18 6.5 3.094 3.094 6.691 6.691 make_m2s 18 6.0 0.000 0.000 5.638 5.638 make_images 18 7.0 0.453 0.453 5.575 5.575 make_images_data 18 8.0 0.000 0.000 4.597 4.597 hybrid_alltoall_any 18 9.0 4.569 4.569 4.569 4.569 dbcsr_data_release 975 7.6 3.604 3.604 3.604 3.604 tree_to_linear_d 9 7.0 2.367 2.367 2.367 2.367 dbcsr_destroy 93 5.8 0.000 0.000 1.890 1.890 mp_alltoall_d11v 27 6.0 1.772 1.772 1.772 1.772 dbcsr_data_copy_aa2 9 7.0 1.202 1.202 1.202 1.202 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.008 19.536 19.539 lib_test 1 2.0 0.000 0.000 19.510 19.526 dbcsr_run_tests 3 3.0 0.000 0.001 19.509 19.526 test_multiplies_multiproc 3 4.0 0.000 0.002 18.820 18.873 dbcsr_multiply_generic 9 5.0 0.001 0.001 17.541 17.612 multiply_cannon 9 6.0 0.002 0.002 15.477 15.960 multiply_cannon_loop 9 7.0 0.002 0.002 15.175 15.669 multiply_cannon_multrec 72 8.0 11.367 11.814 11.368 11.814 mp_waitall_1 576 9.2 4.188 4.592 4.188 4.592 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 3.656 4.207 mp_sum_l 70 5.1 0.702 1.128 0.702 1.128 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.698 1.123 make_m2s 18 6.0 0.001 0.001 0.749 0.803 make_images 18 7.0 0.016 0.018 0.746 0.801 dbcsr_finalize 27 5.7 0.000 0.000 0.668 0.777 dbcsr_make_random_matrix 9 4.0 0.486 0.525 0.660 0.708 dbcsr_data_release 444 7.6 0.564 0.672 0.564 0.672 dbcsr_merge_all 18 6.5 0.098 0.113 0.547 0.643 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.147 0.592 dbcsr_destroy 111 5.9 0.000 0.001 0.446 0.575 dbcsr_redistribute 9 5.0 0.222 0.271 0.506 0.534 make_images_data 18 8.0 0.000 0.001 0.420 0.486 hybrid_alltoall_any 18 9.0 0.032 0.151 0.364 0.424 dbcsr_data_copy_aa2 18 7.5 0.318 0.394 0.318 0.394 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="dbcsr", label="dbcsr", y=44.507, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="dbcsr", label="dbcsr", y=19.536, yerr=0.0 Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=10.100000000000001, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.685, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=9.917, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=4.632, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=4.569, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=3.604, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=1.9750000000000014, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.367, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.486, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.222, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="hybrid_alltoall_any", label="hybrid_alltoall_any", y=0.032, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.564, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.188, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.702, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.082 0.082 160.599 160.599 qs_mol_dyn_low 1 2.0 0.004 0.004 159.229 159.229 qs_forces 6 3.8 0.001 0.001 103.399 103.399 qs_energies 6 4.8 0.000 0.000 99.027 99.027 scf_env_do_scf 6 5.8 0.000 0.000 96.391 96.391 scf_env_do_scf_inner_loop 113 6.2 0.012 0.012 92.040 92.040 velocity_verlet 5 3.0 0.003 0.003 75.093 75.093 rebuild_ks_matrix 119 8.1 0.000 0.000 74.516 74.516 qs_ks_build_kohn_sham_matrix 119 9.1 0.015 0.015 74.515 74.515 qs_ks_update_qs_env 119 7.3 0.001 0.001 70.522 70.522 fft_wrap_pw1pw2 2059 12.4 0.031 0.031 64.770 64.770 fft_wrap_pw1pw2_150 1321 13.9 19.847 19.847 63.664 63.664 qs_vxc_create 119 10.1 0.003 0.003 48.480 48.480 xc_vxc_pw_create 119 11.1 1.740 1.740 48.478 48.478 qmmm_el_coupling 6 3.8 0.000 0.000 48.367 48.367 qmmm_elec_with_gaussian 6 4.8 0.014 0.014 48.363 48.363 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 47.766 47.766 qmmm_elec_gaussian_low_G 6 6.8 46.850 46.850 46.850 46.850 xc_pw_derive 714 13.1 0.005 0.005 36.887 36.887 xc_pw_divergence 119 12.1 0.002 0.002 25.095 25.095 qs_rho_update_rho_low 119 7.3 0.001 0.001 22.766 22.766 calculate_rho_elec 119 8.3 1.494 1.494 22.766 22.766 xc_rho_set_and_dset_create 119 12.1 1.561 1.561 21.457 21.457 fft3d_s 2060 14.4 20.721 20.721 20.750 20.750 pw_scatter_s 1095 14.8 19.856 19.856 19.856 19.856 density_rs2pw 119 9.3 0.005 0.005 12.521 12.521 sum_up_and_integrate 119 10.1 0.001 0.001 10.473 10.473 integrate_v_rspace 119 11.1 0.024 0.024 10.377 10.377 qs_ks_ddapc 119 10.1 0.002 0.002 9.322 9.322 grid_collocate_task_list 119 9.3 8.751 8.751 8.751 8.751 potential_pw2rs 119 12.1 0.042 0.042 6.874 6.874 qmmm_forces 6 3.8 0.001 0.001 5.241 5.241 qmmm_forces_with_gaussian 6 4.8 0.018 0.018 4.848 4.848 pw_integral_ab_c1d_c1d_gs 280 11.0 4.398 4.398 4.398 4.398 init_scf_loop 6 6.8 0.000 0.000 4.348 4.348 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.165 4.165 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 4.032 4.032 pw_poisson_solve 125 9.9 0.002 0.002 3.939 3.939 cp_ddapc_apply_CD 119 11.1 0.020 0.020 3.906 3.906 qs_scf_new_mos 113 7.2 0.001 0.001 3.705 3.705 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.705 3.705 ot_scf_mini 113 9.2 0.001 0.001 3.553 3.553 grid_integrate_task_list 119 12.1 3.479 3.479 3.479 3.479 qmmm_forces_gaussian_low_G 6 6.8 3.442 3.442 3.442 3.442 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.039 53.033 53.034 qs_mol_dyn_low 1 2.0 0.004 0.006 51.910 51.958 qs_forces 6 3.8 0.001 0.001 38.727 38.727 qs_energies 6 4.8 0.000 0.000 36.964 36.965 scf_env_do_scf 6 5.8 0.000 0.000 36.087 36.087 scf_env_do_scf_inner_loop 113 6.2 0.003 0.019 34.486 34.489 rebuild_ks_matrix 119 8.1 0.000 0.000 25.737 25.744 qs_ks_build_kohn_sham_matrix 119 9.1 0.019 0.020 25.737 25.744 qs_ks_update_qs_env 119 7.3 0.001 0.001 24.210 24.217 velocity_verlet 5 3.0 0.002 0.004 21.594 21.596 fft_wrap_pw1pw2 2059 12.4 0.037 0.042 16.111 16.595 fft_wrap_pw1pw2_150 1321 13.9 0.617 0.682 15.335 15.777 fft3d_ps 2059 14.4 6.397 6.987 12.484 13.240 qs_vxc_create 119 10.1 0.004 0.012 13.217 13.219 xc_vxc_pw_create 119 11.1 0.153 0.204 13.213 13.216 xc_pw_derive 714 13.1 0.010 0.012 9.901 10.246 qs_rho_update_rho_low 119 7.3 0.001 0.001 9.275 9.277 calculate_rho_elec 119 8.3 0.049 0.056 9.274 9.276 sum_up_and_integrate 119 10.1 0.002 0.002 8.493 8.507 integrate_v_rspace 119 11.1 0.004 0.005 8.432 8.457 qmmm_forces 6 3.8 0.002 0.003 7.374 7.374 qmmm_forces_with_gaussian 6 4.8 0.007 0.010 7.104 7.292 xc_pw_divergence 119 12.1 0.005 0.005 6.717 6.948 xc_rho_set_and_dset_create 119 12.1 0.349 0.428 6.134 6.294 density_rs2pw 119 9.3 0.006 0.007 5.614 5.937 mp_alltoall_z22v 2059 16.4 4.887 5.737 4.887 5.737 qmmm_el_coupling 6 3.8 0.000 0.000 5.171 5.394 qmmm_elec_with_gaussian 6 4.8 0.017 0.021 5.170 5.393 potential_pw2rs 119 12.1 0.007 0.007 5.184 5.210 grid_collocate_task_list 119 9.3 3.493 3.869 3.493 3.869 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.751 3.795 transfer_pw2rs 500 12.8 0.004 0.005 3.530 3.561 yz_to_x 964 15.0 0.482 0.557 2.914 3.469 x_to_yz 1095 15.8 0.677 0.797 3.132 3.400 grid_integrate_task_list 119 12.1 2.936 3.157 2.936 3.157 qmmm_forces_gaussian_low_G 6 6.8 3.053 3.094 3.053 3.094 mp_waitany 4028 12.8 2.669 3.016 2.669 3.016 transfer_rs2pw 488 10.2 0.006 0.008 2.699 2.992 pw_restrict_s3 18 5.8 1.320 1.365 2.672 2.799 qs_scf_new_mos 113 7.2 0.001 0.001 2.634 2.643 qs_scf_loop_do_ot 113 8.2 0.001 0.001 2.634 2.643 ot_scf_mini 113 9.2 0.001 0.001 2.542 2.547 transfer_pw2rs_150 125 13.9 0.726 0.824 2.493 2.536 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 2.188 2.291 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.060 2.283 pw_prolongate_s3 18 6.8 1.050 1.113 2.060 2.283 transfer_rs2pw_150 125 11.2 0.501 0.598 1.865 2.138 dbcsr_multiply_generic 2598 12.3 0.064 0.065 1.982 2.025 qs_ks_ddapc 119 10.1 0.003 0.003 1.844 1.914 qmmm_elec_gaussian_low_G 6 6.8 1.666 1.773 1.666 1.773 init_scf_loop 6 6.8 0.000 0.000 1.595 1.596 pw_gather_p 964 14.0 1.225 1.554 1.225 1.554 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.541 1.541 mp_sum_dm3 33 5.7 1.380 1.433 1.380 1.433 pw_copy 2027 12.4 1.316 1.426 1.316 1.426 pw_poisson_solve 125 9.9 0.003 0.004 1.339 1.412 pw_derive 1089 13.4 1.289 1.412 1.289 1.412 ot_mini 113 10.2 0.001 0.001 1.270 1.275 mp_waitall_1 178435 16.4 1.172 1.268 1.172 1.268 mp_sum_d 5830 12.2 0.903 1.169 0.903 1.169 pw_integral_ab_r3d_r3d_rs 2481 7.4 0.920 1.012 1.066 1.164 pw_scatter_p 1095 14.8 1.033 1.111 1.033 1.111 pw_zero 2653 8.4 1.004 1.072 1.004 1.072 ------------------------------------------------------------------------------- PlotPoint: plot="total_timings_32omp", name="MQAE_single_node", label="MQAE_single_node", y=160.599, yerr=0.0 PlotPoint: plot="total_timings_32mpi", name="MQAE_single_node", label="MQAE_single_node", y=53.033, yerr=0.0 Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=37.65299999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=46.85, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=20.721, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=19.856, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=19.847, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=8.751, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.479, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.442, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=29.984, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=1.666, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.617, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.493, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=2.936, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.053, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.397, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.887, yerr=0.0 Summary: Performance test took 46 minutes. Status: OK ---> Removed intermediate container 60097fa32564 ---> 0027c42fda63 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in f5d84c29d63f ---> Removed intermediate container f5d84c29d63f ---> 1d1af5903a8f Step 42/42 : ENTRYPOINT [] ---> Running in ae18b94a4d19 ---> Removed intermediate container ae18b94a4d19 ---> 1aa330ebcb8a [Warning] One or more build-args [GIT_COMMIT_SHA SPACK_CACHE] were not consumed Successfully built 1aa330ebcb8a Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2026-06-18 07:01:55+00:00