StartDate: 2022-06-27 11:05:53+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: 32ccd554ea8c1f56db3f63484438b0549ee2810f CommitTime: 2022-06-27 10:51:48 +0200 CommitAuthor: Ole Schütt CommitSubject: Fix name collision due to dbm_tests Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=32ccd554ea8c1f56db3f63484438b0549ee2810f Sending build context to Docker daemon 363.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 405f018f9d1d: Already exists Digest: sha256:b6b83d3c331794420340093eb706a6f152d9c1fa51b262d9bf34594887c2c7ac Status: Downloaded newer image for ubuntu:22.04 ---> 27941809078c Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 92ee757f28a3 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> a398cc4ae5b3 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> e20712e9c254 Step 5/42 : RUN mkdir scripts ---> Using cache ---> c7b9413ca6be Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 5f4bcd2de9f5 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> c39d97839810 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> d0b21d05b338 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 6e616c7670ff Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 5ba44cd61a38 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 1aa896c19a24 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> c7ccbf5e1b85 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 9cb6a1bd2cd3 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 1eed70bdd06a Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> ac04ff4ae473 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> cd0e2369620a Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 1b4ef27dc823 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 8c2ce1cbdb23 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> c08a0bc6f0ef Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 377bc99f74ae Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 415f340401e7 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 46d9c769ebc6 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 6aafee7e9835 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 2e0fc7bba01b Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> dd31a4351cf1 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> a67db58bafbb Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> e7d1480e5889 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> f822acadf693 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 0b0fdcc56471 Step 30/42 : COPY ./Makefile . ---> Using cache ---> a38c6462aef8 Step 31/42 : COPY ./src ./src ---> 446fa7425c6b Step 32/42 : COPY ./exts ./exts ---> 4c73f9c8394f Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> 4179ff500c7c Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in 2a6c9f5e2dbf './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 2a6c9f5e2dbf ---> ec130b2efd64 Step 35/42 : COPY ./data ./data ---> 20c24f29ceaa Step 36/42 : COPY ./tests ./tests ---> 45c5ac2872e3 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 32375aee7e98 Step 38/42 : COPY ./benchmarks ./benchmarks ---> b5ad3e04e339 Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 33b198e545fc Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 448f38e69682 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 191.591 191.591 qs_mol_dyn_low 1 2.0 0.003 0.003 190.981 190.981 qs_forces 11 3.9 0.001 0.001 190.942 190.942 qs_energies 11 4.9 0.001 0.001 184.754 184.754 scf_env_do_scf 11 5.9 0.001 0.001 172.500 172.500 velocity_verlet 10 3.0 0.002 0.002 133.170 133.170 init_scf_loop 11 6.9 0.000 0.000 95.680 95.680 prepare_preconditioner 11 7.9 0.000 0.000 93.188 93.188 make_preconditioner 11 8.9 0.000 0.000 93.188 93.188 make_full_inverse_cholesky 11 9.9 0.000 0.000 92.022 92.022 scf_env_do_scf_inner_loop 108 6.5 0.012 0.012 76.694 76.694 cp_fm_cholesky_invert 11 10.9 64.920 64.920 64.920 64.920 qs_scf_new_mos 108 7.5 0.001 0.001 41.938 41.938 qs_scf_loop_do_ot 108 8.5 0.001 0.001 41.937 41.937 ot_scf_mini 108 9.5 0.002 0.002 40.480 40.480 cp_fm_cholesky_decompose 22 10.9 23.242 23.242 23.242 23.242 qs_ot_get_p 119 10.4 0.001 0.001 22.786 22.786 qs_ot_p2m_diag 50 11.0 0.155 0.155 21.421 21.421 cp_dbcsr_syevd 50 12.0 0.003 0.003 21.018 21.018 rebuild_ks_matrix 119 8.3 0.001 0.001 20.743 20.743 qs_ks_build_kohn_sham_matrix 119 9.3 0.012 0.012 20.742 20.742 dbcsr_multiply_generic 2286 12.5 0.165 0.165 20.064 20.064 cp_fm_diag_elpa 50 13.0 0.000 0.000 19.810 19.810 cp_fm_diag_elpa_base 50 14.0 19.748 19.748 19.809 19.809 qs_rho_update_rho 119 7.7 0.001 0.001 19.577 19.577 calculate_rho_elec 119 8.7 0.951 0.951 19.577 19.577 qs_ks_update_qs_env 119 7.6 0.001 0.001 19.011 19.011 grid_collocate_task_list 119 9.7 15.245 15.245 15.245 15.245 ot_mini 108 10.5 0.001 0.001 14.168 14.168 sum_up_and_integrate 119 10.3 0.196 0.196 13.014 13.014 integrate_v_rspace 119 11.3 0.093 0.093 12.818 12.818 grid_integrate_task_list 119 12.3 10.861 10.861 10.861 10.861 make_m2s 4572 13.5 0.047 0.047 10.843 10.843 qs_ot_get_derivative 108 11.5 0.001 0.001 8.792 8.792 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.585 6.585 dbcsr_make_dense_low 5837 15.5 0.072 0.072 5.938 5.938 pw_transfer 1439 11.6 0.060 0.060 5.897 5.897 make_dense_data 5837 16.5 5.183 5.183 5.852 5.852 fft_wrap_pw1pw2 1201 12.6 0.006 0.006 5.683 5.683 ot_diis_step 108 11.5 0.004 0.004 5.372 5.372 make_images 4572 14.5 2.058 2.058 5.336 5.336 multiply_cannon 2286 13.5 0.185 0.185 5.201 5.201 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.158 5.158 fft_wrap_pw1pw2_140 487 13.2 0.408 0.408 4.836 4.836 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.834 4.834 apply_single 119 13.6 0.000 0.000 4.834 4.834 init_scf_run 11 5.9 0.002 0.002 4.737 4.737 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.735 4.735 multiply_cannon_loop 2286 14.5 0.073 0.073 4.693 4.693 multiply_cannon_multrec 2286 15.5 4.558 4.558 4.619 4.619 wfi_extrapolate 11 7.9 0.001 0.001 4.209 4.209 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.024 50.007 50.017 qs_mol_dyn_low 1 2.0 0.004 0.005 49.897 49.901 qs_forces 11 3.9 0.001 0.001 49.857 49.858 qs_energies 11 4.9 0.001 0.001 46.433 46.434 scf_env_do_scf 11 5.9 0.001 0.002 42.616 42.617 scf_env_do_scf_inner_loop 108 6.5 0.003 0.021 39.444 39.444 velocity_verlet 10 3.0 0.001 0.003 29.802 29.803 rebuild_ks_matrix 119 8.3 0.000 0.001 19.035 19.097 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.020 19.034 19.096 qs_ks_update_qs_env 119 7.6 0.001 0.001 16.995 17.054 dbcsr_multiply_generic 2286 12.5 0.072 0.086 13.567 15.596 qs_rho_update_rho 119 7.7 0.001 0.001 14.742 14.750 calculate_rho_elec 119 8.7 0.029 0.030 14.741 14.749 sum_up_and_integrate 119 10.3 0.018 0.020 14.642 14.734 integrate_v_rspace 119 11.3 0.004 0.005 14.624 14.718 grid_collocate_task_list 119 9.7 9.372 11.392 9.372 11.392 grid_integrate_task_list 119 12.3 8.416 11.108 8.416 11.108 qs_scf_new_mos 108 7.5 0.001 0.001 10.793 10.871 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.793 10.870 ot_scf_mini 108 9.5 0.002 0.002 10.151 10.213 multiply_cannon 2286 13.5 0.118 0.134 9.908 10.073 multiply_cannon_loop 2286 14.5 0.089 0.113 9.373 9.594 mp_waitall_1 169478 16.3 8.428 8.871 8.428 8.871 rs_pw_transfer 974 11.9 0.011 0.013 6.140 6.554 multiply_cannon_metrocomm3 18288 15.5 0.037 0.048 5.487 5.952 ot_mini 108 10.5 0.001 0.001 5.856 5.925 density_rs2pw 119 9.7 0.005 0.006 5.017 5.428 potential_pw2rs 119 12.3 0.006 0.007 3.428 3.453 mp_alltoall_d11v 2130 13.8 2.965 3.367 2.965 3.367 multiply_cannon_multrec 18288 15.5 3.026 3.345 3.035 3.357 pw_transfer 1439 11.6 0.074 0.080 3.292 3.330 mp_waitany 9880 13.7 2.919 3.310 2.919 3.310 fft_wrap_pw1pw2 1201 12.6 0.009 0.010 3.151 3.184 init_scf_loop 11 6.9 0.000 0.000 3.159 3.159 rs_gather_matrices 119 12.3 0.084 0.102 2.736 3.140 mp_sum_l 11218 13.2 0.865 3.074 0.865 3.074 qs_ot_get_derivative 108 11.5 0.001 0.001 3.005 3.061 rs_pw_transfer_RS2PW_140 130 11.5 0.266 0.313 2.590 3.007 ot_diis_step 108 11.5 0.004 0.012 2.810 2.810 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.712 2.779 apply_single 119 13.6 0.000 0.000 2.712 2.779 fft_wrap_pw1pw2_140 487 13.2 0.255 0.286 2.633 2.699 init_scf_run 11 5.9 0.000 0.004 2.622 2.622 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.621 2.622 make_m2s 4572 13.5 0.045 0.055 2.353 2.428 fft3d_ps 1201 14.6 1.154 1.253 2.275 2.346 wfi_extrapolate 11 7.9 0.001 0.001 2.345 2.345 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.197 2.206 make_images 4572 14.5 0.118 0.141 2.033 2.113 rs_pw_transfer_PW2RS_140 130 13.9 0.533 0.610 1.437 1.476 qs_ot_get_p 119 10.4 0.001 0.001 1.395 1.448 mp_sum_d 4129 12.0 0.991 1.303 0.991 1.303 make_images_data 4572 15.5 0.034 0.042 1.088 1.205 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.884 1.197 prepare_preconditioner 11 7.9 0.000 0.000 1.109 1.130 make_preconditioner 11 8.9 0.000 0.000 1.108 1.129 hybrid_alltoall_any 4725 16.4 0.059 0.175 0.932 1.028 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.006 1.020 mp_alltoall_z22v 1201 16.6 0.882 1.014 0.882 1.014 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 1.005 1.006 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.963 1.001 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=53.016999999999996, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=64.92, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=23.242, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=19.748, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.245, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.861, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.558, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=17.799999999999997, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.372, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.416, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.026, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=2.965, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.428, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.040 0.040 211.671 211.671 qs_mol_dyn_low 1 2.0 0.002 0.002 211.003 211.003 qs_forces 11 3.9 0.001 0.001 210.966 210.966 qs_energies 11 4.9 0.001 0.001 203.010 203.010 scf_env_do_scf 11 5.9 0.001 0.001 187.877 187.877 velocity_verlet 10 3.0 0.002 0.002 147.148 147.148 init_scf_loop 11 6.9 0.000 0.000 95.933 95.933 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 91.793 91.793 prepare_preconditioner 11 7.9 0.000 0.000 91.767 91.767 make_preconditioner 11 8.9 0.000 0.000 91.767 91.767 make_full_inverse_cholesky 11 9.9 0.000 0.000 90.650 90.650 cp_fm_cholesky_invert 11 10.9 62.802 62.802 62.802 62.802 qs_scf_new_mos 96 7.5 0.001 0.001 35.921 35.921 qs_scf_loop_do_ot 96 8.5 0.001 0.001 35.921 35.921 ot_scf_mini 96 9.5 0.002 0.002 34.689 34.689 rebuild_ks_matrix 107 8.3 0.001 0.001 34.219 34.219 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 34.218 34.218 qs_rho_update_rho 107 7.7 0.001 0.001 32.081 32.081 calculate_rho_elec 107 8.7 0.872 0.872 32.080 32.080 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.740 30.740 grid_collocate_task_list 107 9.7 28.059 28.059 28.059 28.059 sum_up_and_integrate 107 10.3 0.176 0.176 26.980 26.980 integrate_v_rspace 107 11.3 0.103 0.103 26.804 26.804 grid_integrate_task_list 107 12.3 24.908 24.908 24.908 24.908 cp_fm_cholesky_decompose 22 10.9 24.041 24.041 24.041 24.041 qs_ot_get_p 107 10.4 0.001 0.001 19.025 19.025 dbcsr_multiply_generic 1966 12.4 0.137 0.137 18.064 18.064 qs_ot_p2m_diag 44 11.0 0.137 0.137 17.904 17.904 cp_dbcsr_syevd 44 12.0 0.002 0.002 17.513 17.513 cp_fm_diag_elpa 44 13.0 0.000 0.000 16.518 16.518 cp_fm_diag_elpa_base 44 14.0 16.464 16.464 16.518 16.518 ot_mini 96 10.5 0.001 0.001 12.427 12.427 make_m2s 3932 13.4 0.041 0.041 9.747 9.747 qs_ot_get_derivative 96 11.5 0.001 0.001 7.633 7.633 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.361 7.361 init_scf_run 11 5.9 0.002 0.002 6.543 6.543 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.542 6.542 wfi_extrapolate 11 7.9 0.001 0.001 5.896 5.896 pw_transfer 1295 11.6 0.055 0.055 5.614 5.614 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.412 5.412 dbcsr_make_dense_low 4961 15.5 0.059 0.059 5.157 5.157 make_dense_data 4961 16.5 4.589 4.589 5.086 5.086 make_images 3932 14.4 1.893 1.893 4.923 4.923 ot_diis_step 96 11.5 0.003 0.003 4.791 4.791 multiply_cannon 1966 13.4 0.152 0.152 4.658 4.658 fft_wrap_pw1pw2_140 439 13.2 0.470 0.470 4.650 4.650 dbcsr_make_images_dense 3386 14.7 0.016 0.016 4.532 4.532 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.423 4.423 apply_single 107 13.6 0.000 0.000 4.423 4.423 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.349 4.349 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.311 4.311 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.025 85.144 85.154 qs_mol_dyn_low 1 2.0 0.003 0.004 85.044 85.048 qs_forces 11 3.9 0.001 0.002 85.005 85.005 qs_energies 11 4.9 0.001 0.001 79.384 79.388 scf_env_do_scf 11 5.9 0.000 0.002 73.651 73.657 scf_env_do_scf_inner_loop 96 6.5 0.002 0.017 68.234 68.234 velocity_verlet 10 3.0 0.001 0.003 50.498 50.499 rebuild_ks_matrix 107 8.3 0.000 0.001 37.705 37.750 qs_ks_build_kohn_sham_matrix 107 9.3 0.013 0.019 37.705 37.749 sum_up_and_integrate 107 10.3 0.017 0.019 33.653 33.735 integrate_v_rspace 107 11.3 0.004 0.005 33.635 33.721 qs_ks_update_qs_env 107 7.6 0.001 0.001 33.342 33.381 qs_rho_update_rho 107 7.7 0.001 0.001 32.518 32.528 calculate_rho_elec 107 8.7 0.026 0.027 32.517 32.527 grid_integrate_task_list 107 12.3 23.312 30.400 23.312 30.400 grid_collocate_task_list 107 9.7 22.828 29.494 22.828 29.494 dbcsr_multiply_generic 1966 12.4 0.064 0.075 12.196 13.299 rs_pw_transfer 878 11.9 0.010 0.012 10.385 11.380 density_rs2pw 107 9.7 0.004 0.006 9.340 10.333 qs_scf_new_mos 96 7.5 0.001 0.001 9.531 9.566 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.531 9.566 multiply_cannon 1966 13.4 0.108 0.122 8.842 9.019 ot_scf_mini 96 9.5 0.002 0.002 8.953 8.981 multiply_cannon_loop 1966 14.4 0.082 0.100 8.351 8.544 mp_waitany 8968 13.7 7.543 8.505 7.543 8.505 mp_alltoall_d11v 1998 13.7 7.417 8.344 7.417 8.344 rs_pw_transfer_RS2PW_140 118 11.5 0.218 0.237 7.207 8.205 rs_gather_matrices 107 12.3 0.077 0.084 7.190 8.109 mp_waitall_1 146670 16.2 7.405 7.677 7.405 7.677 init_scf_loop 11 6.9 0.000 0.001 5.404 5.404 ot_mini 96 10.5 0.001 0.001 5.267 5.297 multiply_cannon_metrocomm3 15728 15.4 0.033 0.044 4.809 5.119 init_scf_run 11 5.9 0.000 0.004 4.555 4.555 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.555 4.555 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.489 4.496 wfi_extrapolate 11 7.9 0.001 0.001 4.138 4.138 potential_pw2rs 107 12.3 0.006 0.007 3.096 3.119 multiply_cannon_multrec 15728 15.4 2.761 2.983 2.770 2.995 pw_transfer 1295 11.6 0.068 0.075 2.946 2.987 fft_wrap_pw1pw2 1081 12.6 0.008 0.009 2.818 2.868 qs_ot_get_derivative 96 11.5 0.001 0.001 2.777 2.805 ot_diis_step 96 11.5 0.003 0.004 2.469 2.469 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.410 2.464 apply_single 107 13.6 0.000 0.000 2.410 2.464 fft_wrap_pw1pw2_140 439 13.2 0.229 0.263 2.372 2.420 make_m2s 3932 13.4 0.040 0.048 2.139 2.205 mp_sum_l 9666 13.1 0.819 2.096 0.819 2.096 fft3d_ps 1081 14.6 1.042 1.106 2.016 2.092 make_images 3932 14.4 0.105 0.123 1.851 1.910 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=55.39699999999999, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=62.802, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=28.059, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.908, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=24.041, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=16.464, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=16.638999999999996, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.828, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=23.312, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=7.417, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.405, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=7.543, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.181 0.181 151.175 151.175 qs_energies 1 2.0 0.000 0.000 150.345 150.345 scf_env_do_scf 1 3.0 0.000 0.000 148.795 148.795 qs_ks_update_qs_env 8 5.0 0.000 0.000 114.029 114.029 rebuild_ks_matrix 7 6.0 0.000 0.000 113.974 113.974 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 113.974 113.974 hfx_ks_matrix 7 8.0 0.000 0.000 96.622 96.622 integrate_four_center 7 9.0 1.423 1.423 96.603 96.603 integrate_four_center_main 7 10.0 0.698 0.698 83.797 83.797 init_scf_loop 1 4.0 0.000 0.000 83.505 83.505 integrate_four_center_bin 455 11.0 83.100 83.100 83.100 83.100 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 65.282 65.282 prepare_preconditioner 1 5.0 0.000 0.000 29.361 29.361 make_preconditioner 1 6.0 0.000 0.000 29.361 29.361 arnoldi_normal_ev 11 9.3 0.001 0.001 17.000 17.000 estimate_cond_num 1 7.0 0.000 0.000 16.957 16.957 build_subspace 28 9.5 0.009 0.009 16.616 16.616 integrate_four_center_load 7 10.0 0.001 0.001 11.130 11.130 hfx_load_balance 1 11.0 0.001 0.001 11.129 11.129 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 9.790 9.790 admm_fit_mo_coeffs 7 9.0 0.000 0.000 8.592 8.592 make_full_inverse_cholesky 1 7.0 0.000 0.000 8.017 8.017 cp_fm_cholesky_invert 2 9.5 8.001 8.001 8.001 8.001 dbcsr_sym_m_v_mult 562 10.0 0.015 0.015 7.660 7.660 DGKS_ortho_d 673 10.6 7.182 7.182 7.184 7.184 Gram_Schmidt_ortho_d 673 10.6 5.730 5.730 5.731 5.731 hfx_load_balance_bin 1 12.0 5.558 5.558 5.558 5.558 hfx_load_balance_count 1 12.0 5.555 5.555 5.555 5.555 purify_mo_diag 7 10.0 0.000 0.000 4.746 4.746 make_full_single_inverse 1 7.0 0.000 0.000 4.261 4.261 cp_fm_syevd 7 11.0 0.000 0.000 4.215 4.215 cp_fm_syevd_base 7 12.0 4.215 4.215 4.215 4.215 arnoldi_generalized_ev 1 8.0 0.000 0.000 4.183 4.183 qs_scf_new_mos 7 5.0 0.000 0.000 4.037 4.037 qs_scf_loop_do_ot 7 6.0 0.000 0.000 4.036 4.036 gev_build_subspace 4 9.0 0.005 0.005 3.937 3.937 ot_scf_mini 7 7.0 0.000 0.000 3.934 3.934 qs_vxc_create 14 8.0 0.000 0.000 3.895 3.895 xc_vxc_pw_create 14 9.0 0.163 0.163 3.895 3.895 fit_mo_coeffs 7 10.0 0.000 0.000 3.846 3.846 dbcsr_copy 1318 10.8 0.903 0.903 3.636 3.636 cp_fm_cholesky_decompose 3 8.7 3.106 3.106 3.106 3.106 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.196 0.221 137.117 137.132 qs_energies 1 2.0 0.000 0.000 136.805 136.812 scf_env_do_scf 1 3.0 0.000 0.001 136.389 136.390 qs_ks_update_qs_env 8 5.0 0.000 0.000 134.225 134.229 rebuild_ks_matrix 7 6.0 0.000 0.000 134.214 134.214 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 134.214 134.214 hfx_ks_matrix 7 8.0 0.000 0.000 127.902 127.907 integrate_four_center 7 9.0 0.053 0.342 127.893 127.899 integrate_four_center_main 7 10.0 0.003 0.004 81.947 115.666 integrate_four_center_bin 448 11.0 81.944 115.662 81.944 115.662 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 76.976 76.976 init_scf_loop 1 4.0 0.000 0.000 59.413 59.413 mp_sync 70 11.3 33.745 36.359 33.745 36.359 integrate_four_center_load 7 10.0 0.000 0.000 11.528 11.533 hfx_load_balance 1 11.0 0.001 0.001 11.528 11.533 mp_sum_l 1135 8.3 5.847 6.207 5.847 6.207 hfx_load_balance_dist 1 12.0 0.000 0.000 5.703 5.986 hfx_load_balance_bin 1 12.0 2.880 5.786 2.880 5.786 hfx_load_balance_count 1 12.0 2.865 5.661 2.865 5.661 qs_vxc_create 14 8.0 0.001 0.003 3.030 3.031 xc_vxc_pw_create 14 9.0 0.008 0.009 3.030 3.030 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=36.04900000000001, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=83.1, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=8.001, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=7.182, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="Gram_Schmidt_ortho_d", label="Gram_Schmidt_ortho_d", y=5.73, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.558, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.555, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.836000000000013, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=81.944, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="Gram_Schmidt_ortho_d", label="Gram_Schmidt_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.88, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.865, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=5.847, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=33.745, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 149.745 149.745 qs_energies 1 2.0 0.000 0.000 149.356 149.356 mp2_main 1 3.0 0.000 0.000 125.752 125.752 mp2_gpw_main 1 4.0 0.000 0.000 124.392 124.392 rpa_ri_compute_en 1 5.0 0.000 0.000 116.762 116.762 rpa_num_int 1 6.0 0.001 0.001 116.757 116.757 compute_mat_P_omega 1 7.0 0.003 0.003 67.177 67.177 compute_mat_P_omega_contract 10 8.0 8.496 8.496 66.982 66.982 dbt_total 2336 9.6 0.012 0.012 54.664 54.664 dbt_contract 787 11.0 0.034 0.034 48.237 48.237 dbt_tas_total 1149 12.2 0.231 0.231 47.003 47.003 dbt_tas_multiply 807 12.1 0.002 0.002 45.729 45.729 dbt_tas_dbm 807 14.1 0.003 0.003 39.029 39.029 dbm_multiply 807 16.1 39.019 39.019 39.019 39.019 GW_matrix_operations 10 7.0 0.005 0.005 30.780 30.780 cp_fm_cholesky_invert 10 8.0 29.996 29.996 29.996 29.996 dbt_tas_mm_1N 524 15.1 0.002 0.002 27.999 27.999 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 24.344 24.344 scf_env_do_scf 1 3.0 0.000 0.000 23.422 23.422 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 23.421 23.421 qs_scf_new_mos 17 5.0 0.000 0.000 21.816 21.816 eigensolver 18 5.9 0.001 0.001 20.292 20.292 compute_mat_P_omega_calc_M_occ 250 9.0 8.508 8.508 18.756 18.756 cp_fm_cholesky_decompose 14 8.1 13.845 13.845 13.845 13.845 cp_fm_diag_elpa 18 6.9 0.000 0.000 13.448 13.448 cp_fm_diag_elpa_base 18 7.9 13.403 13.403 13.448 13.448 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 12.205 12.205 RPA_postprocessing_nokp 10 8.0 0.001 0.001 11.354 11.354 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 10.160 10.160 dbt_tas_mm_2 251 15.0 0.001 0.001 9.083 9.083 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 7.623 7.623 cp_fm_cholesky_restore 51 7.0 6.798 6.798 6.798 6.798 get_2c_integrals 1 6.0 0.000 0.000 5.983 5.983 compute_QP_energies 1 7.0 0.000 0.000 5.612 5.612 compute_self_energy_cubic_gw 1 8.0 0.048 0.048 5.611 5.611 dbt_copy 1103 10.7 0.059 0.059 5.136 5.136 contract_cubic_gw 21 9.0 0.000 0.000 4.622 4.622 dbt_tas_reserve_blocks_index 3261 14.3 0.147 0.147 3.086 3.086 dbm_reserve_blocks 3628 15.3 3.007 3.007 3.007 3.007 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.022 33.681 33.692 qs_energies 1 2.0 0.000 0.004 33.600 33.601 mp2_main 1 3.0 0.000 0.000 32.610 32.611 mp2_gpw_main 1 4.0 0.000 0.001 32.572 32.573 rpa_ri_compute_en 1 5.0 0.000 0.000 31.288 31.289 rpa_num_int 1 6.0 0.000 0.002 31.287 31.288 dbt_total 2336 9.6 0.012 0.013 27.767 27.776 compute_mat_P_omega 1 7.0 0.001 0.005 26.435 26.456 compute_mat_P_omega_contract 10 8.0 0.397 0.427 26.240 26.245 dbt_contract 787 11.0 0.026 0.030 20.952 20.964 dbt_tas_total 1149 12.2 0.053 0.063 18.679 18.679 dbt_tas_multiply 807 12.1 0.002 0.002 18.623 18.625 dbt_tas_dbm 807 14.1 0.003 0.004 13.847 13.858 dbm_multiply 807 16.1 10.402 11.246 10.402 11.246 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.735 7.736 compute_mat_P_omega_calc_M_occ 250 9.0 0.383 0.417 7.679 7.679 mp_sync 8706 11.6 5.656 6.941 5.656 6.941 dbt_tas_mm_2 251 15.0 0.001 0.002 6.373 6.378 dbt_copy 1111 10.7 0.012 0.013 5.801 6.156 dbt_reshape 1098 11.7 2.132 2.792 5.525 5.838 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.737 5.738 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.798 5.496 compute_QP_energies 1 7.0 0.000 0.000 3.140 3.141 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 3.137 3.140 dbt_communicate_buffer 1098 12.7 0.054 0.070 2.713 2.857 mp_waitall_2 3776 15.3 2.672 2.853 2.672 2.853 contract_cubic_gw 21 9.0 0.000 0.000 2.490 2.490 dbt_crop 1042 12.0 0.900 1.366 1.394 1.864 dbt_reserve_blocks_index 2849 13.1 0.066 0.085 1.537 1.753 dbt_reserve_blocks_index_array 2791 12.2 0.008 0.009 1.537 1.752 dbt_tas_reserve_blocks_index 3300 14.5 0.114 0.163 1.504 1.720 dbm_reserve_blocks 3696 15.4 1.484 1.700 1.484 1.700 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 1.280 1.282 dbt_tas_replicate 396 14.1 0.544 0.712 1.147 1.267 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.051 1.054 cp_gemm 105 8.4 0.000 0.000 1.032 1.040 cp_gemm_cosma 105 9.4 1.031 1.040 1.031 1.040 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.000 1.005 mp_max_i 1992 9.8 0.799 1.003 0.799 1.003 convert_to_new_pgrid 2421 14.1 0.024 0.028 0.792 0.955 scf_env_do_scf 1 3.0 0.000 0.000 0.951 0.951 scf_env_do_scf_inner_loop 17 4.0 0.000 0.002 0.951 0.951 dbm_copy 1608 15.1 0.761 0.922 0.761 0.922 GW_matrix_operations 10 7.0 0.001 0.001 0.782 0.789 dbm_add 807 14.1 0.638 0.727 0.638 0.727 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=41.967, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=39.019, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=29.996, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=13.845, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=13.403, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.508, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.007, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=10.951999999999998, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.402, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.383, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.484, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.672, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.132, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.656, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 311.014 311.014 qs_forces 1 2.0 0.000 0.000 310.437 310.437 rebuild_ks_matrix 7 6.6 0.000 0.000 288.301 288.301 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 288.301 288.301 hfx_ks_matrix 7 8.6 0.000 0.000 286.388 286.388 hfx_ri_update_ks 7 9.6 0.000 0.000 238.169 238.169 hfx_ri_update_ks_Pmat 7 10.6 32.098 32.098 238.165 238.165 qs_energies 1 3.0 0.000 0.000 231.627 231.627 dbt_total 4939 11.6 0.030 0.030 231.564 231.564 scf_env_do_scf 1 4.0 0.000 0.000 231.211 231.211 qs_ks_update_qs_env 8 6.0 0.000 0.000 209.540 209.540 dbt_tas_total 2391 14.1 0.867 0.867 204.187 204.187 dbt_contract 1473 13.0 0.161 0.161 186.921 186.921 dbt_tas_multiply 1482 14.0 0.004 0.004 176.786 176.786 dbt_tas_dbm 1482 16.0 0.006 0.006 158.373 158.373 dbm_multiply 1482 18.0 158.353 158.353 158.353 158.353 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 157.830 157.830 dbt_tas_mm_2 649 17.1 0.005 0.005 130.164 130.164 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 126.925 126.925 init_scf_loop 2 5.0 0.000 0.000 104.284 104.284 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 78.763 78.763 hfx_ri_update_forces 1 7.0 0.000 0.000 48.215 48.215 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 32.947 32.947 dbt_tas_mm_3T 659 17.1 0.002 0.002 21.417 21.417 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 20.278 20.278 dbt_tas_reshape 906 14.4 0.010 0.010 19.714 19.714 prepare_preconditioner 2 6.0 0.000 0.000 19.181 19.181 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 19.072 19.072 make_preconditioner 2 7.0 0.000 0.000 18.704 18.704 make_full_all 2 8.0 0.000 0.000 18.216 18.216 cp_fm_syevd 12 10.7 0.000 0.000 18.191 18.191 cp_fm_syevd_base 12 11.7 18.190 18.190 18.190 18.190 dbt_copy 2411 12.3 0.227 0.227 17.198 17.198 dbt_tas_merge 649 14.1 11.808 11.808 12.770 12.770 precalc_derivatives 1 8.0 0.005 0.005 12.486 12.486 dbt_tas_reshape_buffer_fill 906 15.4 11.354 11.354 11.354 11.354 dbm_reserve_blocks 8383 16.8 10.599 10.599 10.599 10.599 dbt_tas_reserve_blocks_index 7477 16.0 0.345 0.345 10.173 10.173 dbt_crop 2763 14.2 6.738 6.738 9.829 9.829 dbt_reshape 856 13.9 5.159 5.159 9.124 9.124 hfx_ri_pre_scf_Pmat_2c 1 13.0 0.000 0.000 8.666 8.666 dbt_reserve_blocks_index 4998 15.2 0.127 0.127 7.712 7.712 dbt_reserve_blocks_index_array 4963 14.3 0.019 0.019 7.661 7.661 build_3c_derivatives 9 9.0 2.147 2.147 7.028 7.028 dbt_tas_reshape_buffer_obtain 906 15.4 6.189 6.189 6.879 6.879 dbcsr_cholesky_invert 3 12.0 6.117 6.117 6.415 6.415 dbt_tas_mm_3N 163 16.5 0.001 0.001 6.254 6.254 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.024 61.749 61.759 qs_forces 1 2.0 0.000 0.000 61.551 61.551 rebuild_ks_matrix 7 6.6 0.000 0.000 60.822 60.823 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.003 60.822 60.823 hfx_ks_matrix 7 8.6 0.000 0.000 59.792 59.802 dbt_total 4939 11.6 0.028 0.040 53.601 53.639 hfx_ri_update_ks 7 9.6 0.000 0.000 41.779 41.779 hfx_ri_update_ks_Pmat 7 10.6 1.459 2.940 41.778 41.779 dbt_contract 1473 13.0 0.097 0.108 41.451 41.463 dbt_tas_total 2391 14.1 0.121 0.147 38.834 38.849 qs_energies 1 3.0 0.000 0.002 38.211 38.211 scf_env_do_scf 1 4.0 0.000 0.001 38.072 38.072 qs_ks_update_qs_env 8 6.0 0.000 0.000 37.498 37.499 dbt_tas_multiply 1482 14.0 0.005 0.006 34.512 34.514 dbt_tas_dbm 1482 16.0 0.005 0.007 26.643 26.670 dbm_multiply 1482 18.0 17.689 24.974 17.689 24.974 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 23.326 23.326 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.005 23.219 23.220 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 22.534 22.534 hfx_ri_update_forces 1 7.0 0.000 0.001 18.012 18.022 mp_sync 17669 13.5 14.030 17.007 14.030 17.007 dbt_tas_mm_2 649 17.1 0.004 0.005 16.114 16.131 init_scf_loop 2 5.0 0.000 0.000 15.538 15.538 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.003 12.675 12.688 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.003 8.287 8.287 dbt_copy 2429 12.3 0.032 0.035 6.547 7.354 dbt_crop 2763 14.2 3.021 5.026 3.743 5.887 dbt_tas_mm_3T 659 17.1 0.002 0.002 4.315 5.104 dbt_reshape 1257 13.5 2.143 2.771 4.556 5.007 dbt_tas_mm_3N 163 16.5 0.000 0.001 4.070 4.156 precalc_derivatives 1 8.0 0.001 0.002 4.116 4.116 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.873 3.873 dbt_tas_merge 649 14.1 1.610 2.714 2.929 3.463 mp_waitall_2 5988 16.5 3.083 3.383 3.083 3.383 mp_max_i 3372 12.5 2.461 2.861 2.461 2.861 dbm_reserve_blocks 8417 16.9 2.169 2.466 2.169 2.466 dbt_tas_reserve_blocks_index 7508 16.1 0.255 0.411 2.087 2.417 dbt_tas_communicate_buffer 1825 16.3 0.060 0.077 2.135 2.345 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 2.227 2.238 dbt_tas_replicate 909 15.6 0.593 0.758 2.181 2.221 dbt_reserve_blocks_index 5399 15.2 0.122 0.173 1.776 2.036 dbt_reserve_blocks_index_array 5364 14.2 0.013 0.015 1.774 2.035 build_3c_derivatives 9 9.0 0.229 0.346 1.940 1.944 mp_alltoall_i 4341 15.3 1.650 1.788 1.650 1.788 dbt_tas_reshape 916 14.4 0.008 0.010 1.683 1.777 dbt_communicate_buffer 1257 14.5 0.043 0.061 1.621 1.734 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.002 0.002 1.539 1.551 convert_to_new_pgrid 4446 16.0 0.039 0.043 1.272 1.470 dbm_copy 3043 16.9 1.233 1.432 1.233 1.432 mp_sum_l 38201 15.3 1.178 1.414 1.178 1.414 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=72.47300000000001, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=158.353, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=32.098, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=18.19, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=11.808, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=11.354, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=6.738, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_max_i", label="mp_max_i", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=18.396, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=17.689, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.459, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.61, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=3.021, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_max_i", label="mp_max_i", y=2.461, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.083, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=14.03, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 232.962 232.962 qs_energies 1 2.0 0.000 0.000 232.776 232.776 mp2_main 1 3.0 0.000 0.000 194.933 194.933 mp2_gpw_main 1 4.0 0.001 0.001 190.616 190.616 mp2_ri_gpw_compute_in 1 5.0 0.385 0.385 141.527 141.527 mp2_ri_gpw_compute_in_loop 1 6.0 0.020 0.020 116.555 116.555 mp2_eri_3c_integrate_gpw 2656 7.0 0.024 0.024 84.758 84.758 integrate_v_rspace 2666 8.0 0.821 0.821 68.376 68.376 grid_integrate_task_list 2666 9.0 65.368 65.368 65.368 65.368 mp2_ri_gpw_compute_en 1 5.0 0.076 0.076 49.066 49.066 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.702 9.702 47.270 47.270 scf_env_do_scf 1 3.0 0.000 0.000 36.930 36.930 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 36.929 36.929 qs_scf_new_mos 10 5.0 0.000 0.000 35.650 35.650 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.481 2.481 29.786 29.786 eigensolver 11 5.8 0.001 0.001 27.715 27.715 offload_gemm 2080 8.0 27.305 27.305 27.305 27.305 calculate_wavefunction 5312 9.0 18.744 18.744 26.681 26.681 cp_fm_diag_elpa 11 6.8 0.000 0.000 25.171 25.171 cp_fm_diag_elpa_base 11 7.8 25.016 25.016 25.170 25.170 get_2c_integrals 1 6.0 0.000 0.000 24.565 24.565 dbcsr_multiply_generic 5322 8.0 0.278 0.278 23.723 23.723 ao_to_mo_and_store_B_mult_1 2656 7.0 0.018 0.018 23.698 23.698 compute_2c_integrals 1 7.0 0.006 0.006 18.354 18.354 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 18.335 18.335 mp2_eri_2c_integrate_gpw 1 9.0 3.209 3.209 18.324 18.324 pw_transfer 63872 10.6 1.081 1.081 12.053 12.053 multiply_cannon 5322 9.0 0.577 0.577 11.852 11.852 fft_wrap_pw1pw2 53228 11.4 0.134 0.134 10.741 10.741 multiply_cannon_loop 5322 10.0 0.247 0.247 10.087 10.087 qs_diis_b_step 9 6.0 0.001 0.001 10.024 10.024 make_m2s 10644 9.0 0.086 0.086 9.033 9.033 cp_fm_symm 18 7.0 8.710 8.710 8.710 8.710 make_images 10644 10.0 3.162 3.162 8.665 8.665 multiply_cannon_multrec 5322 11.0 8.216 8.216 8.273 8.273 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.860 2.860 7.914 7.914 fft_wrap_pw1pw2_20 21271 12.4 0.596 0.596 7.408 7.408 fft3d_s 53229 13.4 6.891 6.891 6.932 6.932 copy_dbcsr_to_fm 2679 8.0 0.039 0.039 6.112 6.112 mp2_ri_gpw_compute_en_ener 2080 7.0 5.779 5.779 5.779 5.779 cp_fm_triangular_invert 2 6.0 5.181 5.181 5.181 5.181 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.022 42.761 42.772 qs_energies 1 2.0 0.000 0.001 42.665 42.666 mp2_main 1 3.0 0.000 0.001 40.185 40.186 mp2_gpw_main 1 4.0 0.001 0.002 40.060 40.060 mp2_ri_gpw_compute_in 1 5.0 0.041 0.042 18.328 24.016 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 16.578 22.269 mp2_ri_gpw_compute_en 1 5.0 0.159 0.168 21.650 22.055 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 14.462 20.162 integrate_v_rspace 93 8.1 0.106 0.115 14.394 19.984 grid_integrate_task_list 93 9.1 14.004 19.661 14.004 19.661 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.802 0.989 14.923 14.939 mp2_ri_gpw_compute_en_expansio 65 7.0 0.076 0.089 11.172 11.391 offload_gemm 65 8.0 11.097 11.313 11.097 11.313 mp_min_d 2 7.0 5.746 6.269 5.746 6.269 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 5.690 6.095 mp2_ri_gpw_compute_en_comm 17 7.0 0.106 0.186 2.568 2.805 scf_env_do_scf 1 3.0 0.000 0.000 2.345 2.346 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.345 2.346 mp_sendrecv_dm3 510 8.0 1.955 2.295 1.955 2.295 dbcsr_multiply_generic 176 8.0 0.008 0.009 1.821 2.073 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.803 2.053 get_2c_integrals 1 6.0 0.000 0.000 1.685 1.712 compute_2c_integrals 1 7.0 0.002 0.003 1.394 1.410 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 1.010 1.328 mp2_eri_2c_integrate_gpw 1 9.0 0.209 0.330 1.009 1.328 qs_scf_new_mos 10 5.0 0.000 0.000 1.187 1.255 eigensolver 11 5.8 0.001 0.001 1.161 1.162 multiply_cannon 176 9.0 0.016 0.018 1.073 1.153 calculate_wavefunction 166 9.0 0.503 0.711 0.875 1.129 multiply_cannon_loop 176 10.0 0.002 0.002 1.014 1.093 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.997 0.999 cp_fm_redistribute_end 11 7.8 0.378 0.989 0.389 0.991 cp_fm_diag_elpa_base 11 7.8 0.586 0.943 0.597 0.958 multiply_cannon_multrec 246 11.0 0.868 0.911 0.873 0.917 pw_transfer 2120 10.5 0.038 0.049 0.801 0.890 make_m2s 352 9.0 0.003 0.003 0.709 0.879 make_images 352 10.0 0.053 0.062 0.697 0.867 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=78.61099999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=65.368, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=27.305, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=25.016, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=18.744, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.702, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.216, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.200000000000003, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.004, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=11.097, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.586, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.503, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.802, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.868, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.955, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=5.746, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.077 0.077 221.606 221.606 qs_energies 1 2.0 0.000 0.000 220.250 220.250 scf_env_do_scf 1 3.0 0.000 0.000 211.114 211.114 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 211.114 211.114 qs_scf_new_mos 15 5.0 0.000 0.000 134.268 134.268 eigensolver 15 6.0 0.002 0.002 126.510 126.510 cp_fm_diag_elpa 15 7.0 0.000 0.000 113.432 113.432 cp_fm_diag_elpa_base 15 8.0 110.982 110.982 113.432 113.432 qs_ks_update_qs_env 15 5.0 0.000 0.000 50.842 50.842 rebuild_ks_matrix 15 6.0 0.000 0.000 50.628 50.628 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 50.628 50.628 qs_vxc_create 15 8.0 0.018 0.018 34.601 34.601 calculate_dispersion_nonloc 15 9.0 7.277 7.277 30.220 30.220 pw_transfer 1191 10.0 0.062 0.062 23.433 23.433 fft_wrap_pw1pw2 1086 11.0 0.010 0.010 23.242 23.242 qs_rho_update_rho 16 5.0 0.000 0.000 22.651 22.651 calculate_rho_elec 16 6.0 0.232 0.232 22.651 22.651 grid_collocate_task_list 16 7.0 21.288 21.288 21.288 21.288 fft_wrap_pw1pw2_150 765 12.0 3.310 3.310 16.743 16.743 sum_up_and_integrate 15 8.0 0.040 0.040 14.601 14.601 integrate_v_rspace 15 9.0 0.018 0.018 14.561 14.561 grid_integrate_task_list 15 10.0 14.045 14.045 14.045 14.045 cp_fm_cholesky_restore 45 7.0 10.881 10.881 10.881 10.881 fft3d_s 1087 13.0 10.648 10.648 10.659 10.659 pw_scatter_s 585 13.1 7.230 7.230 7.230 7.230 fft_wrap_pw1pw2_200 197 12.3 0.728 0.728 6.308 6.308 dbcsr_complete_redistribute 46 8.3 2.209 2.209 5.581 5.581 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.555 5.555 init_scf_run 1 3.0 0.000 0.000 5.552 5.552 cp_fm_upper_to_full 30 8.0 4.645 4.645 4.645 4.645 gspace_mixing 14 5.0 0.171 0.171 4.586 4.586 vdW_energy 15 10.0 4.445 4.445 4.445 4.445 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.031 64.152 64.162 qs_energies 1 2.0 0.000 0.001 63.866 63.872 scf_env_do_scf 1 3.0 0.000 0.000 59.654 59.655 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 59.654 59.655 qs_ks_update_qs_env 15 5.0 0.000 0.000 25.475 25.485 rebuild_ks_matrix 15 6.0 0.000 0.000 25.440 25.450 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.009 25.440 25.450 qs_rho_update_rho 16 5.0 0.000 0.000 21.431 21.436 calculate_rho_elec 16 6.0 0.007 0.007 21.431 21.436 grid_collocate_task_list 16 7.0 19.992 20.597 19.992 20.597 sum_up_and_integrate 15 8.0 0.007 0.012 14.703 14.748 integrate_v_rspace 15 9.0 0.001 0.001 14.696 14.744 grid_integrate_task_list 15 10.0 13.490 14.087 13.490 14.087 qs_scf_new_mos 15 5.0 0.000 0.001 13.481 13.503 eigensolver 15 6.0 0.001 0.002 12.474 12.501 qs_vxc_create 15 8.0 0.001 0.001 10.434 10.447 cp_fm_diag_elpa 15 7.0 0.000 0.000 9.172 9.176 cp_fm_diag_elpa_base 15 8.0 9.021 9.059 9.168 9.171 calculate_dispersion_nonloc 15 9.0 0.977 1.821 8.613 8.633 pw_transfer 1191 10.0 0.074 0.082 7.983 8.099 fft_wrap_pw1pw2 1086 11.0 0.011 0.013 7.823 7.960 fft3d_ps 1086 13.0 2.397 2.596 6.201 6.418 fft_wrap_pw1pw2_150 765 12.0 0.252 0.283 5.429 5.502 mp_alltoall_z22v 1086 15.0 3.256 3.839 3.256 3.839 cp_fm_cholesky_restore 45 7.0 3.138 3.184 3.138 3.184 yz_to_x 501 13.9 0.201 0.248 2.371 2.663 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.523 2.523 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.211 2.410 fft_wrap_pw1pw2_200 197 12.3 0.175 0.203 2.276 2.327 xc_vxc_pw_create 15 9.0 0.015 0.018 1.820 1.843 rs_pw_transfer 158 9.4 0.001 0.002 1.393 1.671 x_to_yz 585 14.1 0.323 0.338 1.408 1.641 density_rs2pw 16 7.0 0.001 0.001 1.306 1.480 init_scf_run 1 3.0 0.000 0.001 1.459 1.460 vdW_energy 15 10.0 1.382 1.447 1.382 1.447 build_core_ppnl 1 5.0 1.301 1.440 1.301 1.440 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.373 1.374 mp_waitany 520 11.3 0.989 1.305 0.989 1.305 xc_pw_derive 90 11.0 0.001 0.001 1.222 1.292 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=53.762, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=110.982, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.288, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.045, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.881, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.648, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.255000000000003, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=9.021, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.992, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.49, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.138, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.256, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.095 0.095 299.390 299.390 qs_energies 1 2.0 0.000 0.000 299.236 299.236 ls_scf 1 3.0 0.000 0.000 298.002 298.002 ls_scf_main 1 4.0 0.002 0.002 285.296 285.296 density_matrix_trs4 11 5.0 0.013 0.013 189.937 189.937 arnoldi_extremal 12 6.1 0.000 0.000 105.381 105.381 arnoldi_normal_ev 12 7.1 0.016 0.016 105.381 105.381 build_subspace 23 8.1 0.084 0.084 103.680 103.680 ls_scf_dm_to_ks 11 5.0 0.000 0.000 90.025 90.025 matrix_ls_to_qs 11 6.0 0.000 0.000 86.755 86.755 dbcsr_matrix_vector_mult 652 9.0 0.188 0.188 78.278 78.278 dbcsr_multiply_generic 185 6.1 0.831 0.831 76.960 76.960 dbcsr_matrix_vector_mult_local 652 10.0 66.665 66.665 66.671 66.671 dbcsr_copy_into_existing 11 7.0 48.471 48.471 48.471 48.471 multiply_cannon 185 7.1 0.305 0.305 46.959 46.959 dbcsr_complete_redistribute 23 7.5 30.734 30.734 42.061 42.061 matrix_decluster 11 7.0 0.000 0.000 38.282 38.282 multiply_cannon_loop 185 8.1 0.260 0.260 34.478 34.478 make_m2s 370 7.1 0.038 0.038 25.437 25.437 multiply_cannon_multrec 185 9.1 24.271 24.271 24.299 24.299 make_images 370 8.1 10.576 10.576 23.769 23.769 dbcsr_finalize 646 7.5 0.176 0.176 14.982 14.982 dbcsr_merge_all 597 8.5 2.215 2.215 13.796 13.796 DGKS_ortho_d 702 9.1 13.308 13.308 13.313 13.313 setup_rec_index_2d 370 8.1 12.084 12.084 12.084 12.084 ls_scf_init_scf 1 4.0 0.000 0.000 11.998 11.998 ls_scf_init_matrix_S 1 5.0 0.000 0.000 11.664 11.664 Gram_Schmidt_ortho_d 702 9.1 11.238 11.238 11.240 11.240 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 11.011 11.011 dbcsr_sort_indices 1103 9.9 10.882 10.882 10.882 10.882 tree_to_linear_d 110 9.4 10.349 10.349 10.349 10.349 calculate_norms 370 9.1 9.919 9.919 9.919 9.919 quick_finalize 395 10.0 0.417 0.417 9.447 9.447 dbcsr_special_finalize 370 9.1 0.002 0.002 8.727 8.727 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.026 69.010 69.021 qs_energies 1 2.0 0.000 0.000 68.911 68.911 ls_scf 1 3.0 0.000 0.000 68.846 68.847 ls_scf_main 1 4.0 0.000 0.007 66.210 66.211 density_matrix_trs4 11 5.0 0.006 0.018 63.480 63.522 dbcsr_multiply_generic 185 6.1 0.058 0.071 59.317 59.524 multiply_cannon 185 7.1 0.032 0.034 49.599 50.333 multiply_cannon_loop 185 8.1 0.115 0.129 47.124 48.195 multiply_cannon_multrec 1480 9.1 28.885 30.554 29.163 30.833 mp_waitall_1 11936 10.3 15.325 17.410 15.325 17.410 multiply_cannon_metrocomm3 1480 9.1 0.013 0.019 8.977 11.843 calculate_norms 2960 9.1 5.251 7.368 5.251 7.368 make_m2s 370 7.1 0.034 0.036 6.880 6.967 make_images 370 8.1 0.634 0.725 6.752 6.830 multiply_cannon_metrocomm1 1480 9.1 0.008 0.009 3.558 5.357 arnoldi_extremal 12 6.1 0.000 0.000 3.368 3.392 arnoldi_normal_ev 12 7.1 0.001 0.004 3.368 3.392 build_subspace 23 8.1 0.020 0.025 3.261 3.263 make_images_data 370 9.1 0.009 0.010 3.018 3.222 dbcsr_matrix_vector_mult 652 9.0 0.010 0.046 2.329 3.012 hybrid_alltoall_any 393 9.9 0.176 0.948 2.593 2.747 dbcsr_matrix_vector_mult_local 652 10.0 1.754 2.676 1.756 2.679 mp_sum_l 1119 5.6 1.873 2.605 1.873 2.605 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.389 2.454 dbcsr_complete_redistribute 23 7.5 1.209 1.603 1.958 2.178 matrix_ls_to_qs 11 6.0 0.000 0.000 1.902 2.149 ls_scf_init_scf 1 4.0 0.000 0.000 2.033 2.035 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.006 2.012 matrix_decluster 11 7.0 0.000 0.000 1.761 1.968 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 1.830 1.832 make_images_pack 370 9.1 1.643 1.757 1.646 1.760 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.035 1.599 dbcsr_finalize 646 7.5 0.007 0.008 1.328 1.509 buffer_matrices_ensure_size 370 8.1 1.334 1.507 1.334 1.507 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=106.02199999999996, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=66.665, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=48.471, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=30.734, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=24.271, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=13.308, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.919, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=14.713000000000001, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.754, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.209, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=28.885, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.251, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=15.325, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.873, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 72.627 72.627 lib_test 1 2.0 0.000 0.000 72.619 72.619 dbcsr_run_tests 3 3.0 0.002 0.002 72.619 72.619 test_multiplies_multiproc 3 4.0 0.001 0.001 56.959 56.959 dbcsr_redistribute 9 5.0 36.980 36.980 38.524 38.524 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.761 16.761 dbcsr_make_random_matrix 9 4.0 12.571 12.571 15.557 15.557 multiply_cannon 9 6.0 0.001 0.001 12.044 12.044 multiply_cannon_loop 9 7.0 0.026 0.026 11.682 11.682 multiply_cannon_multrec 9 8.0 11.656 11.656 11.657 11.657 dbcsr_finalize 27 5.7 0.033 0.033 5.602 5.602 dbcsr_merge_all 18 6.5 2.104 2.104 4.902 4.902 dbcsr_data_release 975 7.6 2.529 2.529 2.529 2.529 tree_to_linear_d 9 7.0 1.930 1.930 1.930 1.930 make_m2s 18 6.0 0.001 0.001 1.557 1.557 make_images 18 7.0 0.539 0.539 1.508 1.508 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.015 18.394 18.399 lib_test 1 2.0 0.000 0.000 18.352 18.369 dbcsr_run_tests 3 3.0 0.000 0.001 18.351 18.368 test_multiplies_multiproc 3 4.0 0.001 0.003 17.523 17.610 dbcsr_multiply_generic 9 5.0 0.001 0.001 15.698 15.788 multiply_cannon 9 6.0 0.002 0.002 13.977 14.336 multiply_cannon_loop 9 7.0 0.002 0.002 13.676 13.964 multiply_cannon_multrec 72 8.0 11.353 11.778 11.354 11.779 mp_waitall_1 576 9.2 2.650 3.104 2.650 3.104 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 2.008 2.485 dbcsr_make_random_matrix 9 4.0 0.664 0.919 0.799 1.018 mp_sum_l 390 2.5 0.442 0.916 0.442 0.916 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.437 0.911 make_m2s 18 6.0 0.001 0.001 0.692 0.731 make_images 18 7.0 0.021 0.025 0.688 0.727 dbcsr_data_release 444 7.6 0.613 0.702 0.613 0.702 dbcsr_finalize 27 5.7 0.000 0.000 0.610 0.680 dbcsr_destroy 111 5.9 0.000 0.000 0.509 0.629 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.308 0.617 dbcsr_merge_all 18 6.5 0.096 0.119 0.504 0.574 dbcsr_checksum 6 5.0 0.161 0.534 0.546 0.546 make_images_data 18 8.0 0.000 0.001 0.367 0.442 dbcsr_redistribute 9 5.0 0.232 0.275 0.402 0.441 mp_sum_d 191 1.2 0.387 0.408 0.387 0.408 hybrid_alltoall_any 18 9.0 0.029 0.130 0.323 0.378 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.787000000000006, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=36.98, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.571, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.656, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.529, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.104, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.3439999999999976, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.232, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.664, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.353, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.613, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.096, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.65, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.442, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.047 144.365 144.365 qs_mol_dyn_low 1 2.0 0.003 0.003 142.715 142.715 velocity_verlet 5 3.0 0.003 0.003 116.386 116.386 qmmm_el_coupling 6 3.8 0.000 0.000 90.598 90.598 qmmm_elec_with_gaussian 6 4.8 0.092 0.092 90.594 90.594 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 89.882 89.882 qmmm_elec_gaussian_low_G 6 6.8 88.952 88.952 88.952 88.952 qs_forces 6 3.8 0.001 0.001 43.404 43.404 qs_energies 6 4.8 0.000 0.000 39.028 39.028 scf_env_do_scf 6 5.8 0.001 0.001 35.794 35.794 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 25.750 25.750 rebuild_ks_matrix 45 8.4 0.000 0.000 25.009 25.009 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 25.008 25.008 qs_ks_update_qs_env 45 7.8 0.000 0.000 21.060 21.060 pw_transfer 966 12.3 0.049 0.049 17.020 17.020 fft_wrap_pw1pw2 801 13.6 0.006 0.006 16.785 16.785 fft_wrap_pw1pw2_150 507 15.2 2.113 2.113 16.342 16.342 qs_vxc_create 45 10.4 0.001 0.001 13.452 13.452 xc_vxc_pw_create 45 11.4 0.666 0.666 13.451 13.451 init_scf_loop 6 6.8 0.000 0.000 10.029 10.029 xc_pw_derive 270 13.4 0.002 0.002 9.239 9.239 fft3d_s 802 15.6 7.649 7.649 7.657 7.657 qs_rho_update_rho 45 7.9 0.000 0.000 7.199 7.199 calculate_rho_elec 45 8.9 0.563 0.563 7.199 7.199 xc_rho_set_and_dset_create 45 12.4 0.696 0.696 6.949 6.949 prepare_preconditioner 6 7.8 0.000 0.000 6.643 6.643 make_preconditioner 6 8.8 0.000 0.000 6.192 6.192 make_full_all 6 9.8 0.001 0.001 5.917 5.917 xc_pw_divergence 45 12.4 0.001 0.001 5.781 5.781 pw_scatter_s 429 15.8 5.601 5.601 5.601 5.601 qmmm_forces 6 3.8 0.001 0.001 5.487 5.487 qmmm_forces_with_gaussian 6 4.8 0.113 0.113 5.168 5.168 pw_integral_ab 2539 7.4 4.397 4.397 4.397 4.397 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.294 4.294 qs_ks_ddapc 45 10.4 0.001 0.001 4.285 4.285 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.956 3.956 qmmm_forces_gaussian_low_G 6 6.8 3.590 3.590 3.590 3.590 cp_fm_diag_elpa 18 11.2 0.000 0.000 3.567 3.567 cp_fm_diag_elpa_base 18 12.2 3.559 3.559 3.567 3.567 grid_collocate_task_list 45 9.9 3.331 3.331 3.331 3.331 density_rs2pw 45 9.9 0.002 0.002 3.305 3.305 sum_up_and_integrate 45 10.4 0.126 0.126 3.158 3.158 integrate_v_rspace 45 11.4 0.007 0.007 3.032 3.032 pw_poisson_solve 51 9.9 1.269 1.269 3.023 3.023 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.054 58.307 58.321 qs_mol_dyn_low 1 2.0 0.003 0.004 57.126 57.185 qs_forces 6 3.8 0.001 0.001 40.862 40.862 qs_energies 6 4.8 0.000 0.000 38.925 38.925 scf_env_do_scf 6 5.8 0.000 0.001 37.941 37.941 scf_env_do_scf_inner_loop 113 6.2 0.002 0.018 36.418 36.418 rebuild_ks_matrix 119 8.1 0.000 0.000 26.823 26.833 qs_ks_build_kohn_sham_matrix 119 9.1 0.015 0.020 26.823 26.833 qs_ks_update_qs_env 119 7.3 0.001 0.001 25.224 25.233 velocity_verlet 5 3.0 0.002 0.004 24.309 24.312 pw_transfer 2446 12.3 0.157 0.169 17.092 17.536 fft_wrap_pw1pw2 2059 13.4 0.021 0.023 16.728 17.198 fft_wrap_pw1pw2_150 1321 14.9 1.205 1.368 16.034 16.487 qs_vxc_create 119 10.1 0.003 0.010 13.656 13.660 xc_vxc_pw_create 119 11.1 0.145 0.199 13.653 13.658 fft3d_ps 2059 15.4 6.543 7.378 12.716 13.422 qs_rho_update_rho 119 7.3 0.001 0.001 10.995 10.996 calculate_rho_elec 119 8.3 0.049 0.054 10.994 10.995 xc_pw_derive 714 13.1 0.008 0.010 10.292 10.565 sum_up_and_integrate 119 10.1 0.054 0.067 9.555 9.833 integrate_v_rspace 119 11.1 0.003 0.004 9.500 9.781 qmmm_forces 6 3.8 0.002 0.002 8.128 8.129 qmmm_forces_with_gaussian 6 4.8 0.286 0.339 7.423 7.958 rs_pw_transfer 988 11.5 0.011 0.014 7.454 7.693 xc_rho_set_and_dset_create 119 12.1 0.357 0.717 6.540 7.507 qmmm_el_coupling 6 3.8 0.000 0.000 7.094 7.213 qmmm_elec_with_gaussian 6 4.8 0.297 0.337 7.092 7.212 xc_pw_divergence 119 12.1 0.004 0.005 6.738 6.983 density_rs2pw 119 9.3 0.005 0.007 6.324 6.569 mp_alltoall_z22v 2059 17.4 4.845 5.832 4.845 5.832 potential_pw2rs 119 12.1 0.006 0.007 5.704 5.725 grid_collocate_task_list 119 9.3 4.487 5.082 4.487 5.082 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.029 4.261 grid_integrate_task_list 119 12.1 3.523 4.039 3.523 4.039 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.542 3.798 x_to_yz 1095 16.8 0.777 0.860 3.207 3.677 qmmm_forces_gaussian_low_G 6 6.8 3.319 3.555 3.319 3.555 yz_to_x 964 16.0 0.501 0.603 2.916 3.529 qmmm_elec_gaussian_low_G 6 6.8 2.952 3.194 2.952 3.194 mp_waitany 4028 12.8 2.766 3.117 2.766 3.117 rs_pw_transfer_PW2RS_150 125 13.9 1.134 1.278 2.902 2.943 pw_restrict_s3 18 5.8 1.355 1.655 2.421 2.738 rs_pw_transfer_RS2PW_150 125 11.2 0.857 0.992 2.391 2.652 dbcsr_multiply_generic 2588 12.3 0.058 0.070 2.151 2.202 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.012 2.157 pw_prolongate_s3 18 6.8 1.125 1.326 2.012 2.157 mp_waitall_1 188862 16.2 1.952 2.122 1.952 2.122 qs_scf_new_mos 113 7.2 0.000 0.001 2.087 2.093 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.086 2.092 ot_scf_mini 113 9.2 0.001 0.001 2.003 2.010 mp_sum_dm3 33 5.7 1.825 1.945 1.825 1.945 qs_ks_ddapc 119 10.1 0.002 0.002 1.828 1.904 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.610 1.610 pw_scatter_p 1095 15.8 1.557 1.603 1.557 1.603 init_scf_loop 6 6.8 0.000 0.000 1.521 1.522 pw_integral_ab 2761 7.7 1.010 1.070 1.379 1.483 mp_sum_d 5820 12.2 0.996 1.403 0.996 1.403 xc_functional_eval 238 13.1 0.003 0.004 0.727 1.366 pw_gather_p 964 15.0 1.199 1.364 1.199 1.364 ot_mini 113 10.2 0.000 0.001 1.246 1.252 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=30.845, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=88.952, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.649, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.601, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.397, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.59, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.331, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=31.628000000000004, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.952, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.01, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.319, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.487, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.845, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.523, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.543, yerr=0.0 Summary: Performance test took 43 minutes. Status: OK Removing intermediate container 448f38e69682 ---> b71c67c6801e Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 92778838f1c6 Removing intermediate container 92778838f1c6 ---> 846fcbad0b0a Step 42/42 : ENTRYPOINT [] ---> Running in 75da98b05415 Removing intermediate container 75da98b05415 ---> 1d69a801355d [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 1d69a801355d Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-06-27 12:00:26+00:00