StartDate: 2023-09-27 08:04:46+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: c94756ddc75c22fb829516d4d1ee9958f089b1c9 CommitTime: 2023-09-26 15:05:21 +0200 CommitAuthor: abussy CommitSubject: KP-RI-HFX| Minor performance optimization #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=c94756ddc75c22fb829516d4d1ee9958f089b1c9 Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 386MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 445a6a12be2b: Pulling fs layer 445a6a12be2b: Verifying Checksum 445a6a12be2b: Download complete 445a6a12be2b: Pull complete Digest: sha256:aabed3296a3d45cede1dc866a24476c4d7e093aa806263c27ddaadbdce3c1054 Status: Downloaded newer image for ubuntu:22.04 ---> c6b84b685f35 Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 85169f36a7b8 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> f6efc98fa5d5 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> af1b1219ae9c Step 5/42 : RUN mkdir scripts ---> Using cache ---> a788b6950640 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> b47010e64427 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 0a96a120b57d Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> 30c7c31c349d Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 4c91d9093ba8 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 1ec959bcec7c Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 9e68144aabca Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> dbcd47fb50c4 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 98f9bce9c324 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> d02dda868049 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> a9982f4b2894 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> b7bd2cc7a4df Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 454b503b2eb2 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 43287a5fa08a Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> a8d1a5140925 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 5a814a4e9c2d Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> ac60c28d9b2a Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 28e8659cc4b7 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 4728d23e0aee Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 6e4c1e22653e Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 0ca353410762 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 10d78b90ab74 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 684067f61a05 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 07bc93120bd3 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 49d3fd067007 Step 30/42 : COPY ./Makefile . ---> Using cache ---> 7da1d6d6209c Step 31/42 : COPY ./src ./src ---> Using cache ---> 7a744f7a761b Step 32/42 : COPY ./exts ./exts ---> Using cache ---> e1956d60bd92 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> f33848da47f6 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/" ---> Running in 692acf59d535 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Removing intermediate container 692acf59d535 ---> 0a10eb01224a Step 35/42 : COPY ./data ./data ---> 052dbb48a07c Step 36/42 : COPY ./tests ./tests ---> 1f0c357826ad Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 556cb96f3e2a Step 38/42 : COPY ./benchmarks ./benchmarks ---> 450b601871e8 Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> d1d86a15ad2c Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in abac21d02533 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 75 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.047 127.569 127.569 qs_mol_dyn_low 1 2.0 0.003 0.003 126.612 126.612 qs_forces 11 3.9 0.002 0.002 126.565 126.565 qs_energies 11 4.9 0.001 0.001 118.716 118.716 scf_env_do_scf 11 5.9 0.002 0.002 103.013 103.013 velocity_verlet 10 3.0 0.002 0.002 80.208 80.208 scf_env_do_scf_inner_loop 108 6.5 0.016 0.016 79.614 79.614 qs_ks_update_qs_env 119 7.6 0.001 0.001 30.953 30.953 rebuild_ks_matrix 119 8.3 0.001 0.001 29.949 29.949 qs_ks_build_kohn_sham_matrix 119 9.3 0.019 0.019 29.948 29.948 qs_scf_new_mos 108 7.5 0.001 0.001 29.801 29.801 qs_scf_loop_do_ot 108 8.5 0.001 0.001 29.800 29.800 dbcsr_multiply_generic 2286 12.5 0.224 0.224 29.031 29.031 ot_scf_mini 108 9.5 0.004 0.004 27.446 27.446 qs_rho_update_rho_low 119 7.7 0.001 0.001 24.184 24.184 calculate_rho_elec 119 8.7 1.292 1.292 24.183 24.183 init_scf_loop 11 6.9 0.000 0.000 23.181 23.181 prepare_preconditioner 11 7.9 0.000 0.000 19.031 19.031 make_preconditioner 11 8.9 0.000 0.000 19.031 19.031 grid_collocate_task_list 119 9.7 18.294 18.294 18.294 18.294 ot_mini 108 10.5 0.001 0.001 18.226 18.226 make_full_inverse_cholesky 11 9.9 0.029 0.029 17.116 17.116 sum_up_and_integrate 119 10.3 0.002 0.002 14.935 14.935 integrate_v_rspace 119 11.3 0.184 0.184 14.783 14.783 make_m2s 4572 13.5 0.059 0.059 14.314 14.314 grid_integrate_task_list 119 12.3 11.766 11.766 11.766 11.766 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 9.310 9.310 ot_diis_step 108 11.5 0.005 0.005 9.138 9.138 qs_ot_get_derivative 108 11.5 0.002 0.002 9.084 9.084 multiply_cannon 2286 13.5 0.319 0.319 9.042 9.042 multiply_cannon_loop 2286 14.5 0.094 0.094 8.315 8.315 pw_transfer 1439 11.6 0.092 0.092 8.235 8.235 multiply_cannon_multrec 2286 15.5 8.141 8.141 8.220 8.220 make_images 4572 14.5 3.377 3.377 8.125 8.125 fft_wrap_pw1pw2 1201 12.6 0.009 0.009 7.767 7.767 dbcsr_dot_sd 1205 11.9 7.030 7.030 7.034 7.034 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.869 6.869 apply_single 119 13.6 0.001 0.001 6.869 6.869 fft_wrap_pw1pw2_140 487 13.2 0.560 0.560 6.675 6.675 dbcsr_make_dense_low 5837 15.5 0.093 0.093 6.578 6.578 cp_fm_cholesky_decompose 22 10.9 6.548 6.548 6.548 6.548 make_dense_data 5837 16.5 5.798 5.798 6.469 6.469 dbcsr_complete_redistribute 329 12.2 3.443 3.443 6.153 6.153 pw_poisson_solve 119 10.3 1.689 1.689 6.069 6.069 dbcsr_make_images_dense 3978 14.8 0.020 0.020 5.619 5.619 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.610 5.610 init_scf_run 11 5.9 0.003 0.003 5.315 5.315 scf_env_initial_rho_setup 11 6.9 0.002 0.002 5.312 5.312 qs_create_task_list 11 7.9 0.000 0.000 5.046 5.046 generate_qs_task_list 11 8.9 2.442 2.442 5.045 5.045 cp_fm_cholesky_invert 11 10.9 4.863 4.863 4.863 4.863 copy_dbcsr_to_fm 153 11.3 0.004 0.004 4.859 4.859 evaluate_core_matrix_traces 119 8.3 0.001 0.001 4.667 4.667 calculate_ptrace_kp 238 9.3 0.001 0.001 4.666 4.666 wfi_extrapolate 11 7.9 0.001 0.001 4.636 4.636 density_rs2pw 119 9.7 0.007 0.007 4.597 4.597 dbcsr_copy 2102 12.0 0.403 0.403 4.493 4.493 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 4.181 4.181 qs_ot_get_p 119 10.4 0.001 0.001 4.015 4.015 dbcsr_copy_into_existing 22 7.9 4.010 4.010 4.011 4.011 pw_integral_ab 119 11.3 3.972 3.972 3.972 3.972 fft3d_s 1202 14.6 3.775 3.775 3.782 3.782 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 3.695 3.695 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.664 3.664 dbcsr_data_release 279532 16.0 3.339 3.339 3.339 3.339 copy_fm_to_dbcsr 176 11.2 0.002 0.002 3.229 3.229 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.128 3.128 qs_ot_get_derivative_diag 49 12.0 0.002 0.002 3.036 3.036 potential_pw2rs 119 12.3 0.150 0.150 2.833 2.833 calculate_dm_sparse 119 9.5 0.001 0.001 2.734 2.734 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 2.675 2.675 qs_vxc_create 119 10.3 0.002 0.002 2.665 2.665 xc_vxc_pw_create 119 11.3 1.165 1.165 2.663 2.663 dbcsr_finalize 5186 13.8 0.209 0.209 2.602 2.602 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.038 62.868 62.887 qs_mol_dyn_low 1 2.0 0.005 0.009 62.685 62.690 qs_forces 11 3.9 0.003 0.003 62.630 62.631 qs_energies 11 4.9 0.001 0.001 58.845 58.851 scf_env_do_scf 11 5.9 0.001 0.003 53.988 53.989 scf_env_do_scf_inner_loop 108 6.5 0.004 0.031 49.669 49.670 velocity_verlet 10 3.0 0.001 0.004 36.661 36.662 rebuild_ks_matrix 119 8.3 0.001 0.001 22.411 22.461 qs_ks_build_kohn_sham_matrix 119 9.3 0.018 0.021 22.410 22.461 qs_ks_update_qs_env 119 7.6 0.001 0.002 20.093 20.135 dbcsr_multiply_generic 2286 12.5 0.097 0.101 18.771 18.927 qs_rho_update_rho_low 119 7.7 0.001 0.001 17.593 17.620 calculate_rho_elec 119 8.7 0.036 0.039 17.592 17.619 sum_up_and_integrate 119 10.3 0.002 0.002 15.960 16.007 integrate_v_rspace 119 11.3 0.006 0.006 15.886 15.937 qs_scf_new_mos 108 7.5 0.001 0.001 15.574 15.670 qs_scf_loop_do_ot 108 8.5 0.001 0.001 15.573 15.669 ot_scf_mini 108 9.5 0.003 0.003 14.635 14.742 multiply_cannon 2286 13.5 0.158 0.178 12.787 13.260 multiply_cannon_loop 2286 14.5 0.095 0.100 11.852 12.119 grid_collocate_task_list 119 9.7 10.094 10.520 10.094 10.520 mp_waitall_1 158411 16.6 9.658 10.157 9.658 10.157 grid_integrate_task_list 119 12.3 9.285 9.660 9.285 9.660 ot_mini 108 10.5 0.001 0.001 8.722 8.834 multiply_cannon_metrocomm3 18288 15.5 0.040 0.041 7.072 7.491 density_rs2pw 119 9.7 0.006 0.006 6.826 7.336 pw_transfer 1439 11.6 0.074 0.086 6.879 6.980 fft_wrap_pw1pw2 1201 12.6 0.010 0.011 6.693 6.782 fft_wrap_pw1pw2_140 487 13.2 1.065 1.234 5.636 5.781 potential_pw2rs 119 12.3 0.007 0.008 5.213 5.238 multiply_cannon_multrec 18288 15.5 4.263 4.518 4.278 4.533 ot_diis_step 108 11.5 0.004 0.004 4.412 4.412 qs_ot_get_derivative 108 11.5 0.001 0.001 4.261 4.364 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.204 4.327 apply_single 119 13.6 0.000 0.001 4.204 4.327 init_scf_loop 11 6.9 0.000 0.000 4.291 4.292 fft3d_ps 1201 14.6 1.798 2.008 3.946 4.144 transfer_rs2pw 487 10.6 0.008 0.020 3.514 4.007 make_m2s 4572 13.5 0.052 0.056 3.737 3.862 make_images 4572 14.5 0.137 0.147 3.295 3.404 init_scf_run 11 5.9 0.000 0.008 3.389 3.389 scf_env_initial_rho_setup 11 6.9 0.000 0.006 3.389 3.389 transfer_pw2rs 487 13.2 0.006 0.007 3.340 3.349 mp_waitany 9880 13.7 2.705 3.140 2.705 3.140 wfi_extrapolate 11 7.9 0.001 0.001 3.029 3.029 transfer_rs2pw_140 130 11.5 0.493 0.557 2.273 2.783 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.623 2.639 qs_ot_get_p 119 10.4 0.001 0.001 2.159 2.330 mp_alltoall_d11v 2130 13.8 1.753 2.082 1.753 2.082 mp_sum_d 4135 12.0 1.365 1.961 1.365 1.961 mp_alltoall_z22v 1201 16.6 1.597 1.960 1.597 1.960 transfer_pw2rs_140 130 13.9 0.621 0.711 1.898 1.940 mp_sum_l 11298 13.2 1.566 1.918 1.566 1.918 make_images_data 4572 15.5 0.038 0.043 1.682 1.839 prepare_preconditioner 11 7.9 0.000 0.000 1.790 1.812 make_preconditioner 11 8.9 0.000 0.000 1.790 1.812 rs_gather_matrices 119 12.3 0.155 0.168 1.330 1.700 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.594 1.628 hybrid_alltoall_any 4725 16.4 0.082 0.310 1.444 1.592 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.514 1.578 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.002 1.338 1.343 yz_to_x 487 15.3 0.235 0.255 1.055 1.281 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=75.78999999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=18.294, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.766, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.141, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="dbcsr_dot_sd", label="dbcsr_dot_sd", y=7.03, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.548, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=26.863000000000007, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=10.094, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.285, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.263, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="dbcsr_dot_sd", label="dbcsr_dot_sd", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.705, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.658, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.056 0.056 153.173 153.173 qs_mol_dyn_low 1 2.0 0.003 0.003 151.980 151.980 qs_forces 11 3.9 0.003 0.003 151.927 151.927 qs_energies 11 4.9 0.001 0.001 142.014 142.014 scf_env_do_scf 11 5.9 0.002 0.002 123.677 123.677 scf_env_do_scf_inner_loop 96 6.5 0.016 0.016 98.307 98.307 velocity_verlet 10 3.0 0.002 0.002 96.748 96.748 rebuild_ks_matrix 107 8.3 0.001 0.001 43.397 43.397 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.017 43.397 43.397 qs_ks_update_qs_env 107 7.6 0.001 0.001 41.747 41.747 qs_rho_update_rho_low 107 7.7 0.001 0.001 38.487 38.487 calculate_rho_elec 107 8.7 1.179 1.179 38.486 38.486 grid_collocate_task_list 107 9.7 32.582 32.582 32.582 32.582 sum_up_and_integrate 107 10.3 0.002 0.002 30.025 30.025 integrate_v_rspace 107 11.3 0.175 0.175 29.893 29.893 grid_integrate_task_list 107 12.3 27.136 27.136 27.136 27.136 dbcsr_multiply_generic 1966 12.4 0.238 0.238 26.990 26.990 qs_scf_new_mos 96 7.5 0.001 0.001 26.960 26.960 qs_scf_loop_do_ot 96 8.5 0.001 0.001 26.959 26.959 init_scf_loop 11 6.9 0.000 0.000 25.101 25.101 ot_scf_mini 96 9.5 0.003 0.003 24.829 24.829 prepare_preconditioner 11 7.9 0.000 0.000 19.493 19.493 make_preconditioner 11 8.9 0.000 0.000 19.493 19.493 make_full_inverse_cholesky 11 9.9 0.039 0.039 17.511 17.511 ot_mini 96 10.5 0.001 0.001 16.334 16.334 make_m2s 3932 13.4 0.052 0.052 13.309 13.309 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 9.914 9.914 ot_diis_step 96 11.5 0.004 0.004 8.227 8.227 multiply_cannon 1966 13.4 0.276 0.276 8.181 8.181 qs_ot_get_derivative 96 11.5 0.001 0.001 8.103 8.103 pw_transfer 1295 11.6 0.085 0.085 7.881 7.881 make_images 3932 14.4 3.150 3.150 7.644 7.644 fft_wrap_pw1pw2 1081 12.6 0.008 0.008 7.543 7.543 multiply_cannon_loop 1966 14.4 0.088 0.088 7.511 7.511 multiply_cannon_multrec 1966 15.4 7.348 7.348 7.422 7.422 init_scf_run 11 5.9 0.003 0.003 7.307 7.307 scf_env_initial_rho_setup 11 6.9 0.002 0.002 7.303 7.303 cp_fm_cholesky_decompose 22 10.9 6.702 6.702 6.702 6.702 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 6.519 6.519 apply_single 107 13.6 0.001 0.001 6.519 6.519 wfi_extrapolate 11 7.9 0.001 0.001 6.471 6.471 fft_wrap_pw1pw2_140 439 13.2 0.679 0.679 6.417 6.417 dbcsr_complete_redistribute 317 12.2 3.308 3.308 6.104 6.104 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.012 6.012 dbcsr_dot_sd 1051 11.9 6.007 6.007 6.010 6.010 dbcsr_make_dense_low 4961 15.5 0.087 0.087 5.931 5.931 make_dense_data 4961 16.5 5.208 5.208 5.830 5.830 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 5.687 5.687 qs_create_task_list 11 7.9 0.000 0.000 5.390 5.390 generate_qs_task_list 11 8.9 2.838 2.838 5.390 5.390 pw_poisson_solve 107 10.3 1.425 1.425 5.213 5.213 dbcsr_make_images_dense 3386 14.7 0.017 0.017 5.163 5.163 cp_fm_cholesky_invert 11 10.9 5.067 5.067 5.067 5.067 copy_dbcsr_to_fm 147 11.2 0.003 0.003 4.853 4.853 density_rs2pw 107 9.7 0.006 0.006 4.725 4.725 dbcsr_copy 1855 11.9 0.426 0.426 4.558 4.558 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 4.221 4.221 evaluate_core_matrix_traces 107 8.3 0.001 0.001 4.035 4.035 calculate_ptrace_kp 214 9.3 0.001 0.001 4.034 4.034 dbcsr_copy_into_existing 22 7.9 3.939 3.939 3.940 3.940 qs_ot_get_p 107 10.4 0.001 0.001 3.812 3.812 fft3d_s 1082 14.6 3.638 3.638 3.645 3.645 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 3.629 3.629 pw_integral_ab 107 11.3 3.499 3.499 3.499 3.499 dbcsr_data_release 238673 15.9 3.318 3.318 3.318 3.318 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.197 3.197 copy_fm_to_dbcsr 170 11.1 0.002 0.002 3.163 3.163 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.036 101.322 101.341 qs_mol_dyn_low 1 2.0 0.004 0.010 101.163 101.168 qs_forces 11 3.9 0.003 0.003 101.108 101.109 qs_energies 11 4.9 0.001 0.001 95.044 95.048 scf_env_do_scf 11 5.9 0.001 0.002 88.363 88.373 scf_env_do_scf_inner_loop 96 6.5 0.004 0.030 81.929 81.933 velocity_verlet 10 3.0 0.001 0.004 59.931 59.933 rebuild_ks_matrix 107 8.3 0.001 0.001 41.411 41.500 qs_ks_build_kohn_sham_matrix 107 9.3 0.019 0.024 41.410 41.500 qs_ks_update_qs_env 107 7.6 0.001 0.002 36.791 36.859 sum_up_and_integrate 107 10.3 0.002 0.002 35.030 35.090 integrate_v_rspace 107 11.3 0.006 0.007 34.976 35.044 qs_rho_update_rho_low 107 7.7 0.001 0.001 34.366 34.400 calculate_rho_elec 107 8.7 0.030 0.032 34.365 34.399 grid_integrate_task_list 107 12.3 26.123 27.485 26.123 27.485 grid_collocate_task_list 107 9.7 25.230 26.383 25.230 26.383 dbcsr_multiply_generic 1966 12.4 0.094 0.101 21.760 22.033 qs_scf_new_mos 96 7.5 0.001 0.001 18.268 18.343 qs_scf_loop_do_ot 96 8.5 0.001 0.001 18.267 18.342 ot_scf_mini 96 9.5 0.003 0.003 17.383 17.471 multiply_cannon 1966 13.4 0.155 0.175 13.710 13.947 multiply_cannon_loop 1966 14.4 0.092 0.099 12.481 12.753 mp_waitall_1 136719 16.5 10.975 11.554 10.975 11.554 ot_mini 96 10.5 0.001 0.002 10.835 10.938 density_rs2pw 107 9.7 0.006 0.006 8.439 9.469 multiply_cannon_metrocomm3 15728 15.4 0.038 0.039 7.986 8.408 transfer_rs2pw 439 10.6 0.008 0.009 5.575 6.686 init_scf_loop 11 6.9 0.000 0.000 6.396 6.398 qs_ot_get_derivative 96 11.5 0.001 0.001 6.189 6.283 mp_waitany 8968 13.7 4.973 6.279 4.973 6.279 pw_transfer 1295 11.6 0.076 0.082 6.122 6.248 fft_wrap_pw1pw2 1081 12.6 0.010 0.011 5.946 6.055 transfer_rs2pw_140 118 11.5 0.359 0.382 4.272 5.390 potential_pw2rs 107 12.3 0.008 0.017 5.231 5.270 mp_alltoall_d11v 1998 13.7 4.022 5.262 4.022 5.262 init_scf_run 11 5.9 0.000 0.006 5.200 5.200 scf_env_initial_rho_setup 11 6.9 0.000 0.005 5.199 5.200 fft_wrap_pw1pw2_140 439 13.2 0.954 1.127 4.895 5.075 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.871 4.894 rs_gather_matrices 107 12.3 0.145 0.161 3.562 4.766 wfi_extrapolate 11 7.9 0.001 0.001 4.675 4.675 ot_diis_step 96 11.5 0.004 0.004 4.584 4.596 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.374 4.469 apply_single 107 13.6 0.000 0.001 4.373 4.468 make_m2s 3932 13.4 0.048 0.052 4.272 4.437 multiply_cannon_multrec 15728 15.4 3.884 4.170 3.898 4.184 make_images 3932 14.4 0.122 0.129 3.895 4.054 fft3d_ps 1081 14.6 1.660 1.854 3.785 3.993 transfer_pw2rs 439 13.2 0.006 0.007 3.548 3.565 mp_sum_l 9746 13.0 2.809 3.202 2.809 3.202 qs_ot_get_p 107 10.4 0.001 0.001 2.492 2.699 qs_ot_get_derivative_taylor 53 13.0 0.001 0.001 2.404 2.455 mp_alltoall_z22v 1081 16.6 1.698 2.089 1.698 2.089 transfer_pw2rs_140 118 13.9 0.561 0.671 1.985 2.036 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=73.398, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=32.582, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.136, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.348, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.702, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="dbcsr_dot_sd", label="dbcsr_dot_sd", y=6.007, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=26.11500000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=25.23, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.123, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.884, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="dbcsr_dot_sd", label="dbcsr_dot_sd", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=4.022, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=10.975, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=4.973, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.225 0.225 139.369 139.369 qs_energies 1 2.0 0.000 0.000 138.555 138.555 scf_env_do_scf 1 3.0 0.000 0.000 137.329 137.329 qs_ks_update_qs_env 8 5.0 0.000 0.000 131.936 131.936 rebuild_ks_matrix 7 6.0 0.000 0.000 131.878 131.878 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 131.878 131.878 hfx_ks_matrix 7 8.0 0.000 0.000 122.993 122.993 integrate_four_center 7 9.0 2.055 2.055 122.942 122.942 integrate_four_center_main 7 10.0 0.380 0.380 108.139 108.139 integrate_four_center_bin 457 11.0 107.760 107.760 107.760 107.760 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 77.608 77.608 init_scf_loop 1 4.0 0.000 0.000 59.712 59.712 integrate_four_center_load 7 10.0 0.001 0.001 12.528 12.528 hfx_load_balance 1 11.0 0.001 0.001 12.528 12.528 hfx_load_balance_count 1 12.0 6.264 6.264 6.264 6.264 hfx_load_balance_bin 1 12.0 6.246 6.246 6.246 6.246 qs_vxc_create 14 8.0 0.000 0.000 3.119 3.119 xc_vxc_pw_create 14 9.0 0.118 0.118 3.119 3.119 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.214 0.247 145.280 145.300 qs_energies 1 2.0 0.000 0.000 144.864 144.876 scf_env_do_scf 1 3.0 0.000 0.000 144.364 144.365 qs_ks_update_qs_env 8 5.0 0.000 0.000 141.555 141.555 rebuild_ks_matrix 7 6.0 0.000 0.000 141.541 141.542 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 141.541 141.542 hfx_ks_matrix 7 8.0 0.000 0.000 134.589 134.591 integrate_four_center 7 9.0 0.077 0.382 134.576 134.578 integrate_four_center_main 7 10.0 0.004 0.004 113.086 120.022 integrate_four_center_bin 448 11.0 113.082 120.018 113.082 120.018 scf_env_do_scf_inner_loop 7 4.0 0.001 0.002 84.479 84.479 init_scf_loop 1 4.0 0.000 0.000 59.884 59.884 integrate_four_center_load 7 10.0 0.000 0.000 13.542 13.552 hfx_load_balance 1 11.0 0.001 0.001 13.542 13.551 mp_sync 56 11.2 7.144 13.506 7.144 13.506 hfx_load_balance_count 1 12.0 6.554 6.768 6.554 6.768 hfx_load_balance_bin 1 12.0 6.569 6.766 6.569 6.766 qs_vxc_create 14 8.0 0.000 0.001 3.010 3.013 xc_vxc_pw_create 14 9.0 0.021 0.024 3.010 3.013 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=16.439000000000007, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=107.76, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=6.264, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=6.246, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.055, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.38, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.225, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.635999999999996, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=113.082, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=6.554, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=6.569, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.077, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.214, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=7.144, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 83.246 83.246 qs_energies 1 2.0 0.000 0.000 82.861 82.861 mp2_main 1 3.0 0.000 0.000 79.997 79.997 mp2_gpw_main 1 4.0 0.000 0.000 79.886 79.886 rpa_ri_compute_en 1 5.0 0.000 0.000 75.977 75.977 rpa_num_int 1 6.0 0.002 0.002 75.971 75.971 compute_mat_P_omega 1 7.0 0.004 0.004 65.191 65.191 compute_mat_P_omega_contract 10 8.0 9.125 9.125 64.977 64.977 dbt_total 2336 9.6 0.015 0.015 51.491 51.491 dbt_contract 787 11.0 0.045 0.045 43.640 43.640 dbt_tas_total 1149 12.2 0.280 0.280 42.158 42.158 dbt_tas_multiply 807 12.1 0.002 0.002 40.893 40.893 dbt_tas_dbm 807 14.1 0.004 0.004 33.853 33.853 dbm_multiply 807 16.1 33.844 33.844 33.844 33.844 dbt_tas_mm_1N 524 15.1 0.001 0.001 25.952 25.952 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 24.444 24.444 compute_mat_P_omega_calc_M_occ 250 9.0 9.138 9.138 18.038 18.038 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.225 7.225 dbt_copy 1103 10.7 0.135 0.135 6.567 6.567 dbt_tas_mm_2 251 15.0 0.001 0.001 6.036 6.036 compute_QP_energies 1 7.0 0.000 0.000 5.752 5.752 compute_self_energy_cubic_gw 1 8.0 0.053 0.053 5.751 5.751 contract_cubic_gw 21 9.0 0.000 0.000 4.581 4.581 dbt_tas_reserve_blocks_index 3261 14.3 0.486 0.486 4.237 4.237 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.902 3.902 dbm_reserve_blocks 3628 15.3 3.818 3.818 3.818 3.818 dbt_reserve_blocks_index 2280 13.1 0.064 0.064 3.257 3.257 dbt_reserve_blocks_index_array 2222 12.2 0.012 0.012 3.197 3.197 scf_env_do_scf 1 3.0 0.000 0.000 2.748 2.748 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.748 2.748 convert_to_new_pgrid 2421 14.1 0.156 0.156 2.635 2.635 dbt_crop 1042 12.0 1.552 1.552 2.624 2.624 dbt_tas_copy 574 11.4 1.485 1.485 2.514 2.514 dbm_copy 1614 15.1 2.479 2.479 2.479 2.479 dbt_reshape 278 11.9 1.156 1.156 2.116 2.116 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.108 2.108 compute_W_cubic_GW 10 7.0 0.004 0.004 2.029 2.029 dbt_tas_reshape 367 15.0 0.007 0.007 2.010 2.010 dbt_tas_mm_3N 22 15.1 0.000 0.000 1.741 1.741 get_2c_integrals 1 6.0 0.000 0.000 1.716 1.716 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.027 39.673 39.686 qs_energies 1 2.0 0.000 0.000 39.542 39.544 mp2_main 1 3.0 0.001 0.012 38.470 38.472 mp2_gpw_main 1 4.0 0.000 0.000 38.351 38.353 rpa_ri_compute_en 1 5.0 0.000 0.000 36.980 36.982 rpa_num_int 1 6.0 0.000 0.003 36.980 36.981 dbt_total 2336 9.6 0.017 0.019 32.849 32.853 compute_mat_P_omega 1 7.0 0.001 0.007 31.190 31.226 compute_mat_P_omega_contract 10 8.0 0.460 0.485 31.017 31.023 dbt_contract 787 11.0 0.043 0.045 24.388 24.401 dbt_tas_total 1149 12.2 0.085 0.093 21.675 21.675 dbt_tas_multiply 807 12.1 0.002 0.003 21.570 21.574 dbt_tas_dbm 807 14.1 0.003 0.004 15.283 15.294 dbm_multiply 807 16.1 11.916 12.857 11.916 12.857 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.779 9.781 compute_mat_P_omega_calc_M_occ 250 9.0 0.439 0.460 8.834 8.840 mp_sync 8688 11.6 5.747 7.855 5.747 7.855 dbt_copy 1149 10.8 0.015 0.018 7.259 7.807 dbt_reshape 1136 11.8 2.900 3.143 6.906 7.449 dbt_tas_mm_2 251 15.0 0.001 0.002 7.205 7.217 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.181 6.181 dbt_tas_mm_1N 524 15.1 0.002 0.002 5.372 5.919 compute_QP_energies 1 7.0 0.000 0.000 3.741 3.742 compute_self_energy_cubic_gw 1 8.0 0.003 0.004 3.740 3.740 mp_waitall_2 3812 15.3 2.951 3.346 2.951 3.346 dbt_communicate_buffer 1136 12.8 0.053 0.065 2.935 3.284 contract_cubic_gw 21 9.0 0.000 0.000 2.955 2.956 dbt_reserve_blocks_index 2887 13.1 0.074 0.080 2.098 2.454 dbt_reserve_blocks_index_array 2829 12.2 0.008 0.009 2.087 2.443 dbt_tas_reserve_blocks_index 3347 14.5 0.440 0.511 2.080 2.436 dbm_reserve_blocks 3752 15.4 1.745 2.097 1.745 2.097 dbt_crop 1042 12.0 1.003 1.107 1.671 1.930 mp_sum_l 6165 12.9 1.111 1.338 1.111 1.338 mp2_ri_gpw_compute_in 1 5.0 0.003 0.004 1.326 1.331 dbt_tas_replicate 405 14.1 0.563 0.755 1.201 1.320 parallel_gemm_fm 105 8.4 0.000 0.000 1.252 1.261 parallel_gemm_fm_cosma 105 9.4 1.252 1.261 1.252 1.261 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.255 1.260 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.184 1.188 convert_to_new_pgrid 2421 14.1 0.028 0.031 0.942 1.173 dbm_copy 1608 15.1 0.908 1.139 0.908 1.139 mp_max_i 2005 9.8 0.764 1.053 0.764 1.053 scf_env_do_scf 1 3.0 0.000 0.000 1.021 1.021 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 1.021 1.021 compute_W_cubic_GW 10 7.0 0.001 0.001 0.954 0.960 dbm_add 807 14.1 0.715 0.831 0.715 0.831 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=23.686, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=33.844, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=9.138, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=9.125, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.818, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.479, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.156, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=12.607, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=11.916, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.439, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.46, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.745, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.908, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.9, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.747, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.951, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.033 289.059 289.059 qs_forces 1 2.0 0.000 0.000 288.512 288.512 rebuild_ks_matrix 7 6.6 0.000 0.000 287.034 287.034 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 287.034 287.034 hfx_ks_matrix 7 8.6 0.000 0.000 285.076 285.076 hfx_ri_update_ks 7 9.6 0.000 0.000 243.239 243.239 hfx_ri_update_ks_Pmat 7 10.6 37.923 37.923 243.234 243.234 dbt_total 809 11.0 0.006 0.006 223.128 223.128 qs_energies 1 3.0 0.000 0.000 214.988 214.988 scf_env_do_scf 1 4.0 0.001 0.001 214.679 214.679 qs_ks_update_qs_env 8 6.0 0.000 0.000 213.561 213.561 dbt_contract 207 12.4 0.053 0.053 202.714 202.714 dbt_tas_total 343 13.7 1.928 1.928 199.995 199.995 dbt_tas_multiply 216 13.5 0.001 0.001 196.729 196.729 dbt_tas_dbm 216 15.5 0.001 0.001 183.569 183.569 dbm_multiply 216 17.5 183.566 183.566 183.566 183.566 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 177.020 177.020 dbt_tas_mm_2 91 16.5 0.001 0.001 167.689 167.689 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 134.598 134.598 init_scf_loop 2 5.0 0.000 0.000 80.078 80.078 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 73.478 73.478 hfx_ri_update_forces 1 7.0 1.642 1.642 41.834 41.834 hfx_ri_forces_Pmat_3c 1 8.0 4.810 4.810 24.107 24.107 dbt_copy 409 11.7 0.079 0.079 17.140 17.140 precalc_derivatives 1 8.0 2.285 2.285 13.839 13.839 dbt_reshape 132 13.2 7.294 7.294 11.675 11.675 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 10.852 10.852 dbt_tas_mm_3T 77 17.1 0.000 0.000 10.589 10.589 dbt_tas_reserve_blocks_index 1255 15.4 1.077 1.077 8.787 8.787 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 8.187 8.187 dbm_reserve_blocks 1397 16.3 7.948 7.948 7.948 7.948 build_3c_derivatives 3 9.0 3.114 3.114 7.683 7.683 dbt_reserve_blocks_index 818 14.4 0.118 0.118 6.814 6.814 dbt_reserve_blocks_index_array 795 13.4 0.008 0.008 6.692 6.692 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.037 56.705 56.723 qs_forces 1 2.0 0.000 0.000 56.524 56.524 rebuild_ks_matrix 7 6.6 0.000 0.000 55.668 55.669 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 55.668 55.669 hfx_ks_matrix 7 8.6 0.000 0.000 54.446 54.453 dbt_total 809 11.0 0.007 0.007 48.678 48.691 dbt_contract 207 12.4 0.028 0.029 37.474 37.500 dbt_tas_total 343 13.7 0.085 0.206 33.202 33.203 dbt_tas_multiply 216 13.5 0.001 0.001 32.906 32.909 hfx_ri_update_ks 7 9.6 0.000 0.000 31.243 31.244 hfx_ri_update_ks_Pmat 7 10.6 1.397 1.486 31.238 31.240 qs_energies 1 3.0 0.000 0.000 29.386 29.386 scf_env_do_scf 1 4.0 0.000 0.001 29.215 29.215 qs_ks_update_qs_env 8 6.0 0.000 0.000 28.542 28.543 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 27.128 27.128 dbt_tas_dbm 216 15.5 0.001 0.001 25.696 25.699 dbm_multiply 216 17.5 23.140 24.383 23.140 24.383 hfx_ri_update_forces 1 7.0 0.066 0.072 23.202 23.209 hfx_ri_forces_Pmat_3c 1 8.0 0.191 0.212 17.227 17.227 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 16.507 16.508 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 13.744 13.744 init_scf_loop 2 5.0 0.000 0.000 12.706 12.706 dbt_tas_mm_2 91 16.5 0.001 0.001 11.478 11.480 dbt_copy 497 12.3 0.013 0.015 10.144 10.805 mp_sync 2769 12.9 5.732 8.614 5.732 8.614 dbt_reshape 365 13.6 4.001 4.146 7.880 8.371 dbt_tas_mm_3T 77 17.1 0.000 0.000 6.884 7.715 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 5.911 5.912 dbt_tas_mm_3N 37 15.4 0.000 0.000 5.201 5.399 dbt_tas_reserve_blocks_index 1380 15.8 0.898 0.922 3.664 4.300 precalc_derivatives 1 8.0 0.094 0.104 4.289 4.289 mp_waitall_2 1234 16.4 3.672 3.983 3.672 3.983 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.743 3.744 dbm_reserve_blocks 1529 16.7 2.997 3.650 2.997 3.650 dbt_reserve_blocks_index 1051 14.7 0.122 0.132 3.028 3.549 dbt_reserve_blocks_index_array 1028 13.8 0.005 0.006 2.998 3.515 dbt_crop 372 13.7 1.991 2.040 2.900 3.078 dbt_communicate_buffer 365 14.6 0.012 0.013 2.661 2.935 build_3c_derivatives 3 9.0 0.231 0.249 2.428 2.435 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 2.053 2.053 dbt_tas_replicate 149 15.4 0.727 0.776 1.918 2.026 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.987 1.991 convert_to_new_pgrid 648 15.5 0.048 0.104 1.661 1.967 dbm_copy 452 16.3 1.448 1.782 1.448 1.782 dbt_tas_copy 132 12.5 0.740 0.798 1.437 1.642 dbt_tas_communicate_buffer 328 16.8 0.010 0.011 1.144 1.218 mp_sum_l 6385 13.7 0.888 1.213 0.888 1.213 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=47.518, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=183.566, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=37.923, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=7.948, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=7.294, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.81, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=15.575000000000003, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=23.14, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.397, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.997, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=4.001, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.191, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=5.732, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.672, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 195.931 195.931 qs_energies 1 2.0 0.000 0.000 195.744 195.744 mp2_main 1 3.0 0.000 0.000 190.748 190.748 mp2_gpw_main 1 4.0 0.001 0.001 190.335 190.335 mp2_ri_gpw_compute_in 1 5.0 0.398 0.398 139.959 139.959 mp2_ri_gpw_compute_in_loop 1 6.0 0.011 0.011 130.676 130.676 mp2_eri_3c_integrate_gpw 2656 7.0 0.015 0.015 101.616 101.616 integrate_v_rspace 2666 8.0 0.668 0.668 88.136 88.136 grid_integrate_task_list 2666 9.0 85.509 85.509 85.509 85.509 mp2_ri_gpw_compute_en 1 5.0 0.087 0.087 50.350 50.350 mp2_ri_gpw_compute_en_RI_loop 1 6.0 10.149 10.149 48.430 48.430 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.058 2.058 30.735 30.735 local_gemm 2080 8.0 28.677 28.677 28.677 28.677 dbcsr_multiply_generic 5322 8.0 0.191 0.191 22.195 22.195 ao_to_mo_and_store_B_mult_1 2656 7.0 0.010 0.010 22.169 22.169 pw_transfer 63872 10.6 1.014 1.014 12.468 12.468 calculate_wavefunction 2656 8.0 7.913 7.913 12.023 12.023 multiply_cannon 5322 9.0 0.458 0.458 11.304 11.304 fft_wrap_pw1pw2 53228 11.4 0.111 0.111 11.197 11.197 multiply_cannon_loop 5322 10.0 0.154 0.154 9.918 9.918 get_2c_integrals 1 6.0 0.000 0.000 8.884 8.884 make_m2s 10644 9.0 0.064 0.064 8.601 8.601 multiply_cannon_multrec 5322 11.0 8.221 8.221 8.264 8.264 make_images 10644 10.0 3.339 3.339 8.257 8.257 compute_2c_integrals 1 7.0 0.006 0.006 8.005 8.005 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 7.991 7.991 mp2_eri_2c_integrate_gpw 1 9.0 0.901 0.901 7.979 7.979 fft_wrap_pw1pw2_20 21271 12.4 0.551 0.551 7.892 7.892 fft3d_s 53229 13.4 6.798 6.798 6.839 6.839 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.493 2.493 6.772 6.772 mp2_ri_gpw_compute_en_ener 2080 7.0 5.556 5.556 5.556 5.556 copy_dbcsr_to_fm 2679 8.0 0.028 0.028 4.795 4.795 scf_env_do_scf 1 3.0 0.000 0.000 4.574 4.574 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.574 4.574 potential_pw2rs 5322 10.0 0.152 0.152 4.029 4.029 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.031 42.226 42.239 qs_energies 1 2.0 0.000 0.000 42.089 42.090 mp2_main 1 3.0 0.000 0.001 40.043 40.044 mp2_gpw_main 1 4.0 0.001 0.002 39.939 39.939 mp2_ri_gpw_compute_en 1 5.0 0.234 0.249 21.748 22.231 mp2_ri_gpw_compute_en_RI_loop 1 6.0 4.026 4.745 20.523 20.531 mp2_ri_gpw_compute_in 1 5.0 0.047 0.049 18.103 18.484 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 16.727 17.110 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 14.420 14.790 integrate_v_rspace 93 8.1 0.114 0.124 14.283 14.635 grid_integrate_task_list 93 9.1 13.954 14.309 13.954 14.309 mp2_ri_gpw_compute_en_expansio 65 7.0 0.173 0.220 11.571 11.856 local_gemm 65 8.0 11.398 11.636 11.398 11.636 mp2_ri_gpw_compute_en_comm 30 7.0 0.093 0.126 4.441 5.478 mp_sendrecv_dm3 1860 8.0 3.634 4.898 3.634 4.898 dbcsr_multiply_generic 176 8.0 0.009 0.010 1.960 2.220 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.940 2.198 scf_env_do_scf 1 3.0 0.000 0.000 1.904 1.905 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 1.904 1.904 get_2c_integrals 1 6.0 0.003 0.009 1.302 1.340 multiply_cannon 176 9.0 0.017 0.020 1.126 1.238 multiply_cannon_loop 176 10.0 0.002 0.002 1.066 1.177 fill_local_i_aL 1920 8.0 0.763 1.036 0.763 1.036 multiply_cannon_multrec 246 11.0 0.907 0.976 0.913 0.982 make_m2s 352 9.0 0.003 0.003 0.794 0.958 compute_2c_integrals 1 7.0 0.003 0.005 0.936 0.951 make_images 352 10.0 0.053 0.054 0.781 0.944 mp_min_d 2 7.0 0.414 0.927 0.414 0.927 qs_scf_new_mos 10 5.0 0.000 0.000 0.870 0.874 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 0.384 0.868 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 0.802 0.849 mp2_eri_2c_integrate_gpw 1 9.0 0.204 0.210 0.800 0.848 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=55.46199999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=85.509, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=28.677, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=10.149, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.221, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=7.913, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=8.307000000000002, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.954, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=11.398, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=4.026, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.907, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=3.634, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.111 0.111 132.793 132.793 qs_energies 1 2.0 0.000 0.000 131.392 131.392 scf_env_do_scf 1 3.0 0.000 0.000 122.738 122.738 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 122.738 122.738 qs_ks_update_qs_env 15 5.0 0.000 0.000 51.631 51.631 rebuild_ks_matrix 15 6.0 0.000 0.000 51.414 51.414 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 51.414 51.414 qs_scf_new_mos 15 5.0 0.000 0.000 45.473 45.473 eigensolver 15 6.0 0.002 0.002 36.966 36.966 qs_vxc_create 15 8.0 0.056 0.056 35.166 35.166 calculate_dispersion_nonloc 15 9.0 6.970 6.970 30.550 30.550 pw_transfer 1191 10.0 0.066 0.066 23.941 23.941 fft_wrap_pw1pw2 1086 11.0 0.009 0.009 23.721 23.721 cp_fm_diag_elpa 15 7.0 0.000 0.000 23.364 23.364 cp_fm_diag_elpa_base 15 8.0 20.889 20.889 23.364 23.364 qs_rho_update_rho_low 16 5.0 0.000 0.000 21.672 21.672 calculate_rho_elec 16 6.0 0.240 0.240 21.672 21.672 grid_collocate_task_list 16 7.0 20.273 20.273 20.273 20.273 fft_wrap_pw1pw2_150 765 12.0 3.771 3.771 17.392 17.392 sum_up_and_integrate 15 8.0 0.000 0.000 15.070 15.070 integrate_v_rspace 15 9.0 0.058 0.058 15.055 15.055 grid_integrate_task_list 15 10.0 14.453 14.453 14.453 14.453 cp_fm_cholesky_restore 45 7.0 11.339 11.339 11.339 11.339 fft3d_s 1087 13.0 10.911 10.911 10.919 10.919 pw_scatter_s 585 13.1 7.175 7.175 7.175 7.175 fft_wrap_pw1pw2_200 197 12.3 0.757 0.757 6.137 6.137 copy_dbcsr_to_fm 16 5.9 0.001 0.001 6.122 6.122 dbcsr_complete_redistribute 46 8.3 2.442 2.442 5.854 5.854 gspace_mixing 14 5.0 0.171 0.171 5.126 5.126 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.868 4.868 cp_fm_upper_to_full 30 8.0 4.735 4.735 4.735 4.735 vdW_energy 15 10.0 4.649 4.649 4.649 4.649 xc_vxc_pw_create 15 9.0 0.217 0.217 4.561 4.561 broyden_mixing 14 6.0 4.500 4.500 4.501 4.501 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 3.657 3.657 init_scf_run 1 3.0 0.001 0.001 3.295 3.295 xc_pw_derive 90 11.0 0.001 0.001 2.938 2.938 calculate_dm_sparse 15 6.0 0.018 0.018 2.755 2.755 cp_dbcsr_plus_fm_fm_t_native 15 7.0 0.000 0.000 2.659 2.659 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.034 65.552 65.572 qs_energies 1 2.0 0.000 0.000 65.206 65.214 scf_env_do_scf 1 3.0 0.000 0.000 60.814 60.814 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 60.814 60.814 qs_ks_update_qs_env 15 5.0 0.000 0.000 27.580 27.601 rebuild_ks_matrix 15 6.0 0.000 0.000 27.541 27.562 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 27.541 27.562 qs_rho_update_rho_low 16 5.0 0.000 0.000 19.836 19.845 calculate_rho_elec 16 6.0 0.007 0.007 19.836 19.845 grid_collocate_task_list 16 7.0 18.604 18.800 18.604 18.800 sum_up_and_integrate 15 8.0 0.000 0.001 14.882 14.924 integrate_v_rspace 15 9.0 0.001 0.001 14.871 14.916 grid_integrate_task_list 15 10.0 13.829 13.972 13.829 13.972 qs_scf_new_mos 15 5.0 0.000 0.000 13.786 13.818 eigensolver 15 6.0 0.002 0.002 12.562 12.627 qs_vxc_create 15 8.0 0.001 0.001 12.252 12.272 pw_transfer 1191 10.0 0.087 0.097 10.067 10.169 fft_wrap_pw1pw2 1086 11.0 0.014 0.015 9.871 10.015 calculate_dispersion_nonloc 15 9.0 0.950 0.976 9.783 9.811 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.702 8.706 cp_fm_diag_elpa_base 15 8.0 8.540 8.557 8.699 8.701 fft3d_ps 1086 13.0 3.241 3.547 6.651 7.566 fft_wrap_pw1pw2_150 765 12.0 0.844 1.049 6.418 6.465 mp_alltoall_z22v 1086 15.0 2.739 4.302 2.739 4.302 cp_fm_cholesky_restore 45 7.0 3.667 3.770 3.667 3.770 fft_wrap_pw1pw2_200 197 12.3 0.619 0.762 3.320 3.393 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.785 2.786 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.465 2.680 xc_vxc_pw_create 15 9.0 0.019 0.027 2.469 2.520 x_to_yz 585 14.1 0.379 0.416 1.814 2.503 yz_to_x 501 13.9 0.263 0.304 1.568 2.406 xc_pw_derive 90 11.0 0.001 0.001 1.738 1.836 build_core_ppnl 1 5.0 1.530 1.659 1.530 1.659 vdW_energy 15 10.0 1.436 1.514 1.436 1.514 init_scf_run 1 3.0 0.000 0.000 1.369 1.370 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=54.92800000000001, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=20.889, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.273, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.453, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.339, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.911, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=17.671000000000006, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.54, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=18.604, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.829, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.667, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=3.241, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.087 0.087 323.337 323.337 qs_energies 1 2.0 0.000 0.000 323.183 323.183 ls_scf 1 3.0 0.000 0.000 321.909 321.909 ls_scf_main 1 4.0 0.003 0.003 312.513 312.513 density_matrix_trs4 11 5.0 0.016 0.016 212.769 212.769 arnoldi_extremal 12 6.1 0.000 0.000 107.220 107.220 arnoldi_normal_ev 12 7.1 0.053 0.053 107.219 107.219 dbcsr_matrix_vector_mult 652 9.0 0.270 0.270 105.383 105.383 build_subspace 23 8.1 0.088 0.088 105.121 105.121 dbcsr_matrix_vector_mult_local 652 10.0 103.529 103.529 103.538 103.538 ls_scf_dm_to_ks 11 5.0 0.000 0.000 93.537 93.537 matrix_ls_to_qs 11 6.0 0.000 0.000 89.934 89.934 dbcsr_multiply_generic 185 6.1 1.014 1.014 88.116 88.116 multiply_cannon 185 7.1 0.432 0.432 51.056 51.056 dbcsr_complete_redistribute 23 7.5 33.520 33.520 47.592 47.592 dbcsr_copy_into_existing 11 7.0 46.645 46.645 46.645 46.645 matrix_decluster 11 7.0 0.000 0.000 43.288 43.288 multiply_cannon_loop 185 8.1 0.244 0.244 37.997 37.997 make_m2s 370 7.1 0.040 0.040 31.256 31.256 make_images 370 8.1 13.433 13.433 28.468 28.468 multiply_cannon_multrec 185 9.1 28.078 28.078 28.230 28.230 dbcsr_finalize 646 7.5 0.272 0.272 18.754 18.754 dbcsr_merge_all 597 8.5 2.911 2.911 17.290 17.290 tree_to_linear_d 110 9.4 12.679 12.679 12.679 12.679 setup_rec_index_2d 370 8.1 12.475 12.475 12.475 12.475 dbcsr_sort_indices 1103 9.9 12.136 12.136 12.136 12.136 quick_finalize 395 10.0 0.494 0.494 10.508 10.508 dbcsr_special_finalize 370 9.1 0.002 0.002 9.740 9.740 calculate_norms 370 9.1 9.522 9.522 9.522 9.522 ls_scf_init_scf 1 4.0 0.000 0.000 8.615 8.615 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.274 8.274 dbcsr_dot_sd 144 6.3 7.777 7.777 7.778 7.778 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.602 7.602 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.031 82.409 82.427 qs_energies 1 2.0 0.000 0.000 82.290 82.290 ls_scf 1 3.0 0.000 0.000 82.233 82.234 ls_scf_main 1 4.0 0.001 0.010 79.031 79.031 density_matrix_trs4 11 5.0 0.007 0.024 76.178 76.245 dbcsr_multiply_generic 185 6.1 0.063 0.071 72.887 73.242 multiply_cannon 185 7.1 0.037 0.046 60.579 61.358 multiply_cannon_loop 185 8.1 0.135 0.162 57.765 58.445 multiply_cannon_multrec 1480 9.1 34.253 37.346 34.547 37.650 mp_waitall_1 11936 10.3 20.898 25.296 20.898 25.296 multiply_cannon_metrocomm3 1480 9.1 0.017 0.020 15.795 20.438 make_m2s 370 7.1 0.037 0.039 8.446 8.526 make_images 370 8.1 0.646 0.710 8.307 8.389 calculate_norms 2960 9.1 5.854 6.328 5.854 6.328 make_images_data 370 9.1 0.011 0.013 4.055 4.417 hybrid_alltoall_any 393 9.9 0.291 1.429 3.529 3.887 mp_sum_l 1199 5.3 2.964 3.778 2.964 3.778 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 2.121 2.790 arnoldi_extremal 12 6.1 0.000 0.001 2.680 2.705 arnoldi_normal_ev 12 7.1 0.002 0.008 2.679 2.704 multiply_cannon_metrocomm1 1480 9.1 0.007 0.008 1.392 2.671 build_subspace 23 8.1 0.023 0.029 2.576 2.578 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.460 2.530 ls_scf_init_scf 1 4.0 0.000 0.000 2.445 2.445 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.418 2.423 dbcsr_complete_redistribute 23 7.5 1.296 1.380 2.262 2.351 make_images_pack 370 9.1 1.950 2.292 1.953 2.296 matrix_ls_to_qs 11 6.0 0.000 0.000 2.161 2.255 dbcsr_matrix_vector_mult 652 9.0 0.015 0.053 2.147 2.214 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.196 2.198 matrix_decluster 11 7.0 0.000 0.000 2.034 2.127 buffer_matrices_ensure_size 370 8.1 1.695 1.990 1.695 1.990 dbcsr_matrix_vector_mult_local 652 10.0 1.744 1.816 1.746 1.818 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=88.60999999999999, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=103.529, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=46.645, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=33.52, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=28.078, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images", label="make_images", y=13.433, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.522, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=12.804000000000002, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.744, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.296, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=34.253, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images", label="make_images", y=0.646, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.854, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.964, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.95, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=20.898, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.010 76.235 76.235 lib_test 1 2.0 0.000 0.000 76.224 76.224 dbcsr_run_tests 3 3.0 0.002 0.002 76.224 76.224 test_multiplies_multiproc 3 4.0 0.001 0.001 59.794 59.794 dbcsr_redistribute 9 5.0 38.237 38.237 39.899 39.899 dbcsr_multiply_generic 9 5.0 0.001 0.001 18.489 18.489 dbcsr_make_random_matrix 9 4.0 13.122 13.122 16.319 16.319 multiply_cannon 9 6.0 0.002 0.002 13.283 13.283 multiply_cannon_loop 9 7.0 0.038 0.038 12.916 12.916 multiply_cannon_multrec 9 8.0 12.877 12.877 12.878 12.878 dbcsr_finalize 27 5.7 0.041 0.041 6.014 6.014 dbcsr_merge_all 18 6.5 2.232 2.232 5.245 5.245 dbcsr_data_release 975 7.6 2.840 2.840 2.840 2.840 tree_to_linear_d 9 7.0 2.052 2.052 2.052 2.052 make_m2s 18 6.0 0.001 0.001 1.816 1.816 make_images 18 7.0 0.630 0.630 1.760 1.760 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.020 21.943 21.950 lib_test 1 2.0 0.000 0.000 21.906 21.926 dbcsr_run_tests 3 3.0 0.000 0.001 21.900 21.920 test_multiplies_multiproc 3 4.0 0.000 0.003 20.990 21.054 dbcsr_multiply_generic 9 5.0 0.001 0.001 19.377 19.483 multiply_cannon 9 6.0 0.002 0.002 17.034 17.575 multiply_cannon_loop 9 7.0 0.002 0.003 16.707 17.222 multiply_cannon_multrec 72 8.0 12.585 13.183 12.585 13.183 mp_waitall_1 576 9.2 4.545 5.673 4.545 5.673 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 4.014 4.963 mp_sum_l 470 2.5 0.925 1.529 0.925 1.529 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.916 1.520 make_m2s 18 6.0 0.001 0.001 0.871 0.921 make_images 18 7.0 0.022 0.025 0.868 0.918 dbcsr_make_random_matrix 9 4.0 0.707 0.729 0.870 0.903 dbcsr_finalize 27 5.7 0.000 0.000 0.579 0.769 dbcsr_redistribute 9 5.0 0.268 0.319 0.656 0.710 dbcsr_merge_all 18 6.5 0.091 0.128 0.516 0.697 dbcsr_data_release 444 7.6 0.613 0.684 0.613 0.684 dbcsr_destroy 111 5.9 0.000 0.000 0.551 0.626 make_images_data 18 8.0 0.001 0.001 0.476 0.537 hybrid_alltoall_any 18 9.0 0.044 0.210 0.411 0.492 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.102 0.456 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.9269999999999925, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=38.237, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.122, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.877, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.84, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.232, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.209000000000003, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.268, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.707, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.585, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.613, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.091, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.545, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.925, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.063 0.063 143.604 143.604 qs_mol_dyn_low 1 2.0 0.004 0.004 141.752 141.752 velocity_verlet 5 3.0 0.004 0.004 114.849 114.849 qmmm_el_coupling 6 3.8 0.000 0.000 88.548 88.548 qmmm_elec_with_gaussian 6 4.8 0.036 0.036 88.538 88.538 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 87.530 87.530 qmmm_elec_gaussian_low_G 6 6.8 86.626 86.626 86.626 86.626 qs_forces 6 3.8 0.001 0.001 43.496 43.496 qs_energies 6 4.8 0.000 0.000 38.487 38.487 scf_env_do_scf 6 5.8 0.001 0.001 35.880 35.880 rebuild_ks_matrix 45 8.4 0.000 0.000 31.560 31.560 qs_ks_build_kohn_sham_matrix 45 9.4 0.006 0.006 31.560 31.560 scf_env_do_scf_inner_loop 39 6.8 0.005 0.005 31.321 31.321 qs_ks_update_qs_env 45 7.8 0.000 0.000 26.997 26.997 pw_transfer 966 12.3 0.057 0.057 18.558 18.558 fft_wrap_pw1pw2 801 13.6 0.007 0.007 18.308 18.308 fft_wrap_pw1pw2_150 507 15.2 2.945 2.945 17.923 17.923 qs_vxc_create 45 10.4 0.001 0.001 16.252 16.252 xc_vxc_pw_create 45 11.4 0.870 0.870 16.251 16.251 xc_pw_derive 270 13.4 0.002 0.002 10.344 10.344 xc_rho_set_and_dset_create 45 12.4 1.417 1.417 9.032 9.032 pw_integral_ab 2539 7.4 8.550 8.550 8.550 8.550 qs_rho_update_rho_low 45 7.9 0.000 0.000 8.042 8.042 calculate_rho_elec 45 8.9 0.681 0.681 8.042 8.042 fft3d_s 802 15.6 8.028 8.028 8.037 8.037 qmmm_forces 6 3.8 0.003 0.003 6.241 6.241 xc_pw_divergence 45 12.4 0.001 0.001 6.234 6.234 pw_scatter_s 429 15.8 6.133 6.133 6.133 6.133 qs_ks_ddapc 45 10.4 0.001 0.001 5.991 5.991 qmmm_forces_with_gaussian 6 4.8 0.045 0.045 5.804 5.804 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.590 4.590 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 4.583 4.583 init_scf_loop 6 6.8 0.000 0.000 4.554 4.554 pw_poisson_solve 51 9.9 1.114 1.114 4.384 4.384 cp_ddapc_apply_CD 45 11.4 0.007 0.007 4.137 4.137 qmmm_forces_gaussian_low_G 6 6.8 3.799 3.799 3.799 3.799 grid_collocate_task_list 45 9.9 3.737 3.737 3.737 3.737 density_rs2pw 45 9.9 0.002 0.002 3.624 3.624 sum_up_and_integrate 45 10.4 0.001 0.001 3.286 3.286 integrate_v_rspace 45 11.4 0.007 0.007 3.239 3.239 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.040 0.067 73.428 73.446 qs_mol_dyn_low 1 2.0 0.003 0.004 72.266 72.335 qs_forces 6 3.8 0.001 0.001 52.483 52.483 qs_energies 6 4.8 0.000 0.001 50.087 50.087 scf_env_do_scf 6 5.8 0.000 0.001 48.923 48.923 scf_env_do_scf_inner_loop 113 6.2 0.003 0.023 46.852 46.853 rebuild_ks_matrix 119 8.1 0.000 0.000 35.529 35.541 qs_ks_build_kohn_sham_matrix 119 9.1 0.015 0.017 35.529 35.541 qs_ks_update_qs_env 119 7.3 0.001 0.001 33.445 33.457 velocity_verlet 5 3.0 0.002 0.004 31.072 31.076 pw_transfer 2446 12.3 0.144 0.157 25.390 26.175 fft_wrap_pw1pw2 2059 13.4 0.020 0.022 24.965 25.811 fft_wrap_pw1pw2_150 1321 14.9 4.244 4.726 24.156 24.891 qs_vxc_create 119 10.1 0.002 0.002 20.141 20.150 xc_vxc_pw_create 119 11.1 0.281 0.341 20.139 20.148 fft3d_ps 2059 15.4 8.288 9.015 15.474 17.187 xc_pw_derive 714 13.1 0.007 0.007 14.972 15.614 qs_rho_update_rho_low 119 7.3 0.001 0.001 13.107 13.115 calculate_rho_elec 119 8.3 0.050 0.057 13.107 13.115 sum_up_and_integrate 119 10.1 0.002 0.002 10.930 10.954 integrate_v_rspace 119 11.1 0.004 0.004 10.833 10.860 xc_pw_divergence 119 12.1 0.004 0.004 9.936 10.535 qmmm_forces 6 3.8 0.004 0.004 10.366 10.367 qmmm_forces_with_gaussian 6 4.8 0.008 0.011 9.808 10.156 xc_rho_set_and_dset_create 119 12.1 0.566 0.686 9.630 9.973 qmmm_el_coupling 6 3.8 0.000 0.000 8.287 8.531 qmmm_elec_with_gaussian 6 4.8 0.005 0.006 8.286 8.529 density_rs2pw 119 9.3 0.006 0.007 8.094 8.384 mp_alltoall_z22v 2059 17.4 5.448 7.720 5.448 7.720 potential_pw2rs 119 12.1 0.007 0.008 6.837 6.867 grid_collocate_task_list 119 9.3 4.773 5.171 4.773 5.171 pw_restrict_s3 18 5.8 2.613 2.756 4.713 5.002 yz_to_x 964 16.0 0.748 0.938 3.473 4.962 x_to_yz 1095 16.8 0.939 1.054 3.663 4.557 transfer_pw2rs 500 12.8 0.005 0.006 4.382 4.407 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.271 4.338 grid_integrate_task_list 119 12.1 3.649 3.915 3.649 3.915 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.621 3.864 pw_prolongate_s3 18 6.8 1.990 2.104 3.621 3.864 mp_waitany 4028 12.8 3.066 3.835 3.066 3.835 transfer_rs2pw 488 10.2 0.007 0.008 3.403 3.820 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.568 3.732 qmmm_forces_gaussian_low_G 6 6.8 3.522 3.594 3.522 3.594 qmmm_elec_gaussian_low_G 6 6.8 2.959 3.112 2.959 3.112 transfer_pw2rs_150 125 13.9 0.999 1.169 2.968 3.037 pw_gather_p 964 15.0 2.563 2.815 2.563 2.815 transfer_rs2pw_150 125 11.2 0.855 1.044 2.399 2.802 qs_ks_ddapc 119 10.1 0.002 0.003 2.676 2.769 qs_scf_new_mos 113 7.2 0.000 0.001 2.748 2.763 qs_scf_loop_do_ot 113 8.2 0.001 0.001 2.747 2.762 ot_scf_mini 113 9.2 0.001 0.002 2.658 2.670 pw_scatter_p 1095 15.8 2.602 2.659 2.602 2.659 pw_integral_ab 2761 7.7 2.083 2.268 2.452 2.606 dbcsr_multiply_generic 2588 12.3 0.063 0.065 2.119 2.162 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.108 2.108 init_scf_loop 6 6.8 0.000 0.000 2.067 2.067 mp_sum_dm3 33 5.7 1.786 1.842 1.786 1.842 pw_axpy 2529 9.3 1.524 1.647 1.524 1.647 xc_functional_eval 238 13.1 0.002 0.003 1.279 1.545 mp_sum_d 5822 12.2 1.006 1.539 1.006 1.539 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=23.786000000000016, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=86.626, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=8.55, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.028, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=6.133, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.799, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.737, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=2.945, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=38.461999999999996, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.959, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=2.083, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.522, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.773, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=4.244, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.649, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.448, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=8.288, yerr=0.0 Summary: Performance test took 39 minutes. Status: OK Removing intermediate container abac21d02533 ---> aee2920d1d10 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 527ba6609f6f Removing intermediate container 527ba6609f6f ---> 320f167bf82e Step 42/42 : ENTRYPOINT [] ---> Running in 68fd9f8fe83c Removing intermediate container 68fd9f8fe83c ---> fc190fa2fd0d [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built fc190fa2fd0d Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2023-09-27 08:59:10+00:00