▶ 书上第十三章,用一系列步骤优化一个云水参数化方案。用于熟悉 Fortran 以及 OpenACC 在旗下的表现

● 代码,文件较多,放在一起了

 ! main.f90
PROGRAM main
USE m_config, ONLY: nstop
USE m_physics, ONLY: physics
USE m_io, ONLY: write_output
USE m_setup, ONLY: initialize, cleanup
USE m_timing, ONLY: start_timer, end_timer, print_timers IMPLICIT NONE INTEGER :: ntstep
INTEGER, parameter :: itimloop = CALL initialize() ! 初始化计时器和设备 WRITE(*,"(A)") "Start of time loop"
CALL start_timer(itimloop, "Time loop") DO ntstep = , nstop ! 计算
CALL physics()
CALL write_output( ntstep )
END DO CALL end_timer( itimloop )
WRITE(*,"(A)") "End of time loop" CALL print_timers()
CALL cleanup() END PROGRAM main ! m_config.f90,运行参数
MODULE m_config
INTEGER, parameter :: nx = ! 经度网格数
INTEGER, parameter :: ny = ! 纬度网格数
INTEGER, parameter :: nz = ! 海拔网格数
INTEGER, parameter :: nstop = ! 时间步数
INTEGER, parameter :: nout = ! 输出间隔 END MODULE m_config ! m_fields.f90,场参数
MODULE m_fields
REAL*, ALLOCATABLE :: qv(:,:,:) ! 水蒸汽含量
REAL*, ALLOCATABLE :: t(:,:,:) ! 温度 END MODULE m_fields ! m_io.f90,输入输出函数
MODULE m_io
USE m_config, ONLY: nout, nx, ny, nz
USE m_fields, ONLY: qv IMPLICIT NONE CONTAINS
SUBROUTINE write_output(ntstep)
IMPLICIT NONE INTEGER, INTENT(IN) :: ntstep ! 当前时间片
INTEGER :: i, j, k
REAL* :: qv_mean ! 水蒸汽含量平均值(标量) IF (MOD(ntstep, nout) /= ) RETURN ! 当前时间片不作输出 qv_mean = .0D0 ! 计算均值并输出
DO k = , nz
DO j = , ny
DO i = , nx
qv_mean = qv_mean + qv(i,j,k)
END DO
END DO
END DO
qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
END SUBROUTINE write_output END MODULE m_io ! m_parametrizations.f90,参数化方案
MODULE m_parametrizations
IMPLICIT NONE REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0 CONTAINS
SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) ! 参数化方案一
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev ! 输入维度
REAL*, INTENT(IN) :: t(:,:,:) ! 温度
REAL*, INTENT(OUT) :: qc(:,:,:) ! 云水含量
REAL*, INTENT(INOUT) :: qv(:,:,:) ! 水蒸汽含量
INTEGER :: i, j, k DO k = , nlev
DO j = , npy
DO i = , npx
qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
qc(i,j,k) = cs4 * qv(i,j,k)
END DO
END DO
END DO
END SUBROUTINE saturation_adjustment SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv) ! 参数化方案二
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(INOUT) :: t(:,:,:)
REAL*, INTENT(IN) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k DO k = , nlev
DO j = , npy
DO i = , npx
qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
END DO
END DO
END DO
END SUBROUTINE microphysics END MODULE m_parametrizations ! m_physics.f90,参数化方案的执行
MODULE m_physics
USE m_config, ONLY: nx, ny, nz
USE m_fields, ONLY: qv, t
USE m_parametrizations, ONLY: saturation_adjustment, microphysics IMPLICIT NONE CONTAINS
SUBROUTINE physics()
IMPLICIT NONE
REAL* :: qc(nx,ny,nz) ! 云水含量临时变量
CALL saturation_adjustment(nx, ny, nz, t, qc, qv) ! 第一物理参数化
CALL microphysics(nx, ny, nz, t, qc, qv) ! 第二物理参数化
END SUBROUTINE physics END MODULE m_physics ! m_timming.f90,计时器
MODULE m_timing
IMPLICIT NONE INTEGER, PARAMETER :: ntimer= ! 计时器数量
REAL* :: rtimer(ntimer) ! 计时器
CHARACTER() :: timertag(ntimer) ! 计时器标签
INTEGER :: icountold(ntimer), & ! tick (start of timer section)
icountrate, & ! countrate of SYSTEM_CLOCK()
icountmax ! maximum counter value of SYSTEM_CLOCK() CONTAINS
SUBROUTINE init_timers() ! 初始化计时器
IMPLICIT NONE rtimer(:) = .0D0
timertag(:) = ""
icountold(:) = CALL SYSTEM_CLOCK( COUNT_RATE=icountrate, COUNT_MAX=icountmax )
END SUBROUTINE init_timers SUBROUTINE start_timer(id, tag) ! 开始计时
IMPLICIT NONE INTEGER, INTENT(IN) :: id
CHARACTER(*), INTENT(IN) :: tag IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceeds maximum timer number", ntimer
STOP
END IF IF (LEN_TRIM(timertag(id)) /= ) THEN ! 检查计时器是否已经开始运行
WRITE(*,"(A,I4)") "Error: timer already started previously, id:", id
STOP
END IF IF (LEN_TRIM(tag) == ) THEN ! 检查计时器标签是否非空
WRITE(*,"(A,I4)") "Error: empty tag provided, id:", id
STOP
END IF timertag(id) = TRIM(tag) ! 保存标签
!$acc wait CALL SYSTEM_CLOCK( COUNT=icountold(id) ) ! 开始计时
END SUBROUTINE start_timer SUBROUTINE end_timer(id) ! 结束计时
IMPLICIT NONE INTEGER, INTENT(IN) :: id
INTEGER :: icountnew IF (id < .OR. id > ntimer) THEN ! 检查计时器编号范围
WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceed max timer number", ntimer
STOP
END IF IF (LEN_TRIM(timertag(id)) == ) THEN ! 检查计时器是否已经开始运行
WRITE(*,"(A,I4)") "Error: Need to call start_timer before end_timing, id:", id
STOP
END IF
!$acc wait CALL SYSTEM_CLOCK( COUNT=icountnew ) ! 获取当前时间,计算耗时
rtimer(id) = ( REAL(icountnew - icountold(id), KIND(rtimer(id))) ) / REAL(icountrate, KIND(rtimer(id)))
END SUBROUTINE end_timer SUBROUTINE print_timers() ! 打印计时
IMPLICIT NONE INTEGER :: id WRITE(*,"(A)") "----------------------------"
WRITE(*,"(A)") "Timers:"
WRITE(*,"(A)") "----------------------------"
DO id = , ntimer
IF ( rtimer(id) > .0D0 ) THEN
WRITE(*,"(A15,A2,F8.2,A)") timertag(id), ": ", rtimer(id)*.0D3, " ms"
END IF
END DO
WRITE(*,"(A)") "----------------------------"
END SUBROUTINE print_timers END MODULE m_timing ! m_setup.f90,初始化和清理
MODULE m_setup
USE m_config, ONLY: nstop, nout, nx, ny, nz
USE m_fields, ONLY: t,qv
USE m_timing, ONLY: init_timers, start_timer, end_timer IMPLICIT NONE CONTAINS
SUBROUTINE initialize() ! 初始化计时器和设备
IMPLICIT NONE INTEGER, PARAMETER :: itiminit = ! 计时器编号
INTEGER :: i, j, k #ifdef _OPENACC
WRITE(*,"(A)") "Running with OpenACC"
#else
WRITE(*,"(A)") "Running without OpenACC"
#endif WRITE(*,"(A)") "Initialize" CALL init_timers()
CALL start_timer( itiminit, "Initialization" )
ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) DO k =, nz
DO j = , ny
DO i = , nx
t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
END DO
END DO
END DO #ifdef _OPENACC
CALL initialize_gpu()
#endif CALL end_timer( itiminit )
END SUBROUTINE initialize SUBROUTINE initialize_gpu()! 让 GPU 跑一个小内核来初始化
IMPLICIT NONE INTEGER :: temp()
INTEGER :: i !$acc parallel loop
DO i = ,
temp(i) =
END DO IF (SUM(temp) == ) THEN
WRITE(*,"(A)") "GPU initialized"
ELSE
WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
STOP
END IF
END SUBROUTINE initialize_gpu SUBROUTINE cleanup()! 清扫 t 和 qv 的内存
IMPLICIT NONE DEALLOCATE( t, qv )
END SUBROUTINE cleanup END MODULE m_setup

● OpenMP 优化,改了 m_io.f90,m_parametrizations.f90,m_setup.f90

 ! m_io.f90
MODULE m_io
USE m_config, ONLY: nout, nx, ny, nz
USE m_fields, ONLY: qv IMPLICIT NONE CONTAINS
SUBROUTINE write_output(ntstep)
IMPLICIT NONE INTEGER, INTENT(IN) :: ntstep
INTEGER :: i, j, k
REAL* :: qv_mean IF (MOD(ntstep, nout) /= ) RETURN qv_mean = .0D0
DO k = , nz
!$OMP PARALLEL DO PRIVATE(i,j) SHARED(k,qv) REDUCTION(+:qv_mean)
DO j = , ny
DO i = , nx
qv_mean = qv_mean + qv(i,j,k)
END DO
END DO
END DO
qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
END SUBROUTINE write_output END MODULE m_io ! m_parametrizations.f90
MODULE m_parametrizations
IMPLICIT NONE REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0 CONTAINS
SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(IN) :: t(:,:,:)
REAL*, INTENT(OUT) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k !$OMP PARALLEL
DO k = , nlev
!$OMP DO PRIVATE(i,j)
DO j = , npy
DO i = , npx
qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
qc(i,j,k) = cs4 * qv(i,j,k)
END DO
END DO
END DO
!$OMP END PARALLEL
END SUBROUTINE saturation_adjustment SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(INOUT) :: t(:,:,:)
REAL*, INTENT(IN) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k !$OMP PARALLEL
DO k = , nlev
!$OMP DO PRIVATE(i,j)
DO j = , npy
DO i = , npx
qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
END DO
END DO
END DO
!$OMP END PARALLEL
END SUBROUTINE microphysics END MODULE m_parametrizations ! m_setup.f90
MODULE m_setup
USE m_config, ONLY: nstop, nout, nx, ny, nz
USE m_fields, ONLY: t,qv
USE m_timing, ONLY: init_timers, start_timer, end_timer IMPLICIT NONE CONTAINS
SUBROUTINE initialize() ! 初始化计时器和设备
IMPLICIT NONE INTEGER, PARAMETER :: itiminit = ! timer ID
INTEGER :: i, j, k ! loop indices
INTEGER :: OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM #ifdef _OPENACC
WRITE(*,"(A)") "Running with OpenACC"
#else
WRITE(*,"(A)") "Running without OpenACC"
#ifdef _OPENMP
!$OMP PARALLEL
IF (OMP_GET_THREAD_NUM()==) THEN
WRITE(*,"(A,I4,A)") "Running with OpenMP with ", OMP_GET_NUM_THREADS(), " threads"
END IF
!$OMP END PARALLEL
#endif
#endif
WRITE(*,"(A)") "Initialize" CALL init_timers()
CALL start_timer( itiminit, "Initialization" )
ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) DO k =, nz
DO j = , ny
DO i = , nx
t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
END DO
END DO
END DO #ifdef _OPENACC
CALL initialize_gpu()
#endif CALL end_timer( itiminit )
END SUBROUTINE initialize SUBROUTINE initialize_gpu()
IMPLICIT NONE INTEGER :: temp()
INTEGER :: i !$acc parallel loop
DO i = ,
temp(i) =
END DO IF (SUM(temp) == ) THEN
WRITE(*,"(A)") "GPU initialized"
ELSE
WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
STOP
END IF
END SUBROUTINE initialize_gpu SUBROUTINE cleanup()
IMPLICIT NONE DEALLOCATE( t, qv )
END SUBROUTINE cleanup END MODULE m_setup

● OpenACC 优化,改了 m_io.f90,m_parametrizations.f90,m_physics.f90,m_setup.f90。树上的优化 04 设计算法改动,没有参与比较

 ! m_io.f90
MODULE m_io
USE m_config, ONLY: nout, nx, ny, nz
USE m_fields, ONLY: qv IMPLICIT NONE CONTAINS
SUBROUTINE write_output(ntstep)
IMPLICIT NONE INTEGER, INTENT(IN) :: ntstep
INTEGER :: i, j, k
REAL* :: qv_mean IF (MOD(ntstep, nout) /= ) RETURN !$acc data present(qv)
qv_mean = .0D0
!$acc parallel
!$acc loop gang vector collapse() reduction(+:qv_mean)
DO k = , nz
DO j = , ny
DO i = , nx
qv_mean = qv_mean + qv(i,j,k)
END DO
END DO
END DO
!$acc end parallel
!$acc end data
qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
END SUBROUTINE write_output END MODULE m_io ! m_parametrizations.f90
MODULE m_parametrizations
IMPLICIT NONE REAL*, parameter :: cs1 = 1.0D-, cs2 = .02D0, cs3 = .2D0, cs4=.1D0, t0=.0D0
REAL*, parameter :: cm1 = 1.0D-, cm2=.0D0, cm3=.2D0, cm4=.0D0 CONTAINS
SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(IN) :: t(:,:,:)
REAL*, INTENT(OUT) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k !$acc data present(t,qv,qc)
!$acc parallel
!$acc loop gang vector collapse()
DO k = , nlev
DO j = , npy
DO i = , npx
qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
qc(i,j,k) = cs4 * qv(i,j,k)
END DO
END DO
END DO
!$acc end parallel
!$acc end data
END SUBROUTINE saturation_adjustment SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
IMPLICIT NONE INTEGER, INTENT(IN) :: npx, npy, nlev
REAL*, INTENT(INOUT) :: t(:,:,:)
REAL*, INTENT(IN) :: qc(:,:,:)
REAL*, INTENT(INOUT) :: qv(:,:,:)
INTEGER :: i, j, k
!$acc data present(t,qv,qc)
!$acc parallel
!$acc loop seq
DO k = , nlev
!$acc loop gang
DO j = , npy
!$acc loop vector
DO i = , npx
qv(i, j, k) = qv(i,j,k-) + cm1*(t(i,j,k)-cm2)**cm3
t(i, j, k) = t(i, j, k)*( .0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
END DO
END DO
END DO
!$acc end parallel
!$acc end data
END SUBROUTINE microphysics END MODULE m_parametrizations ! m_physics.f90
MODULE m_physics
USE m_config, ONLY: nx, ny, nz
USE m_fields, ONLY: qv, t
USE m_parametrizations, ONLY: saturation_adjustment, microphysics IMPLICIT NONE REAL*, ALLOCATABLE :: qc(:,:,:) ! 提前声明,由 init_physics 和 finalize_physics 来申请和释放 CONTAINS
SUBROUTINE physics()
IMPLICIT NONE CALL saturation_adjustment(nx, ny, nz, t, qc, qv)
CALL microphysics(nx, ny, nz, t, qc, qv)
END SUBROUTINE physics SUBROUTINE init_physics()
IMPLICIT NONE ALLOCATE( qc(nx,ny,nz) )
!$acc enter data create(qc)
END SUBROUTINE init_physics SUBROUTINE finalize_physics()
IMPLICIT NONE !$acc exit data delete(qc)
DEALLOCATE(qc)
END SUBROUTINE finalize_physics END MODULE m_physics ! m_setup.f90
MODULE m_setup
USE m_config, ONLY: nstop, nout, nx, ny, nz
USE m_fields, ONLY: t,qv
USE m_timing, ONLY: init_timers, start_timer, end_timer
USE m_physics, ONLY: init_physics, finalize_physics
IMPLICIT NONE CONTAINS
SUBROUTINE initialize() ! 初始化计时器和设备
IMPLICIT NONE INTEGER, PARAMETER :: itiminit =
INTEGER :: i, j, k #ifdef _OPENACC
WRITE(*,"(A)") "Running with OpenACC"
#else
WRITE(*,"(A)") "Running without OpenACC"
#endif WRITE(*,"(A)") "Initialize" CALL init_timers()
CALL start_timer( itiminit, "Initialization" )
ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) !$acc enter data create(t,qv)
DO k =, nz
DO j = , ny
DO i = , nx
t(i,j,k) = .0D0 * (.2D0 + .07D0 * COS(.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
qv(i,j,k) = 1.0D- * (.1D0 + .13D0 * COS(.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
END DO
END DO
END DO
!$acc update device(t,qv) #ifdef _OPENACC
CALL initialize_gpu()
#endif
CALL init_physics()
CALL end_timer( itiminit )
END SUBROUTINE initialize SUBROUTINE initialize_gpu()
IMPLICIT NONE INTEGER :: temp()
INTEGER :: i !$acc parallel loop
DO i = ,
temp(i) =
END DO IF (SUM(temp) == ) THEN
WRITE(*,"(A)") "GPU initialized"
ELSE
WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
STOP
END IF
END SUBROUTINE initialize_gpu SUBROUTINE cleanup()
IMPLICIT NONE !$acc exit data delete(t,qv)
DEALLOCATE( t, qv )
CALL finalize_physics()
END SUBROUTINE cleanup END MODULE m_setup

● 所有的输出结果。单独编译一个模式(而不使用默认的 makefile)时,在命令 pgf90 中要使用参数 -Mpreprocess,意思是将预编译器作用到 fortran 文件中,否则 m_setup.f90 中的 # 预编译命令会被当成错误

cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ make example_serial example_openmp example_openacc1 example_openacc2 example_openacc3 example_openacc4
make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
compiling m_config.f90
compiling m_fields.f90
compiling m_io.f90
compiling m_parametrizations.f90
compiling m_physics.f90
compiling m_timing.f90
compiling m_setup.f90
compiling main.f90
make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
make[]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openmp' ... ! 类似上面的过程 make[]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openacc4'
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_serial/example_serial
Running without OpenACC
Initialize
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 17.28 ms
Time loop : 978.08 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openmp/example_openmp
Running without OpenACC
Running with OpenMP with threads
Initialize
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 17.96 ms
Time loop : 898.92 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc1/example_openacc1
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 191.11 ms
Time loop : 1044.35 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc2/example_openacc2
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 176.72 ms
Time loop : 142.11 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc3/example_openacc3
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 162.15 ms
Time loop : 121.77 ms
----------------------------
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc4/example_openacc4
Running with OpenACC
Initialize
GPU initialized
Start of time loop
Step: , mean(qv) = 1.14302104E-04
Step: , mean(qv) = 1.34041461E-04
Step: , mean(qv) = 1.53710207E-04
Step: , mean(qv) = 1.73309068E-04
Step: , mean(qv) = 1.92838848E-04
End of time loop
----------------------------
Timers:
----------------------------
Initialization : 152.47 ms
Time loop : 166.53 ms
----------------------------

● 所有的结果在 nvprof 中的图形。三张图分别为 “仅计算优化无数据优化”,“计算优化与数据优化”,“手工优化变量”

OpenACC 云水参数化方案的更多相关文章

  1. Linux云主机 监控方案浅析

    1.为何需要监控 监控是运维工程师的眼睛,它可帮助运维工程师第一时间发现系统的问题. 对于服务器的整个生命周期,都要和监控打交道: 当有服务器上架,都需要加入比如CPU负载.内存.网络.磁盘等基础监控 ...

  2. 开源网站云查杀方案,搭建自己的云杀毒-搭建ClamAV服务器

    开源网站云查杀方案,搭建自己的云杀毒 搭建ClamAV服务器 1        前言: 在上一篇我们已经演示了整个方案,传送门<开源网站云查杀方案,搭建自己的云杀毒>:https://ww ...

  3. Kube-OVN:大型银行技术团队推荐的金融级云原生网络方案

    近日,由TWT社区主办的2021容器云职业技能大赛团队赛的冠军作品:<适用于大中型银行的云原生技术体系建设方案>中,Kube-OVN成为银行技术团队推荐的金融级云原生网络最佳实践.本文部分 ...

  4. xx云网络实施方案案例

    由于xx云在我公司进行试用,对接我方存储,于是乎就负责网络实施方案,下面是具体方案介绍 ip分配 具体网络拓扑如下: 下面是两台交换机配置 IPMI交换机配置如下: [Quidway]di cu !S ...

  5. 理解 OpenStack 高可用(HA)(1):OpenStack 高可用和灾备方案 [OpenStack HA and DR]

    本系列会分析OpenStack 的高可用性(HA)概念和解决方案: (1)OpenStack 高可用方案概述 (2)Neutron L3 Agent HA - VRRP (虚拟路由冗余协议) (3)N ...

  6. 实现跨云应用——基于DNS的负载均衡

    “公有云可以作为传统IT资源的延展,能帮助客户应对不断变化的需求”——这是我们在向客户介绍公有云产品时经常说的一句话.我们来看一个具体的需求: 某客户有一个web站点,部署在自有的数据中心(on-pr ...

  7. HP PCS 云监控大数据解决方案

    ——把数据从分散统一集中到数据中心 基于HP分布式并行计算/存储技术构建的云监控系统即是通过“云高清摄像机”及IaaS和PaaS监控系统平台,根据用户所需(SaaS)将多路监控数据流传送给“云端”,除 ...

  8. OpenStack 虚拟机监控方案确定

    Contents [hide] 1 监控方案调研过程 1.1 1. 虚拟机里内置监控模块 1.2 2. 通过libvirt获取虚拟机数据监控. 2 a.测试openstack的自待组件ceilomet ...

  9. 免费ERP之云实施

    近日,普实渠道在AIO5软件免费一周年之际,推出了重磅的动作:启动AIO5云实施,推广小微企业免费ERP落地应用. 这无疑是推动客户免费应用ERP的重大里程碑. 当前,在中国小微企业信息化方面,应用情 ...

随机推荐

  1. Heap Operations 优先队列

    Petya has recently learned data structure named "Binary heap". The heap he is now operatin ...

  2. bulid-tool

    Build tool 中文构建工具.构建工具能够帮助你创建一个可重复的.可靠的.携带的且不需要手动干预的构建.构建工具是一个可编程的工具,它能够让你以可执行和有序的任务来表达自动化需求.假设你想要编译 ...

  3. hasura graphql 集成pipelinedb测试

    实际上因为pipelinedb 是原生支持pg的,所以应该不存在太大的问题,以下为测试 使用doker-compose 运行 配置 docker-compose 文件 version: '3.6' s ...

  4. JS 中 this 的用法

    this是JavaScript语言中的一个关键字 他是函数运行时,在函数体内部自动生成的一个对象, 只能在函数体内部使用. 在不同function中, this有不同的值. 1. 纯粹的函数调用. f ...

  5. nyoj 表达式求值

    35-表达式求值 内存限制:64MB 时间限制:3000ms Special Judge: Noaccepted:19 submit:26 题目描述: ACM队的mdd想做一个计算器,但是,他要做的不 ...

  6. ORTP&&RTSP

    ortp为了提高实时性使用UDP发送 rtsp建立了一个TCPserver,等待客户端连接,此时打开VLC播放器-->打开网络串流-->输入rtsp地址,会请求RTSP Server建立一 ...

  7. Elasticsearch 知识点

    Elasticsearch 知识点 table th:first-of-type { width: 200px; } table th:nth-of-type(2) { } 功能 curl命令 运行 ...

  8. spring 概念之:IoC(控制反转)

    IoC(控制反转,Inverse of Control) IoC 的字面意思是控制反转,它包括两方面的内容: 控制 反转 那到底是什么东西的"控制"被"反转"了 ...

  9. ML(4.3): R Random Forest

    随机森林模型是一种数据挖掘模型,常用于进行分类预测.随机森林模型包含多个树形分类器,预测结果由多个分类器投票得出. 决策树相当于一个大师,通过自己在数据集中学到的知识对于新的数据进行分类.俗话说得好, ...

  10. redis通过dump.db文件 进行数据替换 复制

    进行数据替换无非就是三步, 杀掉redis进程 ------------> 复制 dump.db文件 ------------------>启动redis   pkill redis-se ...