!----------------------------------------------------------------------- !BOP ! !ROUTINE: trac2d --- Remap Lagrangian to fixed coordinates ! ! !INTERFACE: subroutine trac2d( grid, dp1, tracer, cx, cy, & mfx, mfy, iord, jord, fill, & nlo, nhi, va, flx ) ! !USES: use shr_kind_mod, only : r8 => shr_kind_r8, r4 => shr_kind_r4 use tp_core, only : tp2c use fill_module, only : fillxy use dynamics_vars, only : T_FVDYCORE_GRID use FVperf_module, only : FVstartclock, FVstopclock, FVbarrierclock #if defined( SPMD ) use parutilitiesmodule, only: maxop, parcollective use mod_comm, only : mp_send4d_ns, mp_recv4d_ns, & mp_send4d_ns_r4, mp_recv4d_ns_r4, & mp_send3d_2, mp_recv3d_2 #endif implicit none ! !INPUT PARAMETERS: type (T_FVDYCORE_GRID), intent(inout) :: grid integer, intent(in):: iord, jord logical, intent(in):: fill integer, intent(in):: nlo, nhi ! Tracer index range ! !INPUT/OUTPUT PARAMETERS: real(r8), intent(inout):: dp1(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast) real(r8), intent(inout):: cx(grid%im,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_d,grid%kfirst:grid%klast) real(r8), intent(inout):: cy(grid%im,grid%jfirst:grid%jlast+1,grid%kfirst:grid%klast) real(r8), intent(inout):: mfx(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast) real(r8), intent(inout):: mfy(grid%im,grid%jfirst:grid%jlast+1,grid%kfirst:grid%klast) real(r8), intent(inout):: tracer(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast,grid%ntotq) ! !OUTPUT PARAMETERS: real(r8), intent(out):: va(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast) real(r8), intent(out):: flx(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast) ! !DESCRIPTION: ! ! Perform large-time-step tracer transport using accumulated Courant ! numbers (cx, cy) and the mass fluxes (mfx, mfy) within the Lagrangian ! layers. This routine is 100\% parallel in the vertical direction ! (with SMP). Merdional Courant number will be further split, if ! necessary, to ensure stability. Cy <= 1 away from poles; Cy $\le$ ! 1/2 at the latitudes closest to the poles. ! ! !REVISION HISTORY: ! ! SJL 99.04.13: Delivery ! WS 99.05.26: Added jfirst:jlast concept; im, jm, km as parameters ! replaced IMR, JMR, JNP, NL with IM, JM-1, JM and KM ! WS 99.09.27: Documentation; indentation; jfirst:jlast ! WS 99.09.30: Ghosting; loop limits; full parallelization; tested ! SJL 99.10.15: nsplt migrated to outermost loop to remove bug ! SJL 99.12.19: Local 2D arrays trimmed! ! WS 00.05.14: Renamed ghost indices as per Kevin's definitions ! WS 00.07.13: Changed PILGRIM API ! AAM 00.08.29: Added kfirst, klast ! AAM 01.06.27: Added y communicators ! SJL 30.07.01: MPI optimization/simplification ! WS 02.04.24: New mod_comm interfaces ! WS 02.07.04: Fixed 2D decomposition bug dest/src for mp_send3d ! WS 03.11.19: Merged in CAM changes by Mirin ! WS 03.12.03: Added GRID as argument, dynamics_vars removed ! WS 04.08.25: Simplification of interface with GRID ! WS 04.10.07: Removed dependency on spmd_dyn; info now in GRID ! WS 05.04.04: Transitioned to type T_TRACERS (supports r4 and r8) ! WS 05.04.09: Each tracer now ghosted individually (save buffers) ! WS 05.04.12: Full support for either r4 or r8 tracers ! WS 05.05.25: Merged CAM and GEOS5, e.g. nsplt(k) opt. in CAM ! PW 05.10.12: Changes for Cray X1(E), alternative implementation ! of double buffering logic ! WS 06.09.08: Magic numbers are now F90 parameters ! !EOP !--------------------------------------------------------------------- !BOC real(r8), parameter :: D1EM10 = 1.0e-10_r8 real(r8), parameter :: D1_0 = 1.0_r8 real(r8), parameter :: D0_0 = 0.0_r8 ! Local variables: ! 2d arrays real(r8) a2(grid%im,grid%jfirst:grid%jlast) real(r8) fx(grid%im,grid%jfirst:grid%jlast) real(r8) fy(grid%im,grid%jfirst:grid%jlast+1) real(r8) cymax(grid%kfirst:grid%klast) ! Temporary r8 array for Q real(r8) :: & q_r8(grid%im,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_d,grid%kfirst:grid%klast,1:2) real(r8) dp2(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast) logical ffsl(grid%jm,grid%kfirst:grid%klast) integer :: nsplt(grid%kfirst:grid%klast) integer :: im, jm, km ! Dimensions integer :: ng ! Max number of ghost latitudes integer :: jfirst, jlast, kfirst, klast ! YZ decomposition limits integer :: cur, nxt ! current and next q_r8 buffer indices integer i, j, k integer it, iq, kq, max_nsplt integer :: k_courant, kend integer ktot integer js1gd, js2g0, js2gd, jn2g0,jn2gd,jn1g1,jn1gd #if defined( SPMD ) integer :: dest, src #endif real(r8) cy_global real(r8) frac real(r8) cmax real(r8) sum1, sum2 cur = 1 nxt = 2 im = grid%im jm = grid%jm km = grid%km ng = grid%ng_d jfirst = grid%jfirst jlast = grid%jlast kfirst = grid%kfirst klast = grid%klast ktot = klast - kfirst + 1 js2g0 = max(2,jfirst) jn2g0 = min(jm-1,jlast) jn1g1 = min(jm,jlast+1) js1gd = max(1,jfirst-ng) ! NG latitudes on S (starting at 1) js2gd = max(2,jfirst-ng) ! NG latitudes on S (starting at 2) jn2gd = min(jm-1,jlast+ng) ! NG latitudes on S (ending at jm-1) jn1gd = min(jm,jlast+ng) ! NG latitudes on N (ending at jm) #if defined( SPMD ) call FVstartclock(grid,'---TRAC2D_COMM') call mp_send4d_ns( grid%commyz, im, jm, km, & 1, jfirst, jlast, kfirst, klast, & ng, ng, cx ) ! Send one latitude of both cy and mfy to the south dest = grid%iam-1 src = grid%iam+1 if ( mod(grid%iam,grid%npr_y) == 0 ) dest = -1 if ( mod(grid%iam+1,grid%npr_y) == 0 ) src = -1 call mp_send3d_2( grid%commyz, dest, src, im, jm, km, & 1, im, jfirst, jlast+1, kfirst, klast, & 1, im, jfirst, jfirst, kfirst, klast, cy, mfy) call FVstopclock(grid,'---TRAC2D_COMM') #endif !$omp parallel do default(shared) private(i,j,k,cmax) do k=kfirst,klast cymax(k) = D0_0 do j=js2g0,jlast cmax = D0_0 do i=1,im cmax = max( abs(cy(i,j,k)), cmax) enddo cymax(k) = max(cymax(k), cmax*(D1_0 + grid%sine(j)**16) ) enddo enddo #if defined( SPMD ) call FVstartclock(grid,'---TRAC2D_COMM') call mp_recv4d_ns( grid%commyz, im, jm, km, & 1, jfirst, jlast, kfirst, klast, & ng, ng, cx ) call mp_recv3d_2( grid%commyz, src, im, jm, km, & 1, im, jfirst, jlast+1, kfirst, klast, & 1, im, jlast+1, jlast+1, kfirst, klast, cy, mfy) call parcollective( grid%comm_y, MAXOP, ktot, cymax ) call FVstopclock(grid,'---TRAC2D_COMM') #endif !--------------------------------------------------------------------- ! Determine the required value of nsplt for each level !--------------------------------------------------------------------- nsplt(:) = int( D1_0 + cymax(:) ) max_nsplt = maxval( nsplt(:) ) #if defined( SPMD ) call FVstartclock(grid,'---TRAC2D_COMM') call parcollective( grid%comm_z, MAXOP, max_nsplt ) ! Find global max call FVstopclock(grid,'---TRAC2D_COMM') #endif #ifndef WACCM_MOZART nsplt(:) = max_nsplt #endif do k_courant = klast,kfirst,-1 if( nsplt(k_courant) > 1 ) then exit end if end do k_courant = max( kfirst,k_courant ) !!! if (max_nsplt /= 1) write(iulog,*) 'trac2d: max_nsplt,k_courant = ', max_nsplt,k_courant !!! write(iulog,*) "max_nsplt", max_nsplt, "k_cour", k_courant, "nsplt", nsplt(:) !$omp parallel do default(shared) private(i,j,k,frac) schedule(dynamic,1) #if !defined(USE_OMP) !CSD$ PARALLEL DO PRIVATE (I, J, K, FRAC) #endif do 4000 k=kfirst,klast if( nsplt(k) .ne. 1 ) then frac = D1_0 / nsplt(k) do j=js2gd,jn2gd do i=1,im cx(i,j,k) = cx(i,j,k) * frac ! cx ghosted on N*ng S*ng enddo enddo do j=js2g0,jn2g0 do i=1,im mfx(i,j,k) = mfx(i,j,k) * frac enddo enddo do j=js2g0,jn1g1 do i=1,im cy(i,j,k) = cy(i,j,k) * frac ! cy ghosted on N mfy(i,j,k) = mfy(i,j,k) * frac ! mfy ghosted on N enddo enddo endif do j=js2g0,jn2g0 do i=1,im if(cy(i,j,k)*cy(i,j+1,k) > D0_0) then if( cy(i,j,k) > D0_0) then va(i,j,k) = cy(i,j,k) else va(i,j,k) = cy(i,j+1,k) ! cy ghosted on N endif else va(i,j,k) = D0_0 endif enddo enddo ! Check if FFSL extension is needed. do j=js2gd,jn2gd ! flux needed on N*ng S*ng ffsl(j,k) = .false. do i=1,im if( abs(cx(i,j,k)) > D1_0 ) then ! cx ghosted on N*ng S*ng ffsl(j,k) = .true. exit endif enddo enddo ! Scale E-W mass fluxes by CX if FFSL do j=js2g0,jn2g0 if( ffsl(j,k) ) then do i=1,im flx(i,j,k) = mfx(i,j,k) / sign( max(abs(cx(i,j,k)), D1EM10), & cx(i,j,k) ) enddo else do i=1,im flx(i,j,k) = mfx(i,j,k) enddo endif enddo 4000 continue #if !defined(USE_OMP) !CSD$ END PARALLEL DO #endif call FVbarrierclock(grid,'sync_trac2d_tracer',grid%commyz) call FVstartclock(grid,'---TRAC2D_TRACER') do 6000 it=1, max_nsplt if ( it == 1 ) then kend = klast ! The entire vertical slab needs to be considered else kend = k_courant ! Only the subset including courant # > 1 considered endif ! WS 05.04.06: send only the first tracer the rest at end of do iq loop ! NOTE: there is per definition at least one tracer q_r8(1:im,jfirst:jlast,kfirst:kend,1) = & tracer(1:im,jfirst:jlast,kfirst:kend,nlo) #if defined( SPMD ) call FVstartclock(grid,'---TRAC2D_TRACER_COMM') call mp_send4d_ns( grid%commyz, im, jm, km, & 1, jfirst, jlast, kfirst, kend, & ng, ng, q_r8(1,jfirst-ng,kfirst,1) ) call FVstopclock(grid,'---TRAC2D_TRACER_COMM') #endif !$omp parallel do default(shared) private(i,j,k,sum1,sum2) do 3000 k=kfirst,kend if (it <= nsplt(k)) then do j=js2g0,jn2g0 do i=1,im-1 dp2(i,j,k) = dp1(i,j,k) + mfx(i,j,k) - mfx(i+1,j,k) + & (mfy(i,j,k) - mfy(i,j+1,k)) * grid%acosp(j) enddo dp2(im,j,k) = dp1(im,j,k) + mfx(im,j,k) - mfx(1,j,k) + & (mfy(im,j,k) - mfy(im,j+1,k)) * grid%acosp(j) enddo if ( jfirst == 1 ) then sum1 = D0_0 do i=1,im sum1 = sum1 + mfy(i,2,k) end do sum1 = - sum1 * grid%rcap do i=1,im dp2(i,1,k) = dp1(i,1,k) + sum1 enddo endif if ( jlast == jm ) then sum2 = D0_0 do i=1,im sum2 = sum2 + mfy(i,jm,k) end do sum2 = sum2 * grid%rcap do i=1,im dp2(i,jm,k) = dp1(i,jm,k) + sum2 enddo endif endif 3000 continue do iq = nlo, nhi #if defined( SPMD ) call FVstartclock(grid,'---TRAC2D_TRACER_COMM') ! ! The buffer indices are exchanged, so that cur points to the current buffer, ! while nxt points to the one which will be used next. ! if ( mod(iq-nlo+1,2) == 0 ) then cur = 2 nxt = 1 else cur = 1 nxt = 2 endif call mp_recv4d_ns( grid%commyz, im, jm, km, & 1, jfirst, jlast, kfirst, kend, & ng, ng, q_r8(1,jfirst-ng,kfirst,cur) ) ! ! Pre-send the next tracer ! if ( iq < nhi ) then q_r8(1:im,jfirst:jlast,kfirst:kend,nxt) = & tracer(1:im,jfirst:jlast,kfirst:kend,iq+1) call mp_send4d_ns( grid%commyz, im, jm, km, & 1, jfirst, jlast, kfirst, kend, & ng, ng, q_r8(1,jfirst-ng,kfirst,nxt) ) endif call FVstopclock(grid,'---TRAC2D_TRACER_COMM') #else ! ! No message passing -- simply copy the tracer into q_r8 ! q_r8(1:im,jfirst:jlast,kfirst:kend,cur) = & tracer(1:im,jfirst:jlast,kfirst:kend,iq) #endif #if !defined(INNER_OMP) !$omp parallel do default(shared) & !$omp private(i, j, k, kq, fx, fy, a2) #endif #if (!defined USE_OMP) !CSD$ PARALLEL DO PRIVATE (I, J, K, KQ, FX, FY, A2) #endif do 5000 k=kfirst,kend if ( it <= nsplt(k) ) then call tp2c(a2, va(1,jfirst,k), q_r8(1:,jfirst-ng:,k,cur), & cx(1,jfirst-ng,k) , cy(1,jfirst,k), & im, jm, iord, jord, ng, & fx, fy, ffsl(1,k), grid%rcap, grid%acosp, & flx(1,jfirst,k), mfy(1,jfirst,k), & grid%cosp, 1, jfirst, jlast ) do j=jfirst,jlast do i=1,im q_r8(i,j,k,cur) = q_r8(i,j,k,cur)*dp1(i,j,k) + a2(i,j) enddo enddo if (fill) call fillxy (q_r8(1:,jfirst:,k,cur), im, jm, jfirst, & jlast, grid%acap, grid%cosp, grid%acosp) do j=jfirst,jlast do i=1,im tracer(i,j,k,iq) = q_r8(i,j,k,cur) / dp2(i,j,k) enddo enddo endif 5000 continue #if (!defined USE_OMP) !CSD$ END PARALLEL DO #endif enddo ! End of do iq=nlo, nhi !$omp parallel do private(i, j, k) schedule( dynamic,1 ) do k=kfirst,kend if ( it < nsplt(k) ) then do j=jfirst,jlast do i=1,im dp1(i,j,k) = dp2(i,j,k) enddo enddo endif enddo 6000 continue call FVstopclock(grid,'---TRAC2D_TRACER') return !EOC end subroutine trac2d !-----------------------------------------------------------------------