#define G 1.0 #ifdef PGI #define USE_F1 #endif #define DISTMACRO #ifdef DISTMACRO #define dist(x,y,z,i,j) ((x(i)-x(j))**2. + (y(i)-y(j))**2. + (z(i)-z(j))**2.) #else pure real function dist(x, y, z, i, j) implicit none real, dimension(:), intent(in) :: x, y, z integer, intent(in) :: i, j dist = (x(i)-x(j))**2. + (y(i)-y(j))**2. + (z(i)-z(j))**2. end function dist #endif #ifdef PGI #ifdef DISTMACRO #define dist_p(x,y,z,i,j) ((x(i)-x(j))**2. + (y(i)-y(j))**2. + (z(i)-z(j))**2.) #else pure function dist_p(x, y, z, i, j) implicit none real, dimension(:), intent(in) :: x, y, z integer, dimension(:), intent(in) :: i, j real, dimension(size(i)) :: dist_p dist_p = (x(i)-x(j))**2. + (y(i)-y(j))**2. + (z(i)-z(j))**2. end function dist_p #endif #endif subroutine force0(n, x, y, z, mass, cutoff) implicit none integer :: n, i, j real, dimension(n) :: x, y, z, mass real, dimension(n) :: Fx, Fy, Fz real, dimension(n,n) :: d real :: k, F, cutoff #ifndef DISTMACRO interface pure real function dist(x, y, z, i, j) real, dimension(:), intent(in) :: x, y, z integer, intent(in) :: i, j end function dist end interface #endif Fx = 0. Fy = 0. Fz = 0. do i = 1, n do j = i+1, n d(i,j) = dist(x,y,z,i,j) enddo enddo do i = 1, n do j = i+1, n if (d(i,j) <= cutoff) then k = G * mass(i) * mass(j) /(d(i,j)**1.5) F = k * (x(i) - x(j)) Fx(i) = Fx(i) + F Fx(j) = Fx(j) - F F = k * (y(i) - y(j)) Fy(i) = Fy(i) + F Fy(j) = Fy(j) - F F = k * (z(i) - z(j)) Fz(i) = Fz(i) + F Fz(j) = Fz(j) - F end if end do end do print *, Fx(1), Fy(1), Fz(1) print *, Fx(n), Fy(n), Fz(n) end subroutine force0 #ifdef USE_F1 subroutine force1(n, x, y, z, mass, cutoff) implicit none integer :: n, i, j real :: cutoff real, dimension(n) :: x, y, z, mass real, dimension(n) :: Fx, Fy, Fz !xhpf$ distribute(block) :: Fx, Fy, Fz real, dimension(n,n) :: k, Fxx, Fyy, Fzz logical, dimension(n,n) :: mask !hpf$ distribute(*, cyclic) :: k, Fxx, Fyy, Fzz, mask real, dimension(n) :: xcol, xrow, ycol, yrow, zcol, zrow, mcol, mrow !hpf$ align xrow(*) with mask(*,1) !hpf$ align xcol(*) with mask(1,*) #ifndef DISTMACRO interface pure real function dist(x, y, z, i, j) real, dimension(:), intent(in) :: x, y, z integer, intent(in) :: i, j end function dist end interface #endif xcol = x xrow = x ycol = y yrow = y zcol = z zrow = z mcol = mass mrow = mass Fx = 0. Fy = 0. Fz = 0. mask = .false. forall (i = 1:n) forall (j = i+1:n, dist(x,y,z,i,j) <= cutoff) mask(i,j) = .true. ! mask(j,i) = .true. k(i,j) = G * mrow(i) * mcol(j)/(dist(x,y,z,i,j)**1.5) Fxx(i,j) = k(i,j) * (xcol(i) - xrow(j)) ! Fxx(j,i) = k(i,j) * (xrow(j) - xcol(i)) Fyy(i,j) = k(i,j) * (ycol(i) - yrow(j)) ! Fyy(j,i) = k(i,j) * (yrow(j) - ycol(i)) Fzz(i,j) = k(i,j) * (zcol(i) - zrow(j)) ! Fzz(j,i) = k(i,j) * (zrow(j) - zcol(i)) end forall end forall Fx = sum(Fxx, dim=2, mask=mask) - sum(Fxx, dim=1, mask=mask) Fy = sum(Fyy, dim=2, mask=mask) - sum(Fyy, dim=1, mask=mask) Fz = sum(Fzz, dim=2, mask=mask) - sum(Fzz, dim=1, mask=mask) print *, Fx(1), Fy(1), Fz(1) print *, Fx(n), Fy(n), Fz(n) end subroutine force1 #endif subroutine force1b(n, x, y, z, mass, cutoff) implicit none integer :: n, i, j real :: cutoff real, dimension(n) :: x, y, z, mass real, dimension(n) :: Fx, Fy, Fz !xhpf$ distribute(block) :: Fx, Fy, Fz real, dimension(n,n) :: k, F, d real, dimension(n,n) :: Fxx, Fyy, Fzz logical, dimension(n,n) :: mask !xhpf$ distribute(cyclic, cyclic) :: k, F !xhpf$ distribute(cyclic, cyclic) :: mask #ifndef DISTMACRO interface pure real function dist(x, y, z, i, j) real, dimension(:), intent(in) :: x, y, z integer, intent(in) :: i, j end function dist end interface #endif Fx = 0. Fy = 0. Fz = 0. mask = .false. forall (i = 1:n) forall (j = i+1:n) d(i,j) = dist(x,y,z,i,j) end forall end forall forall (i = 1:n) forall (j = i+1:n, d(i,j) <= cutoff) mask(i,j) = .true. mask(j,i) = .true. k(i,j) = G * mass(i) * mass(j)/(dist(x,y,z,i,j)**1.5) Fxx(i,j) = k(i,j) * (x(i) - x(j)) Fxx(j,i) = -k(i,j) * (x(i) - x(j)) Fyy(i,j) = k(i,j) * (y(i) - y(j)) Fyy(j,i) = -k(i,j) * (y(i) - y(j)) Fzz(i,j) = k(i,j) * (z(i) - z(j)) Fzz(j,i) = -k(i,j) * (z(i) - z(j)) end forall end forall Fx = sum(Fxx, dim=2, mask=mask) Fy = sum(Fyy, dim=2, mask=mask) Fz = sum(Fzz, dim=2, mask=mask) print *, Fx(1), Fy(1), Fz(1) print *, Fx(n), Fy(n), Fz(n) end subroutine force1b #ifdef PGI subroutine force2(n, x, y, z, mass, cutoff) use hpf_library implicit none integer :: n, nz, i, j real :: cutoff real, dimension(n) :: x, y, z, mass integer, dimension(n,n) :: f_seg, f_ind real, dimension(n, n) :: d logical, dimension(n,n) :: mask !xhpf$ distribute(cyclic, cyclic) :: f_seg, f_ind, mask integer, dimension(:), allocatable :: idx1, idx2 real, dimension(:), allocatable :: F, k real, dimension(n) :: zero real, dimension(n) :: Fx, Fy, Fz !xhpf$ distribute(block, *) :: zero, Fx, Fy, Fz, F, k #ifndef DISTMACRO interface pure real function dist(x, y, z, i, j) real, dimension(:), intent(in) :: x, y, z integer, intent(in) :: i, j end function dist end interface interface pure function dist_p(x, y, z, i, j) real, dimension(:), intent(in) :: x, y, z integer, dimension(:), intent(in) :: i, j real, dimension(size(i)) :: dist_p end function dist_p end interface #endif mask = .false. zero = 0. forall (i = 1:n) forall (j = i+1:n) d(i,j) = dist(x,y,z,i,j) end forall end forall forall (i = 1:n) forall (j = i+1:n, d(i,j) <= cutoff) f_seg(i,j) = i f_ind(i,j) = j mask(i,j) = .true. end forall end forall nz = count(mask) allocate(idx1(nz)) allocate(idx2(nz)) allocate(F(nz)) allocate(k(nz)) idx1 = pack(f_seg, mask) idx2 = pack(f_ind, mask) forall (i = 1:nz) k(i) = G*mass(idx1(i))*mass(idx2(i))/(d(idx1(i),idx2(i))**1.5) end forall ! k = G*mass(idx1)*mass(idx2)/(dist_p(x,y,z,idx1,idx2) ** 1.5) F = k * (x(idx1) - x(idx2)) Fx = sum_scatter(F, zero, idx1) - sum_scatter(F, zero, idx2) F = k * (y(idx1) - y(idx2)) Fy = sum_scatter(F, zero, idx1) - sum_scatter(F, zero, idx2) F = k * (z(idx1) - z(idx2)) Fz = sum_scatter(F, zero, idx1) - sum_scatter(F, zero, idx2) print *, Fx(1), Fy(1), Fz(1) print *, Fx(n), Fy(n), Fz(n) end subroutine force2 #endif program main implicit none integer :: i, j, k, n, max real, dimension(:), allocatable :: x, y, z, mass !xhpf$ distribute(block) :: x, y, z, mass integer :: t0, t1, t2, t3, rate real :: cutoff cutoff = 4. cutoff = cutoff * cutoff print *, "Enter N" read *, n allocate(x(n)) allocate(y(n)) allocate(z(n)) allocate(mass(n)) print *, "P = ", number_of_processors() print *, "N = ", n max = n ** (1./3.) + .5 forall (i = 1:max, j = 1:max, k = 1:max) x((i-1)*max*max+(j-1)*max+k) = i y((i-1)*max*max+(j-1)*max+k) = j z((i-1)*max*max+(j-1)*max+k) = k end forall mass = 1. call system_clock(count=t0, count_rate=rate) call force0(n, x, y, z, mass, cutoff) call system_clock(count=t1) print *, "Count_rate is ", rate print *, "Version 0 took ", (t1-t0)/(rate+0.), " sec" #ifdef USE_F1 call force1(n, x, y, z, mass, cutoff) call system_clock(count=t2) print *, "Version 1 took ", (t2-t1)/(rate+0.), " sec" #endif call system_clock(count=t1) call force1b(n, x, y, z, mass, cutoff) call system_clock(count=t2) print *, "Version 1b took ", (t2-t1)/(rate+0.), " sec" #ifdef PGI call force2(n, x, y, z, mass, cutoff) call system_clock(count=t3) print *, "Version 2 took ", (t3-t2)/(rate+0.), " sec" #endif stop end program main