程序如下:
PROGRAM ffttest
USE clips_library
IMPLICIT NONE
INTEGER, PARAMETER :: maxn=10000,mfax=13,nvol=1000000
! real and imag parts
REAL(8), ALLOCATABLE, DIMENSION(:,:) :: a,b
REAL(8), DIMENSION(maxn) :: trigs
INTEGER, DIMENSION(mfax) :: ifax
INTEGER :: irep,nrep,n,isign,i,j
INTEGER :: nloop,nend,nnode,ier,istart,nthread
REAL(8) :: cputm,t1,t2,t,ops,ainv
INTEGER, EXTERNAL :: omp_get_num_procs,omp_get_num_threads
nnode=omp_get_num_procs()
WRITE(6,'('' ffttest with '',i2,'' processors '')')
WRITE(6,'('' how many threads to use ? '')')
READ(5,*)nthread
CALL omp_set_num_threads(nthread)
OPEN(5,FILE='gpfa.con',STATUS='unknown')
WRITE(6,300)
300 FORMAT(' n nrep nops time Mflop')
! single sequence
WRITE(6,'('' single sequence results '')')
DO WHILE(.TRUE.)
READ(5,*,END=998) n,nrep
nrep=nvol/n
ALLOCATE(a(n,nrep),STAT=ier)
ALLOCATE(b(n,nrep),STAT=ier)
a=1.0d0; b=0.0d0
ainv=1.0d0/REAL(n)
! initialise Fourier transform routines
! New Temperton GPFA
CALL setgpfa(trigs,n)
t1=clips_time()
! $OMP parallel default(shared) private(irep,i,istart)
! nthread=omp_get_num_threads()
! WRITE(6,'('' nthreads = '',i2)')nthread
! $OMP do
DO irep=1,nrep
! New Temperton GPFA
CALL gpfa(a(:,irep),b(:,irep),trigs,1,n,n,1,1)
CALL gpfa(a(:,irep),b(:,irep),trigs,1,n,n,1,-1)
! need to normalise
a(:,irep)=a(:,irep)*ainv
b(:,irep)=b(:,irep)*ainv
END DO
! $OMP end parallel
t2=clips_time()
! assume 5nLOG2(n) ops - really only for radix 2
! and its done it twice!
ops=log(1.0d0*n)/log(2.0d0)*n*nrep*10
DO i=1,n
IF(abs(a(i,1)-1.0d0)+abs(b(i,1)-0.0d0)>1.0d-4)THEN
WRITE(6,'('' error in result'',i4,2g12.5)')i,a(i,1)
STOP
END IF
END DO
t=t2-t1
WRITE(6,200) n,nrep,ops,t,ops/(t*1.0D6)
200 FORMAT(i5,i6,3(1pe12.3))
DEALLOCATE(a,b)
END DO
998 WRITE(6,*) 'end of data reached'
END PROGRAM ffttest
|