程序如下:

  PROGRAM ffttest
  USE clips_library
  IMPLICIT NONE
  INTEGER, PARAMETER :: maxn=10000,mfax=13,nvol=1000000
 ! real and imag parts
  REAL(8), ALLOCATABLE, DIMENSION(:,:) :: a,b
  REAL(8), DIMENSION(maxn) :: trigs
  INTEGER, DIMENSION(mfax) :: ifax
  INTEGER :: irep,nrep,n,isign,i,j
  INTEGER :: nloop,nend,nnode,ier,istart,nthread
  REAL(8) :: cputm,t1,t2,t,ops,ainv
  INTEGER, EXTERNAL :: omp_get_num_procs,omp_get_num_threads
  nnode=omp_get_num_procs()
  WRITE(6,'('' ffttest with '',i2,'' processors '')')
  WRITE(6,'('' how many threads to use ? '')')
  READ(5,*)nthread
  CALL omp_set_num_threads(nthread)
  OPEN(5,FILE='gpfa.con',STATUS='unknown')
  WRITE(6,300)
  300 FORMAT(' n nrep nops time Mflop')
 ! single sequence
  WRITE(6,'('' single sequence results '')')
  DO WHILE(.TRUE.)
  READ(5,*,END=998) n,nrep
  nrep=nvol/n
  ALLOCATE(a(n,nrep),STAT=ier)
  ALLOCATE(b(n,nrep),STAT=ier)
  a=1.0d0; b=0.0d0
  ainv=1.0d0/REAL(n)
 ! initialise Fourier transform routines
 ! New Temperton GPFA
  CALL setgpfa(trigs,n)
  t1=clips_time()
 ! $OMP parallel default(shared) private(irep,i,istart)
 ! nthread=omp_get_num_threads()
 ! WRITE(6,'('' nthreads = '',i2)')nthread
 ! $OMP do
  DO irep=1,nrep
 ! New Temperton GPFA
  CALL gpfa(a(:,irep),b(:,irep),trigs,1,n,n,1,1)
  CALL gpfa(a(:,irep),b(:,irep),trigs,1,n,n,1,-1)
 ! need to normalise
   a(:,irep)=a(:,irep)*ainv
   b(:,irep)=b(:,irep)*ainv
  END DO
 ! $OMP end parallel
  t2=clips_time()
 ! assume 5nLOG2(n) ops - really only for radix 2
 ! and its done it twice!
  ops=log(1.0d0*n)/log(2.0d0)*n*nrep*10
  DO i=1,n
   IF(abs(a(i,1)-1.0d0)+abs(b(i,1)-0.0d0)>1.0d-4)THEN
    WRITE(6,'('' error in result'',i4,2g12.5)')i,a(i,1)
    STOP
   END IF
  END DO
  t=t2-t1
  WRITE(6,200) n,nrep,ops,t,ops/(t*1.0D6)
200 FORMAT(i5,i6,3(1pe12.3))
  DEALLOCATE(a,b)
  END DO
998 WRITE(6,*) 'end of data reached'

  END PROGRAM ffttest