Files
CGH0S7 cb252f5ea2 Optimize numerical algorithms with Intel oneMKL
- FFT.f90: Replace hand-written Cooley-Tukey FFT with oneMKL DFTI
   - ilucg.f90: Replace manual dot product loop with BLAS DDOT
   - gaussj.C: Replace Gauss-Jordan elimination with LAPACK dgesv/dgetri
   - makefile.inc: Add MKL include paths and library linking

   All optimizations maintain mathematical equivalence and numerical precision.
2026-01-16 10:58:11 +08:00

522 lines
16 KiB
Fortran

! adopted from J. THORNBURG's code dilucg.f
subroutine ILUCG(N,IA,JA,A,B,X,ITEMP,RTEMP,EPS,MAXITER,ISTATUS)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION IA(*),JA(*),A(*),B(*),X(*),ITEMP(*),RTEMP(*)
!
! INCOMPLETE LU DECOMPOSITION-CONJUGATE GRADIENT
! - -- - -
! WHERE:
! |N| IS THE NUMBER OF EQUATIONS. IF N < 0, ITEMP AND
! RTEMP CONTAIN THE ILU FROM A PREVIOUS CALL AND
! B AND X ARE THE NEW RHS AND INITIAL GUESS.
! IA IS AN INTEGER ARRAY DIMENSIONED |N|+1. IA(I) IS THE
! INDEX INTO ARRAYS JA AND A OF THE FIRST NON-ZERO
! ELEMENT IN ROW I. LET MAX=IA(|N|+1)-IA(1).
! JA IS AN INTEGER ARRAY DIMENSIONED MAX. JA(K) GIVES
! THE COLUMN NUMBER OF A(K).
! A IS A DOUBLE PRECISION ARRAY DIMENSIONED MAX. IT CONTAINS THE
! NONZERO ELEMENTS OF THE MATRIX STORED BY ROW.
! B CONTAINS THE RHS VECTOR.
! X IS A DOUBLE PRECISION ARRAY DIMENSIONED |N|. ON ENTRY, IT CONTAINS
! AN INITIAL ESTIMATE; ON EXIT, THE SOLUTION.
! ITEMP IS AN INTEGER SCRATCH ARRAY DIMENSIONED 3*(|N|+MAX)+2.
! RTEMP IS A DOUBLE PRECISION SCRATCH ARRAY DIMENSIONED 4*|N|+MAX.
! EPS IS THE CONVERGENCE CRITERIA. IT SPECIFIES THE RELATIVE
! ERROR ALLOWED IN THE SOLUTION. TO BE PRECISE, CONVERGENCE
! IS DEEMED TO HAVE OCCURED WHEN THE INFINITY-NORM OF THE
! CHANGE IN THE SOLUTION IN ONE ITERATION IS .LE. EPS * THE
! INFINITY-NORM OF THE CURRENT SOLUTION. HOWEVER, IF EPS
! .LT. 0.0D0, IT IS INTERNALLY SCALED BY THE MACHINE PRECISION,
! SO THAT, FOR EXAMPLE, EPS = -256.0D0 WILL ALLOW THE LAST 8 BITS
! OF THE SOLUTION TO BE IN ERROR.
! MAXITER GIVES THE REQUESTED NUMBER OF ITERATIONS,
! OR IS 0 FOR "NO LIMIT".
! ISTATUS IS AN INTEGER VARIABLE, WHICH IS SET TO:
! -I IF THERE IS AN ERROR IN THE MATRIX STRUCTURE IN ROW I
! (SUCH AS A ZERO ELEMENT ON THE DIAGONAL).
! 0 IF THE ITERATION FAILED TO REACH THE CONVERGENCE CRITERION
! IN ITER ITERATIONS.
! +I IF THE ITERATION CONVERGED IN I ITERATIONS.
! REFERENCE:
! D.S.KERSHAW,"THE INCOMPLETE CHOLESKY-CONJUGATE GRADIENT
! METHOD FOR INTERATIVE SOLUTION OF LINEAR EQUATIONS",
! J.COMPUT.PHYS. JAN 1978 PP 43-65
!
LOGICAL DLU0
NP=IABS(N)
ISTATUS=0
IF (NP.EQ.0) GO TO 20
! CALCULATE INDICES FOR BREAKING UP TEMPORARY ARRAYS.
N1=NP+1
MAX=IA(N1)-IA(1)
ILU=1
JLU=ILU+N1
ID=JLU+MAX
IC=ID+NP
JC=IC+N1
JCI=JC+MAX
IR=1
IP=IR+NP
IS1=IP+NP
IS2=IS1+NP
IALU=IS2+NP
IF (N.LT.0) GO TO 10
! DO INCOMPLETE LU DECOMPOSITION
IF (DLU0(NP,IA,JA,A,ITEMP(IC),ITEMP(JC),ITEMP(JCI),RTEMP(IALU), &
ITEMP(ILU),ITEMP(JLU),ITEMP(ID),RTEMP(IR),IERROR)) GOTO 20
! AND DO CONJUGATE GRADIENT ITERATIONS
10 CALL DNCG0(NP,IA,JA,A,B,X,ITEMP(ILU),ITEMP(JLU),ITEMP(ID), &
RTEMP(IALU),RTEMP(IR),RTEMP(IP),RTEMP(IS1),RTEMP(IS2), &
EPS,MAXITER,ITER)
! ITER IS ACTUAL NUMBER OF ITERATIONS (NEGATIVE IF NO CONVERGENCE)
ISTATUS = ITER
IF (ITER .LT. 0) ISTATUS = 0
RETURN
! ERROR RETURN FROM INCOMPLETE LU DECOMPOSITION
20 ISTATUS = -IERROR
RETURN
END
!------------------------------------------------------------------------------
LOGICAL FUNCTION DLU0(N,IA,JA,A,IC,JC,JCI,ALU,ILU,JLU,ID,V,IE)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION IA(*),JA(*),A(*),IC(*),JC(*),JCI(*),ALU(*),ILU(*),JLU(*),ID(N),V(N)
LOGICAL NODIAG
COMMON /ICBD00/ ICBAD
! INCOMPLETE LU DECOMPOSITION
! WHERE:
! N,IA,JA, AND A ARE DESCRIBED IN SUBROUTINE ILUCG
! IC IS AN INTEGER ARRAY DIMENSIONED N+1, IC(J) GIVES THE
! INDEX OF THE FIRST NONZERO ELEMENT IN COLMN J IN
! ARRAY JC.
! JC IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A.
! JC(K) GIVES THE ROW NUMBER OF THE K'TH ELEMENT IN
! THE COLUMN STRUCTURE.
! JCI IS AN INTEGER ARRAY WITH THE SAME DIMENSION AS A.
! JCI(K) GIVES THE INDEX INTO ARRAY A OF THE K'TH ELEMENT
! OF THE COLUMN STRUCTURE.
! ALU HAS THE SAME DIMENSION AS A. ON EXIT, IT WILL
! CONTAIN THE INCOMPLETE LU DECOMPOSITION OF A WITH THE
! RECIPROCALS OF THE DIAGONAL ELEMENTS OF U.
! ILU AND JLU CORRESPONDS TO IA AND JA BUT FOR ALU.
! ID IS AN INTEGER ARRAY DIMENSIONED N. IT CONTAINS
! INDICES TO THE DIAGONAL ELEMENTS OF U.
! V IS A REAL SCRATCH VECTOR OF LENGTH N.
! IE GIVES THE ROW NUMBER IN ERROR IF AN ERROR OCCURED
! (RETURN VALUE .TRUE.), OR IS UNUSED IF ALL IS WELL
! (RETURN VALUE .FALSE.).
!
! RETURN VALUE = .FALSE. IF ALL IS WELL, .TRUE. IF ERROR.
!
! NOTE: DLU0 SETS ARGUMENTS IC THROUGH V.
!
ICBAD=0
! ZERO COUNT OF ZERO DIAGONAL ELEMENTS IN U.
!
! FIRST CHECK STRUCTURE OF A AND BUILD COLUMN STRUCTURE
DO 10 I=1,N
IC(I)=0
10 CONTINUE
DO 30 I=1,N
KS=IA(I)
KE=IA(I+1)-1
NODIAG=.TRUE.
DO 20 K=KS,KE
J=JA(K)
IF (J.LT.1.OR.J.GT.N) GO TO 210
IC(J)=IC(J)+1
IF (J.EQ.I) NODIAG=.FALSE.
20 CONTINUE
IF (NODIAG) GO TO 210
30 CONTINUE
! MAKE IC INTO INDICES
KOLD=IC(1)
IC(1)=1
DO 40 I=1,N
KNEW=IC(I+1)
IF (KOLD.EQ.0) GO TO 210
IC(I+1)=IC(I)+KOLD
KOLD=KNEW
40 CONTINUE
! SET JC AND JCI FOR COLUMN STRUCTURE
DO 60 I=1,N
KS=IA(I)
KE=IA(I+1)-1
DO 50 K=KS,KE
J=JA(K)
L=IC(J)
IC(J)=L+1
JC(L)=I
JCI(L)=K
50 CONTINUE
60 CONTINUE
! FIX UP IC
KOLD=IC(1)
IC(1)=1
DO 70 I=1,N
KNEW=IC(I+1)
IC(I+1)=KOLD
KOLD=KNEW
70 CONTINUE
! FIND SORTED ROW STRUCTURE FROM SORTED COLUMN STRUCTURE
NP=N+1
DO 80 I=1,NP
ILU(I)=IA(I)
80 CONTINUE
! MOVE ELEMENTS, SET JLU AND ID
DO 100 J=1,N
KS=IC(J)
KE=IC(J+1)-1
DO 90 K=KS,KE
I=JC(K)
L=ILU(I)
ILU(I)=L+1
JLU(L)=J
KK=JCI(K)
ALU(L)=A(KK)
IF (I.EQ.J) ID(J)=L
90 CONTINUE
100 CONTINUE
! RESET ILU (COULD JUST USE IA)
DO 110 I=1,NP
ILU(I)=IA(I)
110 CONTINUE
! FINISHED WITH SORTED COLUMN AND ROW STRUCTURE
!
! DO LU DECOMPOSITION USING GAUSSIAN ELIMINATION
DO 120 I=1,N
V(I)=0.0D0
120 CONTINUE
DO 200 IROW=1,N
I=ID(IROW)
PIVOT=ALU(I)
IF (PIVOT.NE.0.0D0) GO TO 140
! THIS CASE MAKES THE ILU LESS ACCURATE
ICBAD=ICBAD+1
KS=ILU(IROW)
KE=ILU(IROW+1)-1
DO 130 K=KS,KE
PIVOT=PIVOT+DABS(ALU(K))
130 CONTINUE
IF (PIVOT.EQ.0.0D0) GO TO 220
140 PIVOT=1.0D0/PIVOT
ALU(I)=PIVOT
KKS=I+1
KKE=ILU(IROW+1)-1
IF (KKS.GT.KKE) GO TO 160
DO 150 K=KKS,KKE
J=JLU(K)
V(J)=ALU(K)
150 CONTINUE
! FIX L IN COLUMN IROW AND DO PARTIAL LU IN SUBMATRIX
160 KS=IC(IROW)
KE=IC(IROW+1)-1
DO 190 K=KS,KE
I=JC(K)
IF (I.LE.IROW) GO TO 190
LS=ILU(I)
LE=ILU(I+1)-1
DO 180 L=LS,LE
J=JLU(L)
IF (J.LT.IROW) GO TO 180
IF (J.GT.IROW) GO TO 170
AMULT=ALU(L)*PIVOT
ALU(L)=AMULT
IF (AMULT.EQ.0.0) GO TO 190
GO TO 180
170 IF (V(J).EQ.0.0D0) GO TO 180
ALU(L)=ALU(L)-AMULT*V(J)
180 CONTINUE
190 CONTINUE
! RESET V
IF (KKS.GT.KKE) GO TO 200
DO 195 K=KKS,KKE
J=JLU(K)
V(J)=0.0D0
195 CONTINUE
200 CONTINUE
! NORMAL RETURN
DLU0 = .FALSE.
RETURN
! ERROR RETURNS
210 IE=I
DLU0 = .TRUE.
RETURN
220 IE=IROW
DLU0 = .TRUE.
RETURN
END
!-------------------------------------------------------------------------------------
SUBROUTINE DNCG0(N,IA,JA,A,B,X,ILU,JLU,ID,ALU,R,P,S1,S2,EPS,ITER,IE)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION IA(*),JA(*),A(*),B(N),X(N),ILU(*),JLU(*),ALU(*),ID(N),R(N),P(N),S1(N),S2(N)
! NONSYMMETRIC CONJUGATE GRADIENT
! WHERE:
! N,IA,JA,A,B, AND X ARE DESCRIBED IN SUBROUTINE DILUCG.
! ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU.
! JLU GIVES COLUMN NUMBER.
! ID GIVES INDEX OF DIAGONAL ELEMENT OF U.
! ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
! WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U.
! R,P,S1, AND S2 ARE VECTORS OF LENGTH N USED IN THE
! ITERATIONS.
! EPS IS CONVERGENCE CRITERIA. (DESCRIBED IN SUBROUTINE
! DILUCG).
! ITER IS MAX NUMBER OF ITERATIONS, OR 0 FOR "NO LIMIT".
! IE GIVES ACTUAL NUMBER OF ITERATIONS, NEGATIVE IF
! NO CONVERGENCE.
!
! R0=B-A*X0
CALL DMUL10(N,IA,JA,A,X,R)
DO 10 I=1,N
R(I)=B(I)-R(I)
10 CONTINUE
! P0=(UT*U)(-1)*AT*(L*LT)(-1)*R0
! FIRST SOLVE L*LT*S1=R0
CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1)
! TIMES TRANSPOSE OF A
CALL DMUL20(N,IA,JA,A,S1,S2)
! THEN SOLVE UT*U*P=S2
CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,P)
IE=0
RDOT = DGVV(R,S1,N)
! LOOP BEGINS HERE
20 CALL DMUL30(N,ILU,JLU,ID,ALU,P,S2)
PDOT = DGVV(P,S2,N)
IF (PDOT.EQ.0.0D0) RETURN
ALPHA=RDOT/PDOT
XMAX=0.0D0
XDIF=0.0D0
DO 30 I=1,N
AP=ALPHA*P(I)
X(I)=X(I)+AP
AP=DABS(AP)
XX=DABS(X(I))
IF (AP.GT.XDIF) XDIF=AP
IF (XX.GT.XMAX) XMAX=XX
30 CONTINUE
IE=IE+1
IF ((EPS .GT. 0.0D0) .AND. (XDIF .LE. EPS * XMAX)) RETURN
IF ((EPS .LT. 0.0D0) .AND. (XMAX + XDIF/DABS(EPS) .EQ. XMAX)) RETURN
!
! EXCEEDED ITERATION LIMIT?
!
IF ((ITER .NE. 0) .AND. (IE .GE. ITER)) GO TO 60
CALL DMUL10(N,IA,JA,A,P,S2)
DO 40 I=1,N
R(I)=R(I)-ALPHA*S2(I)
40 CONTINUE
CALL DSUBL0(N,ILU,JLU,ID,ALU,R,S1)
RRDOT = DGVV(R,S1,N)
BETA=RRDOT/RDOT
RDOT=RRDOT
CALL DMUL20(N,IA,JA,A,S1,S2)
CALL DSUBU0(N,ILU,JLU,ID,ALU,S2,S1)
DO 50 I=1,N
P(I)=S1(I)+BETA*P(I)
50 CONTINUE
GO TO 20
60 IE=-IE
RETURN
END
!------------------------------------------------------------------------------------------------------
SUBROUTINE DMUL10(N,IA,JA,A,B,X)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION IA(*),JA(*),A(*),B(N),X(N)
! MULTIPLY A TIMES B TO GET X
! WHERE:
! N IS THE ORDER OF THE MATRIX
! IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW
! JA GIVES COLUMN NUMBER
! A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC
! MATRIX STORED BY ROW
! B IS THE VECTOR
! X IS THE PRODUCT (MUST BE DIFFERENT FROM B)
DO 20 I=1,N
KS=IA(I)
KE=IA(I+1)-1
SUM=0.0D0
DO 10 K=KS,KE
J=JA(K)
SUM=SUM+A(K)*B(J)
10 CONTINUE
X(I)=SUM
20 CONTINUE
RETURN
END
!--------------------------------------------------------------------------------------------------------
SUBROUTINE DMUL20(N,IA,JA,A,B,X)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION IA(*),JA(*),A(*),B(N),X(N)
! MULTIPLY TRANSPOSE OF A TIMES B TO GET X
! WHERE:
! N IS THE ORDER OF THE MATRIX
! IA GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW
! JA GIVES COLUMN NUMBER
! A CONTAINS THE NONZERO ELEMENTS OF THE NONSYMMETRIC
! MATRIX STORED BY ROW
! B IS THE VECTOR
! X IS THE PRODUCT (MUST BE DIFFERENT FROM B)
DO 10 I=1,N
X(I)=0.0D0
10 CONTINUE
DO 30 I=1,N
KS=IA(I)
KE=IA(I+1)-1
BB=B(I)
DO 20 K=KS,KE
J=JA(K)
X(J)=X(J)+A(K)*BB
20 CONTINUE
30 CONTINUE
RETURN
END
!---------------------------------------------------------------------------------------------------------
SUBROUTINE DMUL30(N,ILU,JLU,ID,ALU,B,X)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
! MULTIPLY TRANSPOSE OF U TIMES U TIMES B TO GET X
! WHERE:
! N IS THE ORDER OF THE MATRIX
! ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU
! JLU GIVES COLUMN NUMBER
! ID GIVES INDEX OF DIAGONAL ELEMENT OF U
! ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
! WITH RECIPROCALS OF DIAGONAL ELEMENTS
! B IS THE VECTOR
! X IS THE PRODUCT UT*U*B (X MUST BE DIFFERENT FROM B)
DO 10 I=1,N
X(I)=0.0D0
10 CONTINUE
DO 50 I=1,N
KS=ID(I)+1
KE=ILU(I+1)-1
DIAG=1.0D0/ALU(KS-1)
XX=DIAG*B(I)
IF (KS.GT.KE) GO TO 30
DO 20 K=KS,KE
J=JLU(K)
XX=XX+ALU(K)*B(J)
20 CONTINUE
30 X(I)=X(I)+DIAG*XX
IF (KS.GT.KE) GO TO 50
DO 40 K=KS,KE
J=JLU(K)
X(J)=X(J)+ALU(K)*XX
40 CONTINUE
50 CONTINUE
RETURN
END
!----------------------------------------------------------------------------------------------------------
SUBROUTINE DSUBU0(N,ILU,JLU,ID,ALU,B,X)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
! DO FORWARD AND BACK SUBSTITUTION TO SOLVE UT*U*X=B
! WHERE:
! N IS THE ORDER OF THE MATRIX
! ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW OF LU
! JLU GIVES COLUMN NUMBER
! ID GIVES INDEX OF DIAGONAL ELEMENT OF U
! ALU HAS NONZERO ELMENTS OF LU MATRIX STORED BY ROW
! WITH RECIPROCALS OF DIAGONAL ELEMENTS OF U
! B IS THE RHS VECTOR
! X IS THE SOLUTION VECTOR
NP=N+1
DO 10 I=1,N
X(I)=B(I)
10 CONTINUE
! FORWARD SUBSTITUTION
DO 30 I=1,N
KS=ID(I)+1
KE=ILU(I+1)-1
XX=X(I)*ALU(KS-1)
X(I)=XX
IF (KS.GT.KE) GO TO 30
DO 20 K=KS,KE
J=JLU(K)
X(J)=X(J)-ALU(K)*XX
20 CONTINUE
30 CONTINUE
! BACK SUBSTITUTION
DO 60 II=1,N
I=NP-II
KS=ID(I)+1
KE=ILU(I+1)-1
SUM=0.0D0
IF (KS.GT.KE) GO TO 50
DO 40 K=KS,KE
J=JLU(K)
SUM=SUM+ALU(K)*X(J)
40 CONTINUE
50 X(I)=(X(I)-SUM)*ALU(KS-1)
60 CONTINUE
RETURN
END
!--------------------------------------------------------------------------------------------------------------
SUBROUTINE DSUBL0(N,ILU,JLU,ID,ALU,B,X)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION ILU(*),JLU(*),ID(*),ALU(*),B(N),X(N)
! DO FORWARD AND BACK SUBSTITUTION TO SOLVE L*LT*X=B
! WHERE:
! N IS THE ORDER OF THE MATRIX
! ILU GIVES INDEX OF FIRST NONZERO ELEMENT IN ROW LU
! JLU GIVES THE COLUMN NUMBER
! ID GIVES INDEX OF DIAGONAL ELEMENT OF U
! ALU HAS NONZERO ELEMENTS OF LU MATRIX STORED BY ROW
! DIAGONAL ELEMENTS OF L ARE 1.0 AND NOT STORED
! B IS THE RHS VECTOR
! X IS THE SOLUTION VECTOR
NP=N+1
DO 10 I=1,N
X(I)=B(I)
10 CONTINUE
! FORWARD SUBSTITUTION
DO 30 I=1,N
KS=ILU(I)
KE=ID(I)-1
IF (KS.GT.KE) GO TO 30
SUM=0.0D0
DO 20 K=KS,KE
J=JLU(K)
SUM=SUM+ALU(K)*X(J)
20 CONTINUE
X(I)=X(I)-SUM
30 CONTINUE
! BACK SUBSTITUTION
DO 50 II=1,N
I=NP-II
KS=ILU(I)
KE=ID(I)-1
IF (KS.GT.KE) GO TO 50
XX=X(I)
IF (XX.EQ.0.0) GO TO 50
DO 40 K=KS,KE
J=JLU(K)
X(J)=X(J)-ALU(K)*XX
40 CONTINUE
50 CONTINUE
RETURN
END
!------------------------------------------------------------------------------------------------------------------
DOUBLE PRECISION FUNCTION DGVV(V,W,N)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION V(N),W(N)
! SUBROUTINE TO COMPUTE DOUBLE PRECISION VECTOR DOT PRODUCT.
! Optimized using Intel oneMKL BLAS ddot
! Mathematical equivalence: DGVV = sum_{i=1}^{N} V(i)*W(i)
DOUBLE PRECISION, EXTERNAL :: DDOT
DGVV = DDOT(N, V, 1, W, 1)
RETURN
END