************************************************************************
*                 copyright 2001, Amoco Production Company             *
*                             All Rights Reserved                      *
*                     an affiliate of BP America Inc.                  *
************************************************************************
#include <localsys.h>
#ifdef CRAYSYSTEM
       IDENT     MFOLD
*********************************************************************
*
*      MFOLD is equivalent to the following FORTRAN subroutine:
*
*          SUBROUTINE MFOLD(LA,A,LB,B,LC,C,MXNT,NTRACE,OPS)
*          DIMENSION A(MXNT,NTRACE),C(MXNT,NTRACE)
*          DIMENSION B(LB)
*
*          DO 5000 ITRACE = 1,NTRACE
*            LC = LA+LB-1
*            DO 1 I = 1,LC
*              C(I,ITRACE) = 0.
* 1          CONTINUE
*
*            DO 2 I = 1,LA
*              IM1 = I-1
*              DO 2 J = 1,LB
*                K = J+IM1
*                C(K,ITRACE) = C(K,ITRACE) + A(I,ITRACE)*B(J)
* 2          CONTINUE
* 5000    CONTINUE
* 
*         OPS = OPS + 2*LA*LB*NTRACE
*         RETURN
*         END
*
*        Warning: NTRACE should be at least 2
*                 LA+LB should be less than 4096
*
*        WRITTEN BY OLIVIER LHEMANN, CRAY RESEARCH
**********************************************************************
WCOL1  DEF$SCR   4096    ;work array 1 for columns of C
WCOL2  DEF$SCR   4096    ;work array 2 for columns of C
PCOL1  DEF$SCR   1       ;pointer for column 1
PCOL2  DEF$SCR   1       ;pointer for column 2
BA     DEF$SCR   1       ;address of A in common memory
LLB    DEF$SCR   1       ;LB
BB     DEF$SCR   1       ;address of B in comon memory
CC     DEF$SCR   1       ;address of C in common memory
LTRACE DEF$SCR   1       ;NTRACE
FVLB   DEF$SCR   1       ;first VL for loop from J = 1,LB
RSLB   DEF$SCR   1       ;#of remaining segments for loop from J = 1,LB
FVLAB  DEF$SCR   1       ;first VL for loop from I = 1,LC=LA+LB-1
RSLAB  DEF$SCR   1       ;of remaining segments for loop from I = 1,LC
       ENTRY     MFOLD
MFOLD  ENTER (LA,A,LB,B,LC,C,MXNT,NTRACE),MODE=BASELVL
       LOAD      S7,LB
       LOAD      S6,LA
       LOAD      S4,MXNT
       ADDRESS   A3,A
       ADDRESS   A4,B
       ADDRESS   A5,C
       LOAD      S3,NTRACE
       A7        64
       VL        A7
       V0        V5-V5    ;set V0 to 0
       S0        1
       [BA]      A3       ;save base address of A
       [BB]      A4       ;save base address of B 
       S5        S7+S6    ;LB+LA
       A7        S7       ;LB
       [LLB]     S7       ;save LB
       S7        S7-S0    ;LB-1
       [CC]      A5       ;address of C
       VL        A7       ;first vector length for LB
       A7        VL
       S5        S5-S0    ;LA+LB-1
       $IF       A7,EQ,0
       A7        64
       $ENDIF
       [FVLB]   A7       ;first vector length for LB
       S7       S7>6     ;number of remaining segments for LB
       A7       S5
       [LTRACE] S3       ;save NTRACE
       [RSLB]   S7       ;save # of remaining segents for LB
       VL       A7       ;first vector length for LA+LB-1
       S5       S5-S0  
       A7       VL
       $IF      A7,EQ,0
       A7       64
       $ENDIF
       S5       S5>6      ;# of remaing segments fo LA+LB-1
       [FVLAB]  A7        ;save first VL of LA+LB-1
       [RSLAB]  S5        ;save # of remaining segments for LA+LB-1
*
*   we compute two columns at a time
*
L5000  =        *
       A7       [FVLAB]
       S5       [RSLAB]
       VL       A7
       A1       WCOL1     ;address of work array 1 for C
       A2       WCOL2     ;address of work array 2 for C
*
*   we set the work array 1 and 2 for C to 0
*
L1     =        *
       [A1]     V0        ;store 0 
       A1       A1+A7     ;increment address of work array 1 
       S0       1
       S5       S5-S0     ;decrement loop counter
       [A2]     V0        ;store 0
       A2       A2+A7     ;increment address of work array 2 
       A7       64        ;new VL
       VL       A7
       JP       S5,L1     ;jump to L1 if more segments
       A7       [FVLB]    ;first VL for LB
       S7       [RSLB]    ;# of remaining segments for LB
       VL       A7
       A4       [BB]      ;base address of B
       A1       WCOL1
       A2       WCOL2
       [PCOL1]  A1        ;initialize pointer for column 2
       [PCOL2]  A2        ;initialize pointer for column 2
*
*  loop over the segments in LB
*
       J      LSB
LSB    ALIGN 
       A3       [BA]     ;restore base address of A
       A0       S4       ;MXNT
       S1       (A3)     ;load A(I,ITRACE)
       S2       (A3,A0)  ;load A(I,ITRACE+1)
       A0       1
       V1       (A4,A0)  ;load segment of B
       A3       A3+A0    ;increment address of A
       A4       A4+A7    ;increment address of B
       S5       0        ;I=0       
*
*  loop over I
*
LI    =        *
      A0       1
      V3       S1*FV1   ;A(I,ITRACE)*B(J)
      V2       [A1]     ;load WCOL1
      A0       S4       ;MXNT
      V5       S2*FV1   ;A(I,ITRACE+1)*B(J)
      V4       [A2]     ;load WCOL2
      V6       V3+FV2   ;C(K,ITRACE) + A(I,ITRACE)*B(J)
      S0       1
      S1       (A3)     ;load next A(I,ITRACE)
      S2       (A3,A0)  ;load next A(I,ITRACE+1)
      A0       1
      S5       S5+S0    ;increment I
      V7       V5+FV4   ;C(K,ITRACE+1) + A(I,ITRACE+1)*B(J)
      [A1]     V6       ;store C(K,ITRACE)
      S0       S6-S5    ;LA-I
      A3       A3+A0    ;increment address of A
      A1       A1+A0    ;increment address of C(K,ITRACE)
      [A2]     V7       ;Store C(K,ITRACE+1)
      A2       A2+A0    ;increment address of C(K,ITRACE+1)
      JN       S0,LI    ;if I <> LA goto next iteration
*
      A1       [PCOL1]
      A2       [PCOL2]
      S0       1
      S7       S7-S0    ;decrement segment counter
      A1       A1+A7    ;increment pointer by VL
      A2       A2+A7    ;increment pointer by VL
      A7       64       ;new VL is 64
      VL       A7
      [PCOL1]  A1       ;save new pointer for column 1
      [PCOL2]  A2       ;save new pointer for column 2
      JP       S7,LSB   ;if more segments goto LSB
*
      A7       [FVLAB]  ;first VL
      S5       [RSLAB]  ;# of remaining segments
      A1       WCOL1
      A2       WCOL2
      VL       A7       ;first VL
      A5       [CC]     ;restore address of C
      A6       S4       ;MXNT
      A6       A5+A6    ;address of second column
*
*    store 2 columns of C back to main memory
*
LS   =        *
     V1       [A1]
     A1       A1+A7     ;increment address of A1
     A0       1
     V2       [A2]
     A2       A2+A7
     (A5,A0)  V1
     A5       A5+A7
     S0       1
     S5       S5-S0     ;decrement segment counter
     (A6,A0)  V2
     A6       A6+A7
     A7       64        ;next segments are 64 long
     VL       A7
     JP       S5,LS     ;if more segments goto LS
*
     S0       S4        ;MXNT
     S0       S0<1      ;MXNT*2
     S1       [BA]
     S2       [CC]
     S1       S1+S0     ;add MXNT*2 to address of A
     S2       S2+S0     ;add MXNT*2 to address of C
     S0       2
     [BA]     S1        ;update address of A
     [CC]     S2        ;update address of C
     S3       S3-S0     ;decrement NTRACE by 2
     $IF      S3,EQ,0   ;if NTRACE = 0 end
     $ELSEIF  S3,NE,1
     J        L5000
     $ENDIF
*
*   compute last column of C
*
L6000 =      *
       A7       [FVLAB]
       S5       [RSLAB]
       VL       A7
       A1       WCOL1     ;address of work array 1 for C
*
*   we set the work array for C to 0
*
L12    =        *
       [A1]     V0        ;store 0 
       A1       A1+A7     ;increment address of work array 1 
       S0       1
       S5       S5-S0     ;decrement loop counter
       A7       64        ;new VL
       VL       A7
       JP       S5,L12    ;jump to L12 if more segments
*
       A7       [FVLB]    ;first VL for LB
       S7       [RSLB]    ;# of remaining segments for LB
       VL       A7
       A4       [BB]      ;base address of B
       A1       WCOL1
       [PCOL1]  A1        ;initialize pointer
*
*  loop over the segments in LB
*
LSB2   =        *
       A0       1
       V1       (A4,A0)  ;load segment of B
       A3       [BA]     ;restore base address of A
       A4       A4+A7    ;increment address of B
       S5       0        ;I=0       
*
*  loop over I
*
LI2    =        *
      S1       (A3)     ;load A(I,ITRACE)
      A0       1
      A3       A3+A0    ;increment address of A
      V2       [A1]     ;load WCOL1
      V3       S1*FV1   ;A(I,ITRACE)*B(J)
      S0       1
      S5       S5+S0    ;increment I
      S0       S6-S5    ;LA-I
      V6       V3+FV2   ;C(K,ITRACE) + A(I,ITRACE)*B(J)
      [A1]     V6       ;store C(K,ITRACE)
      A1       A1+A0    ;increment address of C(K,ITRACE)
      JN       S0,LI2    ;if I <> LA goto next iteration
*
      A1       [PCOL1]
      S0       1
      S7       S7-S0    ;decrement segment counter
      A1       A1+A7    ;increment pointer for column 1
      A7       64       ;new VL is 64
      VL       A7
      [PCOL1]  A1       ;save pointer
      JP       S7,LSB2  ;if more segments goto LSB2
*
      A7       [FVLAB]  ;first VL
      S5       [RSLAB]  ;# of remaining segments
      A1       WCOL1
      VL       A7       ;first VL
      A5       [CC]     ;restore address of C
*
*   store last columns of C back to main memory
*
LS2   =        *
      V1       [A1]
      A1       A1+A7
      A0       1
      S0       1
      S5       S5-S0
      (A5,A0)  V1
      A5       A5+A7    ;increment address in C
      A7       64       ;next segments are 64 long
      VL       A7
      JP       S5,LS2   ;if more segments goto LS
*
      RETURN
      ENDSUB
      END 
#endif
