;       gelim32.asm - Gaussian elimination for linear equations.
;       (C)1997, 1998, 1999. C. Bond. All rights reserved.
        .486
        .model flat, C
        .code
        locals

        public gelims,gelimd,geliml
;
;       gelims - Highly optimized solution of linear system using
;                Gaussian elimination with simple pivoting.
;                Finds vector 'x' of length 'n' which solves:
;
;                       A * x = b
;
;                given NxN matrix 'A' and vector 'b' of length 'n'. 
;
;                Optimization is for speed. For nearly singular
;                matrices where high accuracy is important, use
;                a more robust solver. However, if the matrix
;                is stable and well conditioned, this method is
;                fast and reasonably accurate.
;
;       'C' calling conventions:
;
;               int gelims(float **A, float *b, float *x, int n);
;
gelims  proc
        arg a:ptr,b:ptr,x:ptr,n:dword
        local nx4:dword
        uses esi,edi,ebx

        dec n
        mov ecx,n
        shl ecx,2
        mov nx4,ecx
        xor ecx,ecx                 ;ecx = 4*i, (initially i=0)
@@olp:
        mov esi,a
        add esi,ecx
        mov esi,[esi]               ;Point to a[i][0]
        mov ebx,ecx
        fld dword ptr [esi+ebx]     ;a[i][i]
        ftst                        ;Test for zero
        fstsw ax
        sahf
        jnz @@pvtok

        mov edx,ecx
@@pvtlp:
        fstp st(0)
        add edx,4
        cmp edx,nx4
        ja @@singmat
        mov esi,a
        add esi,edx
        mov esi,[esi]               ;a[j][0]
        fld dword ptr [esi+ebx]     ;a[j][i]
        ftst
        fstsw ax
        sahf
        jz @@pvtlp
        mov esi,a
        add ebx,esi
        add esi,edx
        push dword ptr [esi]    ;swap a[i] with a[j] - 'a' matrix row swap.
        push dword ptr [ebx]
        pop dword ptr [esi]
        pop dword ptr [ebx]
        mov ebx,ecx
        mov esi,b
        add ebx,esi
        add esi,edx
        fld dword ptr [esi]         ;swap 'b' vector entries.
        fld dword ptr [ebx]
        fstp dword ptr [esi]
        fstp dword ptr [ebx]
        mov esi,a
        add esi,ecx
        mov esi,[esi]
        jmp @@pvtok
@@singmat:
        mov eax,1                   ;Singular matrix!
        jmp @@gx 
@@pvtok:
        mov eax,b
        add ebx,eax
        fld dword ptr [ebx]         ;b[i]
        mov edx,n
        shl edx,2                   ;dx = 4*k
@@mlp:
        mov edi,a
        add edi,edx
        mov edi,[edi]               ;Point to a[k][0]
        mov ebx,ecx
        fld dword ptr [edi+ebx]     ;a[k][i]
        fdiv st(0),st(2)
        mov ebx,n
        shl ebx,2                   ;bx = 4*j
@@ilp:
        cmp ebx,ecx
        je @@nxt
        fld dword ptr [esi+ebx]     ;a[i][j]
        fmul st(0),st(1)
        fld dword ptr [edi+ebx]
        fsubr
        fstp dword ptr [edi+ebx]
        sub ebx,4
        jmp @@ilp
@@nxt:
        fmul st(0),st(1)
        mov ebx,edx
        add ebx,eax
        fld dword ptr [ebx]         ;b[k]
        fsubr
        fstp dword ptr [ebx]
        sub edx,4
        cmp edx,ecx
        jne @@mlp

        fstp st
        fstp st
        add ecx,4
        cmp ecx,nx4
        jne @@olp
;
;	backsubstitution
;
        mov edi,x
        mov ecx,n
        shl ecx,2                   ;cx = 4*i
@@bsolp:
        mov esi,b
        add esi,ecx       
        fld dword ptr [esi]         ;get b[i]
        mov esi,a
        add esi,ecx
        mov esi,[esi]               ;a[i][0]
        mov ebx,n
        shl ebx,2
@@bsilp:
        cmp ebx,ecx
        je @@bsnxt

        fld dword ptr [edi+ebx]     ;x[j]
        fmul dword ptr [esi+ebx]    ;a[i][j] * x[j]
        fsub
        sub ebx,4
        jmp @@bsilp
@@bsnxt:
        fld dword ptr [esi+ebx]     ;a[i][i]
        ftst                        ;check for divide-by-zero
        fstsw ax
        sahf
        jnz @@nosing
        mov eax,1
        jmp @@gx
@@nosing:
        fdiv
        fstp dword ptr [edi+ebx]    ;x[i]
        sub ecx,4
        jns @@bsolp
        mov eax,0
@@gx:
        ret
gelims	endp

;
;       gelimd - Highly optimized solution of linear system of
;                equations using Gaussian elimination with simple
;                pivoting. Finds vector 'x' of length 'n' which solves:
;
;                       A * x = b
;
;                given NxN matrix 'A' and vector 'b'of length 'n'.
;
;                Optimization is for speed. For nearly singular
;                matrices where high accuracy is important, use
;                a more robust method. If the matrix is stable
;                and well conditioned, this method is fast and
;                reasonably accurate.
;
;       'C' calling conventions:
;               int gelimd(double **A, double *b, double *x, int n);
;
gelimd  proc
        arg a:ptr,b:ptr,x:ptr,n:dword
        local nx4:dword
        uses esi,edi,ebx

        dec n
        mov ecx,n
        shl ecx,2
        mov nx4,ecx                 ;Upper bound for row indexing.
        xor ecx,ecx                 ;ecx = 4*i, (initially i=0)
@@olp:
        mov esi,a
        add esi,ecx
        mov esi,[esi]               ;Point to a[i][0]
        mov ebx,ecx
        shl ebx,1                   ;i*8 for double precision
        fld qword ptr [esi+ebx]     ;a[i][i]
        ftst                        ;Test for zero
        fstsw ax
        sahf
        jnz @@pvtok

        mov edx,ecx
@@pvtlp:
        fstp st(0)                  ;get rid of old a[i][i] ( = 0.0)
        add edx,4
        cmp edx,nx4
        ja @@singmat
        mov esi,a
        add esi,edx
        mov esi,[esi]               ;Point to a[j][0]
        fld qword ptr [esi+ebx]
        ftst
        fstsw ax
        sahf
        jz @@pvtlp                  ;Continue scanning for non-zero pivot.

        mov esi,a                   ;Found pivot in row j.
        mov ebx,ecx
        add ebx,esi
        add esi,edx
        push dword ptr [esi]        ;Swap 'a' matrix row pointers.
        push dword ptr [ebx]
        pop dword ptr [esi]
        pop dword ptr [ebx]
        mov ebx,ecx
        shl ebx,1
        mov esi,b
        add ebx,esi
        add esi,edx
        add esi,edx
        fld qword ptr [esi]         ;Swap 'b' vector entries. 
        fld qword ptr [ebx]
        fstp qword ptr [esi]
        fstp qword ptr [ebx]
        mov esi,a                   ;Restore pointers for next iteration.
        add esi,ecx
        mov esi,[esi]
        mov ebx,ecx
        shl ebx,1
        jmp @@pvtok
@@singmat:
        mov eax,1
        jmp @@gx 
@@pvtok:
        mov eax,b
        add ebx,eax
        fld qword ptr [ebx]         ;b[i]
        mov edx,n
        shl edx,2                   ;dx = 4*k
@@mlp:
        mov edi,a
        add edi,edx
        mov edi,[edi]               ;Point to a[k][0]
        mov ebx,ecx
        shl bx,1
        fld qword ptr [edi+ebx]     ;a[k][i]
        fdiv st(0),st(2)
        mov ebx,n
        shl ebx,2                   ;bx = 4*j
@@ilp:
        cmp ebx,ecx
        je @@nxt
        shl ebx,1
        fld qword ptr [esi+ebx]     ;a[i][j]
        fmul st(0),st(1)
        fld qword ptr [edi+ebx]
        fsubr
        fstp qword ptr [edi+ebx]
        shr ebx,1
        sub ebx,4
        jmp @@ilp
@@nxt:
        fmul st(0),st(1)
        mov ebx,edx
        shl ebx,1
        add ebx,eax
        fld qword ptr [ebx]         ;b[k]
        fsubr
        fstp qword ptr [ebx]
        sub edx,4
        cmp edx,ecx
        jne @@mlp

        fstp st
        fstp st
        add ecx,4
        cmp ecx,nx4
        jne @@olp
;
;	backsubstitution
;
        mov edi,x
        mov ecx,n
        shl ecx,2                   ;cx = 4*i
@@bsolp:
        mov esi,b
        add esi,ecx
        add esi,ecx       
        fld qword ptr [esi]         ;get b[i]
        mov esi,a
        add esi,ecx
        mov esi,[esi]  ;a[i][0]
        mov ebx,n
        shl ebx,2
@@bsilp:
        cmp ebx,ecx
        je @@bsnxt
        shl ebx,1
        fld qword ptr [edi+ebx]     ;x[j]
        fmul qword ptr [esi+ebx]    ;a[i][j] * x[j]
        fsub
        shr ebx,1
        sub ebx,4
        jmp @@bsilp
@@bsnxt:
        shl ebx,1
        fld qword ptr [esi+ebx]     ;a[i][i]
        ftst                        ;check for divide-by-zero
        fstsw ax
        sahf
        jnz @@nosing
        mov eax,1
        jmp @@gx
@@nosing:
        fdiv
        fstp qword ptr [edi+ebx]    ;x[i]
        sub ecx,4
        jns @@bsolp
        mov eax,0
@@gx:
	ret
gelimd	endp
;
;       geliml - Highly optimized solution of linear system of
;                equations using Gaussian elimination with simple
;                pivoting. Finds vector 'x' of length 'n' which solves:
;
;                       A * x = b
;
;                given NxN matrix 'A' and vector 'b' of length 'n'.
;
;                Optimization is for speed. For nearly singular
;                matrices where high accuracy is important, use
;                a more robust method. If the matrix is stable
;                and well conditioned, this method is fast and
;                reasonably accurate.
;
;       'C' calling conventions:
;
;               int geliml(long double **A, long double *b,
;                       long double *x, int n);
;
geliml  proc
        arg a:ptr,b:ptr,x:ptr,n:dword
        local nx4:dword
        uses esi,edi,ebx

	dec n
        mov ecx,n
        shl ecx,2
        mov nx4,ecx                 ;Upper bound for row indexing.
        xor ecx,ecx                 ;cx = 4*i, (initially i=0)
@@olp:
        mov esi,a
        add esi,ecx
        mov esi,[esi]               ;Point to a[i][0]
        mov ebx,ecx
        mov eax,ecx
        shr eax,1
        shl ebx,1
        add ebx,eax                 ;i*10 for long double precision
        fld tbyte ptr [esi+ebx]     ;a[i][i]
        ftst                        ;Test for zero
        fstsw ax
        sahf
        jnz @@pvtok

        mov edx,ecx
@@pvtlp:
        fstp st(0)                  ;get rid of old a[i][i] ( = 0.0)
        add edx,4
        cmp edx,nx4
        ja @@singmat
        mov esi,a
        add esi,edx
        mov esi,[esi]               ;Point to a[j][0]
        fld tbyte ptr [esi+ebx]
        ftst
        fstsw ax
        jz @@pvtlp                  ;Continue scanning for non-zero pivot.

        mov esi,a                   ;Found pivot in row j.
        mov ebx,ecx
        add ebx,esi
        add esi,edx
        push dword ptr [esi]        ;Swap 'a' matrix row pointers.
        push dword ptr [ebx]
        pop dword ptr [esi]
        pop dword ptr [ebx]
        mov ebx,ecx
        mov eax,ecx
        shl ebx,1
        shr eax,1
        add ebx,eax
        mov esi,b
        add ebx,esi
        mov eax,edx
        shr eax,1
        add esi,eax
        add esi,edx
        add esi,edx
        fld tbyte ptr [esi]         ;Swap 'b' vector entries. 
        fld tbyte ptr [ebx]
        fstp tbyte ptr [esi]
        fstp tbyte ptr [ebx]
        mov esi,a                   ;Restore pointers for next iteration.
        add esi,ecx
        mov esi,[esi]
        mov ebx,ecx
        mov eax,ecx
        shr eax,1
        shl ebx,1
        add ebx,eax
        jmp @@pvtok
@@singmat:
        mov eax,1
        jmp @@gx 
@@pvtok:
        mov eax,b
        add ebx,eax
        fld tbyte ptr [ebx]         ;b[i]
        mov edx,n
        shl edx,2                   ;dx = 4*k
@@mlp:
        mov edi,a
        add edi,edx
        mov edi,[edi]               ;Point to a[k][0]
        mov ebx,ecx
        mov eax,ecx
        shr eax,1
        shl ebx,1
        add ebx,eax
        fld tbyte ptr [edi+ebx]     ;a[k][i]
        fdiv st(0),st(2)
        mov ebx,n
        shl ebx,2                   ;bx = 4*j
@@ilp:
        cmp ebx,ecx
        je @@nxt
        mov eax,ebx
        shr eax,1
        shl ebx,1
        add ebx,eax
        fld tbyte ptr [esi+ebx]     ;a[i][j]
        fmul st(0),st(1)
        fld tbyte ptr [edi+ebx]
        fsubr
        fstp tbyte ptr [edi+ebx]
        sub ebx,eax
        shr ebx,1
        sub ebx,4
        jmp @@ilp
@@nxt:
        fmul st(0),st(1)
        mov ebx,edx
        mov eax,edx
        shr eax,1
        shl ebx,1
        add ebx,eax
        mov eax,b
        add ebx,eax
        fld tbyte ptr [ebx]         ;b[k]
        fsubr
        fstp tbyte ptr [ebx]
        sub edx,4
        cmp edx,ecx
        jne @@mlp

        fstp st
        fstp st
        add ecx,4
        cmp ecx,nx4
        jne @@olp
;
;	backsubstitution
;
        mov edi,x
        mov ecx,n
        shl ecx,2                   ;cx = 4*i
@@bsolp:
        mov esi,b
        mov eax,ecx
        shr eax,1
        add esi,ecx
        add esi,ecx
        add esi,eax       
        fld tbyte ptr [esi]         ;get b[i]
        mov esi,a
        add esi,ecx
        mov esi,[esi]  ;a[i][0]
        mov ebx,n
        shl ebx,2
@@bsilp:
        cmp ebx,ecx
        je @@bsnxt
        mov eax,ebx
        shr eax,1
        shl ebx,1
        add ebx,eax
        fld tbyte ptr [edi+ebx]     ;x[j]
        fld tbyte ptr [esi+ebx]
        fmul                        ;a[i][j] * x[j]
        fsub
        sub ebx,eax
        shr ebx,1
        sub ebx,4
        jmp @@bsilp
@@bsnxt:
        mov eax,ebx
        shr eax,1
        shl ebx,1
        add ebx,eax
        fld tbyte ptr [esi+ebx]
        ftst
        fstsw ax
        sahf
        jnz @@nosing
        mov eax,1
        jmp @@gx
@@nosing:
        fdiv                        ;a[i][i]
        fstp tbyte ptr [edi+ebx]    ;x[i]
        sub ecx,4
        jns @@bsolp
        mov eax,0
@@gx:
        ret
geliml	endp
        end

