; gelim32.asm - Gaussian elimination for linear equations. ; (C)1997, 1998, 1999. C. Bond. All rights reserved. .486 .model flat, C .code locals public gelims,gelimd,geliml ; ; gelims - Highly optimized solution of linear system using ; Gaussian elimination with simple pivoting. ; Finds vector 'x' of length 'n' which solves: ; ; A * x = b ; ; given NxN matrix 'A' and vector 'b' of length 'n'. ; ; Optimization is for speed. For nearly singular ; matrices where high accuracy is important, use ; a more robust solver. However, if the matrix ; is stable and well conditioned, this method is ; fast and reasonably accurate. ; ; 'C' calling conventions: ; ; int gelims(float **A, float *b, float *x, int n); ; gelims proc arg a:ptr,b:ptr,x:ptr,n:dword local nx4:dword uses esi,edi,ebx dec n mov ecx,n shl ecx,2 mov nx4,ecx xor ecx,ecx ;ecx = 4*i, (initially i=0) @@olp: mov esi,a add esi,ecx mov esi,[esi] ;Point to a[i][0] mov ebx,ecx fld dword ptr [esi+ebx] ;a[i][i] ftst ;Test for zero fstsw ax sahf jnz @@pvtok mov edx,ecx @@pvtlp: fstp st(0) add edx,4 cmp edx,nx4 ja @@singmat mov esi,a add esi,edx mov esi,[esi] ;a[j][0] fld dword ptr [esi+ebx] ;a[j][i] ftst fstsw ax sahf jz @@pvtlp mov esi,a add ebx,esi add esi,edx push dword ptr [esi] ;swap a[i] with a[j] - 'a' matrix row swap. push dword ptr [ebx] pop dword ptr [esi] pop dword ptr [ebx] mov ebx,ecx mov esi,b add ebx,esi add esi,edx fld dword ptr [esi] ;swap 'b' vector entries. fld dword ptr [ebx] fstp dword ptr [esi] fstp dword ptr [ebx] mov esi,a add esi,ecx mov esi,[esi] jmp @@pvtok @@singmat: mov eax,1 ;Singular matrix! jmp @@gx @@pvtok: mov eax,b add ebx,eax fld dword ptr [ebx] ;b[i] mov edx,n shl edx,2 ;dx = 4*k @@mlp: mov edi,a add edi,edx mov edi,[edi] ;Point to a[k][0] mov ebx,ecx fld dword ptr [edi+ebx] ;a[k][i] fdiv st(0),st(2) mov ebx,n shl ebx,2 ;bx = 4*j @@ilp: cmp ebx,ecx je @@nxt fld dword ptr [esi+ebx] ;a[i][j] fmul st(0),st(1) fld dword ptr [edi+ebx] fsubr fstp dword ptr [edi+ebx] sub ebx,4 jmp @@ilp @@nxt: fmul st(0),st(1) mov ebx,edx add ebx,eax fld dword ptr [ebx] ;b[k] fsubr fstp dword ptr [ebx] sub edx,4 cmp edx,ecx jne @@mlp fstp st fstp st add ecx,4 cmp ecx,nx4 jne @@olp ; ; backsubstitution ; mov edi,x mov ecx,n shl ecx,2 ;cx = 4*i @@bsolp: mov esi,b add esi,ecx fld dword ptr [esi] ;get b[i] mov esi,a add esi,ecx mov esi,[esi] ;a[i][0] mov ebx,n shl ebx,2 @@bsilp: cmp ebx,ecx je @@bsnxt fld dword ptr [edi+ebx] ;x[j] fmul dword ptr [esi+ebx] ;a[i][j] * x[j] fsub sub ebx,4 jmp @@bsilp @@bsnxt: fld dword ptr [esi+ebx] ;a[i][i] ftst ;check for divide-by-zero fstsw ax sahf jnz @@nosing mov eax,1 jmp @@gx @@nosing: fdiv fstp dword ptr [edi+ebx] ;x[i] sub ecx,4 jns @@bsolp mov eax,0 @@gx: ret gelims endp ; ; gelimd - Highly optimized solution of linear system of ; equations using Gaussian elimination with simple ; pivoting. Finds vector 'x' of length 'n' which solves: ; ; A * x = b ; ; given NxN matrix 'A' and vector 'b'of length 'n'. ; ; Optimization is for speed. For nearly singular ; matrices where high accuracy is important, use ; a more robust method. If the matrix is stable ; and well conditioned, this method is fast and ; reasonably accurate. ; ; 'C' calling conventions: ; int gelimd(double **A, double *b, double *x, int n); ; gelimd proc arg a:ptr,b:ptr,x:ptr,n:dword local nx4:dword uses esi,edi,ebx dec n mov ecx,n shl ecx,2 mov nx4,ecx ;Upper bound for row indexing. xor ecx,ecx ;ecx = 4*i, (initially i=0) @@olp: mov esi,a add esi,ecx mov esi,[esi] ;Point to a[i][0] mov ebx,ecx shl ebx,1 ;i*8 for double precision fld qword ptr [esi+ebx] ;a[i][i] ftst ;Test for zero fstsw ax sahf jnz @@pvtok mov edx,ecx @@pvtlp: fstp st(0) ;get rid of old a[i][i] ( = 0.0) add edx,4 cmp edx,nx4 ja @@singmat mov esi,a add esi,edx mov esi,[esi] ;Point to a[j][0] fld qword ptr [esi+ebx] ftst fstsw ax sahf jz @@pvtlp ;Continue scanning for non-zero pivot. mov esi,a ;Found pivot in row j. mov ebx,ecx add ebx,esi add esi,edx push dword ptr [esi] ;Swap 'a' matrix row pointers. push dword ptr [ebx] pop dword ptr [esi] pop dword ptr [ebx] mov ebx,ecx shl ebx,1 mov esi,b add ebx,esi add esi,edx add esi,edx fld qword ptr [esi] ;Swap 'b' vector entries. fld qword ptr [ebx] fstp qword ptr [esi] fstp qword ptr [ebx] mov esi,a ;Restore pointers for next iteration. add esi,ecx mov esi,[esi] mov ebx,ecx shl ebx,1 jmp @@pvtok @@singmat: mov eax,1 jmp @@gx @@pvtok: mov eax,b add ebx,eax fld qword ptr [ebx] ;b[i] mov edx,n shl edx,2 ;dx = 4*k @@mlp: mov edi,a add edi,edx mov edi,[edi] ;Point to a[k][0] mov ebx,ecx shl bx,1 fld qword ptr [edi+ebx] ;a[k][i] fdiv st(0),st(2) mov ebx,n shl ebx,2 ;bx = 4*j @@ilp: cmp ebx,ecx je @@nxt shl ebx,1 fld qword ptr [esi+ebx] ;a[i][j] fmul st(0),st(1) fld qword ptr [edi+ebx] fsubr fstp qword ptr [edi+ebx] shr ebx,1 sub ebx,4 jmp @@ilp @@nxt: fmul st(0),st(1) mov ebx,edx shl ebx,1 add ebx,eax fld qword ptr [ebx] ;b[k] fsubr fstp qword ptr [ebx] sub edx,4 cmp edx,ecx jne @@mlp fstp st fstp st add ecx,4 cmp ecx,nx4 jne @@olp ; ; backsubstitution ; mov edi,x mov ecx,n shl ecx,2 ;cx = 4*i @@bsolp: mov esi,b add esi,ecx add esi,ecx fld qword ptr [esi] ;get b[i] mov esi,a add esi,ecx mov esi,[esi] ;a[i][0] mov ebx,n shl ebx,2 @@bsilp: cmp ebx,ecx je @@bsnxt shl ebx,1 fld qword ptr [edi+ebx] ;x[j] fmul qword ptr [esi+ebx] ;a[i][j] * x[j] fsub shr ebx,1 sub ebx,4 jmp @@bsilp @@bsnxt: shl ebx,1 fld qword ptr [esi+ebx] ;a[i][i] ftst ;check for divide-by-zero fstsw ax sahf jnz @@nosing mov eax,1 jmp @@gx @@nosing: fdiv fstp qword ptr [edi+ebx] ;x[i] sub ecx,4 jns @@bsolp mov eax,0 @@gx: ret gelimd endp ; ; geliml - Highly optimized solution of linear system of ; equations using Gaussian elimination with simple ; pivoting. Finds vector 'x' of length 'n' which solves: ; ; A * x = b ; ; given NxN matrix 'A' and vector 'b' of length 'n'. ; ; Optimization is for speed. For nearly singular ; matrices where high accuracy is important, use ; a more robust method. If the matrix is stable ; and well conditioned, this method is fast and ; reasonably accurate. ; ; 'C' calling conventions: ; ; int geliml(long double **A, long double *b, ; long double *x, int n); ; geliml proc arg a:ptr,b:ptr,x:ptr,n:dword local nx4:dword uses esi,edi,ebx dec n mov ecx,n shl ecx,2 mov nx4,ecx ;Upper bound for row indexing. xor ecx,ecx ;cx = 4*i, (initially i=0) @@olp: mov esi,a add esi,ecx mov esi,[esi] ;Point to a[i][0] mov ebx,ecx mov eax,ecx shr eax,1 shl ebx,1 add ebx,eax ;i*10 for long double precision fld tbyte ptr [esi+ebx] ;a[i][i] ftst ;Test for zero fstsw ax sahf jnz @@pvtok mov edx,ecx @@pvtlp: fstp st(0) ;get rid of old a[i][i] ( = 0.0) add edx,4 cmp edx,nx4 ja @@singmat mov esi,a add esi,edx mov esi,[esi] ;Point to a[j][0] fld tbyte ptr [esi+ebx] ftst fstsw ax jz @@pvtlp ;Continue scanning for non-zero pivot. mov esi,a ;Found pivot in row j. mov ebx,ecx add ebx,esi add esi,edx push dword ptr [esi] ;Swap 'a' matrix row pointers. push dword ptr [ebx] pop dword ptr [esi] pop dword ptr [ebx] mov ebx,ecx mov eax,ecx shl ebx,1 shr eax,1 add ebx,eax mov esi,b add ebx,esi mov eax,edx shr eax,1 add esi,eax add esi,edx add esi,edx fld tbyte ptr [esi] ;Swap 'b' vector entries. fld tbyte ptr [ebx] fstp tbyte ptr [esi] fstp tbyte ptr [ebx] mov esi,a ;Restore pointers for next iteration. add esi,ecx mov esi,[esi] mov ebx,ecx mov eax,ecx shr eax,1 shl ebx,1 add ebx,eax jmp @@pvtok @@singmat: mov eax,1 jmp @@gx @@pvtok: mov eax,b add ebx,eax fld tbyte ptr [ebx] ;b[i] mov edx,n shl edx,2 ;dx = 4*k @@mlp: mov edi,a add edi,edx mov edi,[edi] ;Point to a[k][0] mov ebx,ecx mov eax,ecx shr eax,1 shl ebx,1 add ebx,eax fld tbyte ptr [edi+ebx] ;a[k][i] fdiv st(0),st(2) mov ebx,n shl ebx,2 ;bx = 4*j @@ilp: cmp ebx,ecx je @@nxt mov eax,ebx shr eax,1 shl ebx,1 add ebx,eax fld tbyte ptr [esi+ebx] ;a[i][j] fmul st(0),st(1) fld tbyte ptr [edi+ebx] fsubr fstp tbyte ptr [edi+ebx] sub ebx,eax shr ebx,1 sub ebx,4 jmp @@ilp @@nxt: fmul st(0),st(1) mov ebx,edx mov eax,edx shr eax,1 shl ebx,1 add ebx,eax mov eax,b add ebx,eax fld tbyte ptr [ebx] ;b[k] fsubr fstp tbyte ptr [ebx] sub edx,4 cmp edx,ecx jne @@mlp fstp st fstp st add ecx,4 cmp ecx,nx4 jne @@olp ; ; backsubstitution ; mov edi,x mov ecx,n shl ecx,2 ;cx = 4*i @@bsolp: mov esi,b mov eax,ecx shr eax,1 add esi,ecx add esi,ecx add esi,eax fld tbyte ptr [esi] ;get b[i] mov esi,a add esi,ecx mov esi,[esi] ;a[i][0] mov ebx,n shl ebx,2 @@bsilp: cmp ebx,ecx je @@bsnxt mov eax,ebx shr eax,1 shl ebx,1 add ebx,eax fld tbyte ptr [edi+ebx] ;x[j] fld tbyte ptr [esi+ebx] fmul ;a[i][j] * x[j] fsub sub ebx,eax shr ebx,1 sub ebx,4 jmp @@bsilp @@bsnxt: mov eax,ebx shr eax,1 shl ebx,1 add ebx,eax fld tbyte ptr [esi+ebx] ftst fstsw ax sahf jnz @@nosing mov eax,1 jmp @@gx @@nosing: fdiv ;a[i][i] fstp tbyte ptr [edi+ebx] ;x[i] sub ecx,4 jns @@bsolp mov eax,0 @@gx: ret geliml endp end