AMD x86 Typewriter User Manual

88 Efficient 64-Bit Integer Arithmetic
AMD Athlon Processor x86 Code Optimization
22007E/0November 1999
Example 7 (Division):
;_ulldiv divides two unsigned 64-bit integers, and returns
; the quotient.
;INPUT: [ESP+8]:[ESP+4] dividend
; [ESP+16]:[ESP+12] divisor
;OUTPUT: EDX:EAX quotient of division
_ulldiv PROC
PUSH EBX ;save EBX as per calling convention
MOV ECX, [ESP+20] ;divisor_hi
MOV EBX, [ESP+16] ;divisor_lo
MOV EDX, [ESP+12] ;dividend_hi
MOV EAX, [ESP+8] ;dividend_lo
TEST ECX, ECX ;divisor > 2^32–1?
JNZ $big_divisor ;yes, divisor > 32^32–1
CMP EDX, EBX ;only one division needed? (ECX = 0)
JAE $two_divs ;need two divisions
DIV EBX ;EAX = quotient_lo
MOV EDX, ECX ;EDX = quotient_hi = 0 (quotient in
POP EBX ;restore EBX as per calling convention
RET ;done, return to caller
MOV ECX, EAX ;save dividend_lo in ECX
MOV EAX, EDX ;get dividend_hi
XOR EDX, EDX ;zero extend it into EDX:EAX
DIV EBX ;quotient_hi in EAX
XCHG EAX, ECX ;ECX = quotient_hi, EAX = dividend_lo
DIV EBX ;EAX = quotient_lo
MOV EDX, ECX ;EDX = quotient_hi (quotient in EDX:EAX)
POP EBX ;restore EBX as per calling convention
RET ;done, return to caller
PUSH EDI ;save EDI as per calling convention
MOV EDI, ECX ;save divisor_hi
SHR EDX, 1 ;shift both divisor and dividend right
RCR EAX, 1 ; by 1 bit
BSR ECX, ECX ;ECX = number of remaining shifts
SHRD EBX, EDI, CL ;scale down divisor and dividend
SHRD EAX, EDX, CL ; such that divisor is
SHR EDX, CL ; less than 2^32 (i.e. fits in EBX)
ROL EDI, 1 ;restore original divisor_hi
DIV EBX ;compute quotient
MOV EBX, [ESP+12] ;dividend_lo