Chapter 8 Integer Optimizations 175
Software Optimization Guide for AMD64 Processors
25112 Rev. 3.06 September 2005
;
; Destroys: EAX, ECX, EDX, EFlags
_ullrem PROC
push ebx ; Save EBX as per calling convention.
mov ecx, [esp+20] ; divisor_hi
mov ebx, [esp+16] ; divisor_lo
mov edx, [esp+12] ; dividend_hi
mov eax, [esp+8] ; dividend_lo
test ecx, ecx ; divisor > 2^32 - 1?
jnz r_big_divisor ; Yes, divisor > 32^32 - 1.
cmp edx, ebx ; Only one division needed (ECX = 0)?
jae r_two_divs ; Need two divisions.
div ebx ; EAX = quotient_lo
mov eax, edx ; EAX = remainder_lo
mov edx, ecx ; EDX = remainder_hi = 0
pop ebx ; Restore EBX per calling convention.
ret ; Done, return to caller.
r_two_divs:
mov ecx, eax ; Save dividend_lo in ECX.
mov eax, edx ; Get dividend_hi.
xor edx, edx ; Zero-extend it into EDX:EAX.
div ebx ; EAX = quotient_hi, EDX = intermediate remainder
mov eax, ecx ; EAX = dividend_lo
div ebx ; EAX = quotient_lo
mov eax, edx ; EAX = remainder_lo
xor edx, edx ; EDX = remainder_hi = 0
pop ebx ; Restore EBX as per calling convention.
ret ; Done, return to caller.
r_big_divisor:
push edi ; Save EDI as per calling convention.
mov edi, ecx ; Save divisor_hi.
shr edx, 1 ; Shift both divisor and dividend right
rcr eax, 1 ; by 1 bit.
ror edi, 1
rcr ebx, 1
bsr ecx, ecx ; ECX = number of remaining shifts
shrd ebx, edi, cl ; Scale down divisor and dividend such
shrd eax, edx, cl ; that divisor is less than 2^32
shr edx, cl ; (that is, it fits in EBX).
rol edi, 1 ; Restore original divisor (EDI:ESI).
div ebx ; Compute quotient.
mov ebx, [esp+12] ; dividend low word
mov ecx, eax ; Save quotient.
imul edi, eax ; quotient * divisor high word (low only)
mul DWORD PTR [esp+20] ; quotient * divisor low word
add edx, edi ; EDX:EAX = quotient * divisor
sub ebx, eax ; dividend_lo – (quot.*divisor)_lo
mov ecx, [esp+16] ; dividend_hi