Chapter 8 Integer Optimizations 177
Software Optimization Guide for AMD64 Processors
25112 Rev. 3.06 September 2005
sub eax, esi ; If (remainder < 0),
sbb edx, esi ; compute 2's complement of result.
pop edi ; Restore EDI as per calling convention.
pop esi ; Restore ESI as per calling convention.
pop ebx ; Restore EBX as per calling convention.
ret ; Done, return to caller.
sr_two_divs:
mov ecx, eax ; Save dividend_lo in ECX.
mov eax, edx ; Get dividend_hi.
xor edx, edx ; Zero-extend it into EDX:EAX.
div ebx ; EAX = quotient_hi, EDX = intermediate remainder
mov eax, ecx ; EAX = dividend_lo
div ebx ; EAX = quotient_lo
mov eax, edx ; remainder_lo
xor edx, edx ; remainder_hi = 0
jmp sr_makesign ; Make remainder signed.
sr_big_divisor:
sub esp, 16 ; Create three local variables.
mov [esp], eax ; dividend_lo
mov [esp+4], ebx ; divisor_lo
mov [esp+8], edx ; dividend_hi
mov [esp+12], ecx ; divisor_hi
mov edi, ecx ; Save divisor_hi.
shr edx, 1 ; Shift both
rcr eax, 1 ; divisor and
ror edi, 1 ; and dividend
rcr ebx, 1 ; right by 1 bit.
bsr ecx, ecx ; ECX = number of remaining shifts
shrd ebx, edi, cl ; Scale down divisor and
shrd eax, edx, cl ; dividend such that divisor is
shr edx, cl ; less than 2^32 (that is, fits in EBX).
rol edi, 1 ; Restore original divisor_hi.
div ebx ; Compute quotient.
mov ebx, [esp] ; dividend_lo
mov ecx, eax ; Save quotient.
imul edi, eax ; quotient * divisor high word (low only)
mul DWORD PTR [esp+4] ; quotient * divisor low word
add edx, edi ; EDX:EAX = quotient * divisor
sub ebx, eax ; dividend_lo - (quot.*divisor)_lo
mov ecx, [esp+8] ; dividend_hi
sbb ecx, edx ; Subtract divisor * quot. from dividend.
sbb eax, eax ; remainder < 0 ? 0xffffffff : 0
mov edx, [esp+12] ; divisor_hi
and edx, eax ; remainder < 0 ? divisor_hi : 0
and eax, [esp+4] ; remainder < 0 ? divisor_lo : 0
add eax, ebx ; remainder_lo
add edx, ecx ; remainder_hi
add esp, 16 ; Remove local variables.