Chapter 2 C and C++ Source-Level Optimizations 23
Software Optimization Guide for AMD64 Processors
25112 Rev. 3.06 September 2005
__inline FIXED_U_16_16 fixed_add(FIXED_U_16_16 x, FIXED_U_16_16 y) {
FIXED_U_16_16 z;
z.whole = x.whole + y.whole;
return (z);
}
__inline unsigned int fixed_int(FIXED_U_16_16 x) {
return((unsigned int)(x.parts.intg));
}
...
FIXED_U_16_16 y, z;
unsigned int q;
...
label1:
y = fixed_add (y, z);
q = fixed_int (y);
label2:
...
The object code generated for the source code between label1 and label2 typically follows one of
these two variants:
; Variant 1
mov edx, DWORD PTR [z]
mov eax, DWORD PTR [y] ; -+
add eax, edx ; |
mov DWORD PTR [y], eax ; |
mov EAX, DWORD PTR [y+2] ; <+ Address mismatch--no forwarding in LSU
and EAX, 0FFFFh
mov DWORD PTR [q], eax
; Variant 2
mov edx, DWORD PTR [z]
mov eax, DWORD PTR [y] ; -+
add eax, edx ; |
mov DWORD PTR [y], eax ; |
movzx eax, WORD PTR [y+2] ; <+ Size and address mismatch--no forwarding in LSU
mov DWORD PTR [q], eax
Listing 6. Preferred
typedef union {
unsigned int whole;
struct {
unsigned short frac; /* Lower 16 bits are fraction. */
unsigned short intg; /* Upper 16 bits are integer. */
} parts;
} FIXED_U_16_16;
__inline FIXED_U_16_16 fixed_add(FIXED_U_16_16 x, FIXED_U_16_16 y) {