Support User Manuals

AMD 250 Computer Hardware User Manual

Open as PDF

of 384

Chapter 2 C and C++ Source-Level Optimizations 23

Software Optimization Guide for AMD64 Processors

25112 Rev. 3.06 September 2005

__inline FIXED_U_16_16 fixed_add(FIXED_U_16_16 x, FIXED_U_16_16 y) {

FIXED_U_16_16 z;

z.whole = x.whole + y.whole;

return (z);

}

__inline unsigned int fixed_int(FIXED_U_16_16 x) {

return((unsigned int)(x.parts.intg));

}

...

FIXED_U_16_16 y, z;

unsigned int q;

...

label1:

y = fixed_add (y, z);

q = fixed_int (y);

label2:

...

The object code generated for the source code between label1 and label2 typically follows one of

these two variants:

; Variant 1

mov edx, DWORD PTR [z]

mov eax, DWORD PTR [y] ; -+

add eax, edx ; |

mov DWORD PTR [y], eax ; |

mov EAX, DWORD PTR [y+2] ; <+ Address mismatch--no forwarding in LSU

and EAX, 0FFFFh

mov DWORD PTR [q], eax

; Variant 2

mov edx, DWORD PTR [z]

mov eax, DWORD PTR [y] ; -+

add eax, edx ; |

mov DWORD PTR [y], eax ; |

movzx eax, WORD PTR [y+2] ; <+ Size and address mismatch--no forwarding in LSU

mov DWORD PTR [q], eax

Listing 6. Preferred

typedef union {

unsigned int whole;

struct {

unsigned short frac; /* Lower 16 bits are fraction. */

unsigned short intg; /* Upper 16 bits are integer. */

} parts;

} FIXED_U_16_16;

__inline FIXED_U_16_16 fixed_add(FIXED_U_16_16 x, FIXED_U_16_16 y) {

previous next