AMD 250 Computer Hardware User Manual


 
Chapter 6 Branch Optimizations 139
Software Optimization Guide for AMD64 Processors
25112 Rev. 3.06 September 2005
Example 3: C Code
float x, z, r, res;
z = fabs(x)
if (z < 0.575) {
res = r;
} else {
res = PI / 2 - 2 * r;
}
Example 3: 3DNow!™ Code
; In: MM0 = x
; MM1 = r
; Out: MM0 = res
movq mm7, mabs ; Mask for absolute value
pand mm0, mm7 ; z = abs(x)
movq mm2, bnd ; 0.575
pcmpgtd mm2, mm0 ; z < 0.575 ? 0xffffffff : 0
movq mm3, pio2 ; pi / 2
movq mm0, mm1 ; Save r.
pfadd mm1, mm1 ; 2 * r
pfsubr mm1, mm3 ; pi / 2 - 2 * r
pand mm0, mm2 ; z < 0.575 ? r : 0
pandn mm2, mm1 ; z < 0.575 ? 0 : pi / 2 - 2 * r
por mm0, mm2 ; z < 0.575 ? r : pi / 2 - 2 * r
Example 4: C Code
#define PI 3.14159265358979323
float x, z, r, res;
/* 0 <= r <= PI / 4 */
z = abs(x)
if (z < 1) {
res = r;
} else {
res = PI / 2 - r;
}
Example 4: 3DNow!™ Code
; In: MM0 = x
; MM1 = r
; Out: MM1 = res
movq mm5, mabs ; Mask to clear sign bit
movq mm6, one ; 1.0
pand mm0, mm5 ; z = abs(x)
pcmpgtd mm6, mm0 ; z < 1 ? 0xffffffff : 0
movq mm4, pio2 ; pi / 2
pfsub mm4, mm1 ; pi / 2 - r
pandn mm6, mm4 ; z < 1 ? 0 : pi / 2 - r
pfmax mm1, mm6 ; res = z < 1 ? r : pi / 2 - r