AMD 250 Computer Hardware User Manual


 
Chapter 9 Optimizing with SIMD Instructions 235
Software Optimization Guide for AMD64 Processors
25112 Rev. 3.06 September 2005
; x*R00+y*R10+z*R20+w*R30]
movntq [ebx-16],mm3 ; Store lower quadword of transformed vertex.
pfadd mm5,mm1 ; MM3 = [x*R03+y*R13+z*R23+w*R33,
; x*R02+y*R12+z*R22+w*R32]
movntq [ebx-8],mm5 ; Store upper QWORD of transformed vertex.
dec ecx ; Decrement # of vertices to transform.
jnz rotate_vertices_loop
femms ; Clear MMX state.
sfence ; Finish all memory writes.
;==============================================================================
; INSTRUCTIONS BELOW RESTORE THE REGISTER STATE WITH WHICH THIS ROUTINE
; WAS ENTERED.
; REGISTERS EAX, ECX, EDX ARE CONSIDERED VOLATILE AND ASSUMED TO BE CHANGED
; WHILE THE REGISTERS BELOW MUST BE PRESERVED IF THE USER IS CHANGING THEM
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
;==============================================================================
ret
_matrix_x_vector_3dnow ENDP
_TEXT ENDS
END