[Openmcl-cvs-notifications] r15156 - in /trunk/source: compiler/X86/x86-asm.lisp compiler/X86/x862.lisp level-0/X86/X8632/x8632-misc.lisp
gb at clozure.com
gb at clozure.com
Mon Dec 26 00:54:16 CST 2011
Author: gb
Date: Mon Dec 26 00:54:15 2011
New Revision: 15156
Log:
Define MOVDQU; AFAICT, neither it nor MOVDQA require a 64-bit CPU.
In X862-%NATURAL-LOGAND, if one argument is a fixnum constant the
result will fit in a fixnum if it needs to.
%COPY-PTR-TO-IVECTOR: copy at least 32 (possibly 128) bits at a
time in some (possibly common) cases.
Modified:
trunk/source/compiler/X86/x86-asm.lisp
trunk/source/compiler/X86/x862.lisp
trunk/source/level-0/X86/X8632/x8632-misc.lisp
Modified: trunk/source/compiler/X86/x86-asm.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/compiler/X86/x86-asm.lisp (original)
+++ trunk/source/compiler/X86/x86-asm.lisp Mon Dec 26 00:54:15 2011
@@ -1852,12 +1852,20 @@
#x0f7e #o000 #x0 #x66)
=
;; movdqa
- (def-x86-opcode (movdqa :cpu64) ((:regxmm :insert-xmm-reg) (:anymem :i=
nsert-memory))
+ (def-x86-opcode movdqa ((:regxmm :insert-xmm-reg) (:anymem :insert-mem=
ory))
#x0f7f #o300 #x0 #x66)
- (def-x86-opcode (movdqa :cpu64) ((:anymem :insert-memory) (:regxmm :ins=
ert-xmm-reg)) =
+ (def-x86-opcode movdqa ((:anymem :insert-memory) (:regxmm :insert-xmm-r=
eg)) =
#x0f6f #o000 #x0 #x66)
=
-
+ (def-x86-opcode movdqu ((:regxmm :insert-xmm-reg) (:anymem :insert-mem=
ory))
+ #x0f7f #o300 #x0 #xf3)
+ (def-x86-opcode movdqu ((:anymem :insert-memory) (:regxmm :insert-xmm-r=
eg)) =
+ #x0f6f #o000 #x0 #xf3)
+ =
+
+ ;; sign-extending mov
+ (def-x86-opcode movsbl ((:reg8 :insert-modrm-rm) (:reg32 :insert-modrm-=
reg))
+ #x0fbe #o300 0)
;; sign-extending mov
(def-x86-opcode movsbl ((:reg8 :insert-modrm-rm) (:reg32 :insert-modrm-=
reg))
#x0fbe #o300 0)
Modified: trunk/source/compiler/X86/x862.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/compiler/X86/x862.lisp (original)
+++ trunk/source/compiler/X86/x862.lisp Mon Dec 26 00:54:15 2011
@@ -10596,7 +10596,10 @@
(with-imm-target () (other-reg :natural)
(x862-one-targeted-reg-form seg other other-reg)
(! %natural-logand-c other-reg constant)
- (<- other-reg))))
+ (if (and (typep constant *nx-target-fixnum-type*)
+ (node-reg-p vreg))
+ (! box-fixnum vreg other-reg)
+ (<- other-reg)))))
(^))))))
=
(defx862 x862-natural-shift-right natural-shift-right (seg vreg xfer num a=
mt)
Modified: trunk/source/level-0/X86/X8632/x8632-misc.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/level-0/X86/X8632/x8632-misc.lisp (original)
+++ trunk/source/level-0/X86/X8632/x8632-misc.lisp Mon Dec 26 00:54:15 2011
@@ -20,6 +20,57 @@
;;; Depending on alignment, it might make sense to move more than
;;; a byte at a time.
;;; Does no arg checking of any kind. Really.
+(defun %copy-ptr-to-ivector (src src-byte-offset dest dest-byte-offset nby=
tes)
+ (declare (fixnum src-byte-offset dest-byte-offset nbytes)
+ (optimize (speed 3) (safety 0)))
+ (let* ((ptr-align (logand 7 (%ptr-to-int src))))
+ (declare (type (mod 8) ptr-align))
+ (if (and (=3D 0 (logand nbytes 3))
+ (=3D 0 (logand dest-byte-offset 3))
+ (=3D 0 (logand (the fixnum (+ ptr-align src-byte-offset)) 3)))
+ (%copy-ptr-to-ivector-32bit src src-byte-offset dest dest-byte-offse=
t nbytes)
+ (%copy-ptr-to-ivector-8bit src src-byte-offset dest dest-byte-offset=
nbytes))
+ dest))
+
+;;; We can exploit the fact that SRC-BYTE-OFFSET and DEST-BYTE-OFFSET
+;;; are both multiples of 4 (and therefore still fixnums when unboxed).
+(defx8632lapfunction %copy-ptr-to-ivector-32bit ((psrc 12)
+ (psrc-byte-offset 8)
+ (pdest 4)
+ #|(ra 0)|#
+ (dest-byte-offset arg_y)
+ (nbytes arg_z))
+
+ (let ((foreign-ptr imm0) ;raw foreign pointer
+ (ivector temp1)) ;destination ivector
+ (movl (@ psrc (% esp)) (% temp1))
+ (movl (@ psrc-byte-offset (% esp)) (% foreign-ptr))
+ (sarl ($ x8632::word-shift)(% foreign-ptr))
+ (addl (@ x8632::macptr.address (% temp1)) (% foreign-ptr))
+ (movl (@ pdest (% esp)) (% ivector))
+ (sarl ($ x8632::word-shift) (% dest-byte-offset))
+ (jmp @test16)
+ @loop16
+ (movdqu (@ (% foreign-ptr)) (% xmm0))
+ (movdqu (% xmm0) (@ x8632::misc-data-offset (% ivector) (% dest-byte-o=
ffset)))
+ (addl ($ 16) (% foreign-ptr))
+ (addl ($ 16) (% dest-byte-offset))
+ (subl ($ '16) (% nbytes))
+ @test16
+ (cmpl ($ '16) (% nbytes))
+ (jge @loop16)
+ (testl (% nbytes) (% nbytes))
+ (je @done)
+ @loop4
+ (movd (@ (% foreign-ptr)) (% mm0))
+ (movd (% mm0) (@ x8632::misc-data-offset (% ivector) (% dest-byte-offs=
et)))
+ (addl ($ 4) (% foreign-ptr))
+ (addl ($ 4) (% dest-byte-offset))
+ (subl ($ '4) (% nbytes))
+ (jne @loop4)
+ @done
+ (movl (% ivector) (% arg_z))
+ (single-value-return 5)))
=
;;; I went ahead and used the INC and DEC instructions here, since
;;; they're shorter than the equivalent ADD/SUB. Intel's optimization
@@ -28,12 +79,12 @@
;;; these functions end up being hot, replacing the inc/dec insns
;;; might be worth a try.
=
-(defx8632lapfunction %copy-ptr-to-ivector ((src 12)
- (src-byte-offset 8)
- (dest 4)
- #|(ra 0)|#
- (dest-byte-offset arg_y)
- (nbytes arg_z))
+(defx8632lapfunction %copy-ptr-to-ivector-8bit ((src 12)
+ (src-byte-offset 8)
+ (dest 4)
+ #|(ra 0)|#
+ (dest-byte-offset arg_y)
+ (nbytes arg_z))
(mark-as-imm temp0)
(mark-as-imm arg_y)
(let ((foreign-ptr temp0) ;raw foreign pointer
More information about the Openmcl-cvs-notifications
mailing list