[Openmcl-cvs-notifications] r15156 - in /trunk/source: compiler/X86/x86-asm.lisp compiler/X86/x862.lisp level-0/X86/X8632/x8632-misc.lisp

gb at clozure.com gb at clozure.com
Mon Dec 26 00:54:16 CST 2011


Author: gb
Date: Mon Dec 26 00:54:15 2011
New Revision: 15156

Log:
Define MOVDQU; AFAICT, neither it nor MOVDQA require a 64-bit CPU.

In X862-%NATURAL-LOGAND, if one argument is a fixnum constant the
result will fit in a fixnum if it needs to.

%COPY-PTR-TO-IVECTOR: copy at least 32 (possibly 128) bits at a
time in some (possibly common) cases.

Modified:
    trunk/source/compiler/X86/x86-asm.lisp
    trunk/source/compiler/X86/x862.lisp
    trunk/source/level-0/X86/X8632/x8632-misc.lisp

Modified: trunk/source/compiler/X86/x86-asm.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/compiler/X86/x86-asm.lisp (original)
+++ trunk/source/compiler/X86/x86-asm.lisp Mon Dec 26 00:54:15 2011
@@ -1852,12 +1852,20 @@
      #x0f7e #o000 #x0 #x66)
 =

    ;; movdqa
-   (def-x86-opcode (movdqa :cpu64)  ((:regxmm :insert-xmm-reg) (:anymem :i=
nsert-memory))
+   (def-x86-opcode movdqa  ((:regxmm :insert-xmm-reg) (:anymem :insert-mem=
ory))
      #x0f7f #o300 #x0 #x66)
-   (def-x86-opcode (movdqa :cpu64) ((:anymem :insert-memory) (:regxmm :ins=
ert-xmm-reg)) =

+   (def-x86-opcode movdqa ((:anymem :insert-memory) (:regxmm :insert-xmm-r=
eg)) =

      #x0f6f #o000 #x0 #x66)
     =

-
+   (def-x86-opcode movdqu  ((:regxmm :insert-xmm-reg) (:anymem :insert-mem=
ory))
+     #x0f7f #o300 #x0 #xf3)
+   (def-x86-opcode movdqu ((:anymem :insert-memory) (:regxmm :insert-xmm-r=
eg)) =

+     #x0f6f #o000 #x0 #xf3)
+    =

+
+   ;; sign-extending mov
+   (def-x86-opcode movsbl ((:reg8 :insert-modrm-rm) (:reg32 :insert-modrm-=
reg))
+     #x0fbe #o300 0)
    ;; sign-extending mov
    (def-x86-opcode movsbl ((:reg8 :insert-modrm-rm) (:reg32 :insert-modrm-=
reg))
      #x0fbe #o300 0)

Modified: trunk/source/compiler/X86/x862.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/compiler/X86/x862.lisp (original)
+++ trunk/source/compiler/X86/x862.lisp Mon Dec 26 00:54:15 2011
@@ -10596,7 +10596,10 @@
               (with-imm-target () (other-reg :natural)
                 (x862-one-targeted-reg-form seg other other-reg)
                 (! %natural-logand-c  other-reg constant)
-                (<- other-reg))))
+                (if (and (typep constant *nx-target-fixnum-type*)
+                         (node-reg-p vreg))
+                  (! box-fixnum vreg other-reg)
+                  (<- other-reg)))))
           (^))))))
 =

 (defx862 x862-natural-shift-right natural-shift-right (seg vreg xfer num a=
mt)

Modified: trunk/source/level-0/X86/X8632/x8632-misc.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/level-0/X86/X8632/x8632-misc.lisp (original)
+++ trunk/source/level-0/X86/X8632/x8632-misc.lisp Mon Dec 26 00:54:15 2011
@@ -20,6 +20,57 @@
 ;;; Depending on alignment, it might make sense to move more than
 ;;; a byte at a time.
 ;;; Does no arg checking of any kind.  Really.
+(defun %copy-ptr-to-ivector (src src-byte-offset dest dest-byte-offset nby=
tes)
+  (declare (fixnum src-byte-offset dest-byte-offset nbytes)
+           (optimize (speed 3) (safety 0)))
+  (let* ((ptr-align (logand 7 (%ptr-to-int src))))
+    (declare (type (mod 8) ptr-align))
+    (if (and (=3D 0 (logand nbytes 3))
+             (=3D 0 (logand dest-byte-offset 3))
+             (=3D 0 (logand (the fixnum (+ ptr-align src-byte-offset)) 3)))
+      (%copy-ptr-to-ivector-32bit src src-byte-offset dest dest-byte-offse=
t nbytes)
+      (%copy-ptr-to-ivector-8bit src src-byte-offset dest dest-byte-offset=
 nbytes))
+    dest))
+
+;;; We can exploit the fact that SRC-BYTE-OFFSET and DEST-BYTE-OFFSET
+;;; are both multiples of 4 (and therefore still fixnums when unboxed).
+(defx8632lapfunction %copy-ptr-to-ivector-32bit ((psrc 12)
+                                                 (psrc-byte-offset 8)
+                                                 (pdest 4)
+                                                 #|(ra 0)|#
+                                                 (dest-byte-offset arg_y)
+                                                 (nbytes arg_z))
+
+  (let ((foreign-ptr imm0)		;raw foreign pointer
+	(ivector temp1))                ;destination ivector
+    (movl (@ psrc (% esp)) (% temp1))
+    (movl (@ psrc-byte-offset (% esp)) (% foreign-ptr))
+    (sarl ($ x8632::word-shift)(% foreign-ptr))
+    (addl (@ x8632::macptr.address (% temp1)) (% foreign-ptr))
+    (movl (@ pdest (% esp)) (% ivector))
+    (sarl ($ x8632::word-shift) (% dest-byte-offset))
+    (jmp @test16)
+    @loop16
+    (movdqu (@ (% foreign-ptr)) (% xmm0))
+    (movdqu (% xmm0) (@ x8632::misc-data-offset (% ivector) (% dest-byte-o=
ffset)))
+    (addl ($ 16) (% foreign-ptr))
+    (addl ($ 16) (% dest-byte-offset))
+    (subl ($ '16) (% nbytes))
+    @test16
+    (cmpl ($ '16) (% nbytes))
+    (jge @loop16)
+    (testl (% nbytes) (% nbytes))
+    (je @done)
+    @loop4
+    (movd (@ (% foreign-ptr)) (% mm0))
+    (movd (% mm0) (@ x8632::misc-data-offset (% ivector) (% dest-byte-offs=
et)))
+    (addl ($ 4) (% foreign-ptr))
+    (addl ($ 4) (% dest-byte-offset))
+    (subl ($ '4) (% nbytes))
+    (jne @loop4)
+    @done
+    (movl (% ivector) (% arg_z))
+    (single-value-return 5)))
 =

 ;;; I went ahead and used the INC and DEC instructions here, since
 ;;; they're shorter than the equivalent ADD/SUB.  Intel's optimization
@@ -28,12 +79,12 @@
 ;;; these functions end up being hot, replacing the inc/dec insns
 ;;; might be worth a try.
 =

-(defx8632lapfunction %copy-ptr-to-ivector ((src 12)
-					   (src-byte-offset 8)
-					   (dest 4)
-					   #|(ra 0)|#
-					   (dest-byte-offset arg_y)
-					   (nbytes arg_z))
+(defx8632lapfunction %copy-ptr-to-ivector-8bit ((src 12)
+                                                (src-byte-offset 8)
+                                                (dest 4)
+                                                #|(ra 0)|#
+                                                (dest-byte-offset arg_y)
+                                                (nbytes arg_z))
   (mark-as-imm temp0)
   (mark-as-imm arg_y)
   (let ((foreign-ptr temp0)		;raw foreign pointer



More information about the Openmcl-cvs-notifications mailing list