[Openmcl-cvs-notifications] r15236 - in /trunk/source: level-0/l0-io.lisp level-1/l1-error-system.lisp level-1/l1-init.lisp level-1/l1-sysio.lisp level-1/l1-unicode.lisp lib/ccl-export-syms.lisp lib/macros.lisp

gb at clozure.com gb at clozure.com
Thu Mar 8 19:49:26 CST 2012


Author: gb
Date: Thu Mar  8 19:49:25 2012
New Revision: 15236

Log:
Change the initial-values *TERMINAL-CHARACTER-ENCODING-NAME* and
*DEFAULT-FILE-CHARACTER-ENCODING* to :UTF-8, mostly for the benefit of
the Init-File-Editing-Impaired.  (I've resolved not to make fun of
the IFEI.)  Note that this may require changes to startup scripts etc.

Define new conditions CCL:DECODING-PROBLEM and CCL:ENCODING-PROBLEM.
Signal these conditions (via SIGNAL) when decoding characters
from/enoding them to a stream, pointer or octet vector and a substitution
or replacement character would be used.

New macros (CCL:WITH-DECODING-PROBLEMS-AS-ERRORS &body body) and
(CCL:WITH-ENCODING-PROBLEMS-AS-ERRORS &body body) signal the corresponding
conditions as ERRORs if they are signaled during execution of the body.

(Arguably) fixes ticket:749.

FILE-STRING-LENGTH checks to see if the encoding wants to use a
byte-order-mark before subtracting the length of an encoded BOM
from the encoded string length if the file is at its beginning.

Modified:
    trunk/source/level-0/l0-io.lisp
    trunk/source/level-1/l1-error-system.lisp
    trunk/source/level-1/l1-init.lisp
    trunk/source/level-1/l1-sysio.lisp
    trunk/source/level-1/l1-unicode.lisp
    trunk/source/lib/ccl-export-syms.lisp
    trunk/source/lib/macros.lisp

Modified: trunk/source/level-0/l0-io.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/level-0/l0-io.lisp (original)
+++ trunk/source/level-0/l0-io.lisp Thu Mar  8 19:49:25 2012
@@ -161,7 +161,7 @@
                                           (the fixnum
                                             (ash (the fixnum (logxor 3rd-u=
nit #x80)) 6))
                                           (the fixnum (logxor 4th-unit #x8=
0)))))))))))))))))
-        (setf (schar string i) (or char #\Replacement_Character))))))
+        (setf (schar string i) (or char (note-vector-decoding-problem poin=
ter index :utf-8)))))))
 =

 (defun utf-8-length-of-memory-encoding (pointer noctets start)
   (do* ((i start)

Modified: trunk/source/level-1/l1-error-system.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/level-1/l1-error-system.lisp (original)
+++ trunk/source/level-1/l1-error-system.lisp Thu Mar  8 19:49:25 2012
@@ -50,6 +50,31 @@
 (define-condition process-reset (thread-condition)
   ((kill :initarg :kill :initform nil :reader process-reset-kill)))
 =

+(define-condition encoding-problem (condition)
+  ((character :initarg :character :reader encoding-problem-character)
+   (destination :initarg :destination :reader encoding-problem-destination)
+   (encoding-name :initarg :encoding-name :reader encoding-problem-encodin=
g-name))
+  (:report
+   (lambda (c s)
+     (with-slots (character destination encoding-name) c
+       (format s "Character ~c can't be written to ~a in encoding ~a."
+               character destination encoding-name)))))
+
+
+
+(define-condition decoding-problem (condition)
+  ((source :initarg :source :reader decoding-problem-source)
+   (position :initarg :position :reader decoding-problem-position)
+   (encoding-name :initarg :encoding-name :reader decoding-problem-encodin=
g-name))
+  (:report (lambda (c stream)
+             (with-slots (source position encoding-name) c
+               (format stream "Contents of ~a" source)
+               (when position
+                 (format stream ", near ~a ~d," (if (typep source 'stream)=
 "positition" "index") position))
+               (format stream " don't represent a valid character in ~s." =
encoding-name)))))
+
+
+             =

 =

 (define-condition print-not-readable (error)
   ((object :initarg :object :reader print-not-readable-object)

Modified: trunk/source/level-1/l1-init.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/level-1/l1-init.lisp (original)
+++ trunk/source/level-1/l1-init.lisp Thu Mar  8 19:49:25 2012
@@ -312,7 +312,7 @@
 =

 =

 =

-(defvar *terminal-character-encoding-name* nil
+(defvar *terminal-character-encoding-name* :utf-8
   "NIL (implying :ISO-8859-1), or a keyword which names a defined
 character encoding to be used for *TERMINAL-IO* and other predefined
 initial streams.  The value of *TERMINAL-CHARACTER-ENCODING-NAME*

Modified: trunk/source/level-1/l1-sysio.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/level-1/l1-sysio.lisp (original)
+++ trunk/source/level-1/l1-sysio.lisp Thu Mar  8 19:49:25 2012
@@ -84,7 +84,7 @@
 =

 (defvar *default-external-format* :unix)
 =

-(defvar *default-file-character-encoding* nil)
+(defvar *default-file-character-encoding* :utf-8)
 =

 (defmethod default-character-encoding ((domain (eql :file)))
   *default-file-character-encoding*)
@@ -930,7 +930,8 @@
                 (* (count #\Newline object :start start :end end)
                    (file-string-length stream #\Return))
                 0))
-           (if (eql (file-position stream) 0)
-             0
-             (length (character-encoding-bom-encoding encoding)))))))))
+           (if (and (eql (file-position stream) 0)
+                    (character-encoding-use-byte-order-mark encoding))
+             (length (character-encoding-bom-encoding encoding))
+             0)))))))
   =


Modified: trunk/source/level-1/l1-unicode.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/level-1/l1-unicode.lisp (original)
+++ trunk/source/level-1/l1-unicode.lisp Thu Mar  8 19:49:25 2012
@@ -153,6 +153,42 @@
   (print-unreadable-object (ce stream :type t :identity t)
     (format stream "~a" (character-encoding-name ce))))
 =

+(defun note-stream-decoding-problem (stream)
+  (let* ((source (if (typep stream 'ioblock)
+                   (ioblock-stream stream)
+                   stream))
+         (position (stream-position source))
+         (encoding-name
+          (character-encoding-name
+           (external-format-character-encoding (stream-external-format sou=
rce)))))
+    (signal (make-condition 'decoding-problem
+                            :source source
+                            :position position
+                            :encoding-name encoding-name))
+    #\Replacement_Character))
+
+(defun note-vector-decoding-problem (vector index encoding)
+  (signal (make-condition 'decoding-problem
+                          :source vector
+                          :position index
+                          :encoding-name (let* ((enc (if (typep encoding '=
character-encoding)
+                                                       encoding
+                                                       (lookup-character-e=
ncoding encoding))))
+                                           (if enc (character-encoding-nam=
e enc) encoding))))
+  #\Replacement_Character)
+
+(defun note-encoding-problem (char destination encoding code)
+  (signal (make-condition 'encoding-problem
+                          :character char
+                          :destination (if (typep destination 'ioblock)
+                                         (ioblock-stream destination)
+                                         destination)
+                          :encoding-name (let* ((enc (if (typep encoding '=
character-encoding)
+                                                       encoding
+                                                       (lookup-character-e=
ncoding encoding))))
+                                           (if enc (character-encoding-nam=
e enc) encoding))))
+  code)
+                          =

 ;;; N.B.  (ccl:nfunction <name> (lambda (...) ...)) is just  like
 ;;;       (cl:function (lambda (...) ...)), except that the resulting
 ;;; function will have "name" <name> (this is often helpful when debugging=
.)
@@ -204,7 +240,7 @@
      (let* ((code (char-code char)))
        (declare (type (mod #x110000) code))
        (if (>=3D code 256)
-         (setq code (char-code #\Sub)))
+         (setq code (note-encoding-problem char stream :iso-8859-1 (char-c=
ode #\Sub))))
        (funcall write-function stream code)
        1)))
   :stream-decode-function
@@ -226,7 +262,7 @@
               (code (char-code char)))
          (declare (type (mod #x110000) code))
          (if (>=3D code 256)
-           (setq code (char-code #\Sub)))
+           (setq code (note-encoding-problem char vector :iso-8859-1 (char=
-code #\Sub))))
          (progn
            (setf (aref vector idx) code)
            (incf idx))))))
@@ -246,10 +282,11 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i))))
+       (let* ((char (schar string i))
+              (code (char-code char)))
          (declare (type (mod #x110000) code))
          (if (>=3D code 256)
-           (setq code (char-code #\Sub)))
+           (setq code (note-encoding-problem char pointer :iso-8859-1 (cha=
r-code #\Sub))))
          (setf (%get-unsigned-byte pointer idx) code)
          (incf idx)))))
   :memory-decode-function
@@ -269,6 +306,7 @@
   #'8-bit-fixed-width-length-of-memory-encoding
   :decode-literal-code-unit-limit 256
   :encode-literal-char-code-limit 256
+
   )
 =

 (define-character-encoding :us-ascii
@@ -283,18 +321,18 @@
      (let* ((code (char-code char)))
        (declare (type (mod #x110000) code))
        (when (>=3D code 128)
-         (setq code (char-code #\Sub)))
+         (setq code (note-encoding-problem char stream :us-ascii (char-cod=
e #\Sub))))
        (funcall write-function stream code)
        1)))
   :stream-decode-function
   (nfunction
    ascii-stream-decode
    (lambda (1st-unit next-unit-function stream)
-     (declare (ignore next-unit-function stream)
+     (declare (ignore next-unit-function)
               (type (unsigned-byte 8) 1st-unit))
      (if (< 1st-unit 128)
        (code-char 1st-unit)
-       #\Replacement_Character)))
+       (note-stream-decoding-problem stream))))
   :vector-encode-function
   (nfunction
    ascii-vector-encode
@@ -307,7 +345,7 @@
               (code (char-code char)))
          (declare (type (mod #x110000) code))
          (if (>=3D code 128)
-           (setq code (char-code #\Sub)))
+           (setq code (note-encoding-problem char vector :us-ascii (char-c=
ode #\Sub))))
          (setf (aref vector idx) code)
          (incf idx)))))
   :vector-decode-function
@@ -320,19 +358,20 @@
           ((>=3D i noctets) index)
        (let* ((code (aref vector index)))
          (declare (type (unsigned-byte 8) code))
-         (when (>=3D code 128)
-           (setq code (char-code #\Sub)))
-         (setf (schar string i) (code-char code))))))
+         (setf (schar string i) (if (< code 128)
+                                  (code-char code)
+                                  (note-vector-decoding-problem vector ind=
ex :us-ascii)))))))
   :memory-encode-function
   (nfunction
    ascii-memory-encode
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i))))
+       (let* ((char (schar string i))
+              (code (char-code char)))
          (declare (type (mod #x110000) code))
          (if (>=3D code 128)
-           (setq code (char-code #\Sub)))
+           (setq code (note-encoding-problem char pointer :us-ascii (char-=
code #\Sub))))
          (setf (%get-unsigned-byte pointer idx) code)
          (incf idx)))))
   :memory-decode-function
@@ -345,7 +384,7 @@
        (let* ((code (%get-unsigned-byte pointer index)))
          (declare (type (unsigned-byte 8) code))
          (if (>=3D code 128)
-           (setf (schar string i) #\sub)
+           (setf (schar string i) (note-vector-decoding-problem pointer in=
dex :us-ascii))
            (setf (schar string i) (code-char code)))))))
   :octets-in-string-function
   #'8-bit-fixed-width-octets-in-string
@@ -447,7 +486,7 @@
                                       (the fixnum (- code #x2c0)))))))
                       =

        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-2 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -466,7 +505,8 @@
               (fixnum idx))
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                           ((< code #x180)
                            (svref *unicode-00a0-0180-to-iso-8859-2*
@@ -475,7 +515,7 @@
                            (svref *unicode-00c0-00e0-to-iso-8859-2*
                                   (the fixnum (- code #x2c0)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-2 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -497,7 +537,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                         ((< code #x180)
                          (svref *unicode-00a0-0180-to-iso-8859-2*
@@ -506,7 +547,7 @@
                          (svref *unicode-00c0-00e0-to-iso-8859-2*
                                 (the fixnum (- code #x2c0)))))))
        (declare (type (mod #x110000) code))
-       (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+       (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-proble=
m char pointer :iso-8859-2 (char-code #\Sub))))
        (1+ idx)))))
   :memory-decode-function
   (nfunction
@@ -618,7 +659,9 @@
                        (svref *unicode-2d8-2e0-to-iso-8859-3*
                               (the fixnum (- code #x2d8)))))))
        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2
+                                          (note-encoding-problem
+                                           char stream :iso-8859-3 (char-c=
ode #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -651,7 +694,7 @@
                  =

                (the fixnum (- code #x2d8)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-3 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -673,7 +716,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                         ((< code #x100)
                          (svref *unicode-a0-100-to-iso-8859-3*
@@ -685,7 +729,7 @@
                          (svref *unicode-2d8-2e0-to-iso-8859-3*
                                 (the fixnum (- code #x2d8)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-3 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -797,7 +841,7 @@
                               (the fixnum (- code #x2c0)))))))
                       =

        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-4 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -826,7 +870,7 @@
                          (svref *unicode-2c0-2e0-to-iso-8859-4*
                                 (the fixnum (- code #x2c0)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-4 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -848,7 +892,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                         ((< code #x180)
                          (svref *unicode-a0-180-to-iso-8859-4*
@@ -857,7 +902,7 @@
                          (svref *unicode-2c0-2e0-to-iso-8859-4*
                                 (the fixnum (- code #x2c0)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-4 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -949,7 +994,7 @@
                               (the fixnum (- code #x400)))))))
                       =

        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-5 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -978,7 +1023,7 @@
                          (svref *unicode-400-460-to-iso-8859-5*
                                 (the fixnum (- code #x400)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-5 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -1000,7 +1045,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                         ((< code #xb0)
                          (svref *unicode-a0-b0-to-iso-8859-5*
@@ -1009,7 +1055,7 @@
                          (svref *unicode-400-460-to-iso-8859-5*
                                 (the fixnum (- code #x400)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-5 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -1098,7 +1144,7 @@
                               (the fixnum (- code #x608)))))))
                       =

        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-6 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -1127,7 +1173,7 @@
                          (svref *unicode-608-658-to-iso-8859-6*
                                 (the fixnum (- code #x608)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-6 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -1149,7 +1195,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                         ((< code #xb0)
                          (svref *unicode-a0-b0-to-iso-8859-6*
@@ -1158,7 +1205,7 @@
                          (svref *unicode-608-658-to-iso-8859-6*
                                 (the fixnum (- code #x608)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-6 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -1266,7 +1313,7 @@
                               (the fixnum (- code #x20ac)))))))
               =

        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-7 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -1301,7 +1348,7 @@
                        (svref *unicode-20ac-20b0-to-iso-8859-7*
                               (the fixnum (- code #x20ac)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-7 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -1323,7 +1370,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                       ((< code #xc0)
                        (svref *unicode-a0-c0-to-iso-8859-7*
@@ -1338,7 +1386,7 @@
                        (svref *unicode-20ac-20b0-to-iso-8859-7*
                               (the fixnum (- code #x20ac)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-7 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -1438,7 +1486,7 @@
                               (the fixnum (- code #x2008)))))))
               =

        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-8 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -1470,7 +1518,7 @@
                        (svref *unicode-2008-2018-to-iso-8859-8*
                               (the fixnum (- code #x2008)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-8 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -1492,7 +1540,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                       ((< code #xf8)
                        (svref *unicode-a0-f8-to-iso-8859-8*
@@ -1504,7 +1553,7 @@
                        (svref *unicode-2008-2018-to-iso-8859-8*
                               (the fixnum (- code #x2008)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-8 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -1587,7 +1636,7 @@
                               (the fixnum (- code #x118)))))))
               =

        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-9 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -1616,7 +1665,7 @@
                        (svref *unicode-118-160-to-iso-8859-9*
                               (the fixnum (- code #x118)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-9 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -1638,7 +1687,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xd0) code)
                       ((< code #x100)
                        (svref *unicode-d0-100-to-iso-8859-9*
@@ -1647,7 +1697,7 @@
                        (svref *unicode-118-160-to-iso-8859-9*
                               (the fixnum (- code #x118)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-9 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -1743,7 +1793,7 @@
                        (svref *unicode-a0-180-to-iso-8859-10*
                               (the fixnum (- code #xa0)))))))
        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-10 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -1769,7 +1819,7 @@
                        (svref *unicode-a0-180-to-iso-8859-10*
                               (the fixnum (- code #xa0)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-10 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -1791,13 +1841,14 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                       ((< code #x180)
                        (svref *unicode-a0-180-to-iso-8859-10*
                               (the fixnum (- code #xa0)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-10 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -1838,13 +1889,13 @@
                             (not (and (>=3D code #xdb) (<=3D code #xde))))
                        (+ code #x0d60)))))
        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-11 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
    iso-8859-11-stream-decode
    (lambda (1st-unit next-unit-function stream)
-     (declare (ignore next-unit-function stream)
+     (declare (ignore next-unit-function)
               (type (unsigned-byte 8) 1st-unit))
      (if (< 1st-unit #xa1)
        (code-char 1st-unit)
@@ -1853,7 +1904,7 @@
                 (not (and (>=3D 1st-unit #xe3b)
                           (<=3D 1st-unit #xe3e))))
          (code-char (- 1st-unit #xd60))
-         #\Replacement_Character))))
+         (note-stream-decoding-problem stream)))))
   :vector-encode-function
   (nfunction
    iso-8859-11-vector-encode
@@ -1869,7 +1920,7 @@
                             (not (and (>=3D code #xdb) (<=3D code #xde))))
                        (+ code #x0d60)))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-11 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -1889,20 +1940,21 @@
                           (not (and (>=3D 1st-unit #xe3b)
                                     (<=3D 1st-unit #xe3e))))
                    (code-char (- 1st-unit #xd60))
-                   #\Replacement_Character)))))))
+                   (note-vector-decoding-problem vector index :iso-8859-11=
))))))))
   :memory-encode-function
   (nfunction
    iso-8859-11-memory-encode
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa1) code)
                       ((and (<=3D code #xfb)
                             (not (and (>=3D code #xdb) (<=3D code #xde))))
                        (+ code #x0d60)))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-11 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -1921,7 +1973,7 @@
                           (not (and (>=3D 1st-unit #xe3b)
                                     (<=3D 1st-unit #xe3e))))
                    (code-char (- 1st-unit #xd60))
-                   #\Replacement_Character)))))))
+                   (note-vector-decoding-problem pointer index :iso-8859-1=
1))))))))
   :octets-in-string-function
   #'8-bit-fixed-width-octets-in-string
   :length-of-vector-encoding-function
@@ -2015,7 +2067,7 @@
                        (svref *unicode-2018-2020-to-iso-8859-13*
                               (the fixnum (- code #x2018)))))))
        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-13 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -2045,7 +2097,7 @@
                        (svref *unicode-2018-2020-to-iso-8859-13*
                               (the fixnum (- code #x2018)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-13 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -2067,7 +2119,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                       ((< code #x180)
                        (svref *unicode-a0-180-to-iso-8859-13*
@@ -2077,7 +2130,7 @@
                        (svref *unicode-2018-2020-to-iso-8859-13*
                               (the fixnum (- code #x2018)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-13 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -2208,7 +2261,7 @@
                        (svref *unicode-1ef0-1ef8-to-iso-8859-14*
                               (the fixnum (- code #x1ef0)))))))
        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-14 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -2246,7 +2299,7 @@
                        (svref *unicode-1ef0-1ef8-to-iso-8859-14*
                               (the fixnum (- code #x1ef0)))))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-14 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -2268,7 +2321,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                       ((< code #x100)
                        (svref *unicode-a0-100-to-iso-8859-14*
@@ -2286,7 +2340,7 @@
                        (svref *unicode-1ef0-1ef8-to-iso-8859-14*
                               (the fixnum (- code #x1ef0)))))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-14 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -2384,7 +2438,7 @@
                               (the fixnum (- code #x150))))
                       ((=3D code #x20ac) #xa4))))
        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-15 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -2414,7 +2468,7 @@
                               (the fixnum (- code #x150))))
                       ((=3D code #x20ac) #xa4))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-15 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -2436,7 +2490,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                       ((< code #x100)
                        (svref *unicode-a0-100-to-iso-8859-15*
@@ -2446,7 +2501,7 @@
                               (the fixnum (- code #x150))))
                       ((=3D code #x20ac) #xa4))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-15 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -2559,7 +2614,7 @@
                               (the fixnum (- code #x2018))))
                       ((=3D code #x20ac) #xa4))))
        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :iso-8859-16 (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -2592,7 +2647,7 @@
                               (the fixnum (- code #x2018))))
                       ((=3D code #x20ac) #xa4))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :iso-8859-16 (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -2614,7 +2669,8 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
+       (let* ((char (schar string i))
+              (code (char-code char))
               (c2 (cond ((< code #xa0) code)
                       ((< code #x180)
                        (svref *unicode-a0-180-to-iso-8859-16*
@@ -2627,7 +2683,7 @@
                               (the fixnum (- code #x2018))))
                       ((=3D code #x20ac) #xa4))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :iso-8859-16 (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -2798,7 +2854,7 @@
                               (the fixnum (- code #xfb00))))
                       ((=3D code #xf8ff) #xf0))))
        (declare (type (mod #x110000) code))
-       (funcall write-function stream (or c2 (char-code #\Sub)))
+       (funcall write-function stream (or c2 (note-encoding-problem char s=
tream :macintosh (char-code #\Sub))))
        1)))
   :stream-decode-function
   (nfunction
@@ -2845,7 +2901,7 @@
                               (the fixnum (- code #xfb00))))
                       ((=3D code #xf8ff) #xf0))))
          (declare (type (mod #x110000) code))
-         (setf (aref vector idx) (or c2 (char-code #\Sub)))
+         (setf (aref vector idx) (or c2 (note-encoding-problem char vector=
 :macintosh (char-code #\Sub))))
          (incf idx)))))
   :vector-decode-function
   (nfunction
@@ -2867,8 +2923,9 @@
    (lambda (string pointer idx start end)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i)))
-            (c2 (cond ((< code #x80) code)
+       (let* ((char (schar string i))
+              (code (char-code char))
+              (c2 (cond ((< code #x80) code)
                       ((and (>=3D code #xa0) (< code #x100)
                        (svref *unicode-a0-100-to-macintosh*
                               (the fixnum (- code #xa0)))))
@@ -2894,7 +2951,7 @@
                               (the fixnum (- code #xfb00))))
                       ((=3D code #xf8ff) #xf0))))
          (declare (type (mod #x110000) code))
-         (setf (%get-unsigned-byte pointer idx) (or c2 (char-code #\Sub)))
+         (setf (%get-unsigned-byte pointer idx) (or c2 (note-encoding-prob=
lem char pointer :macintosh (char-code #\Sub))))
          (incf idx)))))
   :memory-decode-function
   (nfunction
@@ -2983,7 +3040,7 @@
                       (logior
                        (the fixnum (ash (the fixnum (logand #x1f 1st-unit)=
) 6))
                        (the fixnum (logxor s1 #x80))))
-                     #\Replacement_Character)
+                     (note-stream-decoding-problem stream))
                    (let* ((s2 (funcall next-unit-function stream)))
                      (if (eq s2 :eof)
                        s2
@@ -3004,8 +3061,8 @@
                                                        (ash (the fixnum (l=
ogand s1 #x3f))
                                                             6))
                                                      (the fixnum (logand s=
2 #x3f)))))))
-                                 #\Replacement_Character)
-                             #\Replacement_Character)
+                                 (note-stream-decoding-problem stream))
+                             (note-stream-decoding-problem stream))
                            (if (< 1st-unit #xf8)
                              (let* ((s3 (funcall next-unit-function stream=
)))
                                (if (eq s3 :eof)
@@ -3030,9 +3087,11 @@
                                           (the fixnum
                                             (ash (the fixnum (logxor s2 #x=
80)) 6))
                                           (the fixnum (logxor s3 #x80))))))
-                                     #\Replacement_Character))))
-                             #\Replacement_Character)))))))))
-           #\Replacement_Character))))
+
+
+                                     (note-stream-decoding-problem stream)=
))))
+                             (note-stream-decoding-problem stream))))))))))
+           (note-stream-decoding-problem stream)))))
     :vector-encode-function
     (nfunction
      utf-8-vector-encode
@@ -3133,7 +3192,7 @@
                                             (the fixnum
                                               (ash (the fixnum (logxor 3rd=
-unit #x80)) 6))
                                             (the fixnum (logxor 4th-unit #=
x80))))))))))))))))
-               (setf (schar string i) (or char #\Replacement_Character))))=
)))
+               (setf (schar string i) (or char (note-vector-decoding-probl=
em vector index :utf-8))))))))
     :memory-encode-function
     #'utf-8-memory-encode
     :memory-decode-function
@@ -3224,8 +3283,8 @@
             (if (and (>=3D 2nd-unit #xdc00)
                      (< 2nd-unit #xe000))
               (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit)
-              #\Replacement_Character))))
-      #\Replacement_Character)))
+              (note-stream-decoding-problem stream)))))
+      (note-stream-decoding-problem stream))))
 =

 =

 =

@@ -3358,7 +3417,7 @@
                          (if (and (>=3D 2nd-unit #xdc00)
                                   (< 2nd-unit #xe000))
                            (utf-16-combine-surrogate-pairs 1st-unit 2nd-un=
it)))))))
-             (setf (schar string i) (or char #\Replacement_Character)))))))
+             (setf (schar string i) (or char (note-vector-decoding-problem=
 vector index #+big-endian-target :utf-16be #-big-endian-target :utf-16le))=
))))))
     :memory-encode-function
     (nfunction
      native-utf-16-memory-encode
@@ -3402,7 +3461,7 @@
                            (if (and (>=3D 2nd-unit #xdc00)
                                     (< 2nd-unit #xe000))
                              (utf-16-combine-surrogate-pairs 1st-unit 2nd-=
unit)))))))
-            (setf (schar string i) (or char #\Replacement_Character)))))))
+            (setf (schar string i) (or char (note-vector-decoding-problem =
pointer index #+big-endian-target :utf-16be #-big-endian-target :utf-16le))=
))))))
     :octets-in-string-function
     #'utf-16-octets-in-string
     :length-of-vector-encoding-function
@@ -3523,7 +3582,7 @@
                        (if (and (>=3D 2nd-unit #xdc00)
                                 (< 2nd-unit #xe000))
                          (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit=
)))))))
-           (setf (schar string i) (or char #\Replacement_Character)))))))
+           (setf (schar string i) (or char (note-vector-decoding-problem v=
ector index #+big-endian-target :utf-16le #-big-endian-target :utf-16be))))=
))))
   :memory-encode-function
   (nfunction
    reversed-utf-16-memory-encode
@@ -3567,7 +3626,7 @@
                        (if (and (>=3D 2nd-unit #xdc00)
                                 (< 2nd-unit #xe000))
                          (utf-16-combine-surrogate-pairs 1st-unit 2nd-unit=
)))))))
-           (setf (schar string i) (or char #\Replacement_Character)))))))
+           (setf (schar string i) (or char (note-vector-decoding-problem p=
ointer index #+big-endian-target :utf-16le #-big-endian-target :utf-16be)))=
)))))
   :octets-in-string-function
   #'utf-16-octets-in-string
   :length-of-vector-encoding-function
@@ -3708,7 +3767,7 @@
                          (if (and (>=3D 2nd-unit #xdc00)
                                   (< 2nd-unit #xe000))
                            (utf-16-combine-surrogate-pairs 1st-unit 2nd-un=
it)))))))
-             (setf (schar string i) (or char #\Replacement_Character))))))=
))
+             (setf (schar string i) (or char (note-vector-decoding-problem=
 vector index :utf-16)))))))))
   :memory-encode-function
   (nfunction
    utf-16-memory-encode
@@ -3768,7 +3827,7 @@
                          (if (and (>=3D 2nd-unit #xdc00)
                                   (< 2nd-unit #xe000))
                            (utf-16-combine-surrogate-pairs 1st-unit 2nd-un=
it)))))))
-             (setf (schar string i) (or char #\Replacement_Character))))))=
))
+             (setf (schar string i) (or char (note-vector-decoding-problem=
 pointer index :utf-16)))))))))
   :octets-in-string-function
   (nfunction
    utf-16-bom-octets-in-string
@@ -3859,16 +3918,16 @@
   (let* ((code (char-code char)))
     (declare (type (mod #x110000) code))
     (if (>=3D code #x10000)
-      (setq code (char-code #\Replacement_Character)))
+      (setq code (note-encoding-problem char stream :ucs-2 (char-code #\Re=
placement_Character))))
     (funcall write-function stream code)
     1))
 =

 (defun ucs-2-stream-decode (1st-unit next-unit-function stream)
   (declare (type (unsigned-byte 16) 1st-unit)
-           (ignore next-unit-function stream))
+           (ignore next-unit-function))
   ;; CODE-CHAR returns NIL on either half of a surrogate pair.
   (or (code-char 1st-unit)
-      #\Replacement_Character))
+      (note-stream-decoding-problem stream)))
 =

 =

 (defun ucs-2-octets-in-string (string start end)
@@ -3921,7 +3980,7 @@
               (code (char-code char)))
          (declare (type (mod #x110000) code))
          (when (>=3D code #x10000)
-           (setq code (char-code #\Replacement_Character)))
+           (setq code (note-encoding-problem char vector #+big-endian-targ=
et :ucs-2be #-big-endian-target :ucs-2le (char-code #\Replacement_Character=
))))
          (setf (%native-u8-ref-u16 vector idx) code)
          (incf idx 2)))))
   :vector-decode-function
@@ -3937,7 +3996,7 @@
        (declare (fixnum i end index))
        (setf (schar string i)
              (or (code-char (%native-u8-ref-u16 vector index))
-                 #\Replacement_Character)))))
+                 (note-vector-decoding-problem vector index  #+big-endian-=
target :ucs-2be #-big-endian-target :ucs-2le))))))
   :memory-encode-function
   (nfunction
    native-ucs-2-memory-encode
@@ -3945,11 +4004,12 @@
      (declare (fixnum idx))
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i))))
+       (let* ((char (schar string i))
+              (code (char-code char)))
          (declare (type (mod #x110000) code))
          (setf (%get-unsigned-word pointer idx)
                       (if (>=3D code #x10000)
-                        (char-code #\Replacement_Character)
+                        (note-encoding-problem char pointer #+big-endian-t=
arget :ucs-2be #-big-endian-target :ucs-2le (char-code #\Replacement_Charac=
ter))
                         code))
          (incf idx 2)))))
   :memory-decode-function
@@ -3963,7 +4023,7 @@
        (declare (fixnum i index))
        (let* ((1st-unit (%get-unsigned-word pointer index)))
          (declare (type (unsigned-byte 16) 1st-unit))
-         (setf (schar string i) (or (char-code 1st-unit) #\Replacement_Cha=
racter))))))
+         (setf (schar string i) (or (char-code 1st-unit) (note-vector-deco=
ding-problem pointer index  #+big-endian-target :ucs-2be #-big-endian-targe=
t :ucs-2le)))))))
   :octets-in-string-function
   #'ucs-2-octets-in-string
   :length-of-vector-encoding-function
@@ -4010,7 +4070,7 @@
               (code (char-code char)))
          (declare (type (mod #x110000) code))
          (when (>=3D code #x10000)
-           (setq code (char-code #\Replacement_Character)))
+           (setq code (note-encoding-problem char vector #+big-endian-targ=
et :ucs-2le #-big-endian-target :ucs-2be (char-code #\Replacement_Character=
))))
          (setf (%reversed-u8-ref-u16 vector idx) code)
          (incf idx 2)))))
   :vector-decode-function
@@ -4026,7 +4086,7 @@
        (declare (fixnum i end index))
        (setf (schar string i)
              (or (code-char (%reversed-u8-ref-u16 vector index))
-                 #\Replacement_Character)))))
+                 (note-vector-decoding-problem vector index #+big-endian-t=
arget :ucs-2le #-big-endian-target :ucs-2be))))))
   :memory-encode-function
   (nfunction
    reversed-ucs-2-memory-encode
@@ -4034,11 +4094,12 @@
      (declare (fixnum idx))
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i))))
+       (let* ((char (schar string i))
+              (code (char-code char)))
          (declare (type (mod #x110000) code))
          (setf (%get-unsigned-word pointer idx)
                (if (>=3D code #x10000)
-                 (%swap-u16 (char-code #\Replacement_Character))
+                 (%swap-u16 (note-encoding-problem char pointer #+big-endi=
an-target :ucs-2le #-big-endian-target :ucs-2be (char-code #\Replacement_Ch=
aracter)))
                  (%swap-u16 code)))
          (incf idx 2)))))
   :memory-decode-function
@@ -4052,7 +4113,7 @@
        (declare (fixnum i index))
        (let* ((1st-unit (%swap-u16 (%get-unsigned-word pointer index))))
          (declare (type (unsigned-byte 16) 1st-unit))
-         (setf (schar string i) (or (code-char 1st-unit) #\Replacement_Cha=
racter))))))
+         (setf (schar string i) (or (code-char 1st-unit) (note-vector-deco=
ding-problem pointer index #+big-endian-target :ucs-2le #-big-endian-target=
 :ucs-2be)))))))
   :octets-in-string-function
   #'ucs-2-octets-in-string
   :length-of-vector-encoding-function
@@ -4093,7 +4154,7 @@
               (code (char-code char)))
          (declare (type (mod #x110000) code))
          (when (>=3D code #x10000)
-           (setq code (char-code #\Replacement_Character)))
+           (setq code (note-encoding-problem char vector :ucs-2 (char-code=
 #\Replacement_Character))))
          (setf (%native-u8-ref-u16 vector idx) code)
          (incf idx 2)))))
   :vector-decode-function
@@ -4120,7 +4181,8 @@
                             (%reversed-u8-ref-u16 vector index)
                             (%native-u8-ref-u16 vector index))))
              (declare (type (unsigned-byte 16) 1st-unit))
-             (setf (schar string i) (or (code-char 1st-unit) #\Replacement=
_Character)))))))
+             (setf (schar string i) (or (code-char 1st-unit)
+                                        (note-vector-decoding-problem vect=
or index :ucs-2))))))))
   :memory-encode-function
   (nfunction
    ucs-2-memory-encode
@@ -4130,11 +4192,12 @@
      (incf idx 2)
      (do* ((i start (1+ i)))
           ((>=3D i end) idx)
-       (let* ((code (char-code (schar string i))))
+       (let* ((char (schar string i))
+              (code (char-code char)))
          (declare (type (mod #x110000) code))
          (setf (%get-unsigned-word pointer idx)
                       (if (>=3D code #x10000)
-                        (char-code #\Replacement_Character)
+                        (note-encoding-problem char pointer :ucs-2 (char-c=
ode #\Replacement_Character))
                         code))
          (incf idx 2)))))
   :memory-decode-function
@@ -4160,7 +4223,8 @@
        (let* ((1st-unit (%get-unsigned-word pointer index)))
          (declare (type (unsigned-byte 16) 1st-unit))
          (if swap (setq 1st-unit (%swap-u16 1st-unit)))
-         (setf (schar string i) (or (code-char 1st-unit) #\Replacement_Cha=
racter)))))))
+         (setf (schar string i) (or (code-char 1st-unit)
+                                    (note-vector-decoding-problem pointer =
index :ucs-2))))))))
   :octets-in-string-function
   (nfunction
    ucs-2-bom-octets-in-string
@@ -4341,7 +4405,7 @@
          (setf (schar string i)
                (or (if (< code char-code-limit)
                       (code-char code))
-                   #\Replacement_Character))))))
+                   (note-vector-decoding-problem vector index #+big-endian=
-target :utf-32be #-big-endian-target :utf-32le)))))))
   :memory-encode-function
   (nfunction
    native-ucs-4-memory-encode
@@ -4366,7 +4430,8 @@
          (declare (type (unsigned-byte 32) 1st-unit))
          (setf (schar string i) (or (if (< 1st-unit char-code-limit)
                                       (code-char 1st-unit))
-                                    #\Replacement_Character))))))
+                                    (note-vector-decoding-problem
+                                     pointer index #+big-endian-target :ut=
f-32be #-big-endian-target :utf-32le)))))))
   :octets-in-string-function
   #'ucs-4-octets-in-string
   :length-of-vector-encoding-function
@@ -4439,7 +4504,7 @@
          (setf (schar string i)
                (or (if (< code char-code-limit)
                      (code-char code))
-                   #\Replacement_Character))))))
+                   (note-vector-decoding-problem vector index #+big-endian=
-target :utf-32le #-big-endian-target :utf-32be)))))))
   :memory-encode-function
   (nfunction
    native-ucs-4-memory-encode
@@ -4464,7 +4529,7 @@
          (declare (type (unsigned-byte 32) 1st-unit))
          (setf (schar string i) (or (if (< 1st-unit char-code-limit)
                                       (code-char 1st-unit))
-                                    #\Replacement_Character))))))
+                                    (note-vector-decoding-problem pointer =
index #+big-endian-target :utf-32le #-big-endian-target :utf-32be)))))))
 =

   :octets-in-string-function
   #'ucs-4-octets-in-string
@@ -4547,7 +4612,8 @@
              (declare (type (unsigned-byte 32) 1st-unit))
              (setf (schar string i) (or (if (< 1st-unit char-code-limit)
                                           (code-char 1st-unit))
-                                        #\Replacement_Character)))))))
+                                        (note-vector-decoding-problem
+                                         vector index :utf-32))))))))
   :memory-encode-function
   (nfunction
    utf-32-memory-encode
@@ -4586,7 +4652,8 @@
          (if swap (setq 1st-unit (%swap-u32 1st-unit)))
          (setf (schar string i) (or (if (< 1st-unit char-code-limit)
                                       (code-char 1st-unit))
-                                    #\Replacement_Character)))))))
+                                    (note-vector-decoding-problem
+                                     pointer index :utf-32))))))))
   :octets-in-string-function
   (nfunction
    utf-32-bom-octets-in-string

Modified: trunk/source/lib/ccl-export-syms.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/lib/ccl-export-syms.lisp (original)
+++ trunk/source/lib/ccl-export-syms.lisp Thu Mar  8 19:49:25 2012
@@ -342,6 +342,10 @@
      set-event-ticks
      event-dispatch
      *ticks-per-second*
+     encoding-problem
+     decoding-problem
+     with-encoding-problems-as-errors
+     with-decoding-problems-as-errors
 =

      *application*
      arglist

Modified: trunk/source/lib/macros.lisp
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- trunk/source/lib/macros.lisp (original)
+++ trunk/source/lib/macros.lisp Thu Mar  8 19:49:25 2012
@@ -1768,6 +1768,14 @@
             (native-utf-16-memory-encode ,data ,sym 0 ,offset ,end)
             (setf (%get-unsigned-word ,sym ,noctets) 0)
             , at body))))))
+
+(defmacro with-encoding-problems-as-errors (&body body)
+  `(handler-bind ((encoding-problem #'error))
+    , at body))
+
+(defmacro with-decoding-problems-as-errors (&body body)
+  `(handler-bind ((decoding-problem #'error))
+    , at body))
 =

 (defmacro with-pointers (speclist &body body)
    (with-specs-aux 'with-pointer speclist body))



More information about the Openmcl-cvs-notifications mailing list