(in-package :unf)

(declaim (inline decompose-p
                 decompose-char
                 next-decomposed-position
                 get-canonical-combining-class
                 bubble-sort
                 decompose->ordering
                 starter-p
                 allowed-char-p
                 next-valid-starter
                 compose-char
                 normalize))

;;;;;;;;;;;;;;;;;;
;;;; decomposition  
(defun decompose-p (ch map &aux (cd (char-code ch)))
  (or (hangul-syllable-char-p ch)
      (and (< cd (length map)) (aref map cd))))

(defun decompose-char (sb ch map)
  (if (hangul-syllable-char-p ch)
      (decompose-hangul sb ch)
    (string-builder-append sb (the simple-characters (aref map (char-code ch))))))

(defun next-decomposed-position (str start end map)
  (or (position-if (lambda (ch) (decompose-p ch map)) str :start start :end end)
      end))

(defun decompose (str str-beg str-end map)
  (declare #.*fastest*
           (array-index str-beg str-end)
           (simple-characters str)
           (simple-vector map))
  (let ((end (next-decomposed-position str str-beg str-end map)))
    (if (= end str-end)
        (substr str str-beg str-end)
      (with-string-builder (sb (- str-end str-beg))
        (string-builder-append sb str :start str-beg :end end)
        (loop WHILE (/= end str-end)
              FOR start = (1+ end)
          DO
          (decompose-char sb (char str end) map)
          (setf end (next-decomposed-position str start str-end map))
          (string-builder-append sb str :start start :end end))))))

;;;;;;;;;;;;;;;;;;;;;;;
;;;; canonical ordering
(defun get-canonical-combining-class (ch &aux (cd (char-code ch)))
  (if (>= cd (length +CCC+))
      0
    (aref +CCC+ cd)))

(defun bubble-sort (str beg end)
  (do ((limit beg)
       (next end))
      ((= next limit) str)
    (setf limit next)
    (loop FOR i FROM (1+ beg) BELOW limit 
          WHEN (> (get-canonical-combining-class (char str (1- i)))
                  (get-canonical-combining-class (char str i)))
      DO
      (rotatef (char str (1- i)) (char str i))
      (setf next i))))

(defun starter-p (ch)
  (zerop (get-canonical-combining-class ch)))

(defun canonical-ordering (str)
  (declare #.*fastest*
           (simple-characters str))
  (loop FOR i OF-TYPE array-index FROM 0 BELOW (1- (length str))
        UNLESS (starter-p (char str i))
    DO
    (loop FOR j OF-TYPE array-index FROM (1+ i)
          WHEN (or (= j (length str))
                   (starter-p (char str j)))
      DO
      (bubble-sort str i j)
      (setf i j)
      (return)))
  str)

(defun decompose->ordering (str start end map)
  (canonical-ordering (decompose str start end map)))

;;;;;;;;;;;;;;;;
;;;; composition
(defun allowed-char-p (ch bits &aux (cd (char-code ch)))
  (and (< cd (length bits)) (zerop (bit bits cd))))

(defun next-illegal-char (str start end bits)
  (declare #.*fastest*
           (simple-characters str)
           (simple-bit-vector bits)
           (array-index start end))
  (loop WITH starter = start
        FOR last-class = 0 THEN class
        FOR i FROM starter BELOW end
        FOR ch = (char str i)
        FOR class = (get-canonical-combining-class ch)
    DO
    (when (or (and (> last-class class) (/= class 0))
              (not (allowed-char-p ch bits)))
      (return starter))

    (when (zerop class)
      (setf starter i))
    
    FINALLY
    (return i)))

(defun next-valid-starter (str start end bits)
  (do ((i start (1+ i)))
      ((or (= i end)
           (and (starter-p (char str i))
                (allowed-char-p (char str i) bits)))
       i)))

(defun compose-char (starter combining)
  (or (gethash (cons starter combining) +CCM+)
      (compose-hangul starter combining)))

(defun compose-impl (sb str &aux (len (length str)))
  (declare #.*fastest*
           (simple-characters str)
           (string-builder sb))
  (let ((start (or (position-if #'starter-p str) len)))
    (string-builder-append sb str :end (min len (1+ start)))

    (loop WITH starter-pos  = (1- (string-builder-pos sb))
          WITH prev-class = 0 
          FOR i FROM (1+ start) BELOW len
          FOR ch = (char str i)
          FOR class = (get-canonical-combining-class ch)
        DO
        (a.if (and (or (< prev-class class)
                       (zerop prev-class))
                   (compose-char (string-builder-ref sb starter-pos) ch))
              (string-builder-replace sb starter-pos it)
          (progn
            (string-builder-add sb ch)
            (setf prev-class class)
            (when (zerop class)
              (setf starter-pos (1- (string-builder-pos sb)))))))))

(defun compose (str str-beg str-end bits map)
  (declare #.*fastest*
           (simple-bit-vector bits)
           (simple-characters str)
           (array-index str-beg str-end))
  (let ((end (next-illegal-char str str-beg str-end bits)))
    (if (= end str-end)
        (substr str str-beg str-end)
      (with-string-builder (sb (length str))
        (string-builder-append sb str :start str-beg :end end)
        (loop UNTIL (= end str-end)
              FOR start  = end
              FOR middle = (next-valid-starter str (1+ end) str-end bits)
          DO
          (compose-impl sb (decompose->ordering str start middle map))

          (setf end (next-illegal-char str middle str-end bits))
          (string-builder-append sb str :start middle :end end))))))

;;;;;;;;;;;;;;;;;;;;;;;
;;;; external functions
(defun nfd (str &key (start 0) (end (length str)))
  (declare #.*interface*
           (string str)
           (array-index start end))
  (charseq:with-dynamic-extent (cseq str :start start :end end)
    (charseq:as-string (simple-str start end) cseq
      (decompose->ordering simple-str start end +CDM+))))

(defun nfkd (str &key (start 0) (end (length str)))
  (declare #.*interface*
           (string str)
           (array-index start end))
  (charseq:with-dynamic-extent (cseq str :start start :end end)
    (charseq:as-string (simple-str start end) cseq
      (decompose->ordering simple-str start end +KDM+))))

(defun nfc (str &key (start 0) (end (length str)))
  (declare #.*interface*
           (string str)
           (array-index start end))
  (charseq:with-dynamic-extent (cseq str :start start :end end)
    (charseq:as-string (simple-str start end) cseq  
      (compose simple-str start end +CIL+ +CDM+))))

(defun nfkc (str &key (start 0) (end (length str)))
  (declare #.*interface*
           (string str)
           (array-index start end))
  (charseq:with-dynamic-extent (cseq str :start start :end end)
    (charseq:as-string (simple-str start end) cseq  
      (compose simple-str start end +KIL+ +KDM+))))

(defun normalize (str normalization-form &key (start 0) (end (length str)))
  (declare #.*interface*
           (string str)
           (array-index start end))
  (ecase normalization-form
    (:nfd  (nfd str  :start start :end end))
    (:nfkd (nfkd str :start start :end end))
    (:nfc  (nfc str  :start start :end end))
    (:nfkc (nfkc str :start start :end end))))