{-# OPTIONS_GHC -funbox-strict-fields #-}
{-# LANGUAGE BangPatterns #-}
-- |
-- Module      : Data.Unicode.Internal.NormalizeStream
-- Copyright   : (c) 2016 Harendra Kumar
--
-- License     : BSD-style
-- Maintainer  : harendra.kumar@gmail.com
-- Stability   : experimental
--
-- Stream based normalization.
--
module Data.Unicode.Internal.NormalizeStream
    (
      D.DecomposeMode(..)
    , stream
    , unstream
    , unstreamC
    )
    where

import           Control.Monad                          (ap)
import           Data.Char                              (chr, ord)
import           Data.List                              (sortBy)
import           Data.Ord                               (comparing)
import qualified Data.Text.Array                        as A
import           Data.Text.Internal                     (Text (..))
import qualified Data.Text.Internal.Encoding.Utf16      as U16
import           Data.Text.Internal.Fusion.Size         (betweenSize,
                                                         upperBound)
import           Data.Text.Internal.Fusion.Types        (Step (..), Stream (..))
import           Data.Text.Internal.Private             (runText)
import           Data.Text.Internal.Unsafe.Char         (unsafeWrite)
import           Data.Text.Internal.Unsafe.Char         (unsafeChr)
import           Data.Text.Internal.Unsafe.Shift        (shiftR)
import           GHC.ST                                 (ST (..))

import qualified Data.Unicode.Properties.CombiningClass  as CC
import qualified Data.Unicode.Properties.Compositions    as C
import qualified Data.Unicode.Properties.Decompose       as D
import qualified Data.Unicode.Properties.DecomposeHangul as H

-------------------------------------------------------------------------------
-- Reorder buffer to hold characters till the next starter boundary
-------------------------------------------------------------------------------

data ReBuf = Empty | One {-# UNPACK #-} !Char | Many [Char]

writeStr :: A.MArray s -> Int -> [Char] -> ST s Int
writeStr :: MArray s -> Int -> [Char] -> ST s Int
writeStr MArray s
marr Int
di [Char]
str = Int -> [Char] -> ST s Int
go Int
di [Char]
str
    where
        go :: Int -> [Char] -> ST s Int
go Int
i [] = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
i
        go Int
i (Char
c : [Char]
cs) = do
            Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
i Char
c
            Int -> [Char] -> ST s Int
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n) [Char]
cs

{-# INLINE writeReorderBuffer #-}
writeReorderBuffer :: A.MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer :: MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
_ Int
di ReBuf
Empty = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
di

writeReorderBuffer MArray s
marr Int
di (One Char
c) = do
    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
di Char
c
    Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)

writeReorderBuffer MArray s
marr Int
di (Many [Char]
str) = MArray s -> Int -> [Char] -> ST s Int
forall s. MArray s -> Int -> [Char] -> ST s Int
writeStr MArray s
marr Int
di [Char]
str

-------------------------------------------------------------------------------
-- Decomposition of Hangul characters is done algorithmically
-------------------------------------------------------------------------------

-- {-# INLINE decomposeCharHangul #-}
decomposeCharHangul :: A.MArray s -> Int -> Char -> ST s (Int, ReBuf)
decomposeCharHangul :: MArray s -> Int -> Char -> ST s (Int, ReBuf)
decomposeCharHangul MArray s
marr Int
j Char
c = do
    case Char -> Either (Char, Char) (Char, Char, Char)
D.decomposeCharHangul Char
c of
        Left  (Char
l, Char
v)    -> do
            Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
j Char
l
            Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
v
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2), ReBuf
Empty)
        Right (Char
l, Char
v, Char
t) -> do
            Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
j Char
l
            Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
v
            Int
n3 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2) Char
t
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n3, ReBuf
Empty)

{-# INLINE decomposeChar #-}
decomposeChar
    :: D.DecomposeMode
    -> A.MArray s       -- destination array for decomposition
    -> Int              -- array index
    -> ReBuf            -- reorder buffer
    -> Char             -- char to be decomposed
    -> ST s (Int, ReBuf)
decomposeChar :: DecomposeMode
-> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
decomposeChar DecomposeMode
_ MArray s
marr Int
i ReBuf
reBuf Char
c | Char -> Bool
D.isHangul Char
c = do
    Int
j <- MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
marr Int
i ReBuf
reBuf
    MArray s -> Int -> Char -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> Char -> ST s (Int, ReBuf)
decomposeCharHangul MArray s
marr Int
j Char
c

-------------------------------------------------------------------------------
-- Decomposition of characters other than Hangul
-------------------------------------------------------------------------------

decomposeChar DecomposeMode
mode MArray s
marr Int
index ReBuf
reBuf Char
ch = do
    -- TODO: return fully decomposed form
    case DecomposeMode -> Char -> DecomposeResult
D.isDecomposable DecomposeMode
mode Char
ch of
      DecomposeResult
D.FalseA -> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
reorder MArray s
marr Int
index ReBuf
reBuf Char
ch
      DecomposeResult
D.TrueA  -> MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll MArray s
marr Int
index ReBuf
reBuf (DecomposeMode -> Char -> [Char]
D.decomposeChar DecomposeMode
mode Char
ch)
      DecomposeResult
_ -> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
reorder MArray s
marr Int
index ReBuf
reBuf Char
ch

    where
        {-# INLINE decomposeAll #-}
        decomposeAll :: MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll MArray s
_ Int
i ReBuf
rbuf [] = (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, ReBuf
rbuf)
        decomposeAll MArray s
arr Int
i ReBuf
rbuf (Char
x : [Char]
xs)  =
            case DecomposeMode -> Char -> DecomposeResult
D.isDecomposable DecomposeMode
mode Char
x of
                DecomposeResult
D.TrueA  -> do
                    (Int
i', ReBuf
rbuf') <- MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll MArray s
arr Int
i ReBuf
rbuf
                                                (DecomposeMode -> Char -> [Char]
D.decomposeChar DecomposeMode
mode Char
x)
                    MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll MArray s
arr Int
i' ReBuf
rbuf' [Char]
xs
                DecomposeResult
_ -> do
                    -- XXX calling reorder is wrong if decomposition results in
                    -- a further decomposable Hangul char. In that case we will
                    -- not go through the Hangul decompose for that char.
                    -- To be strictly correct we have to call decomposeChar
                    -- recursively here.
                    (Int
i', ReBuf
rbuf') <- MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
reorder MArray s
arr Int
i ReBuf
rbuf Char
x
                    MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll MArray s
arr Int
i' ReBuf
rbuf' [Char]
xs

        -- Unicode 9.0.0: 3.11
        -- D108 Reorderable pair: Two adjacent characters A and B in a coded
        -- character sequence <A,B> are a Reorderable Pair if and only if
        -- ccc(A) > ccc(B) > 0.
        --
        -- (array) (array index) (reorder buffer) (input char)
        {-# INLINE reorder #-}
        reorder :: MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
reorder MArray s
_ Int
i ReBuf
Empty Char
c = (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> ReBuf
One Char
c)

        -- input char is a starter, flush the reorder buffer
        reorder MArray s
arr Int
i (One Char
c0) Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c0
            Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
c
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2), ReBuf
Empty)

        -- input char is combining and there is a starter char in the buffer
        -- flush the starter char and add the combining char to the buffer
        reorder MArray s
arr Int
i (One Char
c0) Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c0) = do
            Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c0
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char -> ReBuf
One Char
c)

        -- optimized ordering for common case of two combining chars
        -- XXX replace many with Two here
        reorder  MArray s
_ Int
i (One Char
c0) Char
c = (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, [Char] -> ReBuf
Many [Char]
orderedPair)
            where
                -- {-# INLINE orderedPair #-}
                orderedPair :: [Char]
orderedPair =
                    case Char -> Char -> Bool
inOrder Char
c0 Char
c of
                        Bool
True  -> [Char
c0, Char
c]
                        Bool
False -> [Char
c, Char
c0]

                inOrder :: Char -> Char -> Bool
inOrder Char
c1 Char
c2 =
                    Char -> Int
CC.getCombiningClass Char
c1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Char -> Int
CC.getCombiningClass Char
c2

        -- input char is a starter, flush the reorder buffer
        reorder MArray s
arr Int
i ReBuf
rbuf Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            Int
j <- MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
arr Int
i ReBuf
rbuf
            Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
j Char
c
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, ReBuf
Empty)

        -- unoptimized generic sort for more than two combining chars
        reorder MArray s
_ Int
i (Many [Char]
str) Char
c = (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, [Char] -> ReBuf
Many ([Char] -> [Char]
sortCluster ([Char]
str [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
c])))
            where
                {-# INLINE sortCluster #-}
                sortCluster :: [Char] -> [Char]
sortCluster =   ((Char, Int) -> Char) -> [(Char, Int)] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map (Char, Int) -> Char
forall a b. (a, b) -> a
fst
                              ([(Char, Int)] -> [Char])
-> ([Char] -> [(Char, Int)]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char, Int) -> (Char, Int) -> Ordering)
-> [(Char, Int)] -> [(Char, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Char, Int) -> Int) -> (Char, Int) -> (Char, Int) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Char, Int) -> Int
forall a b. (a, b) -> b
snd)
                              ([(Char, Int)] -> [(Char, Int)])
-> ([Char] -> [(Char, Int)]) -> [Char] -> [(Char, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> (Char, Int)) -> [Char] -> [(Char, Int)]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> Int -> (Char, Int))
-> (Char -> Int) -> Char -> (Char, Int)
forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap (,) Char -> Int
CC.getCombiningClass)

-- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
stream :: Text -> Stream Char
stream :: Text -> Stream Char
stream (Text Array
arr Int
off Int
len) = (Int -> Step Int Char) -> Int -> Size -> Stream Char
forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
off (Int -> Int -> Size
betweenSize (Int
len Int -> Int -> Int
forall a. UnsafeShift a => a -> Int -> a
`shiftR` Int
1) Int
len)
    where
      !end :: Int
end = Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next !Int
i
          | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end                   = Step Int Char
forall s a. Step s a
Done
          -- shift generates only two branches instead of three in case of
          -- range check, works quite a bit faster with llvm backend.
          | (Word16
n Word16 -> Int -> Word16
forall a. UnsafeShift a => a -> Int -> a
`shiftR` Int
10) Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0x36    = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Word16 -> Char
U16.chr2 Word16
n Word16
n2) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
          | Bool
otherwise                  = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Char
unsafeChr Word16
n) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          where
            n :: Word16
n  = Array -> Int -> Word16
A.unsafeIndex Array
arr Int
i
            n2 :: Word16
n2 = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
{-# INLINE [0] stream #-}

-- | /O(n)/ Convert a 'Stream Char' into a decompose-normalized 'Text'.
unstream :: D.DecomposeMode -> Stream Char -> Text
unstream :: DecomposeMode -> Stream Char -> Text
unstream DecomposeMode
mode (Stream s -> Step s Char
next0 s
s0 Size
len) = (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
runText ((forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text)
-> (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ \MArray s -> Int -> ST s Text
done -> do
  -- Before encoding each char we perform a buffer realloc check assuming
  -- worst case encoding size of two 16-bit units for the char. Just add an
  -- extra space to the buffer so that we do not end up reallocating even when
  -- all the chars are encoded as single unit.
  let margin :: Int
margin = Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
maxDecomposeLen
      mlen :: Int
mlen = (Int -> Size -> Int
upperBound Int
4 Size
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
margin)
  MArray s
arr0 <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
mlen
  let outer :: MArray s -> Int -> s -> Int -> ReBuf -> ST s Text
outer !MArray s
arr !Int
maxi = s -> Int -> ReBuf -> ST s Text
encode
       where
        -- keep the common case loop as small as possible
        encode :: s -> Int -> ReBuf -> ST s Text
encode !s
si !Int
di ReBuf
rbuf =
            -- simply check for the worst case
            if Int
maxi Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
margin
            then s -> Int -> ReBuf -> ST s Text
realloc s
si Int
di ReBuf
rbuf
            else
                case s -> Step s Char
next0 s
si of
                    Step s Char
Done -> do
                        Int
di' <- MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
arr Int
di ReBuf
rbuf
                        MArray s -> Int -> ST s Text
done MArray s
arr Int
di'
                    Skip s
si'    -> s -> Int -> ReBuf -> ST s Text
encode s
si' Int
di ReBuf
rbuf
                    Yield Char
c s
si' -> do
                                (Int
di', ReBuf
rbuf') <- DecomposeMode
-> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
forall s.
DecomposeMode
-> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
decomposeChar DecomposeMode
mode MArray s
arr Int
di ReBuf
rbuf Char
c
                                s -> Int -> ReBuf -> ST s Text
encode s
si' Int
di' ReBuf
rbuf'
                                -- n <- unsafeWrite arr di c
                                -- encode si' (di + n) rbuf

        -- keep uncommon case separate from the common case code
        {-# NOINLINE realloc #-}
        realloc :: s -> Int -> ReBuf -> ST s Text
realloc !s
si !Int
di ReBuf
rbuf = do
            let newlen :: Int
newlen = Int
maxi Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2
            MArray s
arr' <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
newlen
            MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
A.copyM MArray s
arr' Int
0 MArray s
arr Int
0 Int
di
            MArray s -> Int -> s -> Int -> ReBuf -> ST s Text
outer MArray s
arr' (Int
newlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) s
si Int
di ReBuf
rbuf

  MArray s -> Int -> s -> Int -> ReBuf -> ST s Text
outer MArray s
arr0 (Int
mlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) s
s0 Int
0 ReBuf
Empty
{-# INLINE [0] unstream #-}

-- we can generate this from UCD
maxDecomposeLen :: Int
maxDecomposeLen :: Int
maxDecomposeLen = Int
32

-------------------------------------------------------------------------------
-- Composition
-------------------------------------------------------------------------------

composeAndWrite
    :: A.MArray s
    -> Int
    -> Char
    -> ReBuf
    -> Char
    -> ST s (Int, Char) -- return new index, new starter
composeAndWrite :: MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
composeAndWrite MArray s
arr Int
di Char
st1 ReBuf
Empty Char
st2 = do
    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
di Char
st1
    (Int, Char) -> ST s (Int, Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char
st2)

composeAndWrite MArray s
arr Int
di Char
st1 (One Char
c) Char
st2 =
    MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
forall s.
MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
composeAndWrite' MArray s
arr Int
di Char
st1 [Char
c] Char
st2

composeAndWrite MArray s
arr Int
di Char
st1 (Many [Char]
str) Char
st2 =
    MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
forall s.
MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
composeAndWrite' MArray s
arr Int
di Char
st1 [Char]
str Char
st2

composeAndWrite'
    :: A.MArray s
    -> Int
    -> Char
    -> [Char]
    -> Char
    -> ST s (Int, Char)
composeAndWrite' :: MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
composeAndWrite' MArray s
arr Int
di Char
st1 [Char]
str Char
st2 = Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
di Char
st1 [] Int
0 [Char]
str
    where
        -- arguments: index, starter, uncombined chars,
        -- cc of prev uncombined char, unprocessed str
        go :: Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
st [] Int
_ [] =
                case Char -> Char -> Maybe Char
C.composePair Char
st Char
st2 of
                    Just Char
x  -> (Int, Char) -> ST s (Int, Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char
x)
                    Maybe Char
Nothing -> do
                        Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
st
                        (Int, Char) -> ST s (Int, Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char
st2)

        go Int
i Char
st [Char]
uncs Int
_ [] = do
            Int
j <- MArray s -> Int -> [Char] -> ST s Int
forall s. MArray s -> Int -> [Char] -> ST s Int
writeStr MArray s
arr Int
i (Char
st Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char]
uncs)
            (Int, Char) -> ST s (Int, Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j, Char
st2)

        go Int
i Char
st [] Int
_ (Char
c : [Char]
cs) = do
            case Char -> Char -> Maybe Char
C.composePair Char
st Char
c of
                Just Char
x  -> Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
x [] Int
0 [Char]
cs
                Maybe Char
Nothing -> do
                    Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
st [Char
c] (Char -> Int
CC.getCombiningClass Char
c) [Char]
cs

        go Int
i Char
st [Char]
uncs Int
cc (Char
c : [Char]
cs) = do
            let ccc :: Int
ccc = Char -> Int
CC.getCombiningClass Char
c
            if Int
ccc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
cc then
                case Char -> Char -> Maybe Char
C.composePair Char
st Char
c of
                    Just Char
x  -> Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
x [Char]
uncs Int
cc [Char]
cs
                    Maybe Char
Nothing -> do
                        Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
st ([Char]
uncs [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
c]) Int
ccc [Char]
cs
            else Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
st ([Char]
uncs [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
c]) Int
ccc [Char]
cs

writeStarterRbuf :: A.MArray s
                 -> Int
                 -> Maybe Char
                 -> ReBuf
                 -> ST s Int
writeStarterRbuf :: MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
writeStarterRbuf MArray s
marr Int
di Maybe Char
st ReBuf
rbuf =
    case Maybe Char
st of
        Maybe Char
Nothing -> MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
marr Int
di ReBuf
rbuf
        Just Char
starter ->
            -- XXX null char hack
            MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
forall s.
MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
composeAndWrite MArray s
marr Int
di Char
starter ReBuf
rbuf Char
'\0' ST s (Int, Char) -> ((Int, Char) -> ST s Int) -> ST s Int
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> ST s Int)
-> ((Int, Char) -> Int) -> (Int, Char) -> ST s Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Char) -> Int
forall a b. (a, b) -> a
fst)

-------------------------------------------------------------------------------
-- Composition of Hangul Jamo characters, done algorithmically
-------------------------------------------------------------------------------

-- Hold an L to wait for V, hold an LV to wait for T.
data JamoBuf
    = JamoEmpty
    | JamoLIndex {-# UNPACK #-} !Int
    | JamoLV     {-# UNPACK #-} !Char

{-# INLINE writeJamoBuf #-}
writeJamoBuf :: A.MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf :: MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
_ Int
di JamoBuf
JamoEmpty = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
di

writeJamoBuf MArray s
marr Int
di (JamoLIndex Int
i) = do
    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
di (Int -> Char
chr (Int
D.jamoLFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i))
    Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)

writeJamoBuf MArray s
marr Int
di (JamoLV Char
c) = do
    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
di Char
c
    Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)

-- TODO Unify compose and decompose if possible with good perf
-- TODO try unifying st, rbuf
-- TODO try using Either for (st, rbuf)/jbuf
-- or we can use different functions for hangul and non-hangul composition with
-- diff signatures. In an outer function we check if the char is hangul and
-- flush and switch the buffer before calling the appropriate function.

-- If we are composing we do not need to first decompose Hangul. We can just
-- compose assuming there could be some partially composed syllables e.g. LV
-- syllable followed by a jamo T. We need to compose this case as well.
--
-- XXX The unicode normalization test suite does not seem to have tests for a
-- LV composed hangul syllable followed by a jamo T.

{-# INLINE composeChar #-}

composeChar
    :: D.DecomposeMode
    -> A.MArray s       -- destination array for decomposition
    -> Int              -- array index
    -> Maybe Char       -- last starter
    -> ReBuf            -- reorder buffer
    -> JamoBuf          -- jamo buffer
    -> Char             -- char to be decomposed
    -> ST s (Int, Maybe Char, ReBuf, JamoBuf)
    -- ^ index, starter, reorder buf, jamobuf
composeChar :: DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
composeChar DecomposeMode
_ MArray s
marr Int
index Maybe Char
st ReBuf
rbuf JamoBuf
jbuf Char
ch | Char -> Bool
H.isHangul Char
ch Bool -> Bool -> Bool
|| Char -> Bool
H.isJamo Char
ch = do
    Int
j <- MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
forall s. MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
writeStarterRbuf MArray s
marr Int
index Maybe Char
st ReBuf
rbuf
    (Int
k, JamoBuf
jbuf') <- if Char -> Bool
H.isJamo Char
ch then
        MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
forall s. MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharJamo MArray s
marr Int
j JamoBuf
jbuf Char
ch
    else
        MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
forall s. MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharHangul MArray s
marr Int
j JamoBuf
jbuf Char
ch
    (Int, Maybe Char, ReBuf, JamoBuf)
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
k, Maybe Char
forall a. Maybe a
Nothing, ReBuf
Empty, JamoBuf
jbuf')
    where
        composeCharJamo :: MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharJamo MArray s
arr Int
i JamoBuf
JamoEmpty Char
c =
            case Char -> Maybe Int
H.jamoLIndex Char
c of
                Just Int
li -> (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Int -> JamoBuf
JamoLIndex Int
li)
                Maybe Int
Nothing -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c
                    (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, JamoBuf
JamoEmpty)

        composeCharJamo MArray s
arr Int
i jb :: JamoBuf
jb@(JamoLIndex Int
li) Char
c =
            case Char -> Maybe Int
H.jamoVIndex Char
c of
                Just Int
vi -> do
                    let lvi :: Int
lvi = Int
li Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
H.jamoNCount Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
vi Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
H.jamoTCount
                    (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> JamoBuf
JamoLV (Int -> Char
chr (Int
H.hangulFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lvi)))
                Maybe Int
Nothing -> do
                    Int
ix <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
arr Int
i JamoBuf
jb
                    MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharJamo MArray s
arr Int
ix JamoBuf
JamoEmpty Char
c

        composeCharJamo MArray s
arr Int
i jb :: JamoBuf
jb@(JamoLV Char
lv) Char
c =
            case Char -> Maybe Int
H.jamoTIndex Char
c of
                Just Int
ti -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i (Int -> Char
chr ((Char -> Int
ord Char
lv) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ti))
                    (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, JamoBuf
JamoEmpty)
                Maybe Int
Nothing -> do
                    Int
ix <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
arr Int
i JamoBuf
jb
                    MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharJamo MArray s
arr Int
ix JamoBuf
JamoEmpty Char
c

        composeCharHangul :: MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharHangul MArray s
arr Int
i JamoBuf
jb Char
c = do
            Int
ix <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
arr Int
i JamoBuf
jb
            case Char -> Bool
H.isHangulLV Char
c of
                Bool
True -> (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
ix, Char -> JamoBuf
JamoLV Char
c)
                Bool
False -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
ix Char
c
                    (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, JamoBuf
JamoEmpty)

-------------------------------------------------------------------------------
-- Composition of characters other than Hangul
-------------------------------------------------------------------------------

composeChar DecomposeMode
mode MArray s
marr Int
index Maybe Char
starter ReBuf
reBuf JamoBuf
jbuf Char
ch = do
    Int
index' <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
marr Int
index JamoBuf
jbuf
    case DecomposeMode -> Char -> DecomposeResult
D.isDecomposable DecomposeMode
mode Char
ch of
        DecomposeResult
D.FalseA -> do
            (Int
i, Maybe Char
st, ReBuf
rbuf) <- MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
forall s.
MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
reorder MArray s
marr Int
index' Maybe Char
starter ReBuf
reBuf Char
ch
            (Int, Maybe Char, ReBuf, JamoBuf)
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, ReBuf
rbuf, JamoBuf
JamoEmpty)
        DecomposeResult
D.TrueA  -> do
            MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall s.
MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll MArray s
marr Int
index' Maybe Char
starter ReBuf
reBuf JamoBuf
jbuf (DecomposeMode -> Char -> [Char]
D.decomposeChar DecomposeMode
mode Char
ch)
        DecomposeResult
_ -> do
            (Int
i, Maybe Char
st, ReBuf
rbuf) <- MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
forall s.
MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
reorder MArray s
marr Int
index' Maybe Char
starter ReBuf
reBuf Char
ch
            (Int, Maybe Char, ReBuf, JamoBuf)
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, ReBuf
rbuf, JamoBuf
JamoEmpty)
    where
        {-# INLINE decomposeAll #-}
        decomposeAll :: MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll MArray s
_ Int
i Maybe Char
st ReBuf
rbuf JamoBuf
jb [] = (Int, Maybe Char, ReBuf, JamoBuf)
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, ReBuf
rbuf, JamoBuf
jb)
        decomposeAll MArray s
arr Int
i Maybe Char
st ReBuf
rbuf JamoBuf
jb (Char
x : [Char]
xs)  =
            case DecomposeMode -> Char -> DecomposeResult
D.isDecomposable DecomposeMode
mode Char
x of
                DecomposeResult
D.TrueA  -> do
                    (Int
i', Maybe Char
st', ReBuf
rbuf', JamoBuf
jb') <- MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll MArray s
arr Int
i Maybe Char
st ReBuf
rbuf JamoBuf
jb
                                                (DecomposeMode -> Char -> [Char]
D.decomposeChar DecomposeMode
mode Char
x)
                    MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll MArray s
arr Int
i' Maybe Char
st' ReBuf
rbuf' JamoBuf
jb' [Char]
xs
                DecomposeResult
_ -> do
                    -- XXX this recursive call here hurts performance
                    -- We can make the hangul composition a separate function
                    -- and call that or reorder here based on the type fo char
                    (Int
i', Maybe Char
st', ReBuf
rbuf', JamoBuf
jb') <- DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall s.
DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
composeChar DecomposeMode
mode MArray s
arr Int
i Maybe Char
st ReBuf
rbuf JamoBuf
jb Char
x
                    MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll MArray s
arr Int
i' Maybe Char
st' ReBuf
rbuf' JamoBuf
jb' [Char]
xs

        -- Unicode 9.0.0: 3.11
        -- D108 Reorderable pair: Two adjacent characters A and B in a coded
        -- character sequence <A,B> are a Reorderable Pair if and only if
        -- ccc(A) > ccc(B) > 0.
        --
        -- (array) (array index) (reorder buffer) (input char)
        {-# INLINE reorder #-}
        reorder :: MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
reorder MArray s
_ Int
i Maybe Char
st ReBuf
Empty Char
c = (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, Char -> ReBuf
One Char
c)

        -- Unicode 9.0.0: 3.11
        -- D111: a starter can never become a non-starter after
        -- combining. If that happens we will potentially have to remember all
        -- previous starters so that the new non-starter can be combined with
        -- the previous starter.
        --
        -- To compose, try to combine an unblocked char with the last starter
        -- and remove if combined. A char with combining class equal or lower
        -- than the previous char is blocked. It implies that only adjacent
        -- starters can be combined.
        --
        -- input char is a starter
        -- does it combine with the previous starter?
        -- if no then flush and replace the last starter
        reorder MArray s
arr Int
i (Just Char
st) (One Char
c0) Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            case Char -> Char -> Maybe Char
C.composePair Char
st Char
c0 of
                Just Char
x  -> case Char -> Char -> Maybe Char
C.composePair Char
x Char
c of
                    Just Char
y -> (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
y, ReBuf
Empty)
                    Maybe Char
Nothing -> do
                        Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
x
                        (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)
                Maybe Char
Nothing -> case Char -> Bool
CC.isCombining Char
c0 of
                    -- starter1 combining starter2
                    Bool
True -> do
                        Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
st
                        Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
c0
                        (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)
                    -- starter1 starter2 starter3
                    Bool
False -> do
                        Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
st
                        case Char -> Char -> Maybe Char
C.composePair Char
c0 Char
c of
                            Just Char
y -> (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
y, ReBuf
Empty)
                            Maybe Char
Nothing -> do
                                Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
c0
                                (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)

        reorder MArray s
arr Int
i Maybe Char
Nothing (One Char
c0) Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) =
            case Char -> Char -> Maybe Char
C.composePair Char
c0 Char
c of
                Just Char
x  -> (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x, ReBuf
Empty)
                Maybe Char
Nothing -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c0
                    (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)

        reorder MArray s
arr Int
i (Just Char
st) (One Char
c0) Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c0) = do
            case Char -> Char -> Maybe Char
C.composePair Char
st Char
c0 of
                Just Char
x  -> (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x, Char -> ReBuf
One Char
c)
                Maybe Char
Nothing -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
st
                    (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c0, Char -> ReBuf
One Char
c)

        -- input char is combining and there is a starter char in the buffer
        -- flush the starter char and add the combining char to the buffer
        reorder MArray s
_arr Int
i Maybe Char
Nothing (One Char
c0) Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c0) = do
            (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c0, Char -> ReBuf
One Char
c)

        -- optimized ordering for common case of two combining chars
        -- XXX replace many with Two here
        reorder  MArray s
_ Int
i Maybe Char
st (One Char
c0) Char
c = (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, [Char] -> ReBuf
Many [Char]
orderedPair)
            where
                -- {-# INLINE orderedPair #-}
                orderedPair :: [Char]
orderedPair =
                    case Char -> Char -> Bool
inOrder Char
c0 Char
c of
                        Bool
True  -> [Char
c0, Char
c]
                        Bool
False -> [Char
c, Char
c0]

                inOrder :: Char -> Char -> Bool
inOrder Char
c1 Char
c2 =
                    Char -> Int
CC.getCombiningClass Char
c1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Char -> Int
CC.getCombiningClass Char
c2

        -- input char is a starter, flush the reorder buffer
        reorder MArray s
arr Int
i (Just Char
st) ReBuf
rbuf Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            (Int
j, Char
st2) <- MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
forall s.
MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
composeAndWrite MArray s
arr Int
i Char
st ReBuf
rbuf Char
c
            (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
st2, ReBuf
Empty)

        reorder MArray s
arr Int
i Maybe Char
Nothing ReBuf
rbuf Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            Int
j <- MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
arr Int
i ReBuf
rbuf
            (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)

        -- unoptimized generic sort for more than two combining chars
        reorder MArray s
_ Int
i Maybe Char
st (Many [Char]
str) Char
c =
            (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, [Char] -> ReBuf
Many ([Char] -> [Char]
sortCluster ([Char]
str [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
c])))
            where
                {-# INLINE sortCluster #-}
                sortCluster :: [Char] -> [Char]
sortCluster =   ((Char, Int) -> Char) -> [(Char, Int)] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map (Char, Int) -> Char
forall a b. (a, b) -> a
fst
                              ([(Char, Int)] -> [Char])
-> ([Char] -> [(Char, Int)]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char, Int) -> (Char, Int) -> Ordering)
-> [(Char, Int)] -> [(Char, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Char, Int) -> Int) -> (Char, Int) -> (Char, Int) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Char, Int) -> Int
forall a b. (a, b) -> b
snd)
                              ([(Char, Int)] -> [(Char, Int)])
-> ([Char] -> [(Char, Int)]) -> [Char] -> [(Char, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> (Char, Int)) -> [Char] -> [(Char, Int)]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> Int -> (Char, Int))
-> (Char -> Int) -> Char -> (Char, Int)
forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap (,) Char -> Int
CC.getCombiningClass)

-- | /O(n)/ Convert a 'Stream Char' into a composed normalized 'Text'.
unstreamC :: D.DecomposeMode -> Stream Char -> Text
unstreamC :: DecomposeMode -> Stream Char -> Text
unstreamC DecomposeMode
mode (Stream s -> Step s Char
next0 s
s0 Size
len) = (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
runText ((forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text)
-> (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ \MArray s -> Int -> ST s Text
done -> do
  -- Before encoding each char we perform a buffer realloc check assuming
  -- worst case encoding size of two 16-bit units for the char. Just add an
  -- extra space to the buffer so that we do not end up reallocating even when
  -- all the chars are encoded as single unit.
  let margin :: Int
margin = Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
maxDecomposeLen
      mlen :: Int
mlen = (Int -> Size -> Int
upperBound Int
4 Size
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
margin)
  MArray s
arr0 <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
mlen
  let outer :: MArray s
-> Int -> s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
outer !MArray s
arr !Int
maxi = s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
encode
       where
        -- keep the common case loop as small as possible
        encode :: s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
encode !s
si !Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf =
            -- simply check for the worst case
            if Int
maxi Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
margin
               then s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
realloc s
si Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf
            else
                case s -> Step s Char
next0 s
si of
                    Step s Char
Done -> do
                        -- Flush any leftover buffers, only one of rbuf/jbuf
                        -- will have contents
                        Int
di'  <- MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
forall s. MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
writeStarterRbuf MArray s
arr Int
di Maybe Char
st ReBuf
rbuf
                        Int
di'' <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
arr Int
di' JamoBuf
jbuf
                        MArray s -> Int -> ST s Text
done MArray s
arr Int
di''
                    Skip s
si'    -> s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
encode s
si' Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf
                    Yield Char
c s
si' -> do
                        (Int
di', Maybe Char
st', ReBuf
rbuf', JamoBuf
jbuf') <- DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall s.
DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
composeChar DecomposeMode
mode MArray s
arr Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf Char
c
                        s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
encode s
si' Int
di' Maybe Char
st' ReBuf
rbuf' JamoBuf
jbuf'

        -- keep uncommon case separate from the common case code
        {-# NOINLINE realloc #-}
        realloc :: s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
realloc !s
si !Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf = do
            let newlen :: Int
newlen = Int
maxi Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2
            MArray s
arr' <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
newlen
            MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
A.copyM MArray s
arr' Int
0 MArray s
arr Int
0 Int
di
            MArray s
-> Int -> s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
outer MArray s
arr' (Int
newlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) s
si Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf

  MArray s
-> Int -> s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
outer MArray s
arr0 (Int
mlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) s
s0 Int
0 Maybe Char
forall a. Maybe a
Nothing ReBuf
Empty JamoBuf
JamoEmpty
{-# INLINE [0] unstreamC #-}