-- | Custom operations for `String`.
module MCSP.Data.String.Extra (
    -- ** Partition operations
    Partition,
    chars,

    -- ** Character set analysis
    alphabet,
    occurrences,
    singletons,
    repeated,
    hasOneOf,

    -- ** Substring analysis
    module MCSP.Data.String.Extra.Radix,
    longestCommonSubstring,
) where

import Control.Monad ((>>=))
import Data.Bool (Bool)
import Data.Foldable (any, foldl')
import Data.Function (flip, id, ($))
import Data.Int (Int)
import Data.Map.Strict (Map, alter, foldrWithKey')
import Data.Maybe (Maybe (Just, Nothing), fromMaybe)
import Data.Monoid (mempty)
import Data.Ord (Ord (..))
import Data.Set (Set, insert, member)
import GHC.Num ((+))

import MCSP.Data.RadixTree.Suffix (construct, findMax)
import MCSP.Data.String (String (..))
import MCSP.Data.String.Extra.Radix

-- ------------------------ --
-- Operations on partitions --

-- | A collection of substrings of the same string.
type Partition a = [String a]

-- | /O(n)/ Split the string in substrings of 1 char each.
--
-- >>> chars "abcd"
-- [a,b,c,d]
chars :: String a -> Partition a
chars :: forall a. String a -> Partition a
chars = [String a] -> String a -> [String a]
forall {a}. [String a] -> String a -> [String a]
go []
  where
    go :: [String a] -> String a -> [String a]
go [String a]
p (String a
rest :>: String a
ch) = [String a] -> String a -> [String a]
go (String a
ch String a -> [String a] -> [String a]
forall a. a -> [a] -> [a]
: [String a]
p) String a
rest
    go ![String a]
p String a
Null = [String a]
p

-- ---------------------- --
-- Character set analysis --

-- | /O(n lg n)/ The set of all characters in a string.
--
-- >>> alphabet "aabacabd"
-- fromList "abcd"
alphabet :: Ord a => String a -> Set a
alphabet :: forall a. Ord a => String a -> Set a
alphabet = (Set a -> a -> Set a) -> Set a -> String a -> Set a
forall b a. (b -> a -> b) -> b -> String a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((a -> Set a -> Set a) -> Set a -> a -> Set a
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
insert) Set a
forall a. Monoid a => a
mempty

-- | /O(n lg n)/ The frequency count of each character in a string.
--
-- >>> occurrences "aabacabd"
-- fromList [('a',4),('b',2),('c',1),('d',1)]
occurrences :: Ord a => String a -> Map a Int
occurrences :: forall a. Ord a => String a -> Map a Int
occurrences = (Map a Int -> a -> Map a Int) -> Map a Int -> String a -> Map a Int
forall b a. (b -> a -> b) -> b -> String a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((a -> Map a Int -> Map a Int) -> Map a Int -> a -> Map a Int
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((a -> Map a Int -> Map a Int) -> Map a Int -> a -> Map a Int)
-> (a -> Map a Int -> Map a Int) -> Map a Int -> a -> Map a Int
forall a b. (a -> b) -> a -> b
$ (Maybe Int -> Maybe Int) -> a -> Map a Int -> Map a Int
forall k a.
Ord k =>
(Maybe a -> Maybe a) -> k -> Map k a -> Map k a
alter Maybe Int -> Maybe Int
forall {a}. Num a => Maybe a -> Maybe a
increment) Map a Int
forall a. Monoid a => a
mempty
  where
    increment :: Maybe a -> Maybe a
increment Maybe a
x = a -> Maybe a
forall a. a -> Maybe a
Just (a
1 a -> a -> a
forall a. Num a => a -> a -> a
+ a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
0 Maybe a
x)
{-# INLINEABLE occurrences #-}

-- | /O(n lg n)/ The set of singleton characters in a string.
--
-- >>> singletons "aabacabd"
-- fromList "cd"
singletons :: Ord a => String a -> Set a
singletons :: forall a. Ord a => String a -> Set a
singletons String a
str = (a -> Int -> Set a -> Set a) -> Set a -> Map a Int -> Set a
forall k a b. (k -> a -> b -> b) -> b -> Map k a -> b
foldrWithKey' a -> Int -> Set a -> Set a
forall {a} {a}. (Num a, Ord a, Eq a) => a -> a -> Set a -> Set a
insertSingleton Set a
forall a. Monoid a => a
mempty (String a -> Map a Int
forall a. Ord a => String a -> Map a Int
occurrences String a
str)
  where
    insertSingleton :: a -> a -> Set a -> Set a
insertSingleton a
k a
1 = a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
insert a
k
    insertSingleton a
_ a
_ = Set a -> Set a
forall a. a -> a
id
{-# INLINEABLE singletons #-}

-- | /O(n lg n)/ The set of repeated characters in a string.
--
-- >>> repeated "aabacabd"
-- fromList "ab"
repeated :: Ord a => String a -> Set a
repeated :: forall a. Ord a => String a -> Set a
repeated String a
str = (a -> Int -> Set a -> Set a) -> Set a -> Map a Int -> Set a
forall k a b. (k -> a -> b -> b) -> b -> Map k a -> b
foldrWithKey' a -> Int -> Set a -> Set a
forall {a} {a}. (Num a, Ord a, Eq a) => a -> a -> Set a -> Set a
insertRepeated Set a
forall a. Monoid a => a
mempty (String a -> Map a Int
forall a. Ord a => String a -> Map a Int
occurrences String a
str)
  where
    insertRepeated :: a -> a -> Set a -> Set a
insertRepeated a
_ a
1 = Set a -> Set a
forall a. a -> a
id
    insertRepeated a
k a
_ = a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
insert a
k

-- | /O(n lg m)/ Check if at least one of the character of string is present in the given set.
--
-- >>> import Data.Set (fromList)
-- >>> hasOneOf "abca" (fromList "bdf")
-- True
--
-- >>> import Data.Set (fromList)
-- >>> hasOneOf "xxx" (fromList "bdf")
-- False
hasOneOf :: Ord a => String a -> Set a -> Bool
hasOneOf :: forall a. Ord a => String a -> Set a -> Bool
hasOneOf String a
str Set a
ls = (a -> Bool) -> String a -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (a -> Set a -> Bool
forall a. Ord a => a -> Set a -> Bool
`member` Set a
ls) String a
str

-- ------------------ --
-- Substring analysis --

-- | /O(?)/ Extracts the longest string that is a substring of both strings.
--
-- Returns `Just` the lexicographically largest of the maximal subtrings, or `Data.Maybe.Nothing`
-- if strings are disjoint.
--
-- >>> longestCommonSubstring "ABABC" "ABCBA"
-- Just ABC
--
-- >>> longestCommonSubstring "13" "1400"
-- Just 1
longestCommonSubstring :: Ord a => String a -> String a -> Maybe (String a)
longestCommonSubstring :: forall a. Ord a => String a -> String a -> Maybe (String a)
longestCommonSubstring String a
s1 String a
s2 = SuffixTree a -> Maybe (String a)
forall a. Ord a => SuffixTree a -> Maybe (String a)
findMax (String a -> String a -> SuffixTree a
forall a. Ord a => String a -> String a -> SuffixTree a
construct String a
s1 String a
s2) Maybe (String a)
-> (String a -> Maybe (String a)) -> Maybe (String a)
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= String a -> Maybe (String a)
forall {a}. String a -> Maybe (String a)
nonEmpty
  where
    nonEmpty :: String a -> Maybe (String a)
nonEmpty (NonNull String a
s) = String a -> Maybe (String a)
forall a. a -> Maybe a
Just String a
s
    nonEmpty String a
Null = Maybe (String a)
forall a. Maybe a
Nothing
{-# INLINEABLE longestCommonSubstring #-}