-- | A compressed trie of string suffixes.
module MCSP.Data.RadixTree.Suffix (
    SuffixTree,
    construct,
    findMax,
) where

import Data.Eq (Eq (..))
import Data.Foldable (foldl')
import Data.Function ((.))
import Data.Functor ((<$>))
import Data.Int (Int)
import Data.Maybe (Maybe (Just, Nothing))
import Data.Ord (Ord (..))
import Data.Semigroup (Semigroup (..))
import Safe.Foldable (maximumMay)
import Text.Show (Show)

import MCSP.Data.RadixTree.Map qualified as Map
import MCSP.Data.String (String, length)
import MCSP.Data.String.Extra.Radix (stripSuffix, suffixes)

-- --------------- --
-- Data definition --
-- --------------- --

data LeafKind = First | Second | Both deriving stock (LeafKind -> LeafKind -> Bool
(LeafKind -> LeafKind -> Bool)
-> (LeafKind -> LeafKind -> Bool) -> Eq LeafKind
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LeafKind -> LeafKind -> Bool
== :: LeafKind -> LeafKind -> Bool
$c/= :: LeafKind -> LeafKind -> Bool
/= :: LeafKind -> LeafKind -> Bool
Eq, Eq LeafKind
Eq LeafKind =>
(LeafKind -> LeafKind -> Ordering)
-> (LeafKind -> LeafKind -> Bool)
-> (LeafKind -> LeafKind -> Bool)
-> (LeafKind -> LeafKind -> Bool)
-> (LeafKind -> LeafKind -> Bool)
-> (LeafKind -> LeafKind -> LeafKind)
-> (LeafKind -> LeafKind -> LeafKind)
-> Ord LeafKind
LeafKind -> LeafKind -> Bool
LeafKind -> LeafKind -> Ordering
LeafKind -> LeafKind -> LeafKind
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: LeafKind -> LeafKind -> Ordering
compare :: LeafKind -> LeafKind -> Ordering
$c< :: LeafKind -> LeafKind -> Bool
< :: LeafKind -> LeafKind -> Bool
$c<= :: LeafKind -> LeafKind -> Bool
<= :: LeafKind -> LeafKind -> Bool
$c> :: LeafKind -> LeafKind -> Bool
> :: LeafKind -> LeafKind -> Bool
$c>= :: LeafKind -> LeafKind -> Bool
>= :: LeafKind -> LeafKind -> Bool
$cmax :: LeafKind -> LeafKind -> LeafKind
max :: LeafKind -> LeafKind -> LeafKind
$cmin :: LeafKind -> LeafKind -> LeafKind
min :: LeafKind -> LeafKind -> LeafKind
Ord, Int -> LeafKind -> ShowS
[LeafKind] -> ShowS
LeafKind -> String
(Int -> LeafKind -> ShowS)
-> (LeafKind -> String) -> ([LeafKind] -> ShowS) -> Show LeafKind
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LeafKind -> ShowS
showsPrec :: Int -> LeafKind -> ShowS
$cshow :: LeafKind -> String
show :: LeafKind -> String
$cshowList :: [LeafKind] -> ShowS
showList :: [LeafKind] -> ShowS
Show)

instance Semigroup LeafKind where
    LeafKind
Both <> :: LeafKind -> LeafKind -> LeafKind
<> LeafKind
_ = LeafKind
Both
    LeafKind
_ <> LeafKind
Both = LeafKind
Both
    LeafKind
First <> LeafKind
Second = LeafKind
Both
    LeafKind
Second <> LeafKind
First = LeafKind
Both
    LeafKind
x <> LeafKind
_ = LeafKind
x
    {-# INLINE (<>) #-}

-- | Represents a single suffix in a tree.
data Suffix a = Suffix {-# UNPACK #-} !LeafKind {-# UNPACK #-} !(String a)
    deriving stock (Int -> Suffix a -> ShowS
[Suffix a] -> ShowS
Suffix a -> String
(Int -> Suffix a -> ShowS)
-> (Suffix a -> String) -> ([Suffix a] -> ShowS) -> Show (Suffix a)
forall a. ShowString a => Int -> Suffix a -> ShowS
forall a. ShowString a => [Suffix a] -> ShowS
forall a. ShowString a => Suffix a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. ShowString a => Int -> Suffix a -> ShowS
showsPrec :: Int -> Suffix a -> ShowS
$cshow :: forall a. ShowString a => Suffix a -> String
show :: Suffix a -> String
$cshowList :: forall a. ShowString a => [Suffix a] -> ShowS
showList :: [Suffix a] -> ShowS
Show)

-- | /O(1)/ Key for comparing suffixes, using the string length.
cmpKey :: Suffix a -> (LeafKind, Int, String a)
cmpKey :: forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey (Suffix LeafKind
l String a
s) = (LeafKind
l, String a -> Int
forall a. String a -> Int
length String a
s, String a
s)
{-# INLINE cmpKey #-}

-- | Compare giving preference to longer strings.
instance Eq a => Eq (Suffix a) where
    Suffix a
lhs == :: Suffix a -> Suffix a -> Bool
== Suffix a
rhs = Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
lhs (LeafKind, Int, String a) -> (LeafKind, Int, String a) -> Bool
forall a. Eq a => a -> a -> Bool
== Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
rhs
    {-# INLINE (==) #-}
    Suffix a
lhs /= :: Suffix a -> Suffix a -> Bool
/= Suffix a
rhs = Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
lhs (LeafKind, Int, String a) -> (LeafKind, Int, String a) -> Bool
forall a. Eq a => a -> a -> Bool
== Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
rhs
    {-# INLINE (/=) #-}

-- | Compare giving preference to longer strings.
instance Ord a => Ord (Suffix a) where
    compare :: Suffix a -> Suffix a -> Ordering
compare Suffix a
lhs Suffix a
rhs = (LeafKind, Int, String a) -> (LeafKind, Int, String a) -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
lhs) (Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
rhs)
    {-# INLINE compare #-}
    Suffix a
lhs < :: Suffix a -> Suffix a -> Bool
< Suffix a
rhs = Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
lhs (LeafKind, Int, String a) -> (LeafKind, Int, String a) -> Bool
forall a. Ord a => a -> a -> Bool
< Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
rhs
    {-# INLINE (<) #-}
    Suffix a
lhs <= :: Suffix a -> Suffix a -> Bool
<= Suffix a
rhs = Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
lhs (LeafKind, Int, String a) -> (LeafKind, Int, String a) -> Bool
forall a. Ord a => a -> a -> Bool
<= Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
rhs
    {-# INLINE (<=) #-}
    Suffix a
lhs > :: Suffix a -> Suffix a -> Bool
> Suffix a
rhs = Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
lhs (LeafKind, Int, String a) -> (LeafKind, Int, String a) -> Bool
forall a. Ord a => a -> a -> Bool
> Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
rhs
    {-# INLINE (>=) #-}
    Suffix a
lhs >= :: Suffix a -> Suffix a -> Bool
>= Suffix a
rhs = Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
lhs (LeafKind, Int, String a) -> (LeafKind, Int, String a) -> Bool
forall a. Ord a => a -> a -> Bool
>= Suffix a -> (LeafKind, Int, String a)
forall a. Suffix a -> (LeafKind, Int, String a)
cmpKey Suffix a
rhs
    {-# INLINE (>) #-}

-- | A set of suffixes for a pair of strings.
--
-- Represented by a [suffix tree](https://en.wikipedia.org/wiki/Generalized_suffix_tree).
type SuffixTree a = Map.RadixTreeMap a (Suffix a)

-- --------------- --
-- Tree operations --
-- --------------- --

-- | /O(1)/ Marks a new leaf for a suffix.
markLeaf :: Eq a => LeafKind -> String a -> String a -> Maybe (Suffix a) -> Maybe (Suffix a)
markLeaf :: forall a.
Eq a =>
LeafKind
-> String a -> String a -> Maybe (Suffix a) -> Maybe (Suffix a)
markLeaf LeafKind
m String a
_ String a
_ (Just (Suffix LeafKind
l String a
s)) = Suffix a -> Maybe (Suffix a)
forall a. a -> Maybe a
Just (LeafKind -> String a -> Suffix a
forall a. LeafKind -> String a -> Suffix a
Suffix (LeafKind
m LeafKind -> LeafKind -> LeafKind
forall a. Semigroup a => a -> a -> a
<> LeafKind
l) String a
s)
markLeaf LeafKind
m String a
k String a
s Maybe (Suffix a)
Nothing = LeafKind -> String a -> Suffix a
forall a. LeafKind -> String a -> Suffix a
Suffix LeafKind
m (String a -> Suffix a) -> Maybe (String a) -> Maybe (Suffix a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String a -> String a -> Maybe (String a)
forall a. Eq a => String a -> String a -> Maybe (String a)
stripSuffix String a
s String a
k

-- | /O(1)/ Marks leaves when merging suffixes.
mergeSuffix :: Suffix a -> Suffix a -> Suffix a
mergeSuffix :: forall a. Suffix a -> Suffix a -> Suffix a
mergeSuffix (Suffix LeafKind
xl String a
xs) (Suffix LeafKind
yl String a
_) = LeafKind -> String a -> Suffix a
forall a. LeafKind -> String a -> Suffix a
Suffix (LeafKind
xl LeafKind -> LeafKind -> LeafKind
forall a. Semigroup a => a -> a -> a
<> LeafKind
yl) String a
xs

-- | /O(?)/ Inserts all non-empty `suffixes` of a pair of strings.
insert :: Ord a => String a -> String a -> SuffixTree a -> SuffixTree a
insert :: forall a.
Ord a =>
String a -> String a -> SuffixTree a -> SuffixTree a
insert String a
s1 String a
s2 = LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
forall {a}.
Ord a =>
LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
insertAllSuffixes LeafKind
Second String a
s2 (RadixTreeMap a (Suffix a) -> RadixTreeMap a (Suffix a))
-> (RadixTreeMap a (Suffix a) -> RadixTreeMap a (Suffix a))
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
forall {a}.
Ord a =>
LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
insertAllSuffixes LeafKind
First String a
s1
  where
    insertAllSuffixes :: LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
insertAllSuffixes LeafKind
l String a
s RadixTreeMap a (Suffix a)
t = (RadixTreeMap a (Suffix a)
 -> String a -> RadixTreeMap a (Suffix a))
-> RadixTreeMap a (Suffix a)
-> [String a]
-> RadixTreeMap a (Suffix a)
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (LeafKind
-> RadixTreeMap a (Suffix a)
-> String a
-> RadixTreeMap a (Suffix a)
forall {a}.
Ord a =>
LeafKind
-> RadixTreeMap a (Suffix a)
-> String a
-> RadixTreeMap a (Suffix a)
insertSuffix LeafKind
l) RadixTreeMap a (Suffix a)
t (String a -> [String a]
forall a. String a -> [String a]
suffixes String a
s)
    insertSuffix :: LeafKind
-> RadixTreeMap a (Suffix a)
-> String a
-> RadixTreeMap a (Suffix a)
insertSuffix LeafKind
l RadixTreeMap a (Suffix a)
t String a
s = (Suffix a -> Suffix a -> Suffix a)
-> String a
-> Suffix a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
forall a v.
Ord a =>
(v -> v -> v)
-> String a -> v -> RadixTreeMap a v -> RadixTreeMap a v
Map.insertWith Suffix a -> Suffix a -> Suffix a
forall a. Suffix a -> Suffix a -> Suffix a
mergeSuffix String a
s (LeafKind -> String a -> Suffix a
forall a. LeafKind -> String a -> Suffix a
Suffix LeafKind
l String a
s) RadixTreeMap a (Suffix a)
t
{-# INLINEABLE insert #-}

-- | /O(?)/ Mark the leaves for all `suffixes` of a pair of strings.
mark :: Ord a => String a -> String a -> SuffixTree a -> SuffixTree a
mark :: forall a.
Ord a =>
String a -> String a -> SuffixTree a -> SuffixTree a
mark String a
s1 String a
s2 = LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
forall {a}.
Ord a =>
LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
markAll LeafKind
Second String a
s2 (RadixTreeMap a (Suffix a) -> RadixTreeMap a (Suffix a))
-> (RadixTreeMap a (Suffix a) -> RadixTreeMap a (Suffix a))
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
forall {a}.
Ord a =>
LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
markAll LeafKind
First String a
s1
  where
    markAll :: LeafKind
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
markAll LeafKind
l String a
s RadixTreeMap a (Suffix a)
t = (RadixTreeMap a (Suffix a)
 -> String a -> RadixTreeMap a (Suffix a))
-> RadixTreeMap a (Suffix a)
-> [String a]
-> RadixTreeMap a (Suffix a)
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (LeafKind
-> RadixTreeMap a (Suffix a)
-> String a
-> RadixTreeMap a (Suffix a)
forall {a}.
Ord a =>
LeafKind
-> RadixTreeMap a (Suffix a)
-> String a
-> RadixTreeMap a (Suffix a)
markPrefixes LeafKind
l) RadixTreeMap a (Suffix a)
t (String a -> [String a]
forall a. String a -> [String a]
suffixes String a
s)
    markPrefixes :: LeafKind
-> RadixTreeMap a (Suffix a)
-> String a
-> RadixTreeMap a (Suffix a)
markPrefixes LeafKind
l RadixTreeMap a (Suffix a)
t String a
s = (String a -> Maybe (Suffix a) -> Maybe (Suffix a))
-> String a
-> RadixTreeMap a (Suffix a)
-> RadixTreeMap a (Suffix a)
forall a v.
Ord a =>
(String a -> Maybe v -> Maybe v)
-> String a -> RadixTreeMap a v -> RadixTreeMap a v
Map.updatePath (LeafKind
-> String a -> String a -> Maybe (Suffix a) -> Maybe (Suffix a)
forall a.
Eq a =>
LeafKind
-> String a -> String a -> Maybe (Suffix a) -> Maybe (Suffix a)
markLeaf LeafKind
l String a
s) String a
s RadixTreeMap a (Suffix a)
t

-- | /O(?)/ Constructs suffix tree for a pair of strings.
--
-- >>> construct "aba" "ba"
-- Tree (Suffix Both ) [a :~> Tree (Suffix Both a) [ba :~> Tree (Suffix First aba) []],ba :~> Tree (Suffix Both ba) []]
construct :: Ord a => String a -> String a -> SuffixTree a
construct :: forall a. Ord a => String a -> String a -> SuffixTree a
construct String a
s1 String a
s2 = String a -> String a -> SuffixTree a -> SuffixTree a
forall a.
Ord a =>
String a -> String a -> SuffixTree a -> SuffixTree a
mark String a
s1 String a
s2 (String a -> String a -> SuffixTree a -> SuffixTree a
forall a.
Ord a =>
String a -> String a -> SuffixTree a -> SuffixTree a
insert String a
s1 String a
s2 SuffixTree a
forall a v. RadixTreeMap a v
Map.empty)
{-# INLINE construct #-}

-- | /O(n log r)/ Retrieves the maximum common prefix of all suffixes.
--
-- >>> findMax (construct "abab" "baba")
-- Just bab
findMax :: Ord a => SuffixTree a -> Maybe (String a)
findMax :: forall a. Ord a => SuffixTree a -> Maybe (String a)
findMax SuffixTree a
t | Just (Suffix LeafKind
Both String a
s) <- SuffixTree a -> Maybe (Suffix a)
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> Maybe a
maximumMay SuffixTree a
t = String a -> Maybe (String a)
forall a. a -> Maybe a
Just String a
s
findMax SuffixTree a
_ = Maybe (String a)
forall a. Maybe a
Nothing
{-# INLINE findMax #-}