Safe Haskell | Safe-Inferred |
---|---|
Language | GHC2021 |
MCSP.Data.MatchingGraph
Description
Working with edges of a matching graph.
Synopsis
- type Edge = (Pair Index, Length)
- pattern Edge :: Pair Index -> Length -> Edge
- start :: Edge -> Pair Index
- blockLen :: Edge -> Length
- edgeSet :: Eq a => Pair (String a) -> Vector Edge
- compatibleEdges :: Pair (Partition a) -> Vector Edge -> Vector Bool
- type Solution = Pair (Vector (Index, Length))
- solution :: Vector Edge -> Solution
- solutions :: Vector Edge -> NonEmpty Solution
- mergeness :: Solution -> Length
- blockCount :: String a -> Solution -> Length
- toPartitions :: Pair (String a) -> Solution -> Pair (Partition a)
Data Type
type Edge = (Pair Index, Length) Source #
A single edge in the matching graph for a pair of strings.
An edge ((s, p), k)
represents a common subtring S[s .. s + k - 1] = P[p .. p + k - 1]
of
length k
that can be used as a block for partitions.
Finding Edges
edgeSet :: Eq a => Pair (String a) -> Vector Edge Source #
O(n^3) List all edges of the matching graph for a pair of strings.
>>>
edgeSet ("abab", "abba")
[((0,0),2),((1,2),2),((2,0),2)]
compatibleEdges :: Pair (Partition a) -> Vector Edge -> Vector Bool Source #
Map each edge to a boolean to indicate wheter that edge is compatible with a given pair of partitions, i.e. could have been used to generate that pair.
>>>
compatibleEdges (["a", "ba", "b"], ["a", "b", "ba"]) [((0,0),2),((1,2),2),((2,0),2)]
[False,True,False]
Building Solutions
type Solution = Pair (Vector (Index, Length)) Source #
A complete solution to the strings partitioning.
Each (k,v)
pair in the solution represents a block S[k ... k + v]
.
solution :: Vector Edge -> Solution Source #
The solution represented by the edge list.
Apply each edge in the same order they are listed, ignoring edges that overlaps with the partial solution.
>>>
solution $ edgeSet ("abab", "abba")
([(0,2)],[(0,2)])
>>>
solution $ edgeSet ("abab", "abab")
([(2,2),(0,2)],[(2,2),(0,2)])
solutions :: Vector Edge -> NonEmpty Solution Source #
List of all solutions represented with an ordering of the edge set.
This is done by reapeatedly constructing solution
s with the unused edges from the previous
solution
.
>>>
solutions $ edgeSet ("abab", "abba")
([(0,2)],[(0,2)]) :| [([(1,2)],[(2,2)]),([(2,2)],[(0,2)]),([],[])]
>>>
solutions $ edgeSet ("abab", "abab")
([(2,2),(0,2)],[(2,2),(0,2)]) :| [([(0,3)],[(0,3)]),([(0,4)],[(0,4)]),([(2,2),(0,2)],[(2,2),(0,2)]),([(1,2)],[(1,2)]),([(1,3)],[(1,3)]),([],[])]
Restoring Partitions
mergeness :: Solution -> Length Source #
Calculates how much the solution merges the characters of the string.
The mergeness is given by the string length minus the number of blocks.
>>>
mergeness mempty
0
>>>
mergeness (solution $ edgeSet ("abab", "abba"))
1
blockCount :: String a -> Solution -> Length Source #
Calculates the number of blocks the solution will create.
>>>
blockCount "abab" mempty
4
>>>
blockCount "abab" (solution $ edgeSet ("abab", "abba"))
3
toPartitions :: Pair (String a) -> Solution -> Pair (Partition a) Source #
Construct the partitions for a solution.
>>>
toPartitions ("abab", "abba") mempty
([a,b,a,b],[a,b,b,a])
>>>
toPartitions ("abab", "abba") (solution $ edgeSet ("abab", "abba"))
([ab,a,b],[ab,b,a])
>>>
toPartitions ("abba", "abab") (solution $ edgeSet ("abba", "abab"))
([ab,b,a],[ab,a,b])