| Safe Haskell | Safe-Inferred |
|---|---|
| Language | GHC2021 |
MCSP.Data.MatchingGraph
Description
Working with edges of a matching graph.
Synopsis
- type Edge = (Pair Index, Length)
- pattern Edge :: Pair Index -> Length -> Edge
- start :: Edge -> Pair Index
- blockLen :: Edge -> Length
- edgeSet :: Eq a => Pair (String a) -> Vector Edge
- compatibleEdges :: Pair (Partition a) -> Vector Edge -> Vector Bool
- type Solution = Pair (Vector (Index, Length))
- solution :: Vector Edge -> Solution
- solutions :: Vector Edge -> NonEmpty Solution
- mergeness :: Solution -> Length
- blockCount :: String a -> Solution -> Length
- toPartitions :: Pair (String a) -> Solution -> Pair (Partition a)
Data Type
type Edge = (Pair Index, Length) Source #
A single edge in the matching graph for a pair of strings.
An edge ((s, p), k) represents a common subtring S[s .. s + k - 1] = P[p .. p + k - 1] of
length k that can be used as a block for partitions.
Finding Edges
edgeSet :: Eq a => Pair (String a) -> Vector Edge Source #
O(n^3) List all edges of the matching graph for a pair of strings.
>>>edgeSet ("abab", "abba")[((0,0),2),((1,2),2),((2,0),2)]
compatibleEdges :: Pair (Partition a) -> Vector Edge -> Vector Bool Source #
Map each edge to a boolean to indicate wheter that edge is compatible with a given pair of partitions, i.e. could have been used to generate that pair.
>>>compatibleEdges (["a", "ba", "b"], ["a", "b", "ba"]) [((0,0),2),((1,2),2),((2,0),2)][False,True,False]
Building Solutions
type Solution = Pair (Vector (Index, Length)) Source #
A complete solution to the strings partitioning.
Each (k,v) pair in the solution represents a block S[k ... k + v].
solution :: Vector Edge -> Solution Source #
The solution represented by the edge list.
Apply each edge in the same order they are listed, ignoring edges that overlaps with the partial solution.
>>>solution $ edgeSet ("abab", "abba")([(0,2)],[(0,2)])
>>>solution $ edgeSet ("abab", "abab")([(2,2),(0,2)],[(2,2),(0,2)])
solutions :: Vector Edge -> NonEmpty Solution Source #
List of all solutions represented with an ordering of the edge set.
This is done by reapeatedly constructing solutions with the unused edges from the previous
solution.
>>>solutions $ edgeSet ("abab", "abba")([(0,2)],[(0,2)]) :| [([(1,2)],[(2,2)]),([(2,2)],[(0,2)]),([],[])]
>>>solutions $ edgeSet ("abab", "abab")([(2,2),(0,2)],[(2,2),(0,2)]) :| [([(0,3)],[(0,3)]),([(0,4)],[(0,4)]),([(2,2),(0,2)],[(2,2),(0,2)]),([(1,2)],[(1,2)]),([(1,3)],[(1,3)]),([],[])]
Restoring Partitions
mergeness :: Solution -> Length Source #
Calculates how much the solution merges the characters of the string.
The mergeness is given by the string length minus the number of blocks.
>>>mergeness mempty0
>>>mergeness (solution $ edgeSet ("abab", "abba"))1
blockCount :: String a -> Solution -> Length Source #
Calculates the number of blocks the solution will create.
>>>blockCount "abab" mempty4
>>>blockCount "abab" (solution $ edgeSet ("abab", "abba"))3
toPartitions :: Pair (String a) -> Solution -> Pair (Partition a) Source #
Construct the partitions for a solution.
>>>toPartitions ("abab", "abba") mempty([a,b,a,b],[a,b,b,a])
>>>toPartitions ("abab", "abba") (solution $ edgeSet ("abab", "abba"))([ab,a,b],[ab,b,a])
>>>toPartitions ("abba", "abab") (solution $ edgeSet ("abba", "abab"))([ab,b,a],[ab,a,b])