from segments.errors import replace


class TreeNode:
    """
    Private class that creates the tree data structure from the orthography profile for
    parsing.
    """

    def __init__(self, char, sentinel=False):
        self.char = char
        self.children = {}
        self.sentinel = sentinel


class Tree:
    def __init__(self, graphemes):
        def _multigraph(node, line):
            # Internal function to add a multigraph starting at node.
            for char in line:
                node = node.children.setdefault(char, TreeNode(char))
            node.sentinel = True

        self.root = TreeNode('', sentinel=True)
        for grapheme in graphemes:
            _multigraph(self.root, grapheme)

    def parse(self, line, error=replace):
        res, idx = self._parse(self.root, line, 0)
        rem = line[idx:]
        while rem:
            # Chop off one character and try parsing the remainder:
            res.append(error(rem[0]))
            rem = rem[1:]
            r, i = self._parse(self.root, rem, 0)
            res.extend(r)
            rem = rem[i:]
        return res

    def _parse(self, root, line, idx):
        """
        :param root: Tree node.
        :param line: String to parse.
        :param idx: Global counter of characters parsed.
        :return: (list of parsed graphemes, incremented character count)
        """
        # Base (or degenerate..) case.
        if len(line) == 0:
            return [], idx

        parse = []
        curr = 0
        node = root
        cidx = idx
        while curr < len(line):
            node = node.children.get(line[curr])
            curr += 1
            if not node:
                break
            if node.sentinel:
                subparse, cidx = self._parse(root, line[curr:], idx + curr)
                # Always keep the latest valid parse, which will be
                # the longest-matched (greedy match) graphemes.
                parse = [line[:curr]]
                parse.extend(subparse)
        if parse:
            idx = cidx
        return parse, idx
