Remarks

  • Trie(pronounce as try) or Prefix Tree is a N-way tree used to facilitate search of string valued keys.
  • The node path corresponds to the characters in the key.
  • We use a flag to indicate the end of key.
  • Trie can be implemented with Tree Nodes or as hashmap of hashmaps.
  • The basic operations like add, get or prefix search startswith all take \(O(k)\) time where k = len(key)

Implementation

Simplest impelmentation

Trie = lambda: defaultdict(Trie)

def add(trie, word):
    for c in word:
        trie = trie[c]
    trie['T'] = word

def contains(trie, word):
    for i, c in enumerate(word):
        if c not in trie: return False
        trie = trie[c]
    return trie['T'] == word

def startswith(trie, word):
    for c in word:
        if c not in trie: return False
        trie = trie[c]
    return True

trie = Trie()

words = ["hello", "hell", "hehe", "what"]
for word in words:
    add(trie, word)

queries = ["hello", "hel", "he", "w", "are"]
for query in queries:
    print(f"{query} in trie: {contains(trie, query)}")

for query in queries:
    print(f"{query} is a prefix in trie: {startswith(trie, query)}")

Below we implement a Trie using python dictionaries.

from collections import defaultdict

class Trie(object):
    def __init__(self):
        _trie = lambda: defaultdict(_trie)
        self._trie = _trie()
        self.flag = '#'

    def add(self, word):
        trie = self._trie
        for char in word:
            trie = trie[char]
        trie[self.flag] = trie.get(self.flag, 0) + 1

    def get(self, word):
        trie = self._trie
        for char in word:
            if char not in trie: return False
            trie = trie[char]
        return False if self.flag not in trie else trie[self.flag]

    def contains(self, word):
        return self.get(word) is not False

    def startswith(self, prefix):
        trie = self._trie
        for char in prefix:
            if char not in trie: return False
            trie = trie[char]
        return True

When there are more stuff the node needs to keep track of, we can implement Trie with actual TrieNode objects:

class Trie(object):
    class TrieNode:
        def __init__(self):
            self.childrens = defaultdict(Trie.TrieNode)
            self.terminal = False

    def __init__(self):
        self.root = self.TrieNode()

    def add(self, word: str) -> None:
        trie = self.root
        for char in word:
            trie = trie.childrens[char]
        trie.terminal = True

    def contains(self, word: str) -> bool:
        trie = self.root
        for char in word:
            if char not in trie.childrens: return False
            trie = trie.childrens[char]
        return trie.terminal

    def startswith(self, prefix: str) -> bool:
        trie = self.root
        for char in prefix:
            if char not in trie.childrens: return False
            trie = trie.childrens[char]
        return True