Advanced
Segment Tree
class TreeNode:
def __init__(self, val=None, parent=None, left=None, right=None, start=None, end=None):
self.val = val
self.parent = parent
self.left = left
self.right = right
self.start = start
self.end = end
def __repr__(self):
print('val {} start {} end {}'.format(self.val, self.start, self.end))
class SegmentTree:
def __init__(self, values):
self.build_tree(values)
def build_tree(self, values):
nodes = [TreeNode(val=val, start=i, end=i) for i, val in enumerate(values)]
self.leaves = nodes
while len(nodes) != 1:
parents = []
for l, r in zip_longest(nodes[::2], nodes[1::2], fillvalue=None):
val = l.val if not r else l.val + r.val
end = l.end if not r else r.end
parent = TreeNode(val, left=l, right=r, start=l.start, end=end)
l.parent = parent
if r: r.parent = parent
parents.append(parent)
nodes = parents
self.root = nodes[0]
def update(self, index, val):
leaf = self.leaves[index]
diff = val - leaf.val
node = leaf
while node:
node.val += diff
node = node.parent
def query(self, left, right):
l = self.leaves[left]
r = self.leaves[right]
if l == r: return l.val
total = l.val + r.val
while l.parent != r.parent:
if l == l.parent.left:
total += l.parent.right.val
if r == r.parent.right:
total += r.parent.left.val
l = l.parent
r = r.parent
return total
KMP
- Motivation:
def is_substr(source, query):
"""return if query is a substring of the source"""
pass
for i in range(len(source)):
if source[i:i+len(query)] == query: return True
return False
O(MN) time where N = len(source), M = len(query). Redaundant work is that when search fails, we go back to next i.
source = aabdcaaabdcaaabcdba
query = aabdcaaabcdb
| match process started from i = 0 we fail here
Do we need to go back to i+1 and start over? No
source = aabdcaaabdcaaabcdba
query = aabdcaaabcdb
|
We should try to start from here.
How do we know?
query = aabdca|aab|cdb
query = |aab|dcaaabcdb
aab is the prefix but also the suffix for subtring of query aabdcaaab
If we know such information about the query string, we can move i back to where that suffix started.
- Longest proper prefix that is also a suffix. LPS
Generate an array that contains the length of the LPS of each part of target.
def lps(p):
m = len(p)
lps = [0, 0]
j = 0
for i in range(1, m):
while j and p[i] != p[j]: j = lps[j]
if p[i] == p[j]: j += 1
lps.append(j)
return lps
query = "aabdcaaabcdb"
print(query)
print("".join(map(str, lps(query))))
aabdcaaabcdb
0010001223000
lps[i] is: if match at query location i failed (does not match source[j]), we can go back to lps[i] of query and try to match query[lps[i]] with the source[j].
Matching prodedure.
def match(s, p):
n, m = len(s), len(p)
lps = lps(p)
ans = []
j = 0
for i in range(n):
while j and s[i] != p[j]: j = lps[j] # don't match, use lps table to jump back
if s[i] == p[j]: j += 1
if j == m:
ans.append(i - m + 1)
j = lps[j]
return ans
match('abcdeabcdabcbacad', 'abc')
partial use:
def kmp_table(s):
m = len(s)
table, j = [0, 0], 0
for i in range(1, m):
while j > 0 and s[i] != s[j]: j = table[j]
if s[i] == s[j]: j += 1
table.append(j)
return table
class Solution:
def shortestPalindrome(self, s: str) -> str:
ss = s + "+" + s[::-1]
tb = kmp_table(ss)
k = tb[-1]
return s[k:][::-1] + s[:k] + s[k:]
Rabin-Karp Rolling Hash
- Motivation:
# rolling hash a fixed size substring on DNA sequence.
def update_code(code, c):
code <<= 2
code &= mask
if c == 'A': code |= 0
if c == 'C': code |= 1
if c == 'G': code |= 2
if c == 'T': code |= 3
return code
Data Structure Crossing
- HashSet + Deque
class Logger:
def __init__(T, limit=limit):
T.messages = set()
T.queue = deque()
T.limite = limite
def shouldPrintMessage(T, timestamp: int, message: str) -> bool:
while T.queue and timestamp - T.queue[0][1] >= T.limite:
msg = T.queue[0][0]
T.queue.popleft()
T.messages.remove(msg)
if message not in T.messages:
T.messages.add(message)
T.queue.append((message, timestamp))
return True
return False