, - , . , , . , ( ) . , , / , , . , , , .
bunji, , / . , :
from collections import defaultdict
from collections import Counter
import re
from copy import deepcopy
def chars_before_after(s, bin_size):
def per_window(sequence, n=1):
"""
From http://stackoverflow.com/q/42220614/610569
>>> list(per_window([1,2,3,4], n=2))
[(1, 2), (2, 3), (3, 4)]
>>> list(per_window([1,2,3,4], n=3))
[(1, 2, 3), (2, 3, 4)]
"""
start, stop = 0, n
seq = list(sequence)
while stop <= len(seq):
yield tuple(seq[start:stop])
start += 1
stop += 1
char_before= defaultdict(Counter)
char_after = defaultdict(Counter)
for window in per_window(s, bin_size+1):
char_after[window[:bin_size]][window[-1]] += 1
char_before[window[1:]][window[0]] += 1
return char_before, char_after
def replace_chars_recount(s, source, target, char_before, char_after, verbose=False):
if verbose:
print('s=' + s + ', source=' + source, 'target=' + target)
print('char_before')
for char_counter in char_before.items():
print(char_counter)
print('\nchar_after')
for char_counter in char_after.items():
print(char_counter)
char_before = deepcopy(char_before)
char_after = deepcopy(char_after)
replaced_s = ''
source_len = len(source)
source_start = 0
source_stop = source_len
target_pos = []
target_len = len(target)
last_replacement = 0
while source_start < len(s):
if verbose: print('start_index=' + str(source_start))
if s[source_start:source_stop] == source:
replaced_s += target
before_start = max(source_start-source_len+last_replacement,0)
before_end = before_start+source_len
while before_start < source_stop and before_end < len(s):
before_chars = tuple(s[before_start:before_end])
if verbose: print('Removing "'+ s[before_end] +'" from after "' + s[before_start:before_end] + '".')
char_after[before_chars][s[before_end]] -= 1
before_start += 1
before_end += 1
after_end = min(len(s), source_stop+source_len)
after_start = after_end-source_len
while after_end > source_start+last_replacement and after_start>0:
after_chars = tuple(s[after_start:after_end])
if verbose: print('Removing "' + s[after_start-1] + '" from before "' + s[after_start:after_end] + '".')
char_before[after_chars][s[after_start-1]] -= 1
after_start -= 1
after_end -= 1
target_pos.append(len(replaced_s) - target_len)
source_start += source_len
source_stop += source_len
last_replacement = source_len
else:
replaced_s += s[source_start]
source_start += 1
source_stop += 1
last_replacement = max(0, last_replacement-1)
last_target = 0-target_len
for target in target_pos:
if verbose: print('target_pos=' + str(target))
before_target_start = max(target-source_len, last_target+target_len, 0)
before_target_end = before_target_start+source_len
while before_target_start <= target+target_len-1 and before_target_end < len(replaced_s):
before_chars = tuple(replaced_s[before_target_start:before_target_end])
if verbose: print('Adding "' + replaced_s[before_target_end] + '" to after "' + replaced_s[before_target_start:before_target_end] + '".')
char_after[before_chars][replaced_s[before_target_end]] += 1
before_target_start += 1
before_target_end += 1
after_end = min(len(replaced_s), target + target_len+ source_len)
after_start = after_end - source_len
while after_end > max(target, last_target+source_len+target_len) and after_start>0:
after_chars = tuple(replaced_s[after_start:after_end])
if verbose: print('Adding "' + replaced_s[after_start - 1] + '" to before "' + replaced_s[after_start:after_end] + '".')
char_before[after_chars][replaced_s[after_start - 1]] += 1
after_start -= 1
after_end -= 1
last_target=target
char_before = {k:v+Counter() for k,v in char_before.items() if any(v.values())}
char_after = {k:v+Counter() for k,v in char_after.items() if any(v.values())}
if verbose:
print('replaced_s=' + replaced_s)
print('char_before')
for char_counter in char_before.items():
print(char_counter)
print('\nchar_after')
for char_counter in char_after.items():
print(char_counter)
return replaced_s, char_before, char_after
def test_replace_chars_recount(s, source, target, verbose=False):
char_before, char_after = chars_before_after(s, len(source))
replaced_s, char_before, char_after = replace_chars_recount(s, source, target, char_before, char_after, verbose)
correct_replaced = re.sub(source, target, s)
correct_before, correct_after = chars_before_after(replaced_s, len(source))
correct_answer = correct_replaced==replaced_s and correct_before==char_before and correct_after==char_after
print('{:>20} {:<20} {:<10} {:<10} {:<5}'.format(s, replaced_s, source, target, str(correct_answer)))
test_cases = [{'s': 'cdababef', 'source': 'ab', 'target': 'x'},
{'s': 'cdabqabef', 'source': 'ab', 'target': 'x'},
{'s': 'cdabgabgef', 'source': 'abg', 'target': 'x'},
{'s': 'cdabgqabgef', 'source': 'abg', 'target': 'x'},
{'s': 'cdababef', 'source': 'ab', 'target': 'xy'},
{'s': 'cdababef', 'source': 'ab', 'target': 'xyz'},
{'s': 'cdababef', 'source': 'a', 'target': 'x'},
{'s': 'cdababef', 'source': 'a', 'target': 'xyz'},
{'s': 'ababef', 'source': 'ab', 'target': 'x'},
{'s': 'cdabab', 'source': 'ab', 'target': 'x'},
{'s': 'cdababef', 'source': 'xy', 'target': 'x'},
{'s': 'cdababef', 'source': 'ab', 'target': ''},
{'s': 'cdabcdabcdef', 'source': 'abcd', 'target': 'x'},
{'s': 'cdabcdeabcdeabcdeef', 'source': 'abcde', 'target': 'x'},
{'s': 'cdababef', 'source': 'a', 'target': 'abcd'},
{'s': 'aaaaa', 'source': 'a', 'target': 'x'},
{'s': 'aaaaa', 'source': 'a', 'target': 'xy'},
{'s': '', 'source': '', 'target': ''}]
print('{:>20} {:<20} {:<10} {:<10} {:<5}'.format('Input String', 'Output String', 'Source', 'Target', 'Correct Result?'))
for test_case in test_cases:
test_replace_chars_recount(test_case['s'], test_case['source'], test_case['target'])
:
Input String Output String Source Target Correct Result?
cdababef cdxxef ab x True
cdabqabef cdxqxef ab x True
cdabgabgef cdxxef abg x True
cdabgqabgef cdxqxef abg x True
cdababef cdxyxyef ab xy True
cdababef cdxyzxyzef ab xyz True
cdababef cdxbxbef a x True
cdababef cdxyzbxyzbef a xyz True
ababef xxef ab x True
cdabab cdxx ab x True
cdababef cdababef xy x True
cdababef cdef ab True
cdabcdabcdef cdxxef abcd x True
cdabcdeabcdeabcdeef cdxxxef abcde x True
cdababef cdabcdbabcdbef a abcd True
aaaaa xxxxx a x True
aaaaa xyxyxyxyxy a xy True
True
Thus, this approach works regardless of the length of the source / target. The only limitation in the current implementation is that the length of the source must be the same as the size of the hopper for counting the characters before and after. However, you can change this to make it more flexible.