This problem aroused my interest, so I wrote a too general solution.
Here is the function that
- aligns any number of sequences
- runs on iterators, so it can efficiently handle long (or infinite) sequences.
- supports duplicate values
- compatible with Python 2 and 3 (although I would use
align_iterables(*inputs, missing_value=None) if I were not interested in historical versions of Python)
import itertools def align_iterables(inputs, missing=None): """Align sorted iterables Yields tuples with values from the respective `inputs`, placing `missing` if the value does not exist in the corresponding iterable. Example: align_generator('bc', 'bf', '', 'abf') yields: (None, None, None, 'a') ('b', 'b', None, 'b') ('c', None, None, None) (None, 'f', None, 'f') """ End = object() iterators = [itertools.chain(i, [End]) for i in inputs] values = [next(i) for i in iterators] while not all(v is End for v in values): smallest = min(v for v in values if v is not End) yield tuple(v if v == smallest else missing for v in values) values = [next(i) if v == smallest else v for i, v in zip(iterators, values)]
# Adapter for this problem:
def align_two_lists(list1, list2, missing="MISSING"): value = list(zip(*list(align_iterables([list1, list2], missing=missing)))) if not value: return [[], []] else: a, b = value return [list(a), list(b)]
# A set of tests for the issue issue:
if __name__ == '__main__': assert align_two_lists('abcef', 'abcdef', '_') == [['a', 'b', 'c', '_', 'e', 'f'], ['a', 'b', 'c', 'd', 'e', 'f']] assert align_two_lists('a', 'abcdef', '_') == [['a', '_', '_', '_', '_', '_'], ['a', 'b', 'c', 'd', 'e', 'f']] assert align_two_lists('abcdef', 'a', '_') == [['a', 'b', 'c', 'd', 'e', 'f'], ['a', '_', '_', '_', '_', '_']] assert align_two_lists('', 'abcdef', '_') == [['_', '_', '_', '_', '_', '_'], ['a', 'b', 'c', 'd', 'e', 'f']] assert align_two_lists('abcdef', '', '_') == [['a', 'b', 'c', 'd', 'e', 'f'], ['_', '_', '_', '_', '_', '_']] assert align_two_lists('ace', 'abcdef', '_') == [['a', '_', 'c', '_', 'e', '_'], ['a', 'b', 'c', 'd', 'e', 'f']] assert align_two_lists('bdf', 'ace', '_') == [['_', 'b', '_', 'd', '_', 'f'], ['a', '_', 'c', '_', 'e', '_']] assert align_two_lists('ace', 'bdf', '_') == [['a', '_', 'c', '_', 'e', '_'], ['_', 'b', '_', 'd', '_', 'f']] assert align_two_lists('aaacd', 'acd', '_') == [['a', 'a', 'a', 'c', 'd'], ['a', '_', '_', 'c', 'd']] assert align_two_lists('acd', 'aaacd', '_') == [['a', '_', '_', 'c', 'd'], ['a', 'a', 'a', 'c', 'd']] assert align_two_lists('', '', '_') == [[], []] list1 = ["datetimeA", "datetimeB", "datetimeD", "datetimeE"] list2 = ["datetimeB", "datetimeC", "datetimeD", "datetimeF"] new_list1 = ["datetimeA", "datetimeB", "MISSING", "datetimeD", "datetimeE", "MISSING"] new_list2 = ["MISSING", "datetimeB", "datetimeC", "datetimeD", "MISSING", "datetimeF"] assert align_two_lists(list1, list2) == [new_list1, new_list2]
# And some additional tests:
# Also test multiple generators for expected, got in zip( [(None, None, None, 'a'), ('b', 'b', None, 'b'), ('c', None, None, None), (None, 'f', None, 'f')], align_iterables(['bc', 'bf', '', 'abf'])): assert expected == got assert list(align_iterables([])) == []