I have a class library that can do this easily, although I don’t know how well it works with large or many such versions.
The library is here: DiffLib on CodePlex (you can also install it through NuGet.)
the script for your example in the question here (you can run this in LINQPad if you add a link to DiffLib):
void Main() { var revs = new string[] { "The quiet fox", "The quiet brown fox", "The quick brown fox", "The quick brown fox.", "The quick brown fox jumped over the lazy dog.", "The quick brown fox jumped over the lazy cat.", "The Quick Brown Fox jumped over the Lazy Cat.", }; string current = revs[0]; List<int> owner = new List<int>(); foreach (char c in current) owner.Add(1); // owner 1 owns entire string Action<int> dumpRev = delegate(int rev) { Debug.WriteLine("rev " + rev); Debug.WriteLine(current); Debug.WriteLine(new string(owner.Select(i => (char)(48 + i)).ToArray())); Debug.WriteLine(""); }; dumpRev(0); for (int index = 1; index < revs.Length; index++) { int ownerId = index + 1; var diff = new DiffLib.Diff<char>(current, revs[index]).ToArray(); int position = 0; foreach (var part in diff) { if (part.Equal) position += part.Length1; else { // get rid of old owner for the part that was // removed or replaced for (int index2 = 0; index2 < part.Length1; index2++) owner.RemoveAt(position); // insert new owner for the part that was // added or did replace the old text for (int index2 = 0; index2 < part.Length2; index2++) owner.Insert(position, ownerId); position += part.Length2; } } current = revs[index]; dumpRev(index); } }
Output:
rev 0
The quiet fox
1111111111111
rev 1
The quiet brown fox
1111111111222222111
rev 2
The quick brown fox
1111111331222222111
rev 3
The quick brown fox.
11111113312222221114
rev 4
The quick brown fox jumped over the lazy dog.
1111111331222222111555555555555555555555555555554
rev 5
The quick brown fox jumped over the lazy cat.
1111111331222222111555555555555555555555555666664
rev 6
The Quick Brown Fox jumped over the Lazy Cat.
11117113317222227115555555555555555555755557664
source share