A: A full implementation will suffice if you simply iterate over the results in a nested foreach:
using System; using System.Collections.Generic; public static class Splitter { public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> source, Predicate<T> match) { using (IEnumerator<T> enumerator = source.GetEnumerator()) { while (enumerator.MoveNext()) { yield return Split(enumerator, match); } } } static IEnumerable<T> Split<T>(IEnumerator<T> enumerator, Predicate<T> match) { do { if (match(enumerator.Current)) { yield break; } else { yield return enumerator.Current; } } while (enumerator.MoveNext()); } }
Use it as follows:
using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace MyTokenizer { class Program { enum TokenTypes { SimpleToken, UberToken } class Token { public TokenTypes TokenType = TokenTypes.SimpleToken; } class MyUberToken : Token { public MyUberToken() { TokenType = TokenTypes.UberToken; } } static void Main(string[] args) { List<object> objects = new List<object>(new object[] { "A", Guid.NewGuid(), "C", new MyUberToken(), "D", new MyUberToken(), "E", new MyUberToken() }); var splitOn = TokenTypes.UberToken; foreach (var list in objects.Split(x => x is Token && ((Token)x).TokenType == splitOn)) { foreach (var item in list) { Console.WriteLine(item); } Console.WriteLine("=============="); } Console.ReadKey(); } } }
B: If you need to process the results after a while, and you want to do it out of order, or you split it into one thread and then maybe send segments to multiple threads, then this will probably give a good starting point:
using System; using System.Collections.Generic; using System.Linq; public static class Splitter2 { public static IEnumerable<IEnumerable<T>> SplitToSegments<T>(this IEnumerable<T> source, Predicate<T> match) { T[] items = source.ToArray(); for (int startIndex = 0; startIndex < items.Length; startIndex++) { int endIndex = startIndex; for (; endIndex < items.Length; endIndex++) { if (match(items[endIndex])) break; } yield return EnumerateArraySegment(items, startIndex, endIndex - 1); startIndex = endIndex; } } static IEnumerable<T> EnumerateArraySegment<T>(T[] array, int startIndex, int endIndex) { for (; startIndex <= endIndex; startIndex++) { yield return array[startIndex]; } } }
C: If you really have to return the List <T> -s collection - which I doubt if you obviously do not want to mutate them some time later - try initializing them to the given capacity before copying:
public static List<List<T>> SplitToLists<T>(this IEnumerable<T> source, Predicate<T> match) { List<List<T>> lists = new List<List<T>>(); T[] items = source.ToArray(); for (int startIndex = 0; startIndex < items.Length; startIndex++) { int endIndex = startIndex; for (; endIndex < items.Length; endIndex++) { if (match(items[endIndex])) break; } List<T> list = new List<T>(endIndex - startIndex); list.AddRange(EnumerateArraySegment(items, startIndex, endIndex - 1)); lists.Add(list); startIndex = endIndex; } return lists; }
D: If this is still not enough, I suggest you run your own lightweight List implementation, which can copy the range directly into the internal array from another instance.