Problem (check change for explanation)
I have a list of about 1,500 lines, and for each of these lines I have to check if any of the files in the directory (and subdirectories) contain this line (there are about 4000 files).
the code
Now I have two options:
Original
foreach(var str in stringList)
{
allFiles.Any(f => File.ReadAllText(f).Contains(str));
}
Second option (using ReadLines instead of ReadAllText, as suggested by VladL in this question )
foreach(var string in stringList)
{
allFiles.SelectMany(File.ReadLines).Any(line => line.Contains(str));
}
I just checked the full execution of the program of the original version, and it took 21 minutes. Then I tested one statement (check if there is 1 line in any file), looking for a line that I knew that it wasn’t contained to check the worst case scenario, and these are my timings (executed every 3 times):
Original: 1285, 1369, 1336 ms
: 2718, 2804, 2831
ReadAllText ReadAllLines Original ( - ), .
, ( )?
Edit
, , , , . csv, , ( ). , , , - .
foreach(var csvFile in csvFiles)
{
var lines = File.ReadAllLines(csvFile);
foreach(var line in lines)
{
if (IsHeader(line)) continue;
var str = ComposeString(line);
var bool = allFiles.Any(f => File.ReadAllText(f).Contains(str));
}
}
2
public void ExecuteAhoCorasick()
{
var table = CreateDataTable();
var allFiles = GetAllFiles();
var csvFiles = GetCsvFiles();
var resList = new List<string>();
foreach(var csvFile in csvFiles)
{
if (file.Contains("ValueList_")) continue;
var lines = File.ReadAllLines(file);
foreach (var line in lines)
{
if (line == HeaderLine) continue;
var res = line.Split(';');
if (res.Length <= 7) continue;
var resPath = $"{res[0]}.{res[1]}.{res[2]}".Trim('.');
resList.Add(resPath);
var row = table.NewRow();
row[0] = res[0];
row[1] = res[1];
row[2] = res[2];
row[3] = res[3];
row[4] = res[4];
row[5] = res[5];
row[6] = res[6];
row[7] = res[7];
row[8] = resPath;
row[9] = false;
row[10] = "";
row[11] = file;
table.Rows.Add(row);
}
}
var foundRes = new List<string>();
foreach (var file in allFiles)
{
var text = File.ReadAllText(file);
var trie = new Trie();
trie.Add(resList);
foundRes.AddRange(trie.Find(text));
}
}