From information on securing data on disk ( http://winntfs.com/2012/11/29/windows-write-caching-part-2-an-overview-for-application-developers/ ), even in the case of, for example, that on Windows platforms, you need to rely on your "fsync" version of FlushFileBuffers to get the best guarantee that buffers are actually flushed from the device caches to the media itself. The combination of FILE_FLAG_NO_BUFFERING with FILE_FLAG_WRITE_THROUGH does not clear the device cache, but simply affects the file system cache if this information is correct.
Given the fact that I will be working with fairly large files that need to be updated "transactionally", this means that "fsync" is executed at the end of the transactional commit. So I created a tiny application to test the performance of this. It basically writes sequentially a packet of 8 random bytes with a memory size using 8 records, and then flushes it. The batch is repeated in a loop, and after each number of written pages, it records performance. In addition, it has two configurable options: fsync on the flash and writing the byte to the last position of the file before starting to write the page.
// Code updated to reflect new results as discussed in answer below. // 26/Aug/2013: Code updated again to reflect results as discussed in follow up question. // 28/Aug/2012: Increased file stream buffer to ensure 8 page flushes. class Program { static void Main(string[] args) { BenchSequentialWrites(reuseExistingFile:false); } public static void BenchSequentialWrites(bool reuseExistingFile = false) { Tuple<string, bool, bool, bool, bool>[] scenarios = new Tuple<string, bool, bool, bool, bool>[] { // output csv, fsync?, fill end?, write through?, mem map? Tuple.Create("timing FS-EBF.csv", true, false, false, false), Tuple.Create("timing NS-EBF.csv", false, false, false, false), Tuple.Create("timing FS-LB-BF.csv", true, true, false, false), Tuple.Create("timing NS-LB-BF.csv", false, true, false, false), Tuple.Create("timing FS-E-WT-F.csv", true, false, true, false), Tuple.Create("timing NS-E-WT-F.csv", false, false, true, false), Tuple.Create("timing FS-LB-WT-F.csv", true, true, true, false), Tuple.Create("timing NS-LB-WT-F.csv", false, true, true, false), Tuple.Create("timing FS-EB-MM.csv", true, false, false, true), Tuple.Create("timing NS-EB-MM.csv", false, false, false, true), Tuple.Create("timing FS-LB-B-MM.csv", true, true, false, true), Tuple.Create("timing NS-LB-B-MM.csv", false, true, false, true), Tuple.Create("timing FS-E-WT-MM.csv", true, false, true, true), Tuple.Create("timing NS-E-WT-MM.csv", false, false, true, true), Tuple.Create("timing FS-LB-WT-MM.csv", true, true, true, true), Tuple.Create("timing NS-LB-WT-MM.csv", false, true, true, true), }; foreach (var scenario in scenarios) { Console.WriteLine("{0,-12} {1,-16} {2,-16} {3,-16} {4:F2}", "Total pages", "Interval pages", "Total time", "Interval time", "MB/s"); CollectGarbage(); var timingResults = SequentialWriteTest("test.data", !reuseExistingFile, fillEnd: scenario.Item3, nPages: 200 * 1000, fSync: scenario.Item2, writeThrough: scenario.Item4, writeToMemMap: scenario.Item5); using (var report = File.CreateText(scenario.Item1)) { report.WriteLine("Total pages,Interval pages,Total bytes,Interval bytes,Total time,Interval time,MB/s"); foreach (var entry in timingResults) { Console.WriteLine("{0,-12} {1,-16} {2,-16} {3,-16} {4:F2}", entry.Item1, entry.Item2, entry.Item5, entry.Item6, entry.Item7); report.WriteLine("{0},{1},{2},{3},{4},{5},{6}", entry.Item1, entry.Item2, entry.Item3, entry.Item4, entry.Item5.TotalSeconds, entry.Item6.TotalSeconds, entry.Item7); } } } } public unsafe static IEnumerable<Tuple<long, long, long, long, TimeSpan, TimeSpan, double>> SequentialWriteTest( string fileName, bool createNewFile, bool fillEnd, long nPages, bool fSync = true, bool writeThrough = false, bool writeToMemMap = false, long pageSize = 4096) { // create or open file and if requested fill in its last byte. var fileMode = createNewFile ? FileMode.Create : FileMode.OpenOrCreate; using (var tmpFile = new FileStream(fileName, fileMode, FileAccess.ReadWrite, FileShare.ReadWrite, (int)pageSize)) { Console.WriteLine("Opening temp file with mode {0}{1}", fileMode, fillEnd ? " and writing last byte." : "."); tmpFile.SetLength(nPages * pageSize); if (fillEnd) { tmpFile.Position = tmpFile.Length - 1; tmpFile.WriteByte(1); tmpFile.Position = 0; tmpFile.Flush(true); } } // Make sure any flushing / activity has completed System.Threading.Thread.Sleep(TimeSpan.FromMinutes(1)); System.Threading.Thread.SpinWait(50); // warm up. var buf = new byte[pageSize]; new Random().NextBytes(buf); var ms = new System.IO.MemoryStream(buf); var stopwatch = new System.Diagnostics.Stopwatch(); var timings = new List<Tuple<long, long, long, long, TimeSpan, TimeSpan, double>>(); var pageTimingInterval = 8 * 2000; var prevPages = 0L; var prevElapsed = TimeSpan.FromMilliseconds(0); // Open file const FileOptions NoBuffering = ((FileOptions)0x20000000); var options = writeThrough ? (FileOptions.WriteThrough | NoBuffering) : FileOptions.None; using (var file = new FileStream(fileName, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite, (int)(16 *pageSize), options)) { stopwatch.Start(); if (writeToMemMap) { // write pages through memory map. using (var mmf = MemoryMappedFile.CreateFromFile(file, Guid.NewGuid().ToString(), file.Length, MemoryMappedFileAccess.ReadWrite, null, HandleInheritability.None, true)) using (var accessor = mmf.CreateViewAccessor(0, file.Length, MemoryMappedFileAccess.ReadWrite)) { byte* base_ptr = null; accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref base_ptr); var offset = 0L; for (long i = 0; i < nPages / 8; i++) { using (var memStream = new UnmanagedMemoryStream(base_ptr + offset, 8 * pageSize, 8 * pageSize, FileAccess.ReadWrite)) { for (int j = 0; j < 8; j++) { ms.CopyTo(memStream); ms.Position = 0; } } FlushViewOfFile((IntPtr)(base_ptr + offset), (int)(8 * pageSize)); offset += 8 * pageSize; if (fSync) FlushFileBuffers(file.SafeFileHandle); if (((i + 1) * 8) % pageTimingInterval == 0) timings.Add(Report(stopwatch.Elapsed, ref prevElapsed, (i + 1) * 8, ref prevPages, pageSize)); } accessor.SafeMemoryMappedViewHandle.ReleasePointer(); } } else { for (long i = 0; i < nPages / 8; i++) { for (int j = 0; j < 8; j++) { ms.CopyTo(file); ms.Position = 0; } file.Flush(fSync); if (((i + 1) * 8) % pageTimingInterval == 0) timings.Add(Report(stopwatch.Elapsed, ref prevElapsed, (i + 1) * 8, ref prevPages, pageSize)); } } } timings.Add(Report(stopwatch.Elapsed, ref prevElapsed, nPages, ref prevPages, pageSize)); return timings; } private static Tuple<long, long, long, long, TimeSpan, TimeSpan, double> Report(TimeSpan elapsed, ref TimeSpan prevElapsed, long curPages, ref long prevPages, long pageSize) { var intervalPages = curPages - prevPages; var intervalElapsed = elapsed - prevElapsed; var intervalPageSize = intervalPages * pageSize; var mbps = (intervalPageSize / (1024.0 * 1024.0)) / intervalElapsed.TotalSeconds; prevElapsed = elapsed; prevPages = curPages; return Tuple.Create(curPages, intervalPages, curPages * pageSize, intervalPageSize, elapsed, intervalElapsed, mbps); } private static void CollectGarbage() { GC.Collect(); GC.WaitForPendingFinalizers(); System.Threading.Thread.Sleep(200); GC.Collect(); GC.WaitForPendingFinalizers(); System.Threading.Thread.SpinWait(10); } [DllImport("kernel32.dll", SetLastError = true)] static extern bool FlushViewOfFile( IntPtr lpBaseAddress, int dwNumBytesToFlush); [DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Auto)] static extern bool FlushFileBuffers(SafeFileHandle hFile); }
The performance results that I get (64-bit Win 7, slow spindle drive) are not very encouraging. It seems that the performance of "fsync" is largely dependent on the size of the file being cleaned, so it dominates the time, and not the amount of dirty data that needs to be cleaned. The graph below shows the results for 4 different settings for a small test application.

As you can see, the performance of "fsync" decreases exponentially as the file grows (until it stops at a few GB). In addition, the disk itself does not seem to be very large (for example, the resource monitor shows its active time as only about a few percent, and its disk queue in most cases is empty most of the time).
I obviously expected the performance of "fsync" to be slightly worse than a regular buffered shift, but I expected it to be more or less constant and independent of file size. Thus, it seems that this cannot be used in conjunction with one large file.
Does anyone have any explanations, different impressions or another solution that allows for data storage on disk and more or less constant predicted performance?
UPDATED See New information in the answer below.