I have a bunch of files that are several megabytes in size, which are very simple:
- They have a multiple of 8
- They contain only doubles at the small end, so they can be read using the method
BinaryReader ReadDouble()
When lexicographically sorted, they contain all the values in the sequence in which they should be.
I can’t store everything in memory like float listor float array, so I need float seqone that goes through the necessary files when accessing them. The part that goes through the sequence really makes it imperative, using GetEnumerator()it because I don't want a resource leak and I want to close all the files correctly.
My first functional approach:
let readFile file =
let rec readReader (maybeReader : BinaryReader option) =
match maybeReader with
| None ->
let openFile() =
printfn "Opening the file"
new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read))
|> Some
|> readReader
seq { yield! openFile() }
| Some reader when reader.BaseStream.Position >= reader.BaseStream.Length ->
printfn "Closing the file"
reader.Dispose()
Seq.empty
| Some reader ->
reader.BaseStream.Position |> printfn "Reading from position %d"
let bytesToRead = Math.Min(1048576L, reader.BaseStream.Length - reader.BaseStream.Position) |> int
let bytes = reader.ReadBytes bytesToRead
let doubles = Array.zeroCreate<float> (bytesToRead / 8)
Buffer.BlockCopy(bytes, 0, doubles, 0, bytesToRead)
seq {
yield! doubles
yield! readReader maybeReader
}
readReader None
, string list, , - :
let values = files |> Seq.collect readFile
use ve = values.GetEnumerator()
// Do stuff that only gets partial data from one file
, ( ). , , :
type FileEnumerator(file : string) =
let reader = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read))
let mutable _current : float = Double.NaN
do file |> printfn "Enumerator active for %s"
interface IDisposable with
member this.Dispose() =
reader.Dispose()
file |> printfn "Enumerator disposed for %s"
interface IEnumerator with
member this.Current = _current :> obj
member this.Reset() = reader.BaseStream.Position <- 0L
member this.MoveNext() =
let stream = reader.BaseStream
if stream.Position >= stream.Length then false
else
_current <- reader.ReadDouble()
true
interface IEnumerator<float> with
member this.Current = _current
type FileEnumerable(file : string) =
interface IEnumerable with
member this.GetEnumerator() = new FileEnumerator(file) :> IEnumerator
interface IEnumerable<float> with
member this.GetEnumerator() = new FileEnumerator(file) :> IEnumerator<float>
let readFile' file = new FileEnumerable(file) :> float seq
,
let values = files |> Seq.collect readFile'
use ve = values.GetEnumerator()
.
, ( , , , ) , , .