The sequence of the wrong length generated by the function

Why does the following function return a sequence of the wrong length when the repl variable is set to false?

open MathNet.Numerics.Distributions
open MathNet.Numerics.LinearAlgebra
let sample (data : seq<float>) (size : int) (repl : bool) =

    let n = data |> Seq.length

    // without replacement
    let rec generateIndex idx =
        let m = size - Seq.length(idx)
        match m > 0 with
        | true ->
            let newIdx = DiscreteUniform.Samples(0, n-1) |> Seq.take m 
            let idx = (Seq.append idx newIdx) |> Seq.distinct
            generateIndex idx
        | false -> 
            idx

    let sample =
        match repl with
        | true ->
            DiscreteUniform.Samples(0, n-1) 
            |> Seq.take size 
            |> Seq.map (fun index -> Seq.item index data)
        | false ->
            generateIndex (seq []) 
            |> Seq.map (fun index -> Seq.item index data)

    sample

Starting function ...

let requested = 1000
let dat = Normal.Samples(0., 1.) |> Seq.take 10000
let resultlen = sample dat requested false |> Seq.length 
printfn "requested -> %A\nreturned -> %A" requested resultlen

The resulting lengths are incorrect.

> 
requested -> 1000
returned -> 998

> 
requested -> 1000
returned -> 1001

> 
requested -> 1000
returned -> 997

Any idea what mistake I am making?

+4
source share
1 answer

Firstly, there is a comment I want to make about the coding style. Then I will explain why your sequences return with different lengths.

In the comments, I mentioned replacing with a match (bool) with true -> ... | false -> ...simple expression if ... then ... else, but there the different coding style that you use, I think, can be improved. You wrote:

let sample (various_parameters) =  // This is a function
    // Other code ...
    let sample = some_calculation  // This is a variable
    sample  // Return the variable

F # , , "" , , , . , :

let f (a : float option) =
    let a = match a with
            | None -> 0.0
            | Some value -> value
    // Now proceed, knowing that `a` has a real value even if had been None before

, , F # defaultArg :

let f (a : float option) =
    let a = defaultArg a 0.0
    // This does exactly the same thing as the previous snippet

a , , a: float option, a - float, " ", "", , , "" ", " sample - , "" sample ". result , , .

, :

let result =
    match repl with
    | true ->
        DiscreteUniform.Samples(0, n-1) 
        |> Seq.take size 
        |> Seq.map (fun index -> Seq.item index data)
    | false ->
        generateIndex (seq []) 
        |> Seq.map (fun index -> Seq.item index data)

result

, , "let result = (something); result" , (something). I.e., :

match repl with
| true ->
    DiscreteUniform.Samples(0, n-1) 
    |> Seq.take size 
    |> Seq.map (fun index -> Seq.item index data)
| false ->
    generateIndex (seq []) 
    |> Seq.map (fun index -> Seq.item index data)

, , if...then...else:

if repl then
    DiscreteUniform.Samples(0, n-1) 
    |> Seq.take size 
    |> Seq.map (fun index -> Seq.item index data)
else
    generateIndex (seq []) 
    |> Seq.map (fun index -> Seq.item index data)

. , , , ( ):

open MathNet.Numerics.Distributions
open MathNet.Numerics.LinearAlgebra
let sample (data : seq<float>) (size : int) (repl : bool) =

    let n = data |> Seq.length

    // without replacement
    let rec generateIndex idx =
        let m = size - Seq.length(idx)
        if m > 0 then
            let newIdx = DiscreteUniform.Samples(0, n-1) |> Seq.take m 
            let idx = (Seq.append idx newIdx) |> Seq.distinct
            generateIndex idx
        else
            idx

    if repl then
        DiscreteUniform.Samples(0, n-1) 
        |> Seq.take size 
        |> Seq.map (fun index -> Seq.item index data)
    else
        generateIndex (seq []) 
        |> Seq.map (fun index -> Seq.item index data)

, , .

: , , , generateIndex, . , : - , - .

generateIndex VS printfn, , . , , :

let rec generateIndex n size idx =
    let m = size - Seq.length(idx)
    printfn "m = %d" m
    match m > 0 with
    | true ->
        let newIdx = DiscreteUniform.Samples(0, n-1) |> Seq.take m
        printfn "Generating newIdx as %A" (List.ofSeq newIdx)
        let idx = (Seq.append idx newIdx) |> Seq.distinct
        printfn "Now idx is %A" (List.ofSeq idx)
        generateIndex n size idx
    | false -> 
        printfn "Done, returning %A" (List.ofSeq idx)
        idx

List.ofSeq idx , F # Interactive seq ( , seq %A, , seq ). , n size ( ), . generateIndex 100 5 (seq []) :

m = 5
Generating newIdx as [74; 76; 97; 78; 31]
Now idx is [68; 28; 65; 58; 82]
m = 0
Done, returning [37; 58; 24; 48; 49]
val it : seq<int> = seq [12; 69; 97; 38; ...]

, ? , - . . seq . , . seq . , , , . : " 0 n -1, m ". seq , ( Seq.cache, ). , seq , , . newIdx, [74; 76; 97; 78; 31], seq, [68; 28; 65; 58; 82].

? Seq.append . seq, : " seq, , , seq. , ". Seq.distinct ; seq, : " seq, , , . , , , ". generateIdx - , 0 n-1 ( 0 100), .

, . , , seq, : DiscreteUniform.Samples(0, n-1), , m , . ( Seq.append, , ). , , , . , ( ) 5 0 99, , . , m= 0 , ... , seq. seq , , 5 . , , 5 , , (, , , m 0) , . , , 1.0 * 0.99 * 0.98 * 0.97 * 0.96, , 0.9035. , , Seq.length, 5, seq 4%, , , .

, , 4 , F #. generateIndex 100 4 (seq []) :

m = 4
Generating newIdx as [36; 63; 97; 31]
Now idx is [39; 93; 53; 94]
m = 0
Done, returning [47; 94; 34]
val it : seq<int> = seq [48; 24; 14; 68]

, "Done, return ( idx)", 3 ? , 4 ( ), .

, , , , . Seq.item, , , n - . (let arrData = data |> Array.ofSeq),

        |> Seq.map (fun index -> Seq.item index data)

        |> Seq.map (fun index -> arrData.[index])

, O (N ^ 2) O (N).

TL; DR: Seq.distinct , m , . generateIdx DiscreteUniform.Samples(0, n-1) |> Seq.distinct |> Seq.take size. ( , ). , final - , :

let sample (data : seq<float>) (size : int) (repl : bool) =
    let arrData = data |> Array.ofSeq
    let n = arrData |> Array.length

    if repl then
        DiscreteUniform.Samples(0, n-1) 
        |> Seq.take size 
        |> Seq.map (fun index -> arrData.[index])
    else
        DiscreteUniform.Samples(0, n-1) 
        |> Seq.distinct
        |> Seq.take size 
        |> Seq.map (fun index -> arrData.[index])

! , ( ) .

:... DRY, "" . ( CaringDev, ). Seq.take size |> Seq.map if, . :

let randomIndices =
    if repl then
        DiscreteUniform.Samples(0, n-1) 
    else
        DiscreteUniform.Samples(0, n-1) |> Seq.distinct

randomIndices
|> Seq.take size 
|> Seq.map (fun index -> arrData.[index])

, :

let sample (data : seq<float>) (size : int) (repl : bool) =
    let arrData = data |> Array.ofSeq
    let n = arrData |> Array.length
    let randomIndices =
        if repl then
            DiscreteUniform.Samples(0, n-1) 
        else
            DiscreteUniform.Samples(0, n-1) |> Seq.distinct
    randomIndices
    |> Seq.take size 
    |> Seq.map (fun index -> arrData.[index])
+9

Source: https://habr.com/ru/post/1669319/


All Articles