F #,
.
Stefan Savev
, , . -.
.
.
1. let create_postings in_name tmp_dir out_name =
2. let process_doc (doc_id, doc_text) =
3. doc_text |> tokenize |> stopword |> stem
4a. |> List.count
4b. |> ListExt.map(fun (word, tf) -> (word, (doc_id, tf))
5. in_name
6. |> as_lines
7. |> Seq.map_concat extract_docs
8. |> Seq.map_concat process_doc
9a. |> External.group_by (fun (w, _) -> w)
9b. (fun (_, docid_and_tf) -> docid_and_tf)
9c. (fun lst -> (List.length lst, lst))
9d. tmp_dir
9e. (External.ElemDesc())
10. |> output out_name
stefan