Collecting blocks of text in a largely free form using FParsec

I am trying to parse some information from text in mostly free form. I tried to implement in FParsec, but I have not used it before, and I'm not sure if I am doing it wrong or even if it works well for this particular problem.

Description of the problem

I want to analyze the contents of a specific set of "Liquid tags" from a discounted document (tags "examplecode" and "requiredcode"). Markdown will be mainly free text with a random block in Liquid tags, for example:

Some free form text.
Possibly lots of lines. Maybe `code` stuff.

{% examplecode opt-lang-tag %}
ABC
DEF
{% endexamplecode %}

More text. Possibly multilines.

{% othertag %}
can ignore this tag
{% endothertag %}

{% requiredcode %}
GHI
{% endrequiredcode %}

In this case, I need to parse [ "ABC\nDEF"; "GHI" ].

, , . , , , , , . .

, :

\{%\s*(examplecode|requiredcode).*\%}(.*?)\{%\s*end\1\s*%\}

FParsec

FParsec. - between s t (everythingUntil t), , everythingUntil, , between .

, "{%", , , , :

let trimStr (s : string) = s.Trim()
let betweenStr s t = between (pstring s) (pstring t)
let allTill s = charsTillString s false maxInt
let skipAllTill s = skipCharsTillString s false maxInt
let word : Parser<string, unit> = many1Satisfy (not << Char.IsWhiteSpace)

type LiquidTag = private LiquidTag of name : string * contents : string
let makeTag n c = LiquidTag (n, trimStr c)

let liquidTag =
    let pStartTag = betweenStr "{%" "%}" (spaces >>. word .>> spaces .>> skipAllTill "%}")
    let pEndTag tagName = betweenStr "{%" "%}" (spaces >>. pstring ("end" + tagName) .>> spaces)
    let tagContents = allTill "{%"
    pStartTag >>= fun name -> 
                    tagContents 
                        .>> pEndTag name 
                        |>> makeTag name

let tags = many (skipAllTill "{%" >>. liquidTag)

, , .

, (, ), ( ).

"{%" . , ( [ "ABC {% DEF " ]):

{% examplecode %}
ABC {% DEF
{% endexamplecode %}

, " " FParsec, FParsec , ?

( "{%" FParsec. .)

+4
1

start >>. everythingUntil end between start end body.

:

let maxInt = System.Int32.MaxValue    
type LiquidTag = LiquidTag of string * string

let skipTillString str = skipCharsTillString str true maxInt

let skipTillStringOrEof str : Parser<unit, _> =
    fun stream -> 
        let mutable found = false
        stream.SkipCharsOrNewlinesUntilString(str, maxInt, &found) |> ignore
        Reply(())

let openingBrace = skipString "{%" >>. spaces

let tagName name = 
    skipString name 
    >>? nextCharSatisfies (fun c -> c = '%' || System.Char.IsWhiteSpace(c))

let endTag name =     
    openingBrace >>? (tagName ("end" + name) >>. (spaces >>. skipString "%}"))

let tagPair_afterOpeningBrace name = 
   tagName name  >>. skipTillString "%}"
   >>. (manyCharsTill anyChar (endTag name)
        |>> fun str -> LiquidTag(name, str))

let skipToOpeningBraceOrEof = skipTillStringOrEof "{%" 

let tagPairs =
    skipToOpeningBraceOrEof 
    >>. many (openingBrace
              >>. opt (    tagPair_afterOpeningBrace "examplecode"
                       <|> tagPair_afterOpeningBrace "requiredcode")
              .>> skipToOpeningBraceOrEof)
        |>> List.choose id
   .>> eof

:

  • , . , . , .

  • >>? , .

  • , . , , manyCharsTill anyChar (endTag name), . many ... |> List.choose id tagPairs .

+5

Source: https://habr.com/ru/post/1528004/


All Articles