OCamlLex case-insensitive

Is there a way to have a case-sensitive token in the Ocamllex specification? I already tried to make the so-called "sensitive" token as follows:

let token = parser
    ...
   | ['C''c']['A''a']['S''s']['E''e'] { CASE }
    ...

but I'm looking for something else if exists.

+4
source share
2 answers

Use a regular lexer token that accepts both lower and upper case and searches for keywords in the table, ignoring the case:

{
type token = Case | Test | Ident of string

let keyword_tbl = Hashtbl.create 64

let _ = List.iter (fun (name, keyword) ->
    Hashtbl.add keyword_tbl name keyword) [
    "case", Case;
    "test", Test;
  ]
}

let ident_char = ['a'-'z' 'A'-'Z' '_']

rule next_token = parse
  | ident_char+ as s {
      let canon = String.lowercase s in
      try Hashtbl.find keyword_tbl canon
      with Not_found ->
        (* `Ident canon` if you want case-insensitive vars as well
         * as keywords *)
        Ident s
    }
+5
source

@gsg, - -, , , . -, ocamllex:

12.7

ocamllex: , , ocamllex, 32767 . , lexer . , , . [...]

http://caml.inria.fr/pub/docs/manual-ocaml-4.00/manual026.html#toc111

.

lookup " " , (: Map HashTbl), . , , , :

{
type token = Case | Test | Ident of string

module KeywordTable =
  Map.Make(struct
    type t = string
    let compare a b =
      String.(compare (lowercase a) (lowercase b))
  end)

let keyword_table =
  List.fold_left
    (fun (k, v) -> KeywordTable.add k v))
    [
      "case", Case;
      "test", Test;
    ]
    KeywordTable.empty
}

let ident_char = ['a'-'z' 'A'-'Z' '_']

rule next_token = parse
  | ident_char+ as s {
      try KeywordTable.find keyword_table s
      with Not_found -> Ident s
    }
+1

Source: https://habr.com/ru/post/1626416/


All Articles