A MiniC++ (C++ subset) compiler for a course.

lexer.mll 3.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. (* Analyseur lexical pour MiniC++ *)
  2. {
  3. open Lexing
  4. open Parser
  5. open Lexhack
  6. exception Lexing_error of string
  7. let keywords_tbl = ["class", CLASS;
  8. "else", ELSE;
  9. "false", FALSE;
  10. "for", FOR;
  11. "if", IF;
  12. "int", TINT;
  13. "new", NEW;
  14. "NULL", NULL;
  15. "public", PUBLIC;
  16. "return", RETURN;
  17. "this", THIS;
  18. "true", TRUE;
  19. "virtual", VIRTUAL;
  20. "void", VOID;
  21. "while", WHILE;]
  22. let id_or_kwd =
  23. let keywords = Hashtbl.create 14 in
  24. List.iter (fun (s, t) -> Hashtbl.add keywords s t) keywords_tbl;
  25. fun s ->
  26. try Hashtbl.find keywords s with Not_found -> begin
  27. if List.mem s !(Lexhack.types_lexhack) then
  28. TIDENT s
  29. else
  30. IDENT s
  31. end
  32. let newline lexbuf =
  33. let pos = lexbuf.lex_curr_p in
  34. lexbuf.lex_curr_p <-
  35. { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }
  36. let localstring = ref ""
  37. }
  38. let chiffre = ['0'-'9']
  39. let alpha = ['a'-'z' 'A'-'Z']
  40. let ident = (alpha | '_') (alpha | chiffre | '_')*
  41. let chiffre_octal = ['0'-'7']
  42. let chiffre_hexa = ['0'-'9' 'a'-'f' 'A'-'F']
  43. let caractere = ['\032'-'\033' '\035'-'\091' '\093'-'\127']
  44. | '\\' '\\' | '\\' '\"' | '\\' 'n' | '\\' 't'
  45. | '\\' 'x' chiffre_hexa chiffre_hexa
  46. let chaine = '\"' caractere* '\"'
  47. let space = [' ' '\t']
  48. rule token = parse
  49. | "#include <iostream>" { INCLUDE }
  50. | '\n' { newline lexbuf ; token lexbuf }
  51. | space+ { token lexbuf }
  52. | "std::cout" { COUT }
  53. | "std::endl" { ENDL }
  54. | "/*" { comment lexbuf }
  55. | "//" { comment_inline lexbuf }
  56. | "||" { OR }
  57. | "&&" { AND }
  58. | "==" { EQ }
  59. | "!=" { NEQ }
  60. | '<' { LT }
  61. | "<=" { LEQ }
  62. | '>' { GT }
  63. | ">=" { GEQ }
  64. | "++" { INCR }
  65. | "--" { DECR }
  66. | '=' { ASSIGN }
  67. | '+' { PLUS }
  68. | '-' { MINUS }
  69. | '*' { TIMES }
  70. | '/' { DIV }
  71. | '%' { MOD }
  72. | '!' { NOT }
  73. | '&' { ECOMM }
  74. | '(' { LPAREN }
  75. | ')' { RPAREN }
  76. | "->" { ARROW }
  77. | '.' { DOT }
  78. | ';' { SEMICOLON }
  79. | ':' { COLON }
  80. | ',' { COMMA }
  81. | "<<" { IN }
  82. | '{' { LBRACE }
  83. | '}' { RBRACE }
  84. | ident as s
  85. { id_or_kwd s }
  86. | '0' { INT 0 }
  87. | (['1'-'9'] chiffre*) as i { INT (int_of_string i) }
  88. | '0' (chiffre_octal+ as i) { INT (int_of_string ("0o"^i)) }
  89. | ('0' 'x' chiffre_hexa+ as i) { INT (int_of_string i) }
  90. | '"' { chaine lexbuf }
  91. | eof { EOF }
  92. | _ as c { raise (Lexing_error ("Caractère illégal : " ^ String.make 1 c)) }
  93. and chaine = parse
  94. | '"' { let tmp = !localstring in localstring := ""; STRING tmp }
  95. | ['\032'-'\033' '\035'-'\091' '\093'-'\127'] as c { localstring := !localstring ^ (String.make 1 c) ; chaine lexbuf }
  96. | '\\' '\\' { localstring := !localstring ^ "\\" ; chaine lexbuf }
  97. | '\\' '\"' { localstring := !localstring ^ "\"" ; chaine lexbuf }
  98. | '\\' 'n' { localstring := !localstring ^ "\n" ; chaine lexbuf }
  99. | '\\' 't' { localstring := !localstring ^ "\t" ; chaine lexbuf }
  100. | '\\' 'x' (chiffre_hexa chiffre_hexa as hex) { localstring := !localstring ^ (String.make 1 (char_of_int (int_of_string ("0x"^hex)))); chaine lexbuf }
  101. | eof { raise (Lexing_error "Unterminated string") }
  102. | _ as c { raise (Lexing_error (Printf.sprintf "Character %s forbidden :" (if c = '\n' then "newline" else String.make 1 c))) }
  103. and comment = parse
  104. | "*/" { token lexbuf }
  105. | _ { comment lexbuf }
  106. | eof { raise (Lexing_error "Commentaire non terminé.") }
  107. and comment_inline = parse
  108. | '\n' { token lexbuf }
  109. | _ { comment_inline lexbuf }