Source file mreader_lexer.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
(* {{{ COPYING *(

     This file is part of Merlin, an helper for ocaml editors

     Copyright (C) 2013 - 2015  Frédéric Bour  <frederic.bour(_)lakaban.net>
                                Thomas Refis  <refis.thomas(_)gmail.com>
                                Simon Castellan  <simon.castellan(_)iuwt.fr>

     Permission is hereby granted, free of charge, to any person obtaining a
     copy of this software and associated documentation files (the "Software"),
     to deal in the Software without restriction, including without limitation the
     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
     sell copies of the Software, and to permit persons to whom the Software is
     furnished to do so, subject to the following conditions:

     The above copyright notice and this permission notice shall be included in
     all copies or substantial portions of the Software.

     The Software is provided "as is", without warranty of any kind, express or
     implied, including but not limited to the warranties of merchantability,
     fitness for a particular purpose and noninfringement. In no event shall
     the authors or copyright holders be liable for any claim, damages or other
     liability, whether in an action of contract, tort or otherwise, arising
     from, out of or in connection with the software or the use or other dealings
     in the Software.

   )* }}} *)

open Std

type keywords = Lexer_raw.keywords

type triple = Parser_raw.token * Lexing.position * Lexing.position

type item =
  | Triple of triple
  | Comment of (string * Location.t)
  | Error of Lexer_raw.error * Location.t

type t =
  { keywords : keywords;
    config : Mconfig.t;
    source : Msource.t;
    items : item list
  }

let get_tokens keywords pos text =
  let state = Lexer_raw.make keywords in
  let lexbuf = Lexing.from_string text in
  Lexing.move lexbuf pos;
  let rec aux items = function
    | Lexer_raw.Return (Parser_raw.COMMENT comment) ->
      continue (Comment comment :: items)
    | Lexer_raw.Refill k -> aux items (k ())
    | Lexer_raw.Return t ->
      let triple = (t, lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p) in
      let items = Triple triple :: items in
      if t = Parser_raw.EOF then items else continue items
    | Lexer_raw.Fail (err, loc) -> continue (Error (err, loc) :: items)
  and continue items = aux items (Lexer_raw.token state lexbuf) in

  function
  | [] ->
    (* First line: skip #! ... *)
    aux [] (Lexer_raw.skip_sharp_bang state lexbuf)
  | items ->
    (* Resume *)
    continue items

let initial_position config =
  { Lexing.pos_fname = Mconfig.filename config;
    pos_lnum = 1;
    pos_bol = 0;
    pos_cnum = 0
  }

let make warnings keywords config source =
  Msupport.catch_errors warnings (ref []) @@ fun () ->
  let items =
    get_tokens keywords (initial_position config) (Msource.text source) []
  in
  { keywords; items; config; source }

let item_start = function
  | Triple (_, s, _) -> s
  | Comment (_, l) | Error (_, l) -> l.Location.loc_start

let item_end = function
  | Triple (_, _, e) -> e
  | Comment (_, l) | Error (_, l) -> l.Location.loc_end

let initial_position t = initial_position t.config

let rev_filter_map ~f lst =
  let rec aux acc = function
    | [] -> acc
    | x :: xs ->
      let acc =
        match f x with
        | Some x' -> x' :: acc
        | None -> acc
      in
      aux acc xs
  in
  aux [] lst

let tokens t =
  rev_filter_map t.items ~f:(function
    | Triple t -> Some t
    | _ -> None)

let keywords t = Lexer_raw.list_keywords t.keywords

let errors t =
  rev_filter_map t.items ~f:(function
    | Error (err, loc) -> Some (Lexer_raw.Error (err, loc))
    | _ -> None)

let comments t =
  rev_filter_map t.items ~f:(function
    | Comment t -> Some t
    | _ -> None)

open Parser_raw

let is_operator = function
  | PREFIXOP s
  | LETOP s
  | ANDOP s
  | INFIXOP0 s
  | INFIXOP1 s
  | INFIXOP2 s
  | INFIXOP3 s
  | INFIXOP4 s -> Some s
  | BANG -> Some "!"
  | PERCENT -> Some "%"
  | PLUS -> Some "+"
  | PLUSDOT -> Some "+."
  | MINUS -> Some "-"
  | MINUSDOT -> Some "-."
  | STAR -> Some "*"
  | EQUAL -> Some "="
  | LESS -> Some "<"
  | GREATER -> Some ">"
  | OR -> Some "or"
  | BARBAR -> Some "||"
  | AMPERSAND -> Some "&"
  | AMPERAMPER -> Some "&&"
  | COLONEQUAL -> Some ":="
  | PLUSEQ -> Some "+="
  | _ -> None

(* [reconstruct_identifier] is impossible to read at the moment, here is a
   pseudo code version of the function:
   (many thanks to Gabriel for this contribution)

        00| let h = parse (focus h) with
        01|   | . { h+1 }
        02|   | _ { h }
        03| in
        04| parse h with
        05| | BOF x=operator       { [x] }
        06| | ¬( x=operator        { [x] }
        07| | ' x=ident            { [] }
        08| | _ {
        09|   let acc, h = parse (h ! tail h) with
        10|     | x=ident !          { [x], h }
        11|     | ( ! x=operator )   { [x], h }
        12|     | ( x=operator ! )   { [x], h - 1 }
        13|     | ( x=operator ) !   { [x], h - 2 }
        14|     | _ { [], h }
        15|   in
        16|   let h = h - 1 in
        17|   let rec head acc = parse (h !) with
        18|     | tl x=ident . ! { head (x :: acc) tl }
        19|     | x=ident . !    { ident :: acc }
        20|     | _              { acc }
        21|   in head acc
        22| }

   Now for the explanations:
     line 0-3:  if we're on a dot, skip it and move to the right

     line 5,6:  if we're on an operator not preceded by an opening parenthesis,
                just return that.

     line 7:    if we're on a type variable, don't return anything.
                reconstruct_identifier is called when locating and getting the
                type of an expression, in both cases there's nothing we can do
                with a type variable.
                See #317

     line 8-22: two step approach:
       - line 9-15:  retrieve the identifier
                     OR retrieve the parenthesized operator and move before the
                        opening parenthesis

       - line 16-21: retrieve the "path" prefix of the identifier/operator we
                     got in the previous step.


   Additionally, the message of commit fc0b152 explains what we consider is an
   identifier:

     «
        Interpreting an OCaml identifier out of context is a bit ambiguous.

        A prefix of the form (UIDENT DOT)* is the module path,
        A UIDENT suffix is either a module name, a module type name (in case the
        whole path is a module path), or a value constructor.
        A LIDENT suffix is either a value name, a type constructor or a module
        type name.
        A LPAREN OPERATOR RPAREN suffix is a value name (and soon, maybe a
        value constructor if beginning by ':' ?!) .

        In the middle, LIDENT DOT (UIDENT DOT)* is projection of the field of a
        record.  In this case, merlin will drop everything up to the first
        UIDENT and complete in the scope of the (UIDENT DOT)* interpreted as a
        module path.
        Soon, the last UIDENT might also be the type of an inline record.
        (Module2.f.Module1.A <- type of the record of the value constructor named A of
        type f, defined in Module1 and aliased in Module2, pfffff).
     »
*)

let reconstruct_identifier_from_tokens tokens pos =
  let rec look_for_component acc = function
    (* Skip 'a and `A *)
    | ((LIDENT _ | UIDENT _), _, _) :: ((BACKQUOTE | QUOTE), _, _) :: items ->
      check acc items
    (* UIDENT is a regular a component *)
    | ((UIDENT _, _, _) as item) :: items -> look_for_dot (item :: acc) items
    (* LIDENT always begin a new identifier *)
    | ((LIDENT _, _, _) as item) :: items ->
      if acc = [] then look_for_dot [ item ] items else check acc (item :: items)
    (* Reified operators behave like LIDENT *)
    | (RPAREN, _, _) :: ((token, _, _) as item) :: (LPAREN, _, _) :: items
      when is_operator token <> None && acc = [] -> look_for_dot [ item ] items
    (* An operator alone is an identifier on its own *)
    | ((token, _, _) as item) :: items
      when is_operator token <> None && acc = [] -> check [ item ] items
    (* Otherwise, check current accumulator and scan the rest of the input *)
    | _ :: items -> check acc items
    | [] -> raise Not_found
  and look_for_dot acc = function
    | (DOT, _, _) :: items -> look_for_component acc items
    | items -> check acc items
  and check acc items =
    if
      acc <> []
      && (let startp =
            match acc with
            | (_, startp, _) :: _ -> startp
            | _ -> assert false
          in
          Lexing.compare_pos startp pos <= 0)
      &&
      let endp =
        match List.last acc with
        | Some (_, _, endp) -> endp
        | _ -> assert false
      in
      Lexing.compare_pos pos endp <= 0
    then acc
    else
      match items with
      | [] -> raise Not_found
      | (_, _, endp) :: _ when Lexing.compare_pos endp pos < 0 ->
        raise Not_found
      | _ -> look_for_component [] items
  in

  match look_for_component [] tokens with
  | exception Not_found -> []
  | acc ->
    let fmt (token, loc_start, loc_end) =
      let id =
        match token with
        | UIDENT s | LIDENT s -> s
        | _ -> (
          match is_operator token with
          | Some t -> t
          | None -> assert false)
      in
      Location.mkloc id { Location.loc_start; loc_end; loc_ghost = false }
    in
    let before_pos = function
      | _, s, _ -> Lexing.compare_pos s pos <= 0
    in
    List.map ~f:fmt (List.filter ~f:before_pos acc)

let reconstruct_identifier config source pos =
  let rec lex acc lexbuf =
    let token = Lexer_ident.token lexbuf in
    let item = (token, lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p) in
    match token with
    | EOF -> item :: acc
    | EOL when Lexing.compare_pos lexbuf.Lexing.lex_curr_p pos > 0 ->
      item :: acc
    | EOL -> lex [] lexbuf
    | _ -> lex (item :: acc) lexbuf
  in
  let lexbuf = Lexing.from_string (Msource.text source) in
  Location.init lexbuf (Mconfig.filename config);
  let tokens = lex [] lexbuf in
  reconstruct_identifier_from_tokens tokens pos

let is_uppercase { Location.txt = x; _ } = x <> "" && Char.is_uppercase x.[0]

let rec drop_lowercase acc = function
  | [ x ] -> List.rev (x :: acc)
  | x :: xs when not (is_uppercase x) -> drop_lowercase [] xs
  | x :: xs -> drop_lowercase (x :: acc) xs
  | [] -> List.rev acc

let for_completion t pos =
  let no_labels = ref false in
  let check_label = function
    | Triple ((LABEL _ | OPTLABEL _), _, _) -> no_labels := true
    | _ -> ()
  in
  let rec aux acc = function
    (* Cursor is before item: continue *)
    | item :: items when Lexing.compare_pos (item_start item) pos >= 0 ->
      aux (item :: acc) items
    (* Cursor is in the middle of item: stop *)
    | item :: _ when Lexing.compare_pos (item_end item) pos > 0 ->
      check_label item;
      raise Exit
    (* Cursor is at the end *)
    | (Triple (token, _, loc_end) as item) :: _ as items
      when Lexing.compare_pos pos loc_end = 0 ->
      check_label item;
      begin
        match token with
        (* Already on identifier, no need to introduce *)
        | UIDENT _ | LIDENT _ -> raise Exit
        | _ -> (acc, items)
      end
    | items -> (acc, items)
  in
  let t =
    match aux [] t.items with
    | exception Exit -> t
    | acc, items ->
      { t with
        items = List.rev_append acc (Triple (LIDENT "", pos, pos) :: items)
      }
  in
  (!no_labels, t)

let identifier_suffix ident =
  match List.last ident with
  | Some x when is_uppercase x -> drop_lowercase [] ident
  | _ -> ident