1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
module Re = Core
exception Parse_error = Perl.Parse_error
exception Not_supported = Perl.Not_supported
type regexp = Re.re
type flag = [ `CASELESS | `MULTILINE | `ANCHORED | `DOTALL ]
type split_result =
| Text of string
| Delim of string
| Group of int * string
| NoGroup
type groups = Core.Group.t
let re ?(flags = []) pat =
let opts = List.map (function
| `CASELESS -> `Caseless
| `MULTILINE -> `Multiline
| `ANCHORED -> `Anchored
| `DOTALL -> `Dotall
) flags in
Perl.re ~opts pat
let regexp ?flags pat = Re.compile (re ?flags pat)
let ~rex s =
Re.Group.all (Re.exec rex s)
let exec ~rex ?pos s =
Re.exec rex ?pos s
let get_substring s i =
Re.Group.get s i
let names rex =
Re.group_names rex
|> List.map fst
|> Array.of_list
let get_named_substring rex name s =
let rec loop = function
| [] -> raise Not_found
| (n, i) :: rem when n = name ->
begin
try get_substring s i
with Not_found -> loop rem
end
| _ :: rem -> loop rem
in
loop (Re.group_names rex)
let get_substring_ofs s i =
Re.Group.offset s i
let pmatch ~rex s =
Re.execp rex s
let substitute ~rex ~subst str =
let b = Buffer.create 1024 in
let rec loop pos =
if pos >= String.length str then
Buffer.contents b
else if Re.execp ~pos rex str then (
let ss = Re.exec ~pos rex str in
let start, fin = Re.Group.offset ss 0 in
let pat = Re.Group.get ss 0 in
Buffer.add_substring b str pos (start - pos);
Buffer.add_string b (subst pat);
loop fin
) else (
Buffer.add_substring b str pos (String.length str - pos);
loop (String.length str)
)
in
loop 0
let split ~rex str =
let rec loop accu pos =
if pos >= String.length str then
List.rev accu
else if Re.execp ~pos rex str then (
let ss = Re.exec ~pos rex str in
let start, fin = Re.Group.offset ss 0 in
let s = String.sub str pos (start - pos) in
loop (s :: accu) fin
) else (
let s = String.sub str pos (String.length str - pos) in
loop (s :: accu) (String.length str)
) in
loop [] 0
let string_unsafe_sub s ofs len =
let r = Bytes.create len in
Bytes.unsafe_blit s ofs r 0 len;
Bytes.unsafe_to_string r
let quote s =
let len = String.length s in
let buf = Bytes.create (len lsl 1) in
let pos = ref 0 in
for i = 0 to len - 1 do
match String.unsafe_get s i with
| '\\' | '^' | '$' | '.' | '[' | '|'
| '(' | ')' | '?' | '*' | '+' | '{' as c ->
Bytes.unsafe_set buf !pos '\\';
incr pos;
Bytes.unsafe_set buf !pos c; incr pos
| c -> Bytes.unsafe_set buf !pos c; incr pos
done;
string_unsafe_sub buf 0 !pos
let full_split ?(max=0) ~rex s =
if String.length s = 0 then []
else if max = 1 then [Text s]
else
let results = Re.split_full rex s in
let matches =
List.map (function
| `Text s -> [Text s]
| `Delim d ->
let matches = Re.Group.all_offset d in
let delim = Re.Group.get d 0 in
(Delim delim)::(
let l = ref [] in
for i = 1 to Array.length matches - 1 do
l :=
(if matches.(i) = (-1, -1)
then NoGroup
else Group (i, Re.Group.get d i))
::(!l)
done;
List.rev !l)) results in
List.concat matches
type substrings = Group.t