Source file hash_set.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
open Import

module Array = struct
  type nonrec t = Bytes.t

  let words = 8
  let[@inline] length t = Bytes.length t / words
  let[@inline] unsafe_get t i = Int64.to_int (Bytes.get_int64_ne t (i * words))
  let[@inline] unsafe_set t i x = Bytes.set_int64_ne t (i * words) (Int64.of_int x)

  let[@inline] make len x =
    let t = Bytes.create (len * words) in
    for i = 0 to length t - 1 do
      unsafe_set t i x
    done;
    t
  ;;

  let[@inline] make_absent len = Bytes.make (len * words) '\255'
  let clear t = Bytes.fill t 0 (Bytes.length t) '\255'

  let fold_left t ~init ~f =
    let init = ref init in
    for i = 0 to length t - 1 do
      init := f !init (unsafe_get t i)
    done;
    !init
  ;;
end

(* A specialized hash table that makes the following trade-offs:
   - Open addresing. Bucketing is quite memory intensive and dune is already
     a memory hog.
   - No boxing for empty slots. We make use of the fact that id's are never
     negative to achieve this.
   - No saving of the hash. Recomputing the hash for id's is a no-op.
*)

type nonrec table =
  { mutable table : Array.t
  ; mutable size : int
  }

type t = table Option.t ref

let init t =
  if Option.is_none !t then t := Option.some { size = 0; table = Array.make 0 (-1) };
  Option.get !t
;;

let[@inline] should_grow t =
  let slots = Array.length t.table in
  slots = 0 || (t.size > 0 && slots / t.size < 2)
;;

let absent = -1

let () =
  let x = Array.make_absent 1 in
  assert (Array.unsafe_get x 0 = absent)
;;

let create () = ref Option.none

let[@inline] index_of_offset slots index i =
  let i = index + !i in
  if i >= slots then i - slots else i
;;

let clear t =
  match !t with
  | None -> ()
  | Some t ->
    t.size <- 0;
    Array.clear t.table
;;

let add t x =
  let hash = Int.hash x in
  let slots = Array.length t.table in
  let index = hash land (slots - 1) in
  let inserting = ref true in
  let i = ref 0 in
  while !inserting do
    let idx = index_of_offset slots index i in
    let elem = Array.unsafe_get t.table idx in
    if elem = absent
    then (
      Array.unsafe_set t.table idx x;
      inserting := false)
    else incr i
  done;
  t.size <- t.size + 1
;;

let resize t =
  let old_table = t.table in
  let slots = Array.length old_table in
  let table = Array.make_absent (if slots = 0 then 1 else slots lsl 1) in
  t.table <- table;
  for i = 0 to slots - 1 do
    let elem = Array.unsafe_get old_table i in
    if elem <> absent then add t elem
  done
;;

let add t x =
  let t = init t in
  if should_grow t then resize t;
  add t x
;;

let[@inline] is_empty t =
  let t = !t in
  if Option.is_none t
  then true
  else (
    let t = Option.get t in
    t.size = 0)
;;

let mem t x =
  let t = !t in
  if Option.is_none t || (Option.get t).size = 0
  then false
  else (
    let t = Option.get t in
    let hash = Int.hash x in
    let slots = Array.length t.table in
    let index = hash land (slots - 1) in
    let i = ref 0 in
    let found = ref false in
    while (not !found) && !i < slots do
      let idx = index_of_offset slots index i in
      let elem = Array.unsafe_get t.table idx in
      if Int.equal elem x
      then found := true
      else if Int.equal elem absent
      then i := slots
      else incr i
    done;
    !found)
;;

let pp fmt t =
  let { table; size } = init t in
  let table =
    Array.fold_left table ~init:[] ~f:(fun acc i -> if i = absent then acc else i :: acc)
    |> List.rev
    |> Stdlib.Array.of_list
  in
  let table fmt () = Fmt.sexp fmt "table" Fmt.(array int) table in
  let size fmt () = Fmt.sexp fmt "size" Fmt.int size in
  Format.fprintf fmt "%a@.%a@." table () size ()
;;