12345678910111213141516171819202122232425262728293031323334353637383940414243444546open!Import(* C stub for int popcount to use the POPCNT instruction where possible *)externalint_popcount:int->int="Base_int_math_int_popcount"[@@noalloc](* To maintain javascript compatibility and enable unboxing, we implement popcount in
OCaml rather than use C stubs. Implementation adapted from:
https://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation *)letint64_popcount=letopenStdlib.Int64inlet(+)=addinlet(-)=subinlet(*)=mulinlet(lsr)=shift_right_logicalinlet(land)=logandinletm1=0x5555555555555555Lin(* 0b01010101... *)letm2=0x3333333333333333Lin(* 0b00110011... *)letm4=0x0f0f0f0f0f0f0f0fLin(* 0b00001111... *)leth01=0x0101010101010101Lin(* 1 bit set per byte *)fun[@inline]x->(* gather the bit count for every pair of bits *)letx=x-((xlsr1)landm1)in(* gather the bit count for every 4 bits *)letx=(xlandm2)+((xlsr2)landm2)in(* gather the bit count for every byte *)letx=(x+(xlsr4))landm4in(* sum the bit counts in the top byte and shift it down *)to_int((x*h01)lsr56);;letint32_popcount=(* On 64-bit systems, this is faster than implementing using [int32] arithmetic. *)letmask=0xffff_ffffLinfun[@inline]x->int64_popcount(Stdlib.Int64.logand(Stdlib.Int64.of_int32x)mask);;letnativeint_popcount=matchStdlib.Nativeint.sizewith|32->fun[@inline]x->int32_popcount(Stdlib.Nativeint.to_int32x)|64->fun[@inline]x->int64_popcount(Stdlib.Int64.of_nativeintx)|_->assertfalse;;