1 /**
2 Copyright: Copyright Thomas Stuart Bockman 2015
3 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
4 Authors: Thomas Stuart Bockman
5 */
6 
7 module future.bitop;
8 public import core.bitop;
9 
10 static if(__VERSION__ < 2071)
11 {
12     nothrow:
13     @safe:
14     @nogc:
15 
16     private union Split64
17     {
18         ulong u64;
19         struct
20         {
21             version(LittleEndian)
22             {
23                 uint lo;
24                 uint hi;
25             }
26             else
27             {
28                 uint hi;
29                 uint lo;
30             }
31         }
32     }
33     private auto split64(ulong x) pure {
34         if(__ctfe) {
35             Split64 ret = void;
36             ret.lo = cast(uint)x;
37             ret.hi = cast(uint)(x >>> 32);
38             return ret;
39         } else
40             return Split64(x);
41     }
42 
43     int bsf(size_t v) pure
44     {
45         import core.bitop : impl = bsf;
46         return impl(v);
47     }
48     static if(size_t.sizeof == uint.sizeof)
49     {
50         int bsf(ulong v) pure
51         {
52             const sv = split64(v);
53             return (sv.lo == 0)?
54                 bsf(sv.hi) + 32 :
55                 bsf(sv.lo);
56         }
57     } else
58         static assert(size_t.sizeof == ulong.sizeof);
59 
60     int bsr(size_t v) pure
61     {
62         import core.bitop : impl = bsr;
63         return impl(v);
64     }
65     static if(size_t.sizeof == uint.sizeof)
66     {
67         int bsr(ulong v) pure
68         {
69             const sv = split64(v);
70             return (sv.hi == 0)?
71                 bsr(sv.lo) :
72                 bsr(sv.hi) + 32;
73         }
74     } else
75         static assert(size_t.sizeof == ulong.sizeof);
76 
77     int popcnt(uint x)
78     {
79         // Select the fastest method depending on the compiler and CPU architecture
80         version(LDC)
81         {
82             return _popcnt(x);
83         }
84         else
85         {
86             version(DigitalMars)
87             {
88                 static if (is(typeof(_popcnt(uint.max))))
89                 {
90                     import core.cpuid;
91                     if (!__ctfe && hasPopcnt)
92                         return _popcnt(x);
93                 }
94             }
95 
96             return soft_popcnt!uint(x);
97         }
98     }
99     int popcnt(ulong x)
100     {
101         // Select the fastest method depending on the compiler and CPU architecture
102         version(LDC)
103         {
104             return _popcnt(x);
105         }
106         else
107         {
108             import core.cpuid;
109 
110             static if (size_t.sizeof == uint.sizeof)
111             {
112                 const sx = split64(x);
113                 version(DigitalMars)
114                 {
115                     static if (is(typeof(_popcnt(uint.max))))
116                     {
117                         if (!__ctfe && hasPopcnt)
118                             return _popcnt(sx.lo) + _popcnt(sx.hi);
119                     }
120                 }
121 
122                 return soft_popcnt!uint(sx.lo) + soft_popcnt!uint(sx.hi);
123             }
124             else static if (size_t.sizeof == ulong.sizeof)
125             {
126                 version(DigitalMars)
127                 {
128                     static if (is(typeof(_popcnt(ulong.max))))
129                     {
130                         if (!__ctfe && hasPopcnt)
131                             return _popcnt(x);
132                     }
133                 }
134 
135                 return soft_popcnt!ulong(x);
136             }
137             else
138                 static assert(false);
139         }
140     }
141 
142     private int soft_popcnt(N)(N x) pure
143         if (is(N == uint) || is(N == ulong))
144     {
145         // Avoid branches, and the potential for cache misses which
146         // could be incurred with a table lookup.
147 
148         // We need to mask alternate bits to prevent the
149         // sum from overflowing.
150         // add neighbouring bits. Each bit is 0 or 1.
151         enum mask1 = cast(N) 0x5555_5555_5555_5555L;
152         x = x - ((x>>1) & mask1);
153         // now each two bits of x is a number 00,01 or 10.
154         // now add neighbouring pairs
155         enum mask2a = cast(N) 0xCCCC_CCCC_CCCC_CCCCL;
156         enum mask2b = cast(N) 0x3333_3333_3333_3333L;
157         x = ((x & mask2a)>>2) + (x & mask2b);
158         // now each nibble holds 0000-0100. Adding them won't
159         // overflow any more, so we don't need to mask any more
160 
161         enum mask4 = cast(N) 0x0F0F_0F0F_0F0F_0F0FL;
162         x = (x + (x >> 4)) & mask4;
163 
164         enum shiftbits = is(N == uint)? 24 : 56;
165         enum maskMul = cast(N) 0x0101_0101_0101_0101L;
166         x = (x * maskMul) >> shiftbits;
167 
168         return cast(int) x;
169     }
170 }