/src/php-src/ext/hash/murmur/PMurHash.c
Line | Count | Source |
1 | | /*----------------------------------------------------------------------------- |
2 | | * MurmurHash3 was written by Austin Appleby, and is placed in the public |
3 | | * domain. |
4 | | * |
5 | | * This implementation was written by Shane Day, and is also public domain. |
6 | | * |
7 | | * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) |
8 | | * with support for progressive processing. |
9 | | */ |
10 | | |
11 | | /*----------------------------------------------------------------------------- |
12 | | |
13 | | If you want to understand the MurmurHash algorithm you would be much better |
14 | | off reading the original source. Just point your browser at: |
15 | | http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp |
16 | | |
17 | | |
18 | | What this version provides? |
19 | | |
20 | | 1. Progressive data feeding. Useful when the entire payload to be hashed |
21 | | does not fit in memory or when the data is streamed through the application. |
22 | | Also useful when hashing a number of strings with a common prefix. A partial |
23 | | hash of a prefix string can be generated and reused for each suffix string. |
24 | | |
25 | | How does it work? |
26 | | |
27 | | We can only process entire 32 bit chunks of input, except for the very end |
28 | | that may be shorter. So along with the partial hash we need to give back to |
29 | | the caller a carry containing up to 3 bytes that we were unable to process. |
30 | | This carry also needs to record the number of bytes the carry holds. I use |
31 | | the low 2 bits as a count (0..3) and the carry bytes are shifted into the |
32 | | high byte in stream order. |
33 | | |
34 | | To handle endianess I simply use a macro that reads a uint32_t and define |
35 | | that macro to be a direct read on little endian machines, a read and swap |
36 | | on big endian machines, or a byte-by-byte read if the endianess is unknown. |
37 | | |
38 | | -----------------------------------------------------------------------------*/ |
39 | | |
40 | | |
41 | | #include "PMurHash.h" |
42 | | |
43 | | // /* MSVC warnings we choose to ignore */ |
44 | | // #if defined(_MSC_VER) |
45 | | // #pragma warning(disable: 4127) /* conditional expression is constant */ |
46 | | // #endif |
47 | | |
48 | | /*----------------------------------------------------------------------------- |
49 | | * Endianess, misalignment capabilities and util macros |
50 | | * |
51 | | * The following 3 macros are defined in this section. The other macros defined |
52 | | * are only needed to help derive these 3. |
53 | | * |
54 | | * READ_UINT32(x) Read a little endian unsigned 32-bit int |
55 | | * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries |
56 | | * ROTL32(x,r) Rotate x left by r bits |
57 | | */ |
58 | | |
59 | | /* I386 or AMD64 */ |
60 | | #if defined(_M_I86) || defined(_M_IX86) || defined(_X86_) || defined(__i386__) || defined(__i386) || defined(i386) \ |
61 | | || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) |
62 | | #define UNALIGNED_SAFE |
63 | | #endif |
64 | | /* I386 or AMD64 */ |
65 | | #if defined(_M_I86) || defined(_M_IX86) || defined(_X86_) || defined(__i386__) || defined(__i386) || defined(i386) \ |
66 | | || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) |
67 | | #define UNALIGNED_SAFE |
68 | | #endif |
69 | | |
70 | | /* Find best way to ROTL */ |
71 | | #if defined(_MSC_VER) |
72 | | #define FORCE_INLINE static __forceinline |
73 | | #include <stdlib.h> /* Microsoft put _rotl declaration in here */ |
74 | | #define ROTL32(x,y) _rotl(x,y) |
75 | | #else |
76 | | #define FORCE_INLINE static inline __attribute__((always_inline)) |
77 | | /* gcc recognises this code and generates a rotate instruction for CPUs with one */ |
78 | 752k | #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r))) |
79 | | #endif |
80 | | |
81 | | #include "endianness.h" |
82 | | |
83 | 376k | #define READ_UINT32(ptr) getblock32((uint32_t *)ptr, 0) |
84 | | |
85 | | /*----------------------------------------------------------------------------- |
86 | | * Core murmurhash algorithm macros */ |
87 | | |
88 | | static const uint32_t kC1 = 0xcc9e2d51; |
89 | | static const uint32_t kC2 = 0x1b873593; |
90 | | |
91 | | /* This is the main processing body of the algorithm. It operates |
92 | | * on each full 32-bits of input. */ |
93 | 376k | #define doblock(h1, k1) \ |
94 | 376k | do {\ |
95 | 376k | k1 *= kC1;\ |
96 | 376k | k1 = ROTL32(k1,15);\ |
97 | 376k | k1 *= kC2;\ |
98 | 376k | \ |
99 | 376k | h1 ^= k1;\ |
100 | 376k | h1 = ROTL32(h1,13);\ |
101 | 376k | h1 = h1*5+0xe6546b64;\ |
102 | 376k | } while(0) |
103 | | |
104 | | /* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ |
105 | | /* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ |
106 | 47 | #define dobytes(cnt, h1, c, n, ptr, len) \ |
107 | 47 | do {\ |
108 | 47 | unsigned __cnt = cnt;\ |
109 | 125 | while(__cnt--) {\ |
110 | 78 | c = c>>8 | (uint32_t)*ptr++<<24;\ |
111 | 78 | n++; len--;\ |
112 | 78 | if(n==4) {\ |
113 | 19 | doblock(h1, c);\ |
114 | 19 | n = 0;\ |
115 | 19 | }\ |
116 | 78 | }\ |
117 | 47 | } while(0) |
118 | | |
119 | | /*---------------------------------------------------------------------------*/ |
120 | | |
121 | | /* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed |
122 | | * if wanted. Both ph1 and pcarry are required arguments. */ |
123 | | void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len) |
124 | 28 | { |
125 | 28 | uint32_t h1 = *ph1; |
126 | 28 | uint32_t c = *pcarry; |
127 | | |
128 | 28 | const uint8_t *ptr = (uint8_t*)key; |
129 | 28 | const uint8_t *end; |
130 | | |
131 | | /* Extract carry count from low 2 bits of c value */ |
132 | 28 | int n = c & 3; |
133 | | |
134 | 28 | #if defined(UNALIGNED_SAFE) |
135 | | /* This CPU handles unaligned word access */ |
136 | | // #pragma message ( "UNALIGNED_SAFE" ) |
137 | | /* Consume any carry bytes */ |
138 | 28 | int i = (4-n) & 3; |
139 | 28 | if(i && i <= len) { |
140 | 19 | dobytes(i, h1, c, n, ptr, len); |
141 | 19 | } |
142 | | |
143 | | /* Process 32-bit chunks */ |
144 | 28 | end = ptr + (len & ~3); |
145 | 376k | for( ; ptr < end ; ptr+=4) { |
146 | 376k | uint32_t k1 = READ_UINT32(ptr); |
147 | 376k | doblock(h1, k1); |
148 | 376k | } |
149 | | |
150 | | #else /*UNALIGNED_SAFE*/ |
151 | | /* This CPU does not handle unaligned word access */ |
152 | | // #pragma message ( "ALIGNED" ) |
153 | | /* Consume enough so that the next data byte is word aligned */ |
154 | | int i = -(intptr_t)(void *)ptr & 3; |
155 | | if(i && i <= len) { |
156 | | dobytes(i, h1, c, n, ptr, len); |
157 | | } |
158 | | |
159 | | /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ |
160 | | end = ptr + (len & ~3); |
161 | | switch(n) { /* how many bytes in c */ |
162 | | case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ |
163 | | for( ; ptr < end ; ptr+=4) { |
164 | | uint32_t k1 = READ_UINT32(ptr); |
165 | | doblock(h1, k1); |
166 | | } |
167 | | break; |
168 | | case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ |
169 | | for( ; ptr < end ; ptr+=4) { |
170 | | uint32_t k1 = c>>24; |
171 | | c = READ_UINT32(ptr); |
172 | | k1 |= c<<8; |
173 | | doblock(h1, k1); |
174 | | } |
175 | | break; |
176 | | case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ |
177 | | for( ; ptr < end ; ptr+=4) { |
178 | | uint32_t k1 = c>>16; |
179 | | c = READ_UINT32(ptr); |
180 | | k1 |= c<<16; |
181 | | doblock(h1, k1); |
182 | | } |
183 | | break; |
184 | | case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ |
185 | | for( ; ptr < end ; ptr+=4) { |
186 | | uint32_t k1 = c>>8; |
187 | | c = READ_UINT32(ptr); |
188 | | k1 |= c<<24; |
189 | | doblock(h1, k1); |
190 | | } |
191 | | } |
192 | | #endif /*UNALIGNED_SAFE*/ |
193 | | |
194 | | /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */ |
195 | 28 | len -= len & ~3; |
196 | | |
197 | | /* Append any remaining bytes into carry */ |
198 | 28 | dobytes(len, h1, c, n, ptr, len); |
199 | | |
200 | | /* Copy out new running hash and carry */ |
201 | 28 | *ph1 = h1; |
202 | 28 | *pcarry = (c & ~0xff) | n; |
203 | 28 | } |
204 | | |
205 | | /*---------------------------------------------------------------------------*/ |
206 | | |
207 | | /* Finalize a hash. To match the original Murmur3A the total_length must be provided */ |
208 | | uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length) |
209 | 28 | { |
210 | 28 | uint32_t k1; |
211 | 28 | int n = carry & 3; |
212 | 28 | if(n) { |
213 | 17 | k1 = carry >> (4-n)*8; |
214 | 17 | k1 *= kC1; k1 = ROTL32(k1,15); k1 *= kC2; h ^= k1; |
215 | 17 | } |
216 | 28 | h ^= total_length; |
217 | | |
218 | | /* fmix */ |
219 | 28 | h ^= h >> 16; |
220 | 28 | h *= 0x85ebca6b; |
221 | 28 | h ^= h >> 13; |
222 | 28 | h *= 0xc2b2ae35; |
223 | 28 | h ^= h >> 16; |
224 | | |
225 | 28 | return h; |
226 | 28 | } |