Panda3D
Loading...
Searching...
No Matches
lookup3.c
1/* See http://www.burtleburtle.net/bob/hash/ */
2
3/*
4-------------------------------------------------------------------------------
5lookup3.c, by Bob Jenkins, May 2006, Public Domain.
6
7These are functions for producing 32-bit hashes for hash table lookup.
8hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
9are externally useful functions. Routines to test the hash are included
10if SELF_TEST is defined. You can use this free for any purpose. It's in
11the public domain. It has no warranty.
12
13You probably want to use hashlittle(). hashlittle() and hashbig()
14hash byte arrays. hashlittle() is is faster than hashbig() on
15little-endian machines. Intel and AMD are little-endian machines.
16On second thought, you probably want hashlittle2(), which is identical to
17hashlittle() except it returns two 32-bit hashes for the price of one.
18You could implement hashbig2() if you wanted but I haven't bothered here.
19
20If you want to find a hash of, say, exactly 7 integers, do
21 a = i1; b = i2; c = i3;
22 mix(a,b,c);
23 a += i4; b += i5; c += i6;
24 mix(a,b,c);
25 a += i7;
26 final(a,b,c);
27then use c as the hash value. If you have a variable length array of
284-byte integers to hash, use hashword(). If you have a byte array (like
29a character string), use hashlittle(). If you have several byte arrays, or
30a mix of things, see the comments above hashlittle().
31
32Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
33then mix those integers. This is fast (you can do a lot more thorough
34mixing with 12*3 instructions on 3 integers than you can with 3 instructions
35on 1 byte), but shoehorning those bytes into integers efficiently is messy.
36-------------------------------------------------------------------------------
37*/
38/*#define SELF_TEST 1*/
39
40#include "lookup3.h"
41
42#include <stdio.h>
43#include <stddef.h>
44#include <stdlib.h>
45#include <time.h>
46
47#ifdef WORDS_BIGENDIAN
48# define HASH_LITTLE_ENDIAN 0
49# define HASH_BIG_ENDIAN 1
50#else
51# define HASH_LITTLE_ENDIAN 1
52# define HASH_BIG_ENDIAN 0
53#endif
54
55#define hashsize(n) ((uint32_t)1<<(n))
56#define hashmask(n) (hashsize(n)-1)
57#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
58
59/*
60-------------------------------------------------------------------------------
61mix -- mix 3 32-bit values reversibly.
62
63This is reversible, so any information in (a,b,c) before mix() is
64still in (a,b,c) after mix().
65
66If four pairs of (a,b,c) inputs are run through mix(), or through
67mix() in reverse, there are at least 32 bits of the output that
68are sometimes the same for one pair and different for another pair.
69This was tested for:
70* pairs that differed by one bit, by two bits, in any combination
71 of top bits of (a,b,c), or in any combination of bottom bits of
72 (a,b,c).
73* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
74 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
75 is commonly produced by subtraction) look like a single 1-bit
76 difference.
77* the base values were pseudorandom, all zero but one bit set, or
78 all zero plus a counter that starts at zero.
79
80Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
81satisfy this are
82 4 6 8 16 19 4
83 9 15 3 18 27 15
84 14 9 3 7 17 3
85Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
86for "differ" defined as + with a one-bit base and a two-bit delta. I
87used http://burtleburtle.net/bob/hash/avalanche.html to choose
88the operations, constants, and arrangements of the variables.
89
90This does not achieve avalanche. There are input bits of (a,b,c)
91that fail to affect some output bits of (a,b,c), especially of a. The
92most thoroughly mixed value is c, but it doesn't really even achieve
93avalanche in c.
94
95This allows some parallelism. Read-after-writes are good at doubling
96the number of bits affected, so the goal of mixing pulls in the opposite
97direction as the goal of parallelism. I did what I could. Rotates
98seem to cost as much as shifts on every machine I could lay my hands
99on, and rotates are much kinder to the top and bottom bits, so I used
100rotates.
101-------------------------------------------------------------------------------
102*/
103#define mix(a,b,c) \
104{ \
105 a -= c; a ^= rot(c, 4); c += b; \
106 b -= a; b ^= rot(a, 6); a += c; \
107 c -= b; c ^= rot(b, 8); b += a; \
108 a -= c; a ^= rot(c,16); c += b; \
109 b -= a; b ^= rot(a,19); a += c; \
110 c -= b; c ^= rot(b, 4); b += a; \
111}
112
113/*
114-------------------------------------------------------------------------------
115final -- final mixing of 3 32-bit values (a,b,c) into c
116
117Pairs of (a,b,c) values differing in only a few bits will usually
118produce values of c that look totally different. This was tested for
119* pairs that differed by one bit, by two bits, in any combination
120 of top bits of (a,b,c), or in any combination of bottom bits of
121 (a,b,c).
122* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
123 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
124 is commonly produced by subtraction) look like a single 1-bit
125 difference.
126* the base values were pseudorandom, all zero but one bit set, or
127 all zero plus a counter that starts at zero.
128
129These constants passed:
130 14 11 25 16 4 14 24
131 12 14 25 16 4 14 24
132and these came close:
133 4 8 15 26 3 22 24
134 10 8 15 26 3 22 24
135 11 8 15 26 3 22 24
136-------------------------------------------------------------------------------
137*/
138#define final(a,b,c) \
139{ \
140 c ^= b; c -= rot(b,14); \
141 a ^= c; a -= rot(c,11); \
142 b ^= a; b -= rot(a,25); \
143 c ^= b; c -= rot(b,16); \
144 a ^= c; a -= rot(c,4); \
145 b ^= a; b -= rot(a,14); \
146 c ^= b; c -= rot(b,24); \
147}
148
149/*
150--------------------------------------------------------------------
151 This works on all machines. To be useful, it requires
152 -- that the key be an array of uint32_t's, and
153 -- that the length be the number of uint32_t's in the key
154
155 The function hashword() is identical to hashlittle() on little-endian
156 machines, and identical to hashbig() on big-endian machines,
157 except that the length has to be measured in uint32_ts rather than in
158 bytes. hashlittle() is more complicated than hashword() only because
159 hashlittle() has to dance around fitting the key bytes into registers.
160--------------------------------------------------------------------
161*/
162uint32_t hashword(
163const uint32_t *k, /* the key, an array of uint32_t values */
164size_t length, /* the length of the key, in uint32_ts */
165uint32_t initval) /* the previous hash, or an arbitrary value */
166{
167 uint32_t a,b,c;
168
169 /* Set up the internal state */
170 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
171
172 /*------------------------------------------------- handle most of the key */
173 while (length > 3)
174 {
175 a += k[0];
176 b += k[1];
177 c += k[2];
178 mix(a,b,c);
179 length -= 3;
180 k += 3;
181 }
182
183 /*------------------------------------------- handle the last 3 uint32_t's */
184 switch(length) /* all the case statements fall through */
185 {
186 case 3 : c+=k[2];
187 case 2 : b+=k[1];
188 case 1 : a+=k[0];
189 final(a,b,c);
190 case 0: /* case 0: nothing left to add */
191 break;
192 }
193 /*------------------------------------------------------ report the result */
194 return c;
195}
196
197
198/*
199-------------------------------------------------------------------------------
200hashlittle() -- hash a variable-length key into a 32-bit value
201 k : the key (the unaligned variable-length array of bytes)
202 length : the length of the key, counting by bytes
203 initval : can be any 4-byte value
204Returns a 32-bit value. Every bit of the key affects every bit of
205the return value. Two keys differing by one or two bits will have
206totally different hash values.
207
208The best hash table sizes are powers of 2. There is no need to do
209mod a prime (mod is sooo slow!). If you need less than 32 bits,
210use a bitmask. For example, if you need only 10 bits, do
211 h = (h & hashmask(10));
212In which case, the hash table should have hashsize(10) elements.
213
214If you are hashing n strings (uint8_t **)k, do it like this:
215 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
216
217By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
218code any way you wish, private, educational, or commercial. It's free.
219
220Use for hash table lookup, or anything where one collision in 2^^32 is
221acceptable. Do NOT use for cryptographic purposes.
222-------------------------------------------------------------------------------
223*/
224
225uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
226{
227 uint32_t a,b,c; /* internal state */
228 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
229
230 /* Set up the internal state */
231 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
232
233 u.ptr = key;
234 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
235 const uint32_t *k =(uint32_t*) key; /* read 32-bit chunks */
236#ifdef VALGRIND
237 const uint8_t *k8;
238#endif
239
240 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
241 while (length > 12)
242 {
243 a += k[0];
244 b += k[1];
245 c += k[2];
246 mix(a,b,c);
247 length -= 12;
248 k += 3;
249 }
250
251 /*----------------------------- handle the last (probably partial) block */
252 /*
253 * "k[2]&0xffffff" actually reads beyond the end of the string, but
254 * then masks off the part it's not allowed to read. Because the
255 * string is aligned, the masked-off tail is in the same word as the
256 * rest of the string. Every machine with memory protection I've seen
257 * does it on word boundaries, so is OK with this. But VALGRIND will
258 * still catch it and complain. The masking trick does make the hash
259 * noticably faster for short strings (like English words).
260 */
261#ifndef VALGRIND
262
263 switch(length)
264 {
265 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
266 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
267 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
268 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
269 case 8 : b+=k[1]; a+=k[0]; break;
270 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
271 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
272 case 5 : b+=k[1]&0xff; a+=k[0]; break;
273 case 4 : a+=k[0]; break;
274 case 3 : a+=k[0]&0xffffff; break;
275 case 2 : a+=k[0]&0xffff; break;
276 case 1 : a+=k[0]&0xff; break;
277 case 0 : return c; /* zero length strings require no mixing */
278 }
279
280#else /* make valgrind happy */
281
282 k8 = (const uint8_t *)k;
283 switch(length)
284 {
285 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
286 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
287 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
288 case 9 : c+=k8[8]; /* fall through */
289 case 8 : b+=k[1]; a+=k[0]; break;
290 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
291 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
292 case 5 : b+=k8[4]; /* fall through */
293 case 4 : a+=k[0]; break;
294 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
295 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
296 case 1 : a+=k8[0]; break;
297 case 0 : return c;
298 }
299
300#endif /* !valgrind */
301
302 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
303 const uint16_t *k = (uint16_t*)key; /* read 16-bit chunks */
304 const uint8_t *k8;
305
306 /*--------------- all but last block: aligned reads and different mixing */
307 while (length > 12)
308 {
309 a += k[0] + (((uint32_t)k[1])<<16);
310 b += k[2] + (((uint32_t)k[3])<<16);
311 c += k[4] + (((uint32_t)k[5])<<16);
312 mix(a,b,c);
313 length -= 12;
314 k += 6;
315 }
316
317 /*----------------------------- handle the last (probably partial) block */
318 k8 = (const uint8_t *)k;
319 switch(length)
320 {
321 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
322 b+=k[2]+(((uint32_t)k[3])<<16);
323 a+=k[0]+(((uint32_t)k[1])<<16);
324 break;
325 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
326 case 10: c+=k[4];
327 b+=k[2]+(((uint32_t)k[3])<<16);
328 a+=k[0]+(((uint32_t)k[1])<<16);
329 break;
330 case 9 : c+=k8[8]; /* fall through */
331 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
332 a+=k[0]+(((uint32_t)k[1])<<16);
333 break;
334 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
335 case 6 : b+=k[2];
336 a+=k[0]+(((uint32_t)k[1])<<16);
337 break;
338 case 5 : b+=k8[4]; /* fall through */
339 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
340 break;
341 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
342 case 2 : a+=k[0];
343 break;
344 case 1 : a+=k8[0];
345 break;
346 case 0 : return c; /* zero length requires no mixing */
347 }
348
349 } else { /* need to read the key one byte at a time */
350 const uint8_t *k =(uint8_t*) key;
351
352 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
353 while (length > 12)
354 {
355 a += k[0];
356 a += ((uint32_t)k[1])<<8;
357 a += ((uint32_t)k[2])<<16;
358 a += ((uint32_t)k[3])<<24;
359 b += k[4];
360 b += ((uint32_t)k[5])<<8;
361 b += ((uint32_t)k[6])<<16;
362 b += ((uint32_t)k[7])<<24;
363 c += k[8];
364 c += ((uint32_t)k[9])<<8;
365 c += ((uint32_t)k[10])<<16;
366 c += ((uint32_t)k[11])<<24;
367 mix(a,b,c);
368 length -= 12;
369 k += 12;
370 }
371
372 /*-------------------------------- last block: affect all 32 bits of (c) */
373 switch(length) /* all the case statements fall through */
374 {
375 case 12: c+=((uint32_t)k[11])<<24;
376 case 11: c+=((uint32_t)k[10])<<16;
377 case 10: c+=((uint32_t)k[9])<<8;
378 case 9 : c+=k[8];
379 case 8 : b+=((uint32_t)k[7])<<24;
380 case 7 : b+=((uint32_t)k[6])<<16;
381 case 6 : b+=((uint32_t)k[5])<<8;
382 case 5 : b+=k[4];
383 case 4 : a+=((uint32_t)k[3])<<24;
384 case 3 : a+=((uint32_t)k[2])<<16;
385 case 2 : a+=((uint32_t)k[1])<<8;
386 case 1 : a+=k[0];
387 break;
388 case 0 : return c;
389 }
390 }
391
392 final(a,b,c);
393 return c;
394}
395
396
397/*
398 * hashlittle2: return 2 32-bit hash values
399 *
400 * This is identical to hashlittle(), except it returns two 32-bit hash
401 * values instead of just one. This is good enough for hash table
402 * lookup with 2^^64 buckets, or if you want a second hash if you're not
403 * happy with the first, or if you want a probably-unique 64-bit ID for
404 * the key. *pc is better mixed than *pb, so use *pc first. If you want
405 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
406 */
407void hashlittle2(
408 const void *key, /* the key to hash */
409 size_t length, /* length of the key */
410 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
411 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
412{
413 uint32_t a,b,c; /* internal state */
414 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
415
416 /* Set up the internal state */
417 a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
418 c += *pb;
419
420 u.ptr = key;
421 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
422 const uint32_t *k = (uint32_t*)key; /* read 32-bit chunks */
423#ifdef VALGRIND
424 const uint8_t *k8;
425#endif
426
427 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
428 while (length > 12)
429 {
430 a += k[0];
431 b += k[1];
432 c += k[2];
433 mix(a,b,c);
434 length -= 12;
435 k += 3;
436 }
437
438 /*----------------------------- handle the last (probably partial) block */
439 /*
440 * "k[2]&0xffffff" actually reads beyond the end of the string, but
441 * then masks off the part it's not allowed to read. Because the
442 * string is aligned, the masked-off tail is in the same word as the
443 * rest of the string. Every machine with memory protection I've seen
444 * does it on word boundaries, so is OK with this. But VALGRIND will
445 * still catch it and complain. The masking trick does make the hash
446 * noticably faster for short strings (like English words).
447 */
448#ifndef VALGRIND
449
450 switch(length)
451 {
452 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
453 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
454 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
455 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
456 case 8 : b+=k[1]; a+=k[0]; break;
457 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
458 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
459 case 5 : b+=k[1]&0xff; a+=k[0]; break;
460 case 4 : a+=k[0]; break;
461 case 3 : a+=k[0]&0xffffff; break;
462 case 2 : a+=k[0]&0xffff; break;
463 case 1 : a+=k[0]&0xff; break;
464 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
465 }
466
467#else /* make valgrind happy */
468
469 k8 = (const uint8_t *)k;
470 switch(length)
471 {
472 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
473 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
474 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
475 case 9 : c+=k8[8]; /* fall through */
476 case 8 : b+=k[1]; a+=k[0]; break;
477 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
478 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
479 case 5 : b+=k8[4]; /* fall through */
480 case 4 : a+=k[0]; break;
481 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
482 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
483 case 1 : a+=k8[0]; break;
484 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
485 }
486
487#endif /* !valgrind */
488
489 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
490 const uint16_t *k = (uint16_t*)key; /* read 16-bit chunks */
491 const uint8_t *k8;
492
493 /*--------------- all but last block: aligned reads and different mixing */
494 while (length > 12)
495 {
496 a += k[0] + (((uint32_t)k[1])<<16);
497 b += k[2] + (((uint32_t)k[3])<<16);
498 c += k[4] + (((uint32_t)k[5])<<16);
499 mix(a,b,c);
500 length -= 12;
501 k += 6;
502 }
503
504 /*----------------------------- handle the last (probably partial) block */
505 k8 = (const uint8_t *)k;
506 switch(length)
507 {
508 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
509 b+=k[2]+(((uint32_t)k[3])<<16);
510 a+=k[0]+(((uint32_t)k[1])<<16);
511 break;
512 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
513 case 10: c+=k[4];
514 b+=k[2]+(((uint32_t)k[3])<<16);
515 a+=k[0]+(((uint32_t)k[1])<<16);
516 break;
517 case 9 : c+=k8[8]; /* fall through */
518 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
519 a+=k[0]+(((uint32_t)k[1])<<16);
520 break;
521 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
522 case 6 : b+=k[2];
523 a+=k[0]+(((uint32_t)k[1])<<16);
524 break;
525 case 5 : b+=k8[4]; /* fall through */
526 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
527 break;
528 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
529 case 2 : a+=k[0];
530 break;
531 case 1 : a+=k8[0];
532 break;
533 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
534 }
535
536 } else { /* need to read the key one byte at a time */
537 const uint8_t *k = (uint8_t*)key;
538
539 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
540 while (length > 12)
541 {
542 a += k[0];
543 a += ((uint32_t)k[1])<<8;
544 a += ((uint32_t)k[2])<<16;
545 a += ((uint32_t)k[3])<<24;
546 b += k[4];
547 b += ((uint32_t)k[5])<<8;
548 b += ((uint32_t)k[6])<<16;
549 b += ((uint32_t)k[7])<<24;
550 c += k[8];
551 c += ((uint32_t)k[9])<<8;
552 c += ((uint32_t)k[10])<<16;
553 c += ((uint32_t)k[11])<<24;
554 mix(a,b,c);
555 length -= 12;
556 k += 12;
557 }
558
559 /*-------------------------------- last block: affect all 32 bits of (c) */
560 switch(length) /* all the case statements fall through */
561 {
562 case 12: c+=((uint32_t)k[11])<<24;
563 case 11: c+=((uint32_t)k[10])<<16;
564 case 10: c+=((uint32_t)k[9])<<8;
565 case 9 : c+=k[8];
566 case 8 : b+=((uint32_t)k[7])<<24;
567 case 7 : b+=((uint32_t)k[6])<<16;
568 case 6 : b+=((uint32_t)k[5])<<8;
569 case 5 : b+=k[4];
570 case 4 : a+=((uint32_t)k[3])<<24;
571 case 3 : a+=((uint32_t)k[2])<<16;
572 case 2 : a+=((uint32_t)k[1])<<8;
573 case 1 : a+=k[0];
574 break;
575 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
576 }
577 }
578
579 final(a,b,c);
580 *pc=c; *pb=b; return; /* zero length strings require no mixing */
581}
582
583
584
585/*
586 * hashbig():
587 * This is the same as hashword() on big-endian machines. It is different
588 * from hashlittle() on all machines. hashbig() takes advantage of
589 * big-endian byte ordering.
590 */
591uint32_t hashbig( const void *key, size_t length, uint32_t initval)
592{
593 uint32_t a,b,c;
594 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
595
596 /* Set up the internal state */
597 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
598
599 u.ptr = key;
600 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
601 const uint32_t *k = (uint32_t*)key; /* read 32-bit chunks */
602#ifdef VALGRIND
603 const uint8_t *k8;
604#endif
605
606 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
607 while (length > 12)
608 {
609 a += k[0];
610 b += k[1];
611 c += k[2];
612 mix(a,b,c);
613 length -= 12;
614 k += 3;
615 }
616
617 /*----------------------------- handle the last (probably partial) block */
618 /*
619 * "k[2]<<8" actually reads beyond the end of the string, but
620 * then shifts out the part it's not allowed to read. Because the
621 * string is aligned, the illegal read is in the same word as the
622 * rest of the string. Every machine with memory protection I've seen
623 * does it on word boundaries, so is OK with this. But VALGRIND will
624 * still catch it and complain. The masking trick does make the hash
625 * noticably faster for short strings (like English words).
626 */
627#ifndef VALGRIND
628
629 switch(length)
630 {
631 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
632 case 11: c+=k[2]<<8; b+=k[1]; a+=k[0]; break;
633 case 10: c+=k[2]<<16; b+=k[1]; a+=k[0]; break;
634 case 9 : c+=k[2]<<24; b+=k[1]; a+=k[0]; break;
635 case 8 : b+=k[1]; a+=k[0]; break;
636 case 7 : b+=k[1]<<8; a+=k[0]; break;
637 case 6 : b+=k[1]<<16; a+=k[0]; break;
638 case 5 : b+=k[1]<<24; a+=k[0]; break;
639 case 4 : a+=k[0]; break;
640 case 3 : a+=k[0]<<8; break;
641 case 2 : a+=k[0]<<16; break;
642 case 1 : a+=k[0]<<24; break;
643 case 0 : return c; /* zero length strings require no mixing */
644 }
645
646#else /* make valgrind happy */
647
648 k8 = (const uint8_t *)k;
649 switch(length) /* all the case statements fall through */
650 {
651 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
652 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
653 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
654 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
655 case 8 : b+=k[1]; a+=k[0]; break;
656 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
657 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
658 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
659 case 4 : a+=k[0]; break;
660 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
661 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
662 case 1 : a+=((uint32_t)k8[0])<<24; break;
663 case 0 : return c;
664 }
665
666#endif /* !VALGRIND */
667
668 } else { /* need to read the key one byte at a time */
669 const uint8_t *k = (uint8_t*)key;
670
671 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
672 while (length > 12)
673 {
674 a += ((uint32_t)k[0])<<24;
675 a += ((uint32_t)k[1])<<16;
676 a += ((uint32_t)k[2])<<8;
677 a += ((uint32_t)k[3]);
678 b += ((uint32_t)k[4])<<24;
679 b += ((uint32_t)k[5])<<16;
680 b += ((uint32_t)k[6])<<8;
681 b += ((uint32_t)k[7]);
682 c += ((uint32_t)k[8])<<24;
683 c += ((uint32_t)k[9])<<16;
684 c += ((uint32_t)k[10])<<8;
685 c += ((uint32_t)k[11]);
686 mix(a,b,c);
687 length -= 12;
688 k += 12;
689 }
690
691 /*-------------------------------- last block: affect all 32 bits of (c) */
692 switch(length) /* all the case statements fall through */
693 {
694 case 12: c+=k[11];
695 case 11: c+=((uint32_t)k[10])<<8;
696 case 10: c+=((uint32_t)k[9])<<16;
697 case 9 : c+=((uint32_t)k[8])<<24;
698 case 8 : b+=k[7];
699 case 7 : b+=((uint32_t)k[6])<<8;
700 case 6 : b+=((uint32_t)k[5])<<16;
701 case 5 : b+=((uint32_t)k[4])<<24;
702 case 4 : a+=k[3];
703 case 3 : a+=((uint32_t)k[2])<<8;
704 case 2 : a+=((uint32_t)k[1])<<16;
705 case 1 : a+=((uint32_t)k[0])<<24;
706 break;
707 case 0 : return c;
708 }
709 }
710
711 final(a,b,c);
712 return c;
713}
714
715
716#ifdef SELF_TEST
717
718/* used for timings */
719void driver1()
720{
721 uint8_t buf[256];
722 uint32_t i;
723 uint32_t h=0;
724 time_t a,z;
725
726 time(&a);
727 for (i=0; i<256; ++i) buf[i] = 'x';
728 for (i=0; i<1; ++i)
729 {
730 h = hashlittle(&buf[0],1,h);
731 }
732 time(&z);
733 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
734}
735
736/* check that every input bit changes every output bit half the time */
737#define HASHSTATE 1
738#define HASHLEN 1
739#define MAXPAIR 60
740#define MAXLEN 70
741void driver2()
742{
743 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
744 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
745 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
746 uint32_t x[HASHSTATE],y[HASHSTATE];
747 uint32_t hlen;
748
749 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
750 for (hlen=0; hlen < MAXLEN; ++hlen)
751 {
752 z=0;
753 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
754 {
755 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
756 {
757 for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
758 {
759 for (l=0; l<HASHSTATE; ++l)
760 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
761
762 /*---- check that every output bit is affected by that input bit */
763 for (k=0; k<MAXPAIR; k+=2)
764 {
765 uint32_t finished=1;
766 /* keys have one bit different */
767 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
768 /* have a and b be two keys differing in only one bit */
769 a[i] ^= (k<<j);
770 a[i] ^= (k>>(8-j));
771 c[0] = hashlittle(a, hlen, m);
772 b[i] ^= ((k+1)<<j);
773 b[i] ^= ((k+1)>>(8-j));
774 d[0] = hashlittle(b, hlen, m);
775 /* check every bit is 1, 0, set, and not set at least once */
776 for (l=0; l<HASHSTATE; ++l)
777 {
778 e[l] &= (c[l]^d[l]);
779 f[l] &= ~(c[l]^d[l]);
780 g[l] &= c[l];
781 h[l] &= ~c[l];
782 x[l] &= d[l];
783 y[l] &= ~d[l];
784 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
785 }
786 if (finished) break;
787 }
788 if (k>z) z=k;
789 if (k==MAXPAIR)
790 {
791 printf("Some bit didn't change: ");
792 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
793 e[0],f[0],g[0],h[0],x[0],y[0]);
794 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
795 }
796 if (z==MAXPAIR) goto done;
797 }
798 }
799 }
800 done:
801 if (z < MAXPAIR)
802 {
803 printf("Mix success %2d bytes %2d initvals ",i,m);
804 printf("required %d trials\n", z/2);
805 }
806 }
807 printf("\n");
808}
809
810/* Check for reading beyond the end of the buffer and alignment problems */
811void driver3()
812{
813 uint8_t buf[MAXLEN+20], *b;
814 uint32_t len;
815 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
816 uint32_t h;
817 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
818 uint32_t i;
819 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
820 uint32_t j;
821 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
822 uint32_t ref,x,y;
823 uint8_t *p;
824
825 printf("Endianness. These lines should all be the same (for values filled in):\n");
826 printf("%.8x %.8x %.8x\n",
827 hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
828 hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
829 hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
830 p = q;
831 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
832 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
833 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
834 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
835 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
836 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
837 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
838 p = &qq[1];
839 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
840 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
841 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
842 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
843 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
844 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
845 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
846 p = &qqq[2];
847 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
848 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
849 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
850 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
851 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
852 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
853 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
854 p = &qqqq[3];
855 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
856 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
857 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
858 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
859 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
860 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
861 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
862 printf("\n");
863 for (h=0, b=buf+1; h<8; ++h, ++b)
864 {
865 for (i=0; i<MAXLEN; ++i)
866 {
867 len = i;
868 for (j=0; j<i; ++j) *(b+j)=0;
869
870 /* these should all be equal */
871 ref = hashlittle(b, len, (uint32_t)1);
872 *(b+i)=(uint8_t)~0;
873 *(b-1)=(uint8_t)~0;
874 x = hashlittle(b, len, (uint32_t)1);
875 y = hashlittle(b, len, (uint32_t)1);
876 if ((ref != x) || (ref != y))
877 {
878 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
879 h, i);
880 }
881 }
882 }
883}
884
885/* check for problems with nulls */
886 void driver4()
887{
888 uint8_t buf[1];
889 uint32_t h,i,state[HASHSTATE];
890
891
892 buf[0] = ~0;
893 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
894 printf("These should all be different\n");
895 for (i=0, h=0; i<8; ++i)
896 {
897 h = hashlittle(buf, 0, h);
898 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
899 }
900}
901
902
903int main()
904{
905 driver1(); /* test that the key is hashed: used for timings */
906 driver2(); /* test that whole key is hashed thoroughly */
907 driver3(); /* test that nothing but the key is hashed */
908 driver4(); /* test hashing multiple buffers (all buffers are null) */
909 return 1;
910}
911
912#endif /* SELF_TEST */
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.