Panda3D

lookup3.c

00001 /* See http://www.burtleburtle.net/bob/hash/ */
00002 
00003 /*
00004 -------------------------------------------------------------------------------
00005 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
00006 
00007 These are functions for producing 32-bit hashes for hash table lookup.
00008 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 
00009 are externally useful functions.  Routines to test the hash are included 
00010 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
00011 the public domain.  It has no warranty.
00012 
00013 You probably want to use hashlittle().  hashlittle() and hashbig()
00014 hash byte arrays.  hashlittle() is is faster than hashbig() on
00015 little-endian machines.  Intel and AMD are little-endian machines.
00016 On second thought, you probably want hashlittle2(), which is identical to
00017 hashlittle() except it returns two 32-bit hashes for the price of one.  
00018 You could implement hashbig2() if you wanted but I haven't bothered here.
00019 
00020 If you want to find a hash of, say, exactly 7 integers, do
00021   a = i1;  b = i2;  c = i3;
00022   mix(a,b,c);
00023   a += i4; b += i5; c += i6;
00024   mix(a,b,c);
00025   a += i7;
00026   final(a,b,c);
00027 then use c as the hash value.  If you have a variable length array of
00028 4-byte integers to hash, use hashword().  If you have a byte array (like
00029 a character string), use hashlittle().  If you have several byte arrays, or
00030 a mix of things, see the comments above hashlittle().  
00031 
00032 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers, 
00033 then mix those integers.  This is fast (you can do a lot more thorough
00034 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
00035 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
00036 -------------------------------------------------------------------------------
00037 */
00038 /*#define SELF_TEST 1*/
00039 
00040 #include "lookup3.h"
00041 
00042 #include <stdio.h>
00043 #include <stddef.h>
00044 #include <stdlib.h>
00045 #include <time.h>
00046 
00047 #ifdef WORDS_BIGENDIAN
00048 # define HASH_LITTLE_ENDIAN 0
00049 # define HASH_BIG_ENDIAN 1
00050 #else
00051 # define HASH_LITTLE_ENDIAN 1
00052 # define HASH_BIG_ENDIAN 0
00053 #endif
00054 
00055 #define hashsize(n) ((uint32_t)1<<(n))
00056 #define hashmask(n) (hashsize(n)-1)
00057 #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
00058 
00059 /*
00060 -------------------------------------------------------------------------------
00061 mix -- mix 3 32-bit values reversibly.
00062 
00063 This is reversible, so any information in (a,b,c) before mix() is
00064 still in (a,b,c) after mix().
00065 
00066 If four pairs of (a,b,c) inputs are run through mix(), or through
00067 mix() in reverse, there are at least 32 bits of the output that
00068 are sometimes the same for one pair and different for another pair.
00069 This was tested for:
00070 * pairs that differed by one bit, by two bits, in any combination
00071   of top bits of (a,b,c), or in any combination of bottom bits of
00072   (a,b,c).
00073 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
00074   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
00075   is commonly produced by subtraction) look like a single 1-bit
00076   difference.
00077 * the base values were pseudorandom, all zero but one bit set, or 
00078   all zero plus a counter that starts at zero.
00079 
00080 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
00081 satisfy this are
00082     4  6  8 16 19  4
00083     9 15  3 18 27 15
00084    14  9  3  7 17  3
00085 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
00086 for "differ" defined as + with a one-bit base and a two-bit delta.  I
00087 used http://burtleburtle.net/bob/hash/avalanche.html to choose 
00088 the operations, constants, and arrangements of the variables.
00089 
00090 This does not achieve avalanche.  There are input bits of (a,b,c)
00091 that fail to affect some output bits of (a,b,c), especially of a.  The
00092 most thoroughly mixed value is c, but it doesn't really even achieve
00093 avalanche in c.
00094 
00095 This allows some parallelism.  Read-after-writes are good at doubling
00096 the number of bits affected, so the goal of mixing pulls in the opposite
00097 direction as the goal of parallelism.  I did what I could.  Rotates
00098 seem to cost as much as shifts on every machine I could lay my hands
00099 on, and rotates are much kinder to the top and bottom bits, so I used
00100 rotates.
00101 -------------------------------------------------------------------------------
00102 */
00103 #define mix(a,b,c) \
00104 { \
00105   a -= c;  a ^= rot(c, 4);  c += b; \
00106   b -= a;  b ^= rot(a, 6);  a += c; \
00107   c -= b;  c ^= rot(b, 8);  b += a; \
00108   a -= c;  a ^= rot(c,16);  c += b; \
00109   b -= a;  b ^= rot(a,19);  a += c; \
00110   c -= b;  c ^= rot(b, 4);  b += a; \
00111 }
00112 
00113 /*
00114 -------------------------------------------------------------------------------
00115 final -- final mixing of 3 32-bit values (a,b,c) into c
00116 
00117 Pairs of (a,b,c) values differing in only a few bits will usually
00118 produce values of c that look totally different.  This was tested for
00119 * pairs that differed by one bit, by two bits, in any combination
00120   of top bits of (a,b,c), or in any combination of bottom bits of
00121   (a,b,c).
00122 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
00123   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
00124   is commonly produced by subtraction) look like a single 1-bit
00125   difference.
00126 * the base values were pseudorandom, all zero but one bit set, or 
00127   all zero plus a counter that starts at zero.
00128 
00129 These constants passed:
00130  14 11 25 16 4 14 24
00131  12 14 25 16 4 14 24
00132 and these came close:
00133   4  8 15 26 3 22 24
00134  10  8 15 26 3 22 24
00135  11  8 15 26 3 22 24
00136 -------------------------------------------------------------------------------
00137 */
00138 #define final(a,b,c) \
00139 { \
00140   c ^= b; c -= rot(b,14); \
00141   a ^= c; a -= rot(c,11); \
00142   b ^= a; b -= rot(a,25); \
00143   c ^= b; c -= rot(b,16); \
00144   a ^= c; a -= rot(c,4);  \
00145   b ^= a; b -= rot(a,14); \
00146   c ^= b; c -= rot(b,24); \
00147 }
00148 
00149 /*
00150 --------------------------------------------------------------------
00151  This works on all machines.  To be useful, it requires
00152  -- that the key be an array of PN_uint32's, and
00153  -- that the length be the number of PN_uint32's in the key
00154 
00155  The function hashword() is identical to hashlittle() on little-endian
00156  machines, and identical to hashbig() on big-endian machines,
00157  except that the length has to be measured in PN_uint32s rather than in
00158  bytes.  hashlittle() is more complicated than hashword() only because
00159  hashlittle() has to dance around fitting the key bytes into registers.
00160 --------------------------------------------------------------------
00161 */
00162 PN_uint32 hashword(
00163 const PN_uint32 *k,                   /* the key, an array of PN_uint32 values */
00164 size_t          length,               /* the length of the key, in PN_uint32s */
00165 PN_uint32        initval)         /* the previous hash, or an arbitrary value */
00166 {
00167   PN_uint32 a,b,c;
00168 
00169   /* Set up the internal state */
00170   a = b = c = 0xdeadbeef + (((PN_uint32)length)<<2) + initval;
00171 
00172   /*------------------------------------------------- handle most of the key */
00173   while (length > 3)
00174   {
00175     a += k[0];
00176     b += k[1];
00177     c += k[2];
00178     mix(a,b,c);
00179     length -= 3;
00180     k += 3;
00181   }
00182 
00183   /*------------------------------------------- handle the last 3 PN_uint32's */
00184   switch(length)                     /* all the case statements fall through */
00185   { 
00186   case 3 : c+=k[2];
00187   case 2 : b+=k[1];
00188   case 1 : a+=k[0];
00189     final(a,b,c);
00190   case 0:     /* case 0: nothing left to add */
00191     break;
00192   }
00193   /*------------------------------------------------------ report the result */
00194   return c;
00195 }
00196 
00197 
00198 /*
00199 -------------------------------------------------------------------------------
00200 hashlittle() -- hash a variable-length key into a 32-bit value
00201   k       : the key (the unaligned variable-length array of bytes)
00202   length  : the length of the key, counting by bytes
00203   initval : can be any 4-byte value
00204 Returns a 32-bit value.  Every bit of the key affects every bit of
00205 the return value.  Two keys differing by one or two bits will have
00206 totally different hash values.
00207 
00208 The best hash table sizes are powers of 2.  There is no need to do
00209 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
00210 use a bitmask.  For example, if you need only 10 bits, do
00211   h = (h & hashmask(10));
00212 In which case, the hash table should have hashsize(10) elements.
00213 
00214 If you are hashing n strings (PN_uint8 **)k, do it like this:
00215   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
00216 
00217 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
00218 code any way you wish, private, educational, or commercial.  It's free.
00219 
00220 Use for hash table lookup, or anything where one collision in 2^^32 is
00221 acceptable.  Do NOT use for cryptographic purposes.
00222 -------------------------------------------------------------------------------
00223 */
00224 
00225 PN_uint32 hashlittle( const void *key, size_t length, PN_uint32 initval)
00226 {
00227   PN_uint32 a,b,c;                                          /* internal state */
00228   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
00229 
00230   /* Set up the internal state */
00231   a = b = c = 0xdeadbeef + ((PN_uint32)length) + initval;
00232 
00233   u.ptr = key;
00234   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
00235     const PN_uint32 *k =(PN_uint32*) key;                           /* read 32-bit chunks */
00236 #ifdef VALGRIND
00237     const PN_uint8  *k8;
00238 #endif
00239 
00240     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
00241     while (length > 12)
00242     {
00243       a += k[0];
00244       b += k[1];
00245       c += k[2];
00246       mix(a,b,c);
00247       length -= 12;
00248       k += 3;
00249     }
00250 
00251     /*----------------------------- handle the last (probably partial) block */
00252     /* 
00253      * "k[2]&0xffffff" actually reads beyond the end of the string, but
00254      * then masks off the part it's not allowed to read.  Because the
00255      * string is aligned, the masked-off tail is in the same word as the
00256      * rest of the string.  Every machine with memory protection I've seen
00257      * does it on word boundaries, so is OK with this.  But VALGRIND will
00258      * still catch it and complain.  The masking trick does make the hash
00259      * noticably faster for short strings (like English words).
00260      */
00261 #ifndef VALGRIND
00262 
00263     switch(length)
00264     {
00265     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
00266     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
00267     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
00268     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
00269     case 8 : b+=k[1]; a+=k[0]; break;
00270     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
00271     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
00272     case 5 : b+=k[1]&0xff; a+=k[0]; break;
00273     case 4 : a+=k[0]; break;
00274     case 3 : a+=k[0]&0xffffff; break;
00275     case 2 : a+=k[0]&0xffff; break;
00276     case 1 : a+=k[0]&0xff; break;
00277     case 0 : return c;              /* zero length strings require no mixing */
00278     }
00279 
00280 #else /* make valgrind happy */
00281 
00282     k8 = (const PN_uint8 *)k;
00283     switch(length)
00284     {
00285     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
00286     case 11: c+=((PN_uint32)k8[10])<<16;  /* fall through */
00287     case 10: c+=((PN_uint32)k8[9])<<8;    /* fall through */
00288     case 9 : c+=k8[8];                   /* fall through */
00289     case 8 : b+=k[1]; a+=k[0]; break;
00290     case 7 : b+=((PN_uint32)k8[6])<<16;   /* fall through */
00291     case 6 : b+=((PN_uint32)k8[5])<<8;    /* fall through */
00292     case 5 : b+=k8[4];                   /* fall through */
00293     case 4 : a+=k[0]; break;
00294     case 3 : a+=((PN_uint32)k8[2])<<16;   /* fall through */
00295     case 2 : a+=((PN_uint32)k8[1])<<8;    /* fall through */
00296     case 1 : a+=k8[0]; break;
00297     case 0 : return c;
00298     }
00299 
00300 #endif /* !valgrind */
00301 
00302   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
00303     const PN_uint16 *k = (PN_uint16*)key;                           /* read 16-bit chunks */
00304     const PN_uint8  *k8;
00305 
00306     /*--------------- all but last block: aligned reads and different mixing */
00307     while (length > 12)
00308     {
00309       a += k[0] + (((PN_uint32)k[1])<<16);
00310       b += k[2] + (((PN_uint32)k[3])<<16);
00311       c += k[4] + (((PN_uint32)k[5])<<16);
00312       mix(a,b,c);
00313       length -= 12;
00314       k += 6;
00315     }
00316 
00317     /*----------------------------- handle the last (probably partial) block */
00318     k8 = (const PN_uint8 *)k;
00319     switch(length)
00320     {
00321     case 12: c+=k[4]+(((PN_uint32)k[5])<<16);
00322              b+=k[2]+(((PN_uint32)k[3])<<16);
00323              a+=k[0]+(((PN_uint32)k[1])<<16);
00324              break;
00325     case 11: c+=((PN_uint32)k8[10])<<16;     /* fall through */
00326     case 10: c+=k[4];
00327              b+=k[2]+(((PN_uint32)k[3])<<16);
00328              a+=k[0]+(((PN_uint32)k[1])<<16);
00329              break;
00330     case 9 : c+=k8[8];                      /* fall through */
00331     case 8 : b+=k[2]+(((PN_uint32)k[3])<<16);
00332              a+=k[0]+(((PN_uint32)k[1])<<16);
00333              break;
00334     case 7 : b+=((PN_uint32)k8[6])<<16;      /* fall through */
00335     case 6 : b+=k[2];
00336              a+=k[0]+(((PN_uint32)k[1])<<16);
00337              break;
00338     case 5 : b+=k8[4];                      /* fall through */
00339     case 4 : a+=k[0]+(((PN_uint32)k[1])<<16);
00340              break;
00341     case 3 : a+=((PN_uint32)k8[2])<<16;      /* fall through */
00342     case 2 : a+=k[0];
00343              break;
00344     case 1 : a+=k8[0];
00345              break;
00346     case 0 : return c;                     /* zero length requires no mixing */
00347     }
00348 
00349   } else {                        /* need to read the key one byte at a time */
00350     const PN_uint8 *k =(PN_uint8*) key;
00351 
00352     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
00353     while (length > 12)
00354     {
00355       a += k[0];
00356       a += ((PN_uint32)k[1])<<8;
00357       a += ((PN_uint32)k[2])<<16;
00358       a += ((PN_uint32)k[3])<<24;
00359       b += k[4];
00360       b += ((PN_uint32)k[5])<<8;
00361       b += ((PN_uint32)k[6])<<16;
00362       b += ((PN_uint32)k[7])<<24;
00363       c += k[8];
00364       c += ((PN_uint32)k[9])<<8;
00365       c += ((PN_uint32)k[10])<<16;
00366       c += ((PN_uint32)k[11])<<24;
00367       mix(a,b,c);
00368       length -= 12;
00369       k += 12;
00370     }
00371 
00372     /*-------------------------------- last block: affect all 32 bits of (c) */
00373     switch(length)                   /* all the case statements fall through */
00374     {
00375     case 12: c+=((PN_uint32)k[11])<<24;
00376     case 11: c+=((PN_uint32)k[10])<<16;
00377     case 10: c+=((PN_uint32)k[9])<<8;
00378     case 9 : c+=k[8];
00379     case 8 : b+=((PN_uint32)k[7])<<24;
00380     case 7 : b+=((PN_uint32)k[6])<<16;
00381     case 6 : b+=((PN_uint32)k[5])<<8;
00382     case 5 : b+=k[4];
00383     case 4 : a+=((PN_uint32)k[3])<<24;
00384     case 3 : a+=((PN_uint32)k[2])<<16;
00385     case 2 : a+=((PN_uint32)k[1])<<8;
00386     case 1 : a+=k[0];
00387              break;
00388     case 0 : return c;
00389     }
00390   }
00391 
00392   final(a,b,c);
00393   return c;
00394 }
00395 
00396 
00397 /*
00398  * hashlittle2: return 2 32-bit hash values
00399  *
00400  * This is identical to hashlittle(), except it returns two 32-bit hash
00401  * values instead of just one.  This is good enough for hash table
00402  * lookup with 2^^64 buckets, or if you want a second hash if you're not
00403  * happy with the first, or if you want a probably-unique 64-bit ID for
00404  * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
00405  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
00406  */
00407 void hashlittle2( 
00408   const void *key,       /* the key to hash */
00409   size_t      length,    /* length of the key */
00410   PN_uint32   *pc,        /* IN: primary initval, OUT: primary hash */
00411   PN_uint32   *pb)        /* IN: secondary initval, OUT: secondary hash */
00412 {
00413   PN_uint32 a,b,c;                                          /* internal state */
00414   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
00415 
00416   /* Set up the internal state */
00417   a = b = c = 0xdeadbeef + ((PN_uint32)length) + *pc;
00418   c += *pb;
00419 
00420   u.ptr = key;
00421   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
00422     const PN_uint32 *k = (PN_uint32*)key;                           /* read 32-bit chunks */
00423 #ifdef VALGRIND
00424     const PN_uint8  *k8;
00425 #endif
00426 
00427     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
00428     while (length > 12)
00429     {
00430       a += k[0];
00431       b += k[1];
00432       c += k[2];
00433       mix(a,b,c);
00434       length -= 12;
00435       k += 3;
00436     }
00437 
00438     /*----------------------------- handle the last (probably partial) block */
00439     /* 
00440      * "k[2]&0xffffff" actually reads beyond the end of the string, but
00441      * then masks off the part it's not allowed to read.  Because the
00442      * string is aligned, the masked-off tail is in the same word as the
00443      * rest of the string.  Every machine with memory protection I've seen
00444      * does it on word boundaries, so is OK with this.  But VALGRIND will
00445      * still catch it and complain.  The masking trick does make the hash
00446      * noticably faster for short strings (like English words).
00447      */
00448 #ifndef VALGRIND
00449 
00450     switch(length)
00451     {
00452     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
00453     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
00454     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
00455     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
00456     case 8 : b+=k[1]; a+=k[0]; break;
00457     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
00458     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
00459     case 5 : b+=k[1]&0xff; a+=k[0]; break;
00460     case 4 : a+=k[0]; break;
00461     case 3 : a+=k[0]&0xffffff; break;
00462     case 2 : a+=k[0]&0xffff; break;
00463     case 1 : a+=k[0]&0xff; break;
00464     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
00465     }
00466 
00467 #else /* make valgrind happy */
00468 
00469     k8 = (const PN_uint8 *)k;
00470     switch(length)
00471     {
00472     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
00473     case 11: c+=((PN_uint32)k8[10])<<16;  /* fall through */
00474     case 10: c+=((PN_uint32)k8[9])<<8;    /* fall through */
00475     case 9 : c+=k8[8];                   /* fall through */
00476     case 8 : b+=k[1]; a+=k[0]; break;
00477     case 7 : b+=((PN_uint32)k8[6])<<16;   /* fall through */
00478     case 6 : b+=((PN_uint32)k8[5])<<8;    /* fall through */
00479     case 5 : b+=k8[4];                   /* fall through */
00480     case 4 : a+=k[0]; break;
00481     case 3 : a+=((PN_uint32)k8[2])<<16;   /* fall through */
00482     case 2 : a+=((PN_uint32)k8[1])<<8;    /* fall through */
00483     case 1 : a+=k8[0]; break;
00484     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
00485     }
00486 
00487 #endif /* !valgrind */
00488 
00489   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
00490     const PN_uint16 *k = (PN_uint16*)key;                           /* read 16-bit chunks */
00491     const PN_uint8  *k8;
00492 
00493     /*--------------- all but last block: aligned reads and different mixing */
00494     while (length > 12)
00495     {
00496       a += k[0] + (((PN_uint32)k[1])<<16);
00497       b += k[2] + (((PN_uint32)k[3])<<16);
00498       c += k[4] + (((PN_uint32)k[5])<<16);
00499       mix(a,b,c);
00500       length -= 12;
00501       k += 6;
00502     }
00503 
00504     /*----------------------------- handle the last (probably partial) block */
00505     k8 = (const PN_uint8 *)k;
00506     switch(length)
00507     {
00508     case 12: c+=k[4]+(((PN_uint32)k[5])<<16);
00509              b+=k[2]+(((PN_uint32)k[3])<<16);
00510              a+=k[0]+(((PN_uint32)k[1])<<16);
00511              break;
00512     case 11: c+=((PN_uint32)k8[10])<<16;     /* fall through */
00513     case 10: c+=k[4];
00514              b+=k[2]+(((PN_uint32)k[3])<<16);
00515              a+=k[0]+(((PN_uint32)k[1])<<16);
00516              break;
00517     case 9 : c+=k8[8];                      /* fall through */
00518     case 8 : b+=k[2]+(((PN_uint32)k[3])<<16);
00519              a+=k[0]+(((PN_uint32)k[1])<<16);
00520              break;
00521     case 7 : b+=((PN_uint32)k8[6])<<16;      /* fall through */
00522     case 6 : b+=k[2];
00523              a+=k[0]+(((PN_uint32)k[1])<<16);
00524              break;
00525     case 5 : b+=k8[4];                      /* fall through */
00526     case 4 : a+=k[0]+(((PN_uint32)k[1])<<16);
00527              break;
00528     case 3 : a+=((PN_uint32)k8[2])<<16;      /* fall through */
00529     case 2 : a+=k[0];
00530              break;
00531     case 1 : a+=k8[0];
00532              break;
00533     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
00534     }
00535 
00536   } else {                        /* need to read the key one byte at a time */
00537     const PN_uint8 *k = (PN_uint8*)key;
00538 
00539     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
00540     while (length > 12)
00541     {
00542       a += k[0];
00543       a += ((PN_uint32)k[1])<<8;
00544       a += ((PN_uint32)k[2])<<16;
00545       a += ((PN_uint32)k[3])<<24;
00546       b += k[4];
00547       b += ((PN_uint32)k[5])<<8;
00548       b += ((PN_uint32)k[6])<<16;
00549       b += ((PN_uint32)k[7])<<24;
00550       c += k[8];
00551       c += ((PN_uint32)k[9])<<8;
00552       c += ((PN_uint32)k[10])<<16;
00553       c += ((PN_uint32)k[11])<<24;
00554       mix(a,b,c);
00555       length -= 12;
00556       k += 12;
00557     }
00558 
00559     /*-------------------------------- last block: affect all 32 bits of (c) */
00560     switch(length)                   /* all the case statements fall through */
00561     {
00562     case 12: c+=((PN_uint32)k[11])<<24;
00563     case 11: c+=((PN_uint32)k[10])<<16;
00564     case 10: c+=((PN_uint32)k[9])<<8;
00565     case 9 : c+=k[8];
00566     case 8 : b+=((PN_uint32)k[7])<<24;
00567     case 7 : b+=((PN_uint32)k[6])<<16;
00568     case 6 : b+=((PN_uint32)k[5])<<8;
00569     case 5 : b+=k[4];
00570     case 4 : a+=((PN_uint32)k[3])<<24;
00571     case 3 : a+=((PN_uint32)k[2])<<16;
00572     case 2 : a+=((PN_uint32)k[1])<<8;
00573     case 1 : a+=k[0];
00574              break;
00575     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
00576     }
00577   }
00578 
00579   final(a,b,c);
00580   *pc=c; *pb=b; return;             /* zero length strings require no mixing */
00581 }
00582 
00583 
00584 
00585 /*
00586  * hashbig():
00587  * This is the same as hashword() on big-endian machines.  It is different
00588  * from hashlittle() on all machines.  hashbig() takes advantage of
00589  * big-endian byte ordering. 
00590  */
00591 PN_uint32 hashbig( const void *key, size_t length, PN_uint32 initval)
00592 {
00593   PN_uint32 a,b,c;
00594   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
00595 
00596   /* Set up the internal state */
00597   a = b = c = 0xdeadbeef + ((PN_uint32)length) + initval;
00598 
00599   u.ptr = key;
00600   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
00601     const PN_uint32 *k = (PN_uint32*)key;                           /* read 32-bit chunks */
00602 #ifdef VALGRIND
00603     const PN_uint8  *k8;
00604 #endif
00605 
00606     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
00607     while (length > 12)
00608     {
00609       a += k[0];
00610       b += k[1];
00611       c += k[2];
00612       mix(a,b,c);
00613       length -= 12;
00614       k += 3;
00615     }
00616 
00617     /*----------------------------- handle the last (probably partial) block */
00618     /* 
00619      * "k[2]<<8" actually reads beyond the end of the string, but
00620      * then shifts out the part it's not allowed to read.  Because the
00621      * string is aligned, the illegal read is in the same word as the
00622      * rest of the string.  Every machine with memory protection I've seen
00623      * does it on word boundaries, so is OK with this.  But VALGRIND will
00624      * still catch it and complain.  The masking trick does make the hash
00625      * noticably faster for short strings (like English words).
00626      */
00627 #ifndef VALGRIND
00628 
00629     switch(length)
00630     {
00631     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
00632     case 11: c+=k[2]<<8; b+=k[1]; a+=k[0]; break;
00633     case 10: c+=k[2]<<16; b+=k[1]; a+=k[0]; break;
00634     case 9 : c+=k[2]<<24; b+=k[1]; a+=k[0]; break;
00635     case 8 : b+=k[1]; a+=k[0]; break;
00636     case 7 : b+=k[1]<<8; a+=k[0]; break;
00637     case 6 : b+=k[1]<<16; a+=k[0]; break;
00638     case 5 : b+=k[1]<<24; a+=k[0]; break;
00639     case 4 : a+=k[0]; break;
00640     case 3 : a+=k[0]<<8; break;
00641     case 2 : a+=k[0]<<16; break;
00642     case 1 : a+=k[0]<<24; break;
00643     case 0 : return c;              /* zero length strings require no mixing */
00644     }
00645 
00646 #else  /* make valgrind happy */
00647 
00648     k8 = (const PN_uint8 *)k;
00649     switch(length)                   /* all the case statements fall through */
00650     {
00651     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
00652     case 11: c+=((PN_uint32)k8[10])<<8;  /* fall through */
00653     case 10: c+=((PN_uint32)k8[9])<<16;  /* fall through */
00654     case 9 : c+=((PN_uint32)k8[8])<<24;  /* fall through */
00655     case 8 : b+=k[1]; a+=k[0]; break;
00656     case 7 : b+=((PN_uint32)k8[6])<<8;   /* fall through */
00657     case 6 : b+=((PN_uint32)k8[5])<<16;  /* fall through */
00658     case 5 : b+=((PN_uint32)k8[4])<<24;  /* fall through */
00659     case 4 : a+=k[0]; break;
00660     case 3 : a+=((PN_uint32)k8[2])<<8;   /* fall through */
00661     case 2 : a+=((PN_uint32)k8[1])<<16;  /* fall through */
00662     case 1 : a+=((PN_uint32)k8[0])<<24; break;
00663     case 0 : return c;
00664     }
00665 
00666 #endif /* !VALGRIND */
00667 
00668   } else {                        /* need to read the key one byte at a time */
00669     const PN_uint8 *k = (PN_uint8*)key;
00670 
00671     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
00672     while (length > 12)
00673     {
00674       a += ((PN_uint32)k[0])<<24;
00675       a += ((PN_uint32)k[1])<<16;
00676       a += ((PN_uint32)k[2])<<8;
00677       a += ((PN_uint32)k[3]);
00678       b += ((PN_uint32)k[4])<<24;
00679       b += ((PN_uint32)k[5])<<16;
00680       b += ((PN_uint32)k[6])<<8;
00681       b += ((PN_uint32)k[7]);
00682       c += ((PN_uint32)k[8])<<24;
00683       c += ((PN_uint32)k[9])<<16;
00684       c += ((PN_uint32)k[10])<<8;
00685       c += ((PN_uint32)k[11]);
00686       mix(a,b,c);
00687       length -= 12;
00688       k += 12;
00689     }
00690 
00691     /*-------------------------------- last block: affect all 32 bits of (c) */
00692     switch(length)                   /* all the case statements fall through */
00693     {
00694     case 12: c+=k[11];
00695     case 11: c+=((PN_uint32)k[10])<<8;
00696     case 10: c+=((PN_uint32)k[9])<<16;
00697     case 9 : c+=((PN_uint32)k[8])<<24;
00698     case 8 : b+=k[7];
00699     case 7 : b+=((PN_uint32)k[6])<<8;
00700     case 6 : b+=((PN_uint32)k[5])<<16;
00701     case 5 : b+=((PN_uint32)k[4])<<24;
00702     case 4 : a+=k[3];
00703     case 3 : a+=((PN_uint32)k[2])<<8;
00704     case 2 : a+=((PN_uint32)k[1])<<16;
00705     case 1 : a+=((PN_uint32)k[0])<<24;
00706              break;
00707     case 0 : return c;
00708     }
00709   }
00710 
00711   final(a,b,c);
00712   return c;
00713 }
00714 
00715 
00716 #ifdef SELF_TEST
00717 
00718 /* used for timings */
00719 void driver1()
00720 {
00721   PN_uint8 buf[256];
00722   PN_uint32 i;
00723   PN_uint32 h=0;
00724   time_t a,z;
00725 
00726   time(&a);
00727   for (i=0; i<256; ++i) buf[i] = 'x';
00728   for (i=0; i<1; ++i) 
00729   {
00730     h = hashlittle(&buf[0],1,h);
00731   }
00732   time(&z);
00733   if (z-a > 0) printf("time %d %.8x\n", z-a, h);
00734 }
00735 
00736 /* check that every input bit changes every output bit half the time */
00737 #define HASHSTATE 1
00738 #define HASHLEN   1
00739 #define MAXPAIR 60
00740 #define MAXLEN  70
00741 void driver2()
00742 {
00743   PN_uint8 qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
00744   PN_uint32 c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
00745   PN_uint32 e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
00746   PN_uint32 x[HASHSTATE],y[HASHSTATE];
00747   PN_uint32 hlen;
00748 
00749   printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
00750   for (hlen=0; hlen < MAXLEN; ++hlen)
00751   {
00752     z=0;
00753     for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
00754     {
00755       for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
00756       {
00757   for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
00758   {
00759     for (l=0; l<HASHSTATE; ++l)
00760       e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((PN_uint32)0);
00761 
00762           /*---- check that every output bit is affected by that input bit */
00763     for (k=0; k<MAXPAIR; k+=2)
00764     { 
00765       PN_uint32 finished=1;
00766       /* keys have one bit different */
00767       for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (PN_uint8)0;}
00768       /* have a and b be two keys differing in only one bit */
00769       a[i] ^= (k<<j);
00770       a[i] ^= (k>>(8-j));
00771        c[0] = hashlittle(a, hlen, m);
00772       b[i] ^= ((k+1)<<j);
00773       b[i] ^= ((k+1)>>(8-j));
00774        d[0] = hashlittle(b, hlen, m);
00775       /* check every bit is 1, 0, set, and not set at least once */
00776       for (l=0; l<HASHSTATE; ++l)
00777       {
00778         e[l] &= (c[l]^d[l]);
00779         f[l] &= ~(c[l]^d[l]);
00780         g[l] &= c[l];
00781         h[l] &= ~c[l];
00782         x[l] &= d[l];
00783         y[l] &= ~d[l];
00784         if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
00785       }
00786       if (finished) break;
00787     }
00788     if (k>z) z=k;
00789     if (k==MAXPAIR) 
00790     {
00791        printf("Some bit didn't change: ");
00792        printf("%.8x %.8x %.8x %.8x %.8x %.8x  ",
00793               e[0],f[0],g[0],h[0],x[0],y[0]);
00794        printf("i %d j %d m %d len %d\n", i, j, m, hlen);
00795     }
00796     if (z==MAXPAIR) goto done;
00797   }
00798       }
00799     }
00800    done:
00801     if (z < MAXPAIR)
00802     {
00803       printf("Mix success  %2d bytes  %2d initvals  ",i,m);
00804       printf("required  %d  trials\n", z/2);
00805     }
00806   }
00807   printf("\n");
00808 }
00809 
00810 /* Check for reading beyond the end of the buffer and alignment problems */
00811 void driver3()
00812 {
00813   PN_uint8 buf[MAXLEN+20], *b;
00814   PN_uint32 len;
00815   PN_uint8 q[] = "This is the time for all good men to come to the aid of their country...";
00816   PN_uint32 h;
00817   PN_uint8 qq[] = "xThis is the time for all good men to come to the aid of their country...";
00818   PN_uint32 i;
00819   PN_uint8 qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
00820   PN_uint32 j;
00821   PN_uint8 qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
00822   PN_uint32 ref,x,y;
00823   PN_uint8 *p;
00824 
00825   printf("Endianness.  These lines should all be the same (for values filled in):\n");
00826   printf("%.8x                            %.8x                            %.8x\n",
00827          hashword((const PN_uint32 *)q, (sizeof(q)-1)/4, 13),
00828          hashword((const PN_uint32 *)q, (sizeof(q)-5)/4, 13),
00829          hashword((const PN_uint32 *)q, (sizeof(q)-9)/4, 13));
00830   p = q;
00831   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
00832          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
00833          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
00834          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
00835          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
00836          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
00837          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
00838   p = &qq[1];
00839   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
00840          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
00841          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
00842          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
00843          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
00844          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
00845          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
00846   p = &qqq[2];
00847   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
00848          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
00849          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
00850          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
00851          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
00852          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
00853          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
00854   p = &qqqq[3];
00855   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
00856          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
00857          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
00858          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
00859          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
00860          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
00861          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
00862   printf("\n");
00863   for (h=0, b=buf+1; h<8; ++h, ++b)
00864   {
00865     for (i=0; i<MAXLEN; ++i)
00866     {
00867       len = i;
00868       for (j=0; j<i; ++j) *(b+j)=0;
00869 
00870       /* these should all be equal */
00871       ref = hashlittle(b, len, (PN_uint32)1);
00872       *(b+i)=(PN_uint8)~0;
00873       *(b-1)=(PN_uint8)~0;
00874       x = hashlittle(b, len, (PN_uint32)1);
00875       y = hashlittle(b, len, (PN_uint32)1);
00876       if ((ref != x) || (ref != y)) 
00877       {
00878   printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
00879                h, i);
00880       }
00881     }
00882   }
00883 }
00884 
00885 /* check for problems with nulls */
00886  void driver4()
00887 {
00888   PN_uint8 buf[1];
00889   PN_uint32 h,i,state[HASHSTATE];
00890 
00891 
00892   buf[0] = ~0;
00893   for (i=0; i<HASHSTATE; ++i) state[i] = 1;
00894   printf("These should all be different\n");
00895   for (i=0, h=0; i<8; ++i)
00896   {
00897     h = hashlittle(buf, 0, h);
00898     printf("%2ld  0-byte strings, hash is  %.8x\n", i, h);
00899   }
00900 }
00901 
00902 
00903 int main()
00904 {
00905   driver1();   /* test that the key is hashed: used for timings */
00906   driver2();   /* test that whole key is hashed thoroughly */
00907   driver3();   /* test that nothing but the key is hashed */
00908   driver4();   /* test hashing multiple buffers (all buffers are null) */
00909   return 1;
00910 }
00911 
00912 #endif  /* SELF_TEST */
 All Classes Functions Variables Enumerations