chronozphere
10-11-2010, 06:51 PM
Hey guys
I did an attempt to port this hash function (http://www.azillionmonkeys.com/qed/hash.html) to pascal. Here's the result:
function getWord(const data: PChar): Word;
begin
Result := PWord( data )^;
end;
function SuperFastHash(const str: String): Cardinal;
var
data: PChar;
hash, tmp: Cardinal;
rem, len: Integer;
begin
Result := 0;
if str = '' then Exit;
hash := len;
data := PChar(str);
len := Length(str);
rem := len and 3;
len := len shr 2;
//Main loop
while len > 0 do
begin
hash := hash + getWord( data );
tmp := (getWord( data+2 ) shl 11) xor hash;
hash := (hash shl 16) xor tmp;
data := data + 2 * sizeof(Word);
hash := hash + (hash shr 11);
dec(len);
end;
//Handle end cases
case rem of
3: begin
hash := hash + getWord(data);
hash := hash xor (hash shl 16);
hash := hash xor (byte(data[ sizeof(Word) ]) shl 18);
hash := hash + (hash shr 11);
end;
2: begin
hash := hash + getWord(data);
hash := hash xor (hash shl 11);
hash := hash + (hash shr 17);
end;
1: begin
hash := hash + PByte(data)^;
hash := hash xor (hash shl 10);
hash := hash + (hash shr 1);
end;
end;
//Force avalanching of final 127 bits
hash := hash xor (hash shl 3);
hash := hash + (hash shr 5);
hash := hash xor (hash shl 4);
hash := hash + (hash shr 17);
hash := hash xor (hash shl 25);
hash := hash + (hash shr 6);
Result := hash;
end;
You can test it with the following code:
//Benchmark routine
procedure TForm1.Button2Click(Sender: TObject);
var
I, J: Integer;
T1, T2, F: Int64;
ms: Single;
Strings: array [0..2] of string;
const
NUMBER = 5000000;
begin
for I := 0 to 2 do
for J := 0 to 255 do
Strings[I] := Strings[I] + Char(Random(255));
Randomize();
QueryPerformanceCounter(T1);
for I := 0 to NUMBER do
SuperFastHash( Strings[ ((I mod 5)*(I mod 8)+2) mod 3 ] );
QueryPerformanceCounter(T2);
QueryPerformanceFrequency(F);
ms := ((T2 - T1) / F) * 1000;
ShowMessage(IntToStr(NUMBER)+' executions in: '+Format('%3.3n',[ms])+'ms')
end;
//Tests whether the function is deterministic (allways returns the same output for the same input)
procedure TForm1.Button3Click(Sender: TObject);
var
I, J: Integer;
H1, H2: Cardinal;
Data: String;
begin
for I := 0 to 1000 do
begin
Data := '';
for J := 0 to 255 do
Data := Data + Char(Random(255));
Assert( SuperFastHash( Data ) = SuperFastHash( Data ) );
end;
ShowMessage('Test passed!');
end;
So far it seems to work quite good. The benchmark function tells me that it can execute 5.000.000 times in 1.77 seconds on my 2.8Ghz machine.
I do have three questions:
The function seems to work. However, I don't know if I made a mistake somewhere during the translation. Could someone take a brief look and tell me if my type-casts are OK?? This might not be neccesary, but I'd still appreciate it.
Also, do you guys know other fast hashing algorithms? Maybe it would be fun to compare this one to others, to see which is fastest. ;)
I would like to use a fast hashing function as a base for a portable HashMap class (that also works in delphi). I guess I should just create an array with size N and do "mod N" on the result returned by the hash-function. Is that a good way to implement it?
Thanks!
I did an attempt to port this hash function (http://www.azillionmonkeys.com/qed/hash.html) to pascal. Here's the result:
function getWord(const data: PChar): Word;
begin
Result := PWord( data )^;
end;
function SuperFastHash(const str: String): Cardinal;
var
data: PChar;
hash, tmp: Cardinal;
rem, len: Integer;
begin
Result := 0;
if str = '' then Exit;
hash := len;
data := PChar(str);
len := Length(str);
rem := len and 3;
len := len shr 2;
//Main loop
while len > 0 do
begin
hash := hash + getWord( data );
tmp := (getWord( data+2 ) shl 11) xor hash;
hash := (hash shl 16) xor tmp;
data := data + 2 * sizeof(Word);
hash := hash + (hash shr 11);
dec(len);
end;
//Handle end cases
case rem of
3: begin
hash := hash + getWord(data);
hash := hash xor (hash shl 16);
hash := hash xor (byte(data[ sizeof(Word) ]) shl 18);
hash := hash + (hash shr 11);
end;
2: begin
hash := hash + getWord(data);
hash := hash xor (hash shl 11);
hash := hash + (hash shr 17);
end;
1: begin
hash := hash + PByte(data)^;
hash := hash xor (hash shl 10);
hash := hash + (hash shr 1);
end;
end;
//Force avalanching of final 127 bits
hash := hash xor (hash shl 3);
hash := hash + (hash shr 5);
hash := hash xor (hash shl 4);
hash := hash + (hash shr 17);
hash := hash xor (hash shl 25);
hash := hash + (hash shr 6);
Result := hash;
end;
You can test it with the following code:
//Benchmark routine
procedure TForm1.Button2Click(Sender: TObject);
var
I, J: Integer;
T1, T2, F: Int64;
ms: Single;
Strings: array [0..2] of string;
const
NUMBER = 5000000;
begin
for I := 0 to 2 do
for J := 0 to 255 do
Strings[I] := Strings[I] + Char(Random(255));
Randomize();
QueryPerformanceCounter(T1);
for I := 0 to NUMBER do
SuperFastHash( Strings[ ((I mod 5)*(I mod 8)+2) mod 3 ] );
QueryPerformanceCounter(T2);
QueryPerformanceFrequency(F);
ms := ((T2 - T1) / F) * 1000;
ShowMessage(IntToStr(NUMBER)+' executions in: '+Format('%3.3n',[ms])+'ms')
end;
//Tests whether the function is deterministic (allways returns the same output for the same input)
procedure TForm1.Button3Click(Sender: TObject);
var
I, J: Integer;
H1, H2: Cardinal;
Data: String;
begin
for I := 0 to 1000 do
begin
Data := '';
for J := 0 to 255 do
Data := Data + Char(Random(255));
Assert( SuperFastHash( Data ) = SuperFastHash( Data ) );
end;
ShowMessage('Test passed!');
end;
So far it seems to work quite good. The benchmark function tells me that it can execute 5.000.000 times in 1.77 seconds on my 2.8Ghz machine.
I do have three questions:
The function seems to work. However, I don't know if I made a mistake somewhere during the translation. Could someone take a brief look and tell me if my type-casts are OK?? This might not be neccesary, but I'd still appreciate it.
Also, do you guys know other fast hashing algorithms? Maybe it would be fun to compare this one to others, to see which is fastest. ;)
I would like to use a fast hashing function as a base for a portable HashMap class (that also works in delphi). I guess I should just create an array with size N and do "mod N" on the result returned by the hash-function. Is that a good way to implement it?
Thanks!