JavaScript 224 LOC
This function will calculate all results of a given length, optionally limited to results starting with a given string, that is if it's done within the 1 second that I have allotted. If no results exist it is done virtually instantly. The function rely heavily on a very slow custom built 64 bit integer library, an equivalent C function would probably be 100 to 1000 times faster. I might set up a nice web interface.
<html>
<head>
<title>Break Hash</title>
</head>
<body>
<pre id="out"></pre>
<script>
function to64(inp){
var res=[]
res[0]=inp%(256*256*256)
inp=Math.floor(inp/(256*256*256))
res[1]=inp%(256*256*256)
inp=Math.floor(inp/(256*256*256))
res[2]=inp%(256*256)
return res
}
function add64(inp1,inp2){
var res=[]
res[0]=inp1[0]+inp2[0]
res[1]=inp1[1]+inp2[1]+Math.floor(res[0]/(256*256*256))
res[2]=(inp1[2]+inp2[2]+Math.floor(res[1]/(256*256*256)))&(256*256-1)
res[0]&=(256*256*256-1)
res[1]&=(256*256*256-1)
return res
}
function neg64(inp){
var res=[]
res[0]=inp[0]^(256*256*256-1)
res[1]=inp[1]^(256*256*256-1)
res[2]=inp[2]^(256*256-1)
return add64(res,[1,0,0])
}
function abs64(inp){
if(inp[2]>=(256*128)){
return neg64(inp)
}
return inp
}
function mul64(inp1,inp2){
var res=[]
res[0]=inp1[0]*inp2[0]
res[1]=inp1[1]*inp2[0]+inp1[0]*inp2[1]+Math.floor(res[0]/(256*256*256))
res[2]=inp1[2]*inp2[0]+inp1[1]*inp2[1]+inp1[0]*inp2[2]+Math.floor(res[1]/(256*256*256))
overflow=(res[2]>=(256*256)) //Global shortcut
res[2]&=(256*256-1)
res[0]&=(256*256*256-1)
res[1]&=(256*256*256-1)
return res
}
function mod64(inp1,inp2){ //64,float -> float
var res=inp1[2]%inp2
res=(res*(256*256*256)+inp1[1])%inp2
return (res*(256*256*256)+inp1[0])%inp2
}
function div64(inp1,inp2){ //64,float -> 64
var res=[]
res[2]=Math.floor(inp1[2]/inp2)
var mem1=inp1[1]+(inp1[2]%inp2)*(256*256*256)
res[1]=Math.floor(mem1/inp2)
res[0]=Math.floor((inp1[0]+(mem1%inp2)*(256*256*256))/inp2)
return add64(res,[0,0,0])
}
function cmp64(inp1,inp2){
for(var a=2;a>=0;a--){
if(inp1[a]>inp2[a]){
return 1
}
if(inp1[a]<inp2[a]){
return -1
}
}
return 0
}
function string64(inp){
var res=""
while(cmp64(inp,[0,0,0])){
res=mod64(inp,10)+res
inp=div64(inp,10)
}
return res||"0"
}
function hashPassword(password){
var i
password=stringtoarray(password)
var hash = to64(0);
var multiplier = to64(13);
for (i = 0; i < password.length; i++){
multiplier = add64(multiplier,[1,0,0])
hash = add64(mul64(hash,multiplier),to64(password[i]));
}
hash = abs64(hash);
hash = mul64(hash,to64(Math.pow(2,mod64(hash,11))));
hash = abs64(hash);
return string64(hash)
}
function stringtoarray(password){
var i
if((typeof password)=="string"){
var password2 = password.toUpperCase();
password=[]
for(i=0;i<password2.length;i++){
password[i]=password2.charCodeAt(i)
}
}
return password
}
function reverse(hash,start,len){
var maxtime=(+new Date())+1000
var m1=neg64(to64(1))
start=stringtoarray(start)
var multipliers=[]
var multipliersum=[]
var max=[]
var min=[]
var firstlimit=0
multipliers[len-1]=[1,0,0]
multipliersum[len-1]=[1,0,0]
max[len-1]=[126,0,0]
min[len-1]=[32,0,0]
for(var a=len-2;a>=0;a--){
multipliers[a]=mul64(multipliers[a+1],to64(15+a))
multipliersum[a]=add64(multipliers[a],multipliersum[a+1])
min[a]=mul64(multipliersum[a],[32,0,0])
max[a]=mul64(multipliersum[a],[126,0,0])
if(overflow && !firstlimit){
firstlimit=a+1
}
}
var startvalue=[0,0,0]
for(var a=0;a<start.length;a++){
startvalue=add64(startvalue,mul64(to64(start[a]),multipliers[a]))
}
startvalue=neg64(startvalue)
allowed=[]
for(a=0;a<32;a++){
allowed[a]=false
}
for(a=32;a<97;a++){
allowed[a]=true
}
for(a=97;a<123;a++){
allowed[a]=false
}
for(a=123;a<127;a++){
allowed[a]=true
}
for(a=127;a<256;a++){
allowed[a]=false
}
var results=[]
function arrtostring(arr){
str=""
for(var a=0;a<arr.length;a++){
//str+=String.fromCharCode(arr[a])
str+="&#"+arr[a]+";"
}
return str
}
function makepwd(ihash){ //This is where clock cycles go to die
ihash=add64(ihash,startvalue)
stringarr=[]
for(var a=0;a<start.length;a++){
stringarr[a]=start[a]
}
function addrest(ihash,startfrom){
if(maxtime<(+new Date())){
return
}
var a
var jhash
if(startfrom==len){
if(cmp64(ihash,[0,0,0])==0){
results[results.length]=arrtostring(stringarr)
}
}
else if(startfrom<firstlimit || ((cmp64(ihash,min[startfrom])!=-1) && (cmp64(ihash,max[startfrom])!=1))){
for(a=32;a<127;a++){
if(allowed[a]){
jhash=add64(ihash,neg64(mul64(to64(a),multipliers[startfrom])))
stringarr[startfrom]=a
addrest(jhash,startfrom+1)
}
}
}
}
addrest(ihash,start.length)
}
function reversemod(ihash){
if(ihash[2]<(128*256)){
if(mod64(ihash,11)==0){
makepwd(ihash)
makepwd(neg64(ihash))
}
}
var modresult=1
var a
var addlim
var jhash
while(mod64(ihash,2)==0){
ihash=div64(ihash,2)
addlim=Math.pow(2,modresult-1)
for(a=0;a<addlim;a++){
jhash=add64(to64(a*Math.pow(2,64-modresult)),ihash)
if(mod64(jhash,11)==modresult){
makepwd(jhash)
makepwd(neg64(jhash))
}
}
modresult++
}
}
reversemod(hash)
reversemod(neg64(hash))
return results
}
document.getElementById("out").innerHTML=reverse(to64(224443104),"ebusiness",22).join("<br>")
//document.getElementById("out").innerHTML=hashPassword("EBUSINESS\"XUWL]NHHHF{I")
</script>
</body>
</html>
Some collisions for 224443104
EBUSINESS"XUWL]NHHHF{I
EBUSINESS"XUWL]NHHHF|&
EBUSINESS"XUWL]NHHHGYI
EBUSINESS"XUWL]NHHHGZ&
EBUSINESS"XUWL]NHHHH7I
Sorry, Not familiar with C#. What does
(short) password.chartAt(i)do? Will it return the ASCII code of the character?A -> 65etc? – Wile E. Coyote – 2011-03-07T10:00:15.220@Dogbert, essentially, yes. (It actually converts the character into a 16-bit value which is its codepoint in the Unicode basic plane, but given that the problem specification restricts interest to ASCII and ASCII values map directly to Unicode codepoints, you can treat it as returning the ASCII code). – Peter Taylor – 2011-03-07T11:08:24.807
@Dogbert This snippet is actually Java, but we broke it with C#. I'll clarify. For the rest, @Peter Taylor has it right. – zneak – 2011-03-07T12:17:32.007
1I fail to get how you can get 224443104 for "ZNEAK". ASCII codes are 90, 78, 69, 65, 75; ((((013+90)14+78)15+69)16+65)*17+75 is 5478988; abs that is the same; 5478988%11 is 9 and 5478988<<9 is 2805241856 (abs that is the same as well). What am I missing? – J B – 2011-03-07T13:15:14.697
2@J-B, you're missing the + of +=, which causes your multipliers to be one too small. It should be ((((014+90)15+... – Peter Taylor – 2011-03-07T13:53:37.800
@Peter Taylor: good catch, thanks! – J B – 2011-03-07T14:00:09.337
2By simple estimation there is an average of over 6*10^17 results in the range 5 to 20 characters (though the number will vary wildly depending on the number of possible backtracks through the last 3 operations), do you really want them all as output? I'm sorry, zneak. I'm afraid I can't do that. – aaaaaaaaaaaa – 2011-03-07T23:18:32.757
@eBusiness I know, I know! Life is hard, and posting that many would drain one's bandwidth quick enough. Luckily, all you have to post along your answer, as specified in the question, are 5 collisions. I'm sure you can do at least that. – zneak – 2011-03-07T23:23:31.887
It's not so much bandwidth I'm concerned about, but no computer in the world has enough memory to store all the results. One really can't optimise a task that is incompletable. – aaaaaaaaaaaa – 2011-03-08T01:33:29.353
@eBusiness Whatever your concern is, rest assured that you need no more than 5 with your post. Your computer should be able to handle that (and your ISP too). – zneak – 2011-03-08T04:42:14.400
1eBusiness makes a good point. Your spec says "yields all possible inputs (made of 5-20 characters with only printable ASCII characters) that would produce the given hash", but a program which emits one possible input per nanosecond would, for an average hash, take more than a decade to execute (there are approximately 2^123 possible inputs, ignoring lower-case a-z since they are upper-cased by the hash function, and only 2^64 possible outputs - if that). – Peter Taylor – 2011-03-08T08:44:32.397
@Peter Taylor I didn't do a very complex analysis, but I'm pretty sure a smart program doesn't need to go through 2^123 possibilities. And since I don't see any practical use of breaking those hashes, I'm at ease stopping the program once I get the feeling it's working. – zneak – 2011-03-08T12:20:47.887
@zneak, you're missing the point. 2^123 objects in 2^64 buckets means that on average each bucket contains 2^(123-64) = 2^59 objects. If you prefer to look at it this way: a program which meets your spec will output about 8 exabytes. You really need to change the spec to either limit it to the first N values for a reasonable N or to limit the search space to strings up to some smaller size.
– Peter Taylor – 2011-03-08T12:35:12.937@Peter Taylor I was very wrong with my numbers, but I still hope that you realize I'm more interested in the algorithm than the output itself. I've removed the upper bound on input size to make it more clear: just generate values that match the hash until I press Ctrl+C. – zneak – 2011-03-08T14:21:58.250
Hmmm, well, now the specification as it stand calls for infinite results. – aaaaaaaaaaaa – 2011-03-08T14:54:03.840
Which means that people using (most) functional languages have to complicate things by using lazy output. – Peter Taylor – 2011-03-08T15:33:24.023
@eBusiness Yes, it does. If you write a general solution, it shouldn't be a problem. @Peter Taylor, I really don't know about functional languages, but aren't dynamic stacks a standard feature among them? That said, if you don't stack overflow after a few minutes of running with your favorite language, I won't mind it if practical restrictions prevent you from outputting exabytes of data. – zneak – 2011-03-08T15:52:40.493
@zneak, functional languages often do output via interactive emitting of the results of the function. In other words, they don't output part-way through a calculation - so if you have an infinite calculation, you'll never see any output. See J-B's solution as an example: it's a function which returns a list of all the solutions it finds. – Peter Taylor – 2011-03-08T17:21:30.847