Levenshtein Distance

40

3

While there are many edit distance questions, such as this one, there isn't a simple question to write a program that calculates the Levenshtein distance.

Some Exposition

The Levenshtein edit distance between two strings is the minimum possible number of insertions, deletions, or substitutions to convert one word into another word. In this case, each insertion, deletion and substitution has a cost of 1.

For example, the distance between roll and rolling is 3, because deletions cost 1, and we need to delete 3 characterrs. The distance between toll and tall is 1, because substitutions cost 1.

Rules

  • The input will be two strings. You may assume that the strings are lowercase, only contain letters, are non-empty and are a maximum of 100 characters in length.
  • The output will be minimum Levenshtein edit distance of the two strings, as defined above.
  • Your code must a program or a function. It does not need to be a named function, but it cannot be a built-in function that directly computes the Levenshtein distance. Other built-ins are allowed.
  • This is code golf, so the shortest answer wins.

Some Examples

>>> lev("atoll", "bowl")
3
>>> lev("tar", "tarp")
1
>>> lev("turing", "tarpit")
4
>>> lev("antidisestablishmentarianism", "bulb")
27

As always, if the problem is unclear, please let me know. Good luck and good golfing!

Catalogue

var QUESTION_ID=67474;var ANSWER_FILTER="!t)IWYnsLAZle2tQ3KqrVveCRJfxcRLe";var COMMENT_FILTER="!)Q2B_A2kjfAiU78X(md6BoYk";var OVERRIDE_USER=47581;var answers=[],answers_hash,answer_ids,answer_page=1,more_answers=true,comment_page;function answersUrl(index){return"http://api.stackexchange.com/2.2/questions/"+QUESTION_ID+"/answers?page="+index+"&pagesize=100&order=desc&sort=creation&site=codegolf&filter="+ANSWER_FILTER}function commentUrl(index,answers){return"http://api.stackexchange.com/2.2/answers/"+answers.join(';')+"/comments?page="+index+"&pagesize=100&order=desc&sort=creation&site=codegolf&filter="+COMMENT_FILTER}function getAnswers(){jQuery.ajax({url:answersUrl(answer_page++),method:"get",dataType:"jsonp",crossDomain:true,success:function(data){answers.push.apply(answers,data.items);answers_hash=[];answer_ids=[];data.items.forEach(function(a){a.comments=[];var id=+a.share_link.match(/\d+/);answer_ids.push(id);answers_hash[id]=a});if(!data.has_more)more_answers=false;comment_page=1;getComments()}})}function getComments(){jQuery.ajax({url:commentUrl(comment_page++,answer_ids),method:"get",dataType:"jsonp",crossDomain:true,success:function(data){data.items.forEach(function(c){if(c.owner.user_id===OVERRIDE_USER)answers_hash[c.post_id].comments.push(c)});if(data.has_more)getComments();else if(more_answers)getAnswers();else process()}})}getAnswers();var SCORE_REG=/<h\d>\s*([^\n,<]*(?:<(?:[^\n>]*>[^\n<]*<\/[^\n>]*>)[^\n,<]*)*),.*?(\d+)(?=[^\n\d<>]*(?:<(?:s>[^\n<>]*<\/s>|[^\n<>]+>)[^\n\d<>]*)*<\/h\d>)/;var OVERRIDE_REG=/^Override\s*header:\s*/i;function getAuthorName(a){return a.owner.display_name}function process(){var valid=[];answers.forEach(function(a){var body=a.body;a.comments.forEach(function(c){if(OVERRIDE_REG.test(c.body))body='<h1>'+c.body.replace(OVERRIDE_REG,'')+'</h1>'});var match=body.match(SCORE_REG);if(match)valid.push({user:getAuthorName(a),size:+match[2],language:match[1],link:a.share_link,});else console.log(body)});valid.sort(function(a,b){var aB=a.size,bB=b.size;return aB-bB});var languages={};var place=1;var lastSize=null;var lastPlace=1;valid.forEach(function(a){if(a.size!=lastSize)lastPlace=place;lastSize=a.size;++place;var answer=jQuery("#answer-template").html();answer=answer.replace("{{PLACE}}",lastPlace+".").replace("{{NAME}}",a.user).replace("{{LANGUAGE}}",a.language).replace("{{SIZE}}",a.size).replace("{{LINK}}",a.link);answer=jQuery(answer);jQuery("#answers").append(answer);var lang=a.language;lang=jQuery('<a>'+lang+'</a>').text();languages[lang]=languages[lang]||{lang:a.language,lang_raw:lang.toLowerCase(),user:a.user,size:a.size,link:a.link}});var langs=[];for(var lang in languages)if(languages.hasOwnProperty(lang))langs.push(languages[lang]);langs.sort(function(a,b){if(a.lang_raw>b.lang_raw)return 1;if(a.lang_raw<b.lang_raw)return-1;return 0});for(var i=0;i<langs.length;++i){var language=jQuery("#language-template").html();var lang=langs[i];language=language.replace("{{LANGUAGE}}",lang.lang).replace("{{NAME}}",lang.user).replace("{{SIZE}}",lang.size).replace("{{LINK}}",lang.link);language=jQuery(language);jQuery("#languages").append(language)}}
body{text-align:left!important}#answer-list{padding:10px;width:290px;float:left}#language-list{padding:10px;width:290px;float:left}table thead{font-weight:700}table td{padding:5px}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script> <link rel="stylesheet" type="text/css" href="//cdn.sstatic.net/codegolf/all.css?v=83c949450c8b"> <div id="language-list"> <h2>Shortest Solution by Language</h2> <table class="language-list"> <thead> <tr><td>Language</td><td>User</td><td>Score</td></tr> </thead> <tbody id="languages"> </tbody> </table> </div> <div id="answer-list"> <h2>Leaderboard</h2> <table class="answer-list"> <thead> <tr><td></td><td>Author</td><td>Language</td><td>Size</td></tr> </thead> <tbody id="answers"> </tbody> </table> </div> <table style="display: none"> <tbody id="answer-template"> <tr><td>{{PLACE}}</td><td>{{NAME}}</td><td>{{LANGUAGE}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr> </tbody> </table> <table style="display: none"> <tbody id="language-template"> <tr><td>{{LANGUAGE}}</td><td>{{NAME}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr> </tbody> </table>

Sherlock9

Posted 2015-12-23T07:38:17.770

Reputation: 11 664

Answers

8

Pyth, 34 bytes

J]wf}z=Jsmsm++.DdkXLdkGXLkdGhld-Jk

Demonstration

This is not particularly well golfed, and very slow. It can't handle anything past 2 changes in a reasonable period of time.

isaacg

Posted 2015-12-23T07:38:17.770

Reputation: 39 268

3But it works, and that's what counts. :P – Conor O'Brien – 2015-12-28T23:54:22.503

10

Matlab, 177 163 bytes

function l=c(a,b);m=nnz(a)+1;n=nnz(b)+1;for i=0:m-1;for j=0:n-1;z=max(i,j);try;z=min([l(i,j+1)+1,l(i+1,j)+1,l(i,j)+(a(i)~=b(j))]);end;l(i+1,j+1)=z;end;end;l=l(m,n)

This is a straightforward implementation of this formula:

enter image description here

Ungolfed:

function l=l(a,b);
m=nnz(a)+1;
n=nnz(b)+1;
for i=0:m-1;
    for j=0:n-1;
        z=max(i,j);
        try;
            z=min([l(i,j+1)+1,l(i+1,j)+1,l(i,j)+(a(i)~=b(j))]);
        end;
        l(i+1,j+1)=z;
    end;
end;
l=l(m,n)

flawr

Posted 2015-12-23T07:38:17.770

Reputation: 40 560

If the scored code is not what you've included, please include the scored code. Otherwise I think there's a lot of whitespace that can be golfed out. – Alex A. – 2015-12-23T17:04:38.763

1@AlexA. the leading space and newlines for indentation purpose are not counted (and can be removed safely). Unce upon a time this was allowed and no one complained. – edc65 – 2015-12-23T18:28:38.100

1

@edc65 The meta consensus now is that the code as scored should be provided.

– Alex A. – 2015-12-23T18:34:05.970

2Well then, the majority prefers the unreadable version. I still let the readable version here, in case anyone might want to see what is actually going on=) – flawr – 2015-12-23T20:28:09.413

2It's common practice to provide both the golfed submission (the one that's scored) as well as an ungolfed version, we just require that the scored one be included. ;) – Alex A. – 2015-12-24T17:35:44.123

7

Python 2, 151 140 138 bytes

Slow recursive implemention of the Levenshtein distance based on Wikipedia (Thanks to @Kenney for shaving of 11 chars and @Sherlock9 for another 2).

def l(s,t):
 def f(m,n):
  if m*n<1:return m or n
  return 1+min([f(m-1,n),f(m,n-1),f(m-1,n-1)-(s[m-1]==t[n-1])])
 return f(len(s),len(t))

Giving the correct answers for the presented test cases:

assert l("tar", "tarp") == 1
assert l("turing", "tarpit") == 4
assert l("antidisestablishmentarianism", "bulb") == 27        
assert l("atoll", "bowl") == 3

Willem

Posted 2015-12-23T07:38:17.770

Reputation: 1 528

1You might save 3-4 bytes or so by doing something like if !n*m:return n if n else m, and another 2 by return 1+min([ f(..), f(..), f(..) - (s[..] == t[..]) ]). – Kenney – 2015-12-24T21:10:54.290

You would save 2 bytes by using f(m-1,n-1)-(s[m-1]==t[n-1]) instead of f(m-1,n-1)+(s[m-1]!=t[n-1])-1. – Sherlock9 – 2015-12-29T03:45:34.720

Golfed off 20 chars: http://codegolf.stackexchange.com/a/102910/60919

– FlipTack – 2016-12-11T14:46:07.633

5

JavaScript (ES6) 106 113 122

Edit 16 bytes saved following @Neil suggestions

As an anonymous function.

(s,t)=>[...s].map((u,i)=>w=w.map((v,j)=>p=j--?Math.min(p,v,w[j]-(u==t[j]))+1:i+1),w=[...[,...t].keys()])|p

This is a golfed implementation of the Wagner–Fischer algorithm exactly as described in the linked wikipedia article, in the section Iterative with two matrix rows (even if in fact, just 1 row is used - array w)

Less golfed

(s,t)=>
{
  w = [...[0,...t].keys()];
  for(i = 0; i < s.length; i++)
    w = w.map((v,j)=>
              p = j
              ? Math.min(p+1, v+1, w[j-1] + (s[i]!=t[j-1]))
              : i+1
             );
  return p
}

Test snippet

L=(s,t)=>[...s].map((u,i)=>w=w.map((v,j)=>p=j--?Math.min(p,v,w[j]-(u==t[j]))+1:i+1),w=[...[,...t].keys()])|p

console.log=x=>O.textContent+=x+'\n';

[["atoll", "bowl"],["tar", "tarp"]
,["turing", "tarpit"],["antidisestablishmentarianism", "bulb"]]
.forEach(t=>console.log(t+' => '+L(...t)))
<pre id=O></pre>

edc65

Posted 2015-12-23T07:38:17.770

Reputation: 31 086

1Can you use [...[0,...t].keys()] instead? Saves 2 bytes if you can. – Neil – 2015-12-23T16:12:51.983

1@Neil that looks ugly but it's shorter. Thx – edc65 – 2015-12-23T16:41:26.183

1Actually you can save another byte, [...[,...t].keys()] works too I think. – Neil – 2015-12-23T22:17:47.543

I managed to shave off another byte by using [...s].map(): (s,t)=>(w=[...[,...t].keys()],[...s].map((u,i)=>w=w.map((v,j)=>p=j--?Math.min(p,v,w[j]-(s[i-1]==t[j]))+1:i)),p) – Neil – 2015-12-23T22:28:53.303

@Neil great, thanks again! – edc65 – 2015-12-23T22:44:43.173

Wait, 106? I was working on another approach but I only got it down to 108: (s,t)=>[...s].map((u,i)=>(q=p=i+1,w=w.map((v,j)=>p=Math.min(++p,q-(u==t[j]),q=++v))),w=[...[...t].keys()])|p – Neil – 2015-12-25T01:06:30.237

4

Python 2, 118 bytes

A golf of this solution, but it doesn't look like Willem's been on for a year, so I'll have to post it myself:

def l(s,t):f=lambda m,n:m or n if m*n<1else-~min(f(m-1,n),f(m,n-1),f(m-1,n-1)-(s[m-1]==t[n-1]));print f(len(s),len(t))

Try on repl.it

Takes two strings and outputs the distance to STDOUT (allowed by meta). Please comment suggestions, I'm sure this can be golfed further.

FlipTack

Posted 2015-12-23T07:38:17.770

Reputation: 13 242

Is it necessary to wrap everything in a function? Could you use two input()s or an input().split()? – Sherlock9 – 2016-12-11T14:46:51.430

@Sherlock9 I tried that, but it costs 1 byte extra as far as I can tell

– FlipTack – 2016-12-11T15:29:08.073

Right, I forgot that you need to define s and t somewhere in the code. Never mind. Good job :D – Sherlock9 – 2016-12-11T16:27:54.613

I'm not sure why Willem used m or n. You can replace it with m+n. – Arnauld – 2019-04-27T07:05:43.120

4

Haskell, 67 bytes

e@(a:r)#f@(b:s)|a==b=r#s|1<3=1+minimum[r#f,e#s,r#s]
x#y=length$x++y

Try it online! Example usage: "turing" # "tarpit" yields 4.

This is a recursive solution. Given two strings e and f, we first compare their first characters a and b. If they are equal, then the Levenshtein distance of e and f is the same as the Levenshtein distance of r and s, the remainder of e and f after removing the first characters. Otherwise, either a or b needs to be removed, or one is substituted by the other. [r#f,e#s,r#s] recursively computes the Levenshtein for those three cases, minimum picks the smallest one, and 1 is added to account for the necessary remove or replace operation.

If one of the strings is empty, we and up on the second line. In this case, the distance is just the length of the non-empty string, or equivalently the length of both strings concatenated together.

Laikoni

Posted 2015-12-23T07:38:17.770

Reputation: 23 676

1Wow, this is a seriously good solution, really elegant and short. – ggPeti – 2018-03-06T23:08:04.407

1

It seems your program outputs 0 for the input pair "abb","aab" (link). (I just found out because I wanted to steal your code for another challenge:)

– flawr – 2019-12-29T21:49:27.427

@flawr Thanks for pointing out, the previous 67 byte version works correctly so I rolled back. – Laikoni – 2019-12-31T00:28:05.640

3

Retina, 78 72 bytes

&`(.)*$(?<!(?=((?<-4>\4)|(?<-1>.(?<-4>)?))*,(?(4),))^.*,((.)|(?<-1>.))*)

Try it online!

In a way, this is a pure regex solution where the result is the number of positions from which the regex matches. Because why not...

Fair warning, this is super inefficient. The way this works is that it offloads the actual optimisation to the regex engine's backtracker, which simply brute forces all possible alignments, starting with as few alterations as possible and allowing one more until it's possible to match up the strings with additions, deletions and substitutions.

For a slightly more sensible solution, this one does the matching only once and doesn't have any negative lookarounds. Here, the result is the number of captures in group 2, which you could access with match.Groups[2].Captures.Count in C#, for example. It's still horribly inefficient though.

Explanation

I'm explaining the second version above, because it's conceptually a bit easier (as it's just a single regex match). Here is an ungolfed version I've named the groups (or made them non-capturing) and added comments. Remember that the components in a lookbehind should be read from back to front, but alternatives and lookaheads inside those should be read from front to back. Yeah.

.+                      # Ensures backtracking from smallest to largest for next repetition
(?<ops>(?<distance>.))* # This puts the current attempted distances onto two different stacks,
                        # one to work with, and one for the result.
$                       # Make sure the lookbehind starts from the end.
(?<=                    # The basic idea is now to match up the strings character by character,
                        # allowing insertions/deletions/substitutions at the cost of one capture
                        # on <ops>. Remember to read from the bottom up.
  (?=                   # Start matching forwards again. We need to go through the other string
                        # front-to-back due to the nature of the stack (the last character we
                        # remembered from the second string must be the first character we check
                        # against in the first string).
    (?:
      (?<-str>\k<str>)  # Either match the current character against the corresponding one from
                        # the other string.
    |
      (?<-ops>          # Or consume one operation to...
        .               # consume a character without matching it to the other string (a deletion)
        (?<-str>)?      # optionally dropping a character from the other string as well 
                        # (a substitution).
      )
    )*                  # Rinse and repeat.
    ,(?(str),)          # Ensure we reached the end of the first string while consuming all of the 
                        # second string. This is only possible if the two strings can be matched up 
                        # in no more than <distance> operations.
  )
  ^.*,                  # Match the rest of string to get back to the front.
  (?:                   # This remembers the second string from back-to-front.
    (?<str>.)           # Either capture the current character.
  |
    (?<-ops>.)          # Or skip it, consuming an operation. This is an insertion.
  )*
)

The only difference to the 72-byte version is that we can drop the leading .+ (and second group in the beginning) by finding positions at the end where we don't have enough <ops> and count all of those positions.

Martin Ender

Posted 2015-12-23T07:38:17.770

Reputation: 184 808

3

Python 3, 105 94 93 bytes

-11 bytes by xnor

l=lambda a,b:b>""<a and min(l(a[1:],b[1:])+(a[0]!=b[0]),l(a[1:],b)+1,l(a,b[1:])+1)or len(a+b)

Golfed version of the shortest implementation on Wikibooks.

Try it online!

movatica

Posted 2015-12-23T07:38:17.770

Reputation: 635

Nice solution. The l= needs to be included and counted because the function is recursive. You can combine the bases cases into if b>""<a else len(a+b). – xnor – 2019-04-27T22:54:30.633

Nice play with the operators, thanx! – movatica – 2019-04-27T23:38:57.543

3

Python 3, 267 216 184 162 bytes

This function calculates the Levenshtein distance using an array that is 2 x len(word_2)+1 in size.

Edit: This doesn't get close to the Willem's Python 2 answer, but here is a more golfed answer with many little refinements here and there.

def e(p,q):
 m=len(q);r=range;*a,=r(m+1);b=[1]*-~m
 for i in r(len(p)):
  for j in r(m):b[j+1]=1+min(a[j+1],b[j],a[j]-(p[i]==q[j]))
  a,b=b,[i+2]*-~m
 return a[m]

Ungolfed:

def edit_distance(word_1,word_2):
    len_1 = len(word_1)
    len_2 = len(word_2)
    dist = [[x for x in range(len_2+1)], [1 for y in range(len_2+1)]]
    for i in range(len_1):
        for j in range(len_2):
            if word_1[i] == word_2[j]:
                dist[1][j+1] = dist[0][j]
            else:
                deletion = dist[0][j+1]+1
                insertion = dist[1][j]+1
                substitution = dist[0][j]+1
                dist[1][j+1] = min(deletion, insertion, substitution)
        dist[0], dist[1] = dist[1], [i+2 for m in range(len_2+1)]
    return dist[0][len_2]

Sherlock9

Posted 2015-12-23T07:38:17.770

Reputation: 11 664

3

Seriously, 86 82 78 bytes

,#,#`k;;;░="+l"£@"│d);)[]oq╜Riu)@d);)@[]oq╜Riu(@)@)@[]oq╜Ri3}@)=Y+km"£@IRi`;╗ƒ

Hex Dump:

2c232c23606b3b3b3bb03d222b6c229c4022b364293b295b5d6f71bd526975294064293b29405b
5d6f71bd5269752840294029405b5d6f71bd5269337d40293d592b6b6d229c40495269603bbb9f

Try It Online

(Note that the link is to a different version because something about the online interpreter breaks with the new, shorter version, even though it works fine with the downloadable interpreter.)

It's about the most straightforward implementation Seriously allows for the recursive definition. It is very slow because it does no memoization at all. Perhaps the tabular method could be shorter (maybe by using the registers as rows), but I'm fairly happy with this, despite how much kludging-my-way-around-the-language's-shortcomings it contains. That one can use

[]oq`<code>`Ri

as a proper two-argument function call was quite the nice find.

Explanation:

,#,#                             Read in two arguments, break them into lists of chars
    `                       `;╗ƒ put the quoted function in reg0 and immediately call it
     k;;;                        put the two lists in a list and make 3 copies
         ░                       replace the latter two with one with empty lists removed
          =                      replace two more with 1 if no empty lists removed, else 0
           "..."£@"..."£@        push the two functions described below, moving 
                                 the boolean above them both
                         I       select the correct function based on the condition
                          Ri     call the function, returning the correct distance
                                 for these substrings

   There are two functions that can be called from the main function above. Each expects 
   two strings, i and j, to be on the stack. This situation is ensured by putting 
   those strings in a list and using R to call the functions with that list as the stack.
   The first is very simple:

+l                             Concatenate the strings and take their length.
                               This is equivalent to the length of the longer
                               string, since one of the strings will be empty.

   The second function is very long and complicated. It will do the "insertion, deletion, 
   substitution" part of the recursive definition. Here's what happens in 4 parts:

│d);)                          After this, the stack is top[i-,j,i,j,ci,i-], where i- is 
                               list i with its last character, ci, chopped off.
     []oq                      this puts i- and j into a list so that they can be passed
                               as arguments recursively into the main function
         ╜Riu                  this calls the main function (from reg0) with the args
                               which will return a number to which we add 1 to get #d,
                               the min distance if we delete a character
)@d);)@                        After this, the stack is top[i,j-,ci,i-,#d,cj,j-], where 
                               j- and cj are the same idea as i- and ci
       []oq╜Riu                listify arguments, recurse and increment to get #i
                               (distance if we insert)
(@)@)@                         After this, the stack is top[i-,j-,#d,cj,#i,ci]
      []oq╜Ri                  listify arguments, recurse to get min distance between 
                               them but we still need to add 1 when we'd need to 
                               substitute because the chars we chopped off are different
(((@)                          After this, the stack is top[cj,ci,#s,#d,#i]
     =Y                        1 if they are not equal, 0 if they are
       +                       add it to the distance we find to get the distance
                               if we substitute here
        k                      put them all in a list
         m                     push the minimum distance over the three options

quintopia

Posted 2015-12-23T07:38:17.770

Reputation: 3 899

I like how the code tries to escape the pre-element :) – mınxomaτ – 2015-12-23T11:36:54.147

3

AutoIt, 333 bytes

Func l($0,$1,$_=StringLen,$z=StringMid)
Dim $2=$_($0),$3=$_($1),$4[$2+1][$3+1]
For $5=0 To $2
$4[$5][0]=$5
Next
For $6=0 To $3
$4[0][$6]=$6
Next
For $5=1 To $2
For $6=1 To $3
$9=$z($0,$5,1)<>$z($1,$6,1)
$7=1+$4[$5][$6-1]
$8=$9+$4[$5-1][$6-1]
$m=1+$4[$5-1][$6]
$m=$m>$7?$7:$m
$4[$5][$6]=$m>$8?$8:$m
Next
Next
Return $4[$2][$3]
EndFunc

Sample test code:

ConsoleWrite(l("atoll", "bowl") & @LF)
ConsoleWrite(l("tar", "tarp") & @LF)
ConsoleWrite(l("turing", "tarpit") & @LF)
ConsoleWrite(l("antidisestablishmentarianism", "bulb") & @LF)

yields

3
1
4
27

mınxomaτ

Posted 2015-12-23T07:38:17.770

Reputation: 7 398

3

k4, 66 bytes

{$[~#x;#y;~#y;#x;&/.z.s'[-1 0 -1_\:x;0 -1 -1_\:y]+1 1,~(*|x)=*|y]}

A boring and basically ungolfed impl of the algo. Ex.:

  f:{$[~#x;#y;~#y;#x;&/.z.s'[-1 0 -1_\:x;0 -1 -1_\:y]+1 1,~(*|x)=*|y]}
  f["kitten";"sitting"]
3
  f["atoll";"bowl"]
3
  f["tar";"tarp"]
1
  f["turing";"tarpit"]
4
  f["antidisestablishmentarianism";"bulb"]
27

Aaron Davies

Posted 2015-12-23T07:38:17.770

Reputation: 881

2

GNU Prolog, 133 bytes

m([H|A],B):-B=A;B=[H|A].
d([H|A]-[H|B],D):-d(A-B,D).
d(A-B,D):-A=B,D=0;D#=E+1,m(A,X),m(B,Y),d(X-Y,E).
l(W,D):-d(W,D),(E#<D,l(W,E);!).

Takes a tuple as argument. Example of usage:

| ?- l("turing"-"tarpit",D).

D = 4

yes

m specifies that B is A either directly or with its first character removed. d uses m as a subroutine to calculate an edit distance between the tuple elements (i.e. the distance of a series of edits that converts one into the other). Then l is a standard trick for finding the minimum of d (you take an arbitrary distance, then take an arbitrary smaller distance, repeat until you can't go smaller).

user62131

Posted 2015-12-23T07:38:17.770

Reputation:

2

APL (Dyalog Classic), 34 bytes

{⊃⌽⊖¯1(⊢⌊(1+⌽⌊⊖)⌊⍵+⊣⌽⊖)⍣≡+\+⍀⍵}∘.≠

Try it online!

ngn

Posted 2015-12-23T07:38:17.770

Reputation: 11 449

2

Haskell, 136 bytes

Call e. Bit slow on antidisestablishmentarianism etc.

l=length
e a b=v a(l a)b(l b)
v a i b j|i*j==0=i+j|0<1=minimum[1+v a(i-1)b j,1+v a i b(j-1),fromEnum(a!!(i-1)/=b!!(j-1))+v a(i-1)b(j-1)]

Leif Willerts

Posted 2015-12-23T07:38:17.770

Reputation: 1 060

2

Jolf, 4 bytes

Try it here!

~LiI
~L   calculate the Levenshtein distance of
  i   input string
   I  and another input string

I added that builtin yesterday, but saw this challenge today, i.e., just now. Still, this answer is noncompeting.

In a newer version:

~Li

takes implicit second input.

Conor O'Brien

Posted 2015-12-23T07:38:17.770

Reputation: 36 228

"Your code must a program or a function. It does not need to be a named function, but it cannot be a built-in function that directly computes the Levenshtein distance. Other built-ins are allowed." – Kevin Cruijssen – 2018-06-14T14:17:42.433

Ah, didn't see you mentioned it's non-competing.. Better put it in the title, or add a valid program/function without builtin. – Kevin Cruijssen – 2018-06-14T14:19:08.110

1

Perl, 168 166 163 bytes

sub l{my($S,$T,$m)=(@_,100);$S*$T?do{$m=++$_<$m?$_:$m for l($S-1,$T),l($S,--$T),l(--$S,$T)-($a[$S]eq$b[$T]);$m}:$S||$T}print l~~(@a=shift=~/./g),~~(@b=shift=~/./g)

Recursive implementation. Save in a file.pl and run as perl file.pl atoll bowl.

sub l {
    my($S,$T,$m)=(@_,100);

    $S*$T
    ? do {
        $m = ++$_ < $m ? $_ : $m
        for
            l($S-1,   $T),
            l($S  , --$T),
            l(--$S,   $T) - ($a[$S] eq $b[$T])
        ;    
        $m
    }
    : $S||$T
}
print l~~(@a=shift=~/./g),~~(@b=shift=~/./g)


The other two implementations are both longer (full matrix: 237 bytes, twoone-row iterative: 187).

  • update 166: omit () in calling l.
  • update 163: eliminate return by abusing do in trinary.

Kenney

Posted 2015-12-23T07:38:17.770

Reputation: 946

0

PowerShell v3+, 247 Bytes

$c,$d=$args;$e,$f=$c,$d|% le*;$m=[object[,]]::new($f+1,$e+1);0..$e|%{$m[0,$_]=$_};0..$f|%{$m[$_,0]=$_};1..$e|%{$i=$_;1..$f|%{$m[$_,$i]=(($m[($_-1),$i]+1),($m[$_,($i-1)]+1),($m[($_-1),($i-1)]+((1,0)[($c[($i-1)]-eq$d[($_-1)])]))|sort)[0]}};$m[$f,$e]

I ended up making this to solve another challenges involving LD's.

Code explanation with comments.

# Get both of the string passed as arguments. $c being the compare string
# and $d being the difference string. 
$c,$d=$args

# Save the lengths of these strings. $e is the length of $c and $f is the length of $d
$e,$f=$c,$d|% le*

# Create the multidimentional array $m for recording LD calculations
$m=[object[,]]::new($f+1,$e+1)

# Populate the first column 
0..$e|%{$m[0,$_]=$_}

# Populate the first row
0..$f|%{$m[$_,0]=$_}

# Calculate the Levenshtein distance by working through each position in the matrix. 
# Working the columns
1..$e|%{
    # Save the column index for use in the next pipeline
    $i=$_

    # Working the rows.
    1..$f|%{
        # Calculate the smallest value between the following values in the matrix relative to this one
        # cell above, cell to the left, cell in the upper left. 
        # Upper left also contain the cost calculation for this pass.    
        $m[$_,$i]=(($m[($_-1),$i]+1),($m[$_,($i-1)]+1),($m[($_-1),($i-1)]+((1,0)[($c[($i-1)]-eq$d[($_-1)])]))|sort)[0]
    }
}
# Return the last element of the matrix to get LD 
$m[$f,$e]

Matt

Posted 2015-12-23T07:38:17.770

Reputation: 1 075

0

JavaScript (ES6),  95 91  89 bytes

Takes input as (source)(target). Essentially a port of @Willem's Python answer (later optimized by @FlipTack).

s=>t=>(g=m=>m*n?1+Math.min(g(m,--n),g(--m)-(s[m]==t[n++]),g(m)):m+n)(s.length,n=t.length)

Try it online!

Arnauld

Posted 2015-12-23T07:38:17.770

Reputation: 111 334

0

C#, 215 210 198

public int L(string s,string t){int c,f,a,i,j;var v=new int[100];for(c=i=0;i<s.Length;i++)for(f=c=i,j=0;j<t.Length;j++){a=c;c=f;f=i==0?j+1:v[j];if(f<a)a=f;v[j]=c=s[i]==t[j]?c:1+(c<a?c:a);}return c;}

more readable:

public int L(string s,string t){
    int c,f,a,i,j;
    var v=new int[100];
    for(c=i=0;i<s.Length;i++)
        for(f=c=i,j=0;j<t.Length;j++){
            a=c;
            c=f;
            f=(i==0)?j+1:v[j];
            if (f<a) a=f;
            v[j]=c=(s[i]==t[j])?c:1+((c<a)?c:a);
        }
    return c;
}

user8865

Posted 2015-12-23T07:38:17.770

Reputation:

0

C, 192 bytes

#define m(x,y) (x>y?x:y)
#define r(x,y) l(s,ls-x,t,lt-y)
int l(char*s,int ls,char*t,int lt){if(!ls)return lt;if(!lt)return ls;a=r(1,1);if(s[ls]==t[ls])return a;return m(m(r(0,1),r(1,0)),a)+1;}
---------

Detailed

#include <stdio.h>

#define m(x,y) (x>y?x:y)
#define f(x) char x[128];fgets(x,100,stdin)
#define r(x,y) l(s,ls-x,t,lt-y)

int l(char*s,int ls,char*t,int lt)
{
    if(!ls) return lt;
    if(!lt) return ls;

    int a = r(1,1);
    if(s[ls]==t[ls]) return a;

    return m(m(r(0,1),r(1,0)),a)+1;
}

int main(void)
{
    f(a);
    f(b);
    printf("%d", l(a,strlen(a),b,strlen(b)));
    return 0;
}

Khaled.K

Posted 2015-12-23T07:38:17.770

Reputation: 1 435