Remove duplicated & switched case

27

4

Goal

The goal of this challenge is: given a string as input, remove duplicate pairs of letters, if the second item in the pair is of opposite capitalization. (i.e. uppercase becomes lowercase and vice-versa).

Pairs should be replaced from left to right. For example, aAa should become aa and not aA.

examples

Inputs & outputs:

Input:         Output:  
bBaAdD         bad     
NniIcCeE       Nice    
Tt eE Ss tT    T e S t 
sS Ee tT       s E t   
1!1!1sStT!     1!1!1st!
nN00bB         n00b    
(eE.gG.)       (e.g.)  
Hh3lL|@!       H3l|@!
Aaa            Aa
aaaaa          aaaaa
aaAaa          aaaa

The input consists of printable ASCII symbols.

You shouldn't remove duplicated digits or other non-letter characters.

Acknowledgement

This challenge is the opposite of @nicael 's "Duplicate & switch case". Can you reverse it?

Thank you for all contributors from the sandbox!

Catalogue

The Stack Snippet at the bottom of this post generates the catalogue from the answers a) as a list of shortest solution per language and b) as an overall leaderboard.

To make sure that your answer shows up, please start your answer with a headline, using the following Markdown template:

## Language Name, N bytes

where N is the size of your submission. If you improve your score, you can keep old scores in the headline, by striking them through. For instance:

## Ruby, <s>104</s> <s>101</s> 96 bytes

If there you want to include multiple numbers in your header (e.g. because your score is the sum of two files or you want to list interpreter flag penalties separately), make sure that the actual score is the last number in the header:

## Perl, 43 + 2 (-p flag) = 45 bytes

You can also make the language name a link which will then show up in the snippet:

## [><>](http://esolangs.org/wiki/Fish), 121 bytes

<style>body { text-align: left !important} #answer-list { padding: 10px; width: 290px; float: left; } #language-list { padding: 10px; width: 290px; float: left; } table thead { font-weight: bold; } table td { padding: 5px; }</style><script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script> <link rel="stylesheet" type="text/css" href="//cdn.sstatic.net/codegolf/all.css?v=83c949450c8b"> <div id="language-list"> <h2>Shortest Solution by Language</h2> <table class="language-list"> <thead> <tr><td>Language</td><td>User</td><td>Score</td></tr> </thead> <tbody id="languages"> </tbody> </table> </div> <div id="answer-list"> <h2>Leaderboard</h2> <table class="answer-list"> <thead> <tr><td></td><td>Author</td><td>Language</td><td>Size</td></tr> </thead> <tbody id="answers"> </tbody> </table> </div> <table style="display: none"> <tbody id="answer-template"> <tr><td>{{PLACE}}</td><td>{{NAME}}</td><td>{{LANGUAGE}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr> </tbody> </table> <table style="display: none"> <tbody id="language-template"> <tr><td>{{LANGUAGE}}</td><td>{{NAME}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr> </tbody> </table><script>var QUESTION_ID = 85509; var ANSWER_FILTER = "!t)IWYnsLAZle2tQ3KqrVveCRJfxcRLe"; var COMMENT_FILTER = "!)Q2B_A2kjfAiU78X(md6BoYk"; var OVERRIDE_USER = 36670; var answers = [], answers_hash, answer_ids, answer_page = 1, more_answers = true, comment_page; function answersUrl(index) { return "//api.stackexchange.com/2.2/questions/" + QUESTION_ID + "/answers?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + ANSWER_FILTER; } function commentUrl(index, answers) { return "//api.stackexchange.com/2.2/answers/" + answers.join(';') + "/comments?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + COMMENT_FILTER; } function getAnswers() { jQuery.ajax({ url: answersUrl(answer_page++), method: "get", dataType: "jsonp", crossDomain: true, success: function (data) { answers.push.apply(answers, data.items); answers_hash = []; answer_ids = []; data.items.forEach(function(a) { a.comments = []; var id = +a.share_link.match(/\d+/); answer_ids.push(id); answers_hash[id] = a; }); if (!data.has_more) more_answers = false; comment_page = 1; getComments(); } }); } function getComments() { jQuery.ajax({ url: commentUrl(comment_page++, answer_ids), method: "get", dataType: "jsonp", crossDomain: true, success: function (data) { data.items.forEach(function(c) { if (c.owner.user_id === OVERRIDE_USER) answers_hash[c.post_id].comments.push(c); }); if (data.has_more) getComments(); else if (more_answers) getAnswers(); else process(); } }); } getAnswers(); var SCORE_REG = /<h\d>\s*([^\n,<]*(?:<(?:[^\n>]*>[^\n<]*<\/[^\n>]*>)[^\n,<]*)*),.*?(\d+)(?=[^\n\d<>]*(?:<(?:s>[^\n<>]*<\/s>|[^\n<>]+>)[^\n\d<>]*)*<\/h\d>)/; var OVERRIDE_REG = /^Override\s*header:\s*/i; function getAuthorName(a) { return a.owner.display_name; } function process() { var valid = []; answers.forEach(function(a) { var body = a.body; a.comments.forEach(function(c) { if(OVERRIDE_REG.test(c.body)) body = '<h1>' + c.body.replace(OVERRIDE_REG, '') + '</h1>'; }); var match = body.match(SCORE_REG); if (match) valid.push({ user: getAuthorName(a), size: +match[2], language: match[1], link: a.share_link, }); else console.log(body); }); valid.sort(function (a, b) { var aB = a.size, bB = b.size; return aB - bB }); var languages = {}; var place = 1; var lastSize = null; var lastPlace = 1; valid.forEach(function (a) { if (a.size != lastSize) lastPlace = place; lastSize = a.size; ++place; var answer = jQuery("#answer-template").html(); answer = answer.replace("{{PLACE}}", lastPlace + ".") .replace("{{NAME}}", a.user) .replace("{{LANGUAGE}}", a.language) .replace("{{SIZE}}", a.size) .replace("{{LINK}}", a.link); answer = jQuery(answer); jQuery("#answers").append(answer); var lang = a.language; lang = jQuery('<a>'+lang+'</a>').text(); languages[lang] = languages[lang] || {lang: a.language, lang_raw: lang.toLowerCase(42), user: a.user, size: a.size, link: a.link}; }); var langs = []; for (var lang in languages) if (languages.hasOwnProperty(lang)) langs.push(languages[lang]); langs.sort(function (a, b) { if (a.lang_raw > b.lang_raw) return 1; if (a.lang_raw < b.lang_raw) return -1; return 0; }); for (var i = 0; i < langs.length; ++i) { var language = jQuery("#language-template").html(); var lang = langs[i]; language = language.replace("{{LANGUAGE}}", lang.lang) .replace("{{NAME}}", lang.user) .replace("{{SIZE}}", lang.size) .replace("{{LINK}}", lang.link); language = jQuery(language); jQuery("#languages").append(language); } }</script>

aloisdg moving to codidact.com

Posted 2016-07-16T12:01:20.127

Reputation: 1 767

4Haha, that's NniIcCeE :) – nicael – 2016-07-16T14:09:56.843

@nicael I am glad you approve :) – aloisdg moving to codidact.com – 2016-07-16T14:43:02.337

what's the output for: abB? abB or ab? – Downgoat – 2016-07-16T18:41:19.343

@Downgoat abB should output ab – aloisdg moving to codidact.com – 2016-07-16T18:46:27.870

I don't understand why aaaAAA becomes aaaAA and not aaA. – raznagul – 2016-07-18T10:00:10.257

1@raznagul why would it? Split it up: aa;aA;AA, only the middle pair matches the pattern and becomes a, so aa;a;AA – LLlAMnYP – 2016-07-18T11:04:50.763

Answers

12

Jelly, 8 bytes

ṛŒsḟḟȧµ\

Try it online! or verify all test cases.

How it works

ṛŒsḟḟȧµ\  Main link. Argument: s (string)

      µ   Convert all links to the left into a chain (unknown arity) and begin a
          new chain.
       \  Do a cumulative reduce by the chain to the left.
          Left argument:   r (previous result or first character)
          Right argument:  c (next character)
ṛ           Set the return value to c.
 Œs         Swap c's case.
    ḟ       Remove c from r (if present).
            This yields an empty string if c and r are identical (repeated letter
            with the same case or non-letter) and r otherwise.
            Note that r will be empty if the previous character has been removed.
   ḟ        Remove the resulting characters (if any) from c with swapped case.
            This yields c with swapped case if the result to the right does not
            contain c; otherwise, it yields the empty string.
     ȧ      Flat logical AND with c.
            Replace swapped case c with c; do not modify an empty string.

Dennis

Posted 2016-07-16T12:01:20.127

Reputation: 196 637

Shorter than the Regex damn! – aloisdg moving to codidact.com – 2016-07-16T18:32:44.513

2Beat Retina on a [tag:string] challenge ._. – TuxCrafting – 2016-07-16T19:00:26.923

11

Retina, 18 bytes

(.)(?!\1)(?i)\1
$1

Try it online!

Explanation

This a single (and fairly simple) substitution which matches the relevant pairs and replaces them with only the first character. The pairs are matched by activating case-insensitivity halfway through the pattern:

(.)     # Match a character and capture it into group 1.
(?!\1)  # Use a negative lookahead to ensure that the next character *isn't* the same
        # as the character we just captured. This doesn't advance the position of the
        # regex engine's "cursor".
(?i)    # Now activate case-insensitivity for the remainder of the pattern.
\1      # Match the second character with a backreference to the first. With the i
        # modifier activated, this will match if the two characters only differ
        # by case.

The substitution simply writes back the character we already captured in group 1 anyway.

Martin Ender

Posted 2016-07-16T12:01:20.127

Reputation: 184 808

1

Nice answer! Debuggex works great with this one!

– aloisdg moving to codidact.com – 2016-07-16T12:11:10.993

5

C#, 87 75 bytes

s=>System.Text.RegularExpressions.Regex.Replace(s,@"(.)(?!\1)(?i)\1","$1");

With the mighty regex from Martin Ender. C# lambda where the input and the output are string.

12 bytes saved by Martin Ender and TùxCräftîñg.


C#, 141 134 bytes

s=>{var r="";for(int i=0,l=s.Length;i<l;i++){var c=s[i];r+=c;if(char.IsLetter(c)&i+1<l&&(c|32)==(s[i+1]|32)&c!=s[i+1])i++;}return r;};

C# lambda where the input and the output are string. The algorithm is naive. This is the one I use as reference.

Code:

s=>{
    var r = "";
    for(int i = 0; i < s.Length; i++)
    {
        r+=s[i];
        if (char.IsLetter(s[i]) & i+1 < s.Length)
            if (char.ToLower(s[i])==char.ToLower(s[i+1])
              & char.IsLower(s[i])!=char.IsLower(s[i+1]))
                i += 1;
    }       
    return r;
};

7 bytes thanks to Martin Ender!


Try them online!

aloisdg moving to codidact.com

Posted 2016-07-16T12:01:20.127

Reputation: 1 767

@TùxCräftîñg Indeed but it is easy to read like this. Check my golfed version for a less verbose answer :) – aloisdg moving to codidact.com – 2016-07-16T12:25:09.473

5

Brachylog, 44 bytes

.v|.l1|hA,?bhB(@uA;A@uB),?bb&~b.hA|?b&~b.h~h?

Brachylog has no regular expressions.

Explanation

    .v          Input = Output = ""
|               OR
    .l1         Input = Output = string of one character
|               OR
    hA,         A is the first char or the Input
    ?bhB        B is the second char of the Input
    (
        @uA         B uppercased is A
        ;           OR
        A@uB        A uppercased is B
    ),
    ?bb&        Call recursively on Input minus the first two elements
    ~b.hA       Output is the result of that call with A appended before it
|               OR
    b&          Call recursively on Input minus the first element
    ~b.h~h?     Output is the result of that call with the first element of Input appended
                  before it

Fatalize

Posted 2016-07-16T12:01:20.127

Reputation: 32 976

4

Perl, 40 24+1=25 bytes

Use the same regex as Martin.
Use the -p flag

s/(.)(?!\1)(?i)\1/\1/g

Test it on ideone

TuxCrafting

Posted 2016-07-16T12:01:20.127

Reputation: 4 547

If you use the -p flag, you can remove almost all your code except the s/// for a good saving! – Dom Hastings – 2016-07-16T15:18:38.640

4

Python 3, 64 59 58 bytes

r=input()
for c in r:r=c[c.swapcase()==r!=c:];print(end=r)

Test it on Ideone.

Dennis

Posted 2016-07-16T12:01:20.127

Reputation: 196 637

4

C, 66 bytes

l;main(c){for(;~(c=getchar());)l=l^c^32|!isalpha(c)?putchar(c):0;}

orlp

Posted 2016-07-16T12:01:20.127

Reputation: 37 067

3

Pyth, 24 20 bytes

4 bytes thanks to @Jakube.

This still uses regex, but just for tokenizing.

shM:zj\|+s_BVGrG1\.1

Test suite.

shM:zj\|+s_BVGrG1\.1   input as z
         s_BVGrG1      generate ['aA', 'Aa', 'bB', 'Bb', ..., 'zZ', 'Zz']
        +        \.    add "." to the back of the array
     j\|               insert "|" between every element of the array,
                       forming a new long string, which will be our
                       tokenizer: "aA|Aa|bB|Bb|cC|Cc|...|yY|Yy|zZ|Zz|."
                       the "." at the end is to capture the remaining characters
  :z               1   return all matches of z against that regex
                       this is effectively a tokenizer
 hM                    take the first character of each token
s                      join all the transformed tokens together, and then
                       implicitly print to STDOUT.
  • 24-byte version here.

Leaky Nun

Posted 2016-07-16T12:01:20.127

Reputation: 45 011

3

MATL, 21 bytes

"Kk@k=K@XK=>?4XKx}K&h

Try it online!. Or verify all test cases.

Explanation

This processes each character in a loop. Each iteration compares the current character with the previous character. The latter is stored in clipboard K, which is initiallized to 4 by default.

The current character is compared with the previous one twice: first case-insensitively and then case sensitively. The current character should be deleted if and only if the first comparison was true and the second was false. Note that, since clipboard K initially contains 4, the first character will always be kept.

If the current character is deleted clipboard K should be reset (so the next character will be kept); otherwise it should be updated with the current character.

"            % Take input string implicitly. For each char from this string:
  K          %   Push previous char, initiallized to number 4
  k          %   Convert to lower case. For numbers it rounds down
  @          %   Push current char
  k          %   Convert to lower case. 
  =          %   True if current and previous chars are (case-insensitively) equal
  K          %   Push previous char
  @          %   Push current char
  XK         %   Update clipboard K with current char. This doesn't affect the stack
  =          %   True if current and previous chars are (case-sensitively) equal
  >?         %   If first comparison was true and second was false
    4XKx     %     Reset clipboard K to 4
  }          %   Else
    K        %     Push previous char
    &h       %     Concatenate horizontally to gradually build the output string

Luis Mendo

Posted 2016-07-16T12:01:20.127

Reputation: 87 464

3

C, 129 127 125 107 106 105 93 92 90 88 85 78 bytes

c;d;f(char*s){for(;putchar(c=*s);)s+=isalpha(c)*(d=*++s)&&(!((c^d)&95)&&c^d);}

A C port of my C# answer. My C may be a bit bad. I don't use the language a lot anymore. Any help is welcome!

  • 1 byte saved thanks to Lowjacker's trick: a!=b = a^b
  • 1 byte saved thanks to Walpen's trick: a&&b = a*b
  • 12 bytes saved by Lynn's trick and inspired here by TùxCräftîñg
  • 1 byte saved thanks to Joey Adams's trick and inspired here by orlp : Moving variable to global
  • 2 bytes saved by SEJPM by resolving my (c|32)==(d|32) bitwise problem
  • 5 bytes saved by Pietu1998

Code:

c;d;f(char*s) {
    for(;putchar(c=*s);)
        s+=isalpha(c)*(d=*++s)&&(!((c^d)&95)&&c^d);
}

Try it online!

aloisdg moving to codidact.com

Posted 2016-07-16T12:01:20.127

Reputation: 1 767

1I think you can increment the pointer to save some bytes. I have found this (untested): f(char*s){while(*s) {char c=*s,d=s+1;putchar(c);s+=isalpha(c)&&d&&((c|32)==(d|32)&&c!=d);}} – TuxCrafting – 2016-07-16T16:46:59.780

@TùxCräftîñg I forgot about this one. I corrected your proposition based on Lynn answer. Thank you for the help! – aloisdg moving to codidact.com – 2016-07-16T18:31:51.900

1I think you can change s+++1 to ++s. – PurkkaKoodari – 2016-07-17T09:00:20.900

@Pietu1998 Indeed I can! – aloisdg moving to codidact.com – 2016-07-17T11:48:03.597

1c and d will always be printable ASCII, so 95 should work in place of ~32. Also, I think c;d;f(char*s){for(;*s;){putchar(c=*s);s+=isalpha(c)*(d=*(++s))&&(!((c^d)&95)&&c^d);}} would work (but untested). – PurkkaKoodari – 2016-07-17T15:13:38.853

@Pietu1998 Neat! – aloisdg moving to codidact.com – 2016-07-17T16:54:45.340

3

JavaScript (ES6), 71 68 bytes

s=>s.replace(/./g,c=>l=c!=l&&c>'0'&&parseInt(c+l,36)%37<1?'':c,l='')

Explanation:

s=>s.replace(/./g,c=>   Loop over each character in the string
 l=                     Save result for next loop
  c!=l&&                Check whether characters differ
  c>'@'&&               Check minimum character code
  parseInt(c+l,36)%37<1 Check if characters have same value
  ?'':c,                If so then delete this character
 l='')                  Initial empty previous character

Given c>'@', the only way for parseInt(c+l,36) to be a multiple of 37 is for both c and l to have the same value (they can't have zero value because we excluded space and zero, and if they have no value then the expression will evaluate to NaN<1 which is false) is for them to be the same letter. However, we know that they're not the same letter case-sensitively, so they must be the same case-insensitively.

Note that this algorithm only works if I check every character; if I try to simplify it by matching on letters it will then fail on things like "a+A".

Edit: Saved 3 bytes thanks to @edc65.

Neil

Posted 2016-07-16T12:01:20.127

Reputation: 95 035

Use replace instead of map. 68. But I'm too lazy to figure out how to put '`' inside a comment (nice trick mod 37) – edc65 – 2016-07-16T21:30:08.357

@edc65 I don't need any \``s if I usereplace`. (I only had them before to try to be consistent, but then I golfed my answer while editing it for submission and became inconsistent again. Sigh...) – Neil – 2016-07-16T23:36:05.010

2

JavaScript (ES6), 61 bytes, 57 Bytes

s=>s.replace(/./g,c=>l=c!=l&/(.)\1/i.test(l+c)?'':c,l='')

Thanks to Neil for saving 5 bytes.

c.P.u1

Posted 2016-07-16T12:01:20.127

Reputation: 1 049

1Bad news: you miscounted, and that's actually 62 bytes. Good news: I can save you five bytes! s=>s.replace(/./g,c=>l=c!=l&/(.)\1/i.test(l+c)?'':c,l='') – Neil – 2016-07-18T12:45:20.887

Oh, sorry, I counted using "code".length, didn't realise there was an escape sequence in there. Thanks – c.P.u1 – 2016-07-18T17:19:13.880

Try using (code).toString().length. – Neil – 2016-07-18T17:21:17.700

Yeah, or (code+"").length – c.P.u1 – 2016-07-18T17:23:22.927

2

Java 7, 66 bytes

String c(String i){return i.replaceAll("(.)(?!\\1)(?i)\\1","$1");}

Used Martin Ender's regex from his Retina answer.

Ungolfed & test code:

Try it here.

class Main{
  static String c(String i){
    return i.replaceAll("(.)(?!\\1)(?i)\\1", "$1");
  }

  public static void main(String[] a){
    System.out.println(c("bBaAdD"));
    System.out.println(c("NniIcCeE"));
    System.out.println(c("Tt eE Ss tT"));
    System.out.println(c("sS Ee tT"));
    System.out.println(c("1!1!1sStT!"));
    System.out.println(c("nN00bB"));
    System.out.println(c("(eE.gG.)"));
    System.out.println(c("Hh3lL|@!"));
    System.out.println(c("Aaa"));
    System.out.println(c("aaaaa"));
    System.out.println(c("aaAaa"));
  }
}

Output:

bad
Nice
T e S t
s E t
1!1!1st!
n00b
(e.g.)
H3l|@!
Aa
aaaaa
aaaa

Kevin Cruijssen

Posted 2016-07-16T12:01:20.127

Reputation: 67 575

1

JavaScript (ES6) 70

(s,p,q)=>s.replace(/./g,c=>p!=c&q===(d=parseInt(c,36))?q='':(q=d,p=c))

f=(s,p,q)=>s.replace(/./g,c=>p!=c&q===(d=parseInt(c,36))?q='':(q=d,p=c))

;
[['bBaAdD','bad']
,['NniIcCeE','Nice']
,['Tt eE Ss tT','T e S t']
,['sS Ee tT','s E t']
,['1!1!1sStT!','1!1!1st!']
,['nN00bB','n00b']
,['(eE.gG.)','(e.g.)']
,['Hh3lL|@!','H3l|@!']
,['Aaa','Aa']
,['aaaaa','aaaaa']
,['aaAaa','aaaa']]
.forEach(
  x=>
  {
    var i=x[0],k=x[1],r=f(i)
    console.log(k==r?'OK':'KO',i,r)
  }
)

edc65

Posted 2016-07-16T12:01:20.127

Reputation: 31 086

OK, I'll bite. Why the ===? – Neil – 2016-07-18T12:31:30.443

0=="" but not 0==="" @Neil – edc65 – 2016-07-18T12:48:08.637

1

Convex, 18 bytes

V±V.+'.+'|*\ô{0=}%

Try it online!

Similar approach as @Leaky Nun's Pyth answer. It constructs the array ["aA" "bB" ... "zZ" "Aa" "Bb" ... "Zz" '.], joins by the '| character, and tests the input based on that regex. Then it takes the first character of each match.

GamrCorps

Posted 2016-07-16T12:01:20.127

Reputation: 7 058