Reindent Java/C/C++/etc. code

10

2

Write a program that adds or removes whitespace to format code nicely. Rules for what the code should look like when you're done:

  • No line should contain more than one of { and }.
  • A { should always be the last thing on a line.
  • A } should always be the only thing on a line (besides whitespace that comes before it).
  • The amount of whitespace in front of each line should be a fixed multiple of the current nesting count. (You can use any amount of indentation you want, as long as it doesn't change.)
  • No whitespace should be inserted or removed that doesn't contribute to satisfying one of these rules.

The nesting count for the first line is 0. The nesting count of any other line is the nesting count of the previous line, plus one if the previous line contains a {, minus one if the current line contains a }.

{ and } inside string literals and comments don't count in the above rules. A string literal is text enclosed in single or double quotes, where single or double quotes with an odd number of backslashes immediately before them aren't interpreted as the end of the string literal. A comment is text enclosed in /* and */, or text going from // to the end of the line. In a line multiple comment start markers, only the first one counts. Comments are not parsed inside string literals.

Examples

    main() {printf("Hello!"); // I don't care about the world...
        }

becomes:

main() {
    printf("Hello!"); // I don't care about the world...
}




int main(){
    puts("a");
        puts("b");
}

becomes:

int main(){
    puts("a");
    puts("b");
}


main()
{ printf("{"); /* }
} */
     printf("}//}"); ///*
    }

becomes:

main()
{
    printf("{"); /* }
} */
    printf("}//}"); ///*
}


int test[] = {1, 2, 3};

becomes:

int test[] = {
    1, 2, 3
}
;

tbodt

Posted 2017-06-27T20:18:43.413

Reputation: 2 176

Comments are not for extended discussion; this conversation has been moved to chat.

– Dennis – 2017-06-30T06:02:07.953

Answers

6

JavaScript (ES6), 376 373 378 393 bytes

This was... quite the challenge...

let f =

s=>{F=(i,l=1)=>[a=a.map(([q,y])=>[q,y<i+e?y:y+l]),e+=l]
for(a=b=[];b;s=s[r='replace'](/\/\*(?:(?!\/\*|\*\/)[^])*\*\/|\/\/.+|("|')(?:\\.|(?!\1).)*\1/,(q,_,y)=>a.unshift(b=[q,y])&0))b=l=0
s=s[e=1,r](/[{}](?=.)|.(?=})/g,(c,i)=>F(i)&&c+`
`)[e=0,r](/.+/g,(x,i)=>x[r](/^\s*/,y=>" ".repeat(q=/{/.test(x)?l++:/}/.test(x)?--l:l,F(i,q-y.length))))
a.map(([q,y])=>s=s.slice(0,y)+q+s.slice(-~y))
return s}
<textarea rows=7 cols=50 oninput=O.innerText=f(value)>    main() {printf("Hello!"); // I don't care about the world...
    }</textarea>
<pre id=O></pre>

Let me know if there's anything wrong with the output, though I couldn't find anything can't find anything more.

Ungolfed version

I golfed the code as I wrote it, so let's see how this goes...

function prettify(code) {
  let excerpts = [], extras, level;
  
  // Extract comments and strings for easy parsing
  // This has to be done recursively because of the nested comments
  for (let done = false; !done; ) {
    done = true;
    code = code.replace(/\/\*(?:(?!\/\*|\*\/)[^])*\*\/|\/\/.+|("|')(?:\\.|(?!\1).)*\1/, (excerpt, _, index) => {
      excerpts.unshift([excerpt, index]);
      done = false;
      return "0";
    });
  }
  
  // Update the indices of the excerpts when the code is changed
  let adjustIndices = (index, length) => {
    index += extras;
    excerpts = excerpts.map(([string, oldIndex]) => {
      var newIndex = oldIndex;
      if (oldIndex >= index) newIndex += length;
      return [string, newIndex];
    });
    extras += length;
  }
  
  // Add extra newlines where necessary:
  // - After a { or a } if there isn't one already
  // - Before a } if there isn't one already (note: already-indented '}'s will get an extra newline)
  extras = 0;
  code = code.replace(/[{}](?=.)|.(?=})/g, (char, index) => {
    adjustIndices(index + 1, 1);
    return char + "\n";
  });
  
  // Remove extra whitespace at the beginning of each line,
  // and the same time apply the necessary number of tabs
  extras = 0;
  level = 0;
  code = code.replace(/.+/g, (line, index) => 
    line.replace(/^\s*/, spaces => {
      let tabs;
      if (line.indexOf('{') > -1)
        tabs = level, level += 1;
      else if (line.indexOf('}') > -1)
        level -= 1, tabs = level;
      else
        tabs = level;

      adjustIndices(index, tabs - spaces.length);
      return "\t".repeat(tabs);
    })
  );
  
  // Add back in the excerpts
  for ([excerpt, index] of excerpts)
    code = code.slice(0, index) + excerpt + code.slice(index + 1);
  
  return code;
}
<textarea rows = 7 cols = 50 oninput = "O.innerText = prettify(this.value)">    main() {printf("Hello!"); // I don't care about the world...
    }</textarea>
<pre id=O></pre>

ETHproductions

Posted 2017-06-27T20:18:43.413

Reputation: 47 880

looks good to me – tbodt – 2017-06-27T22:49:55.990

I golfed the code as I wrote it that means you're a true golfer... – Erik the Outgolfer – 2017-06-28T08:53:41.300

4

JavaScript (ES6), 260 259 bytes

Parses the input character by character. Uses 4-space indentation.

s=>s.replace(/[^]/g,(x,n)=>(p=s[n-1],a=!l&!c&!e,l|x!='/'?a&x=='*'&p=='/'?c=x:!c&!e&x=='"'?(l^=1,x):x==`
`?(i=e=0,x):a&x=='}'?d--&&i?`
`+x:i=x:a&x=='{'?s[i=!++d,n+1]==`
`?x:x+`
`:i?x:x==' '?'':' '.repeat(!c*d*4,i=1)+x:p==x?e=x:!e&p=='*'?(c=0,x):x),d=i=l=c=e=0)

This is still a WIP and was basically tested only against the provided examples. If you find any bug, please let me know in the comments.

The state of the parser is fully described by the following variables:

  • d → current nesting depth
  • i → flag telling that we're located 'inside' the code (i.e. after the leading spaces of the line)
  • l → string literal flag
  • c → block comment flag
  • e → line comment flag

Obligatory indented version

s => s.replace(
  /[^]/g,
  (x, n) => (
    p = s[n - 1],
    a = !l & !c & !e,
    l | x != '/' ?
      a & x == '*' & p == '/' ?
        c = x
      :
        !c & !e & x == '"' ?
          (l ^= 1, x)
        :
          x == `\n` ?
            (i = e = 0, x)
          :
            a & x == '}' ?
              d-- && i ? `\n` + x : i = x
            :
              a & x == '{' ?
                s[i = !++d, n + 1] == `\n` ? x : x + `\n`
              :
                i ?
                  x
                :
                  x == ' ' ? '' : ' '.repeat(!c * d * 4, i = 1) + x
    :
      p == x ?
        e = x
      :
        !e & p == '*' ? (c = 0, x) : x
  ),
  d = i = l = c = e = 0
)

Test cases

let f =

s=>s.replace(/[^]/g,(x,n)=>(p=s[n-1],a=!l&!c&!e,l|x!='/'?a&x=='*'&p=='/'?c=x:!c&!e&x=='"'?(l^=1,x):x==`
`?(i=e=0,x):a&x=='}'?d--&&i?`
`+x:i=x:a&x=='{'?s[i=!++d,n+1]==`
`?x:x+`
`:i?x:x==' '?'':' '.repeat(!c*d*4,i=1)+x:p==x?e=x:!e&p=='*'?(c=0,x):x),d=i=l=c=e=0)

console.log(f(
  `    main() {printf("Hello!"); // I don't care about the world...\n` +
  `        }`
))

console.log(f(
  `int main(){\n` +
  `    puts("a");\n` +
  `        puts("b");\n` +
  `}`
))

console.log(f(
  `main()\n` +
  `{ printf("{"); /* }\n` +
  `} */\n` +
  `     printf("}//}"); ///*\n` +
  `    }`
))

console.log(f(
  `int test[] = {1, 2, 3};`
))

Arnauld

Posted 2017-06-27T20:18:43.413

Reputation: 111 334