Print Real Invisible Text

15

3

My previous challenge, Print invisible text was quite popular, likely due to how trivial it is.

However those more observant of you may have noticed that you're not really printing invisible text, because it's impossible to read what was inputted given only the output.

So I figured how about a real invisible text challenge.

Given a string consisting of only printable ASCII characters (0x20-0x7E), convert each character to a distinct Unicode character (in UTF-8 encoding) that is not one of the 95 printable ASCII characters (any UTF-8 character outside of the 0x20-0x7E range)

Input

A string of printable ASCII characters, either as a string or character array/list

Output

The input string with each character replaced with a distinct non-printable character. Each given character must have a corresponding non-printable character that is not used as the substitute for any other character.

If you are unable to print non-printable characters, you can output the character values instead.

For example if your code replaces all lowercase a's with 0x01, you may not use 0x01 as the substitution for any other characters.

Your code must also be deterministic. This means that if, given the string Hello, all lowercase l's are replaced with 0x03, your code must also replace all lowercase l's with 0x03 given any other string.

Testcases

It's somewhat difficult to write testcases for this challenge, so I'll simply show the output as a list of hexcodes

input     -> output
"Hello"   -> [0x01, 0x02, 0x03, 0x03, 0x04]
"Hi!"     -> [0x01, 0x05, 0x06]
""        -> []
"   H   " -> [0x07, 0x07, 0x07, 0x01, 0x07, 0x07, 0x07]
"yo! "    -> [0x08, 0x04, 0x06, 0x07]

Leaderboard

Here is a Stack Snippet to generate both a regular leaderboard and an overview of winners by language.

/* Configuration */

var QUESTION_ID = 123447; // Obtain this from the url
// It will be like https://XYZ.stackexchange.com/questions/QUESTION_ID/... on any question page
var ANSWER_FILTER = "!t)IWYnsLAZle2tQ3KqrVveCRJfxcRLe";
var COMMENT_FILTER = "!)Q2B_A2kjfAiU78X(md6BoYk";
var OVERRIDE_USER = 48934; // This should be the user ID of the challenge author.

/* App */

var answers = [], answers_hash, answer_ids, answer_page = 1, more_answers = true, comment_page;

function answersUrl(index) {
  return "https://api.stackexchange.com/2.2/questions/" +  QUESTION_ID + "/answers?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + ANSWER_FILTER;
}

function commentUrl(index, answers) {
  return "https://api.stackexchange.com/2.2/answers/" + answers.join(';') + "/comments?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + COMMENT_FILTER;
}

function getAnswers() {
  jQuery.ajax({
    url: answersUrl(answer_page++),
    method: "get",
    dataType: "jsonp",
    crossDomain: true,
    success: function (data) {
      answers.push.apply(answers, data.items);
      answers_hash = [];
      answer_ids = [];
      data.items.forEach(function(a) {
        a.comments = [];
        var id = +a.share_link.match(/\d+/);
        answer_ids.push(id);
        answers_hash[id] = a;
      });
      if (!data.has_more) more_answers = false;
      comment_page = 1;
      getComments();
    }
  });
}

function getComments() {
  jQuery.ajax({
    url: commentUrl(comment_page++, answer_ids),
    method: "get",
    dataType: "jsonp",
    crossDomain: true,
    success: function (data) {
      data.items.forEach(function(c) {
        if (c.owner.user_id === OVERRIDE_USER)
          answers_hash[c.post_id].comments.push(c);
      });
      if (data.has_more) getComments();
      else if (more_answers) getAnswers();
      else process();
    }
  });  
}

getAnswers();

var SCORE_REG = /<h\d>\s*([^\n,]*[^\s,]),.*?(\d+)(?=[^\n\d<>]*(?:<(?:s>[^\n<>]*<\/s>|[^\n<>]+>)[^\n\d<>]*)*<\/h\d>)/;

var OVERRIDE_REG = /^Override\s*header:\s*/i;

function getAuthorName(a) {
  return a.owner.display_name;
}

function process() {
  var valid = [];
  
  answers.forEach(function(a) {
    var body = a.body;
    a.comments.forEach(function(c) {
      if(OVERRIDE_REG.test(c.body))
        body = '<h1>' + c.body.replace(OVERRIDE_REG, '') + '</h1>';
    });
    
    var match = body.match(SCORE_REG);
    if (match)
      valid.push({
        user: getAuthorName(a),
        size: +match[2],
        language: match[1],
        link: a.share_link,
      });
    
  });
  
  valid.sort(function (a, b) {
    var aB = a.size,
        bB = b.size;
    return aB - bB
  });

  var languages = {};
  var place = 1;
  var lastSize = null;
  var lastPlace = 1;
  valid.forEach(function (a) {
    if (a.size != lastSize)
      lastPlace = place;
    lastSize = a.size;
    ++place;
    
    var answer = jQuery("#answer-template").html();
    answer = answer.replace("{{PLACE}}", lastPlace + ".")
                   .replace("{{NAME}}", a.user)
                   .replace("{{LANGUAGE}}", a.language)
                   .replace("{{SIZE}}", a.size)
                   .replace("{{LINK}}", a.link);
    answer = jQuery(answer);
    jQuery("#answers").append(answer);

    var lang = a.language;
    if (/<a/.test(lang)) lang = jQuery(lang).text();
    
    languages[lang] = languages[lang] || {lang: a.language, user: a.user, size: a.size, link: a.link};
  });

  var langs = [];
  for (var lang in languages)
    if (languages.hasOwnProperty(lang))
      langs.push(languages[lang]);

  langs.sort(function (a, b) {
    if (a.lang > b.lang) return 1;
    if (a.lang < b.lang) return -1;
    return 0;
  });

  for (var i = 0; i < langs.length; ++i)
  {
    var language = jQuery("#language-template").html();
    var lang = langs[i];
    language = language.replace("{{LANGUAGE}}", lang.lang)
                       .replace("{{NAME}}", lang.user)
                       .replace("{{SIZE}}", lang.size)
                       .replace("{{LINK}}", lang.link);
    language = jQuery(language);
    jQuery("#languages").append(language);
  }

}
body { text-align: left !important}

#answer-list {
  padding: 10px;
  width: 290px;
  float: left;
}

#language-list {
  padding: 10px;
  width: 290px;
  float: left;
}

table thead {
  font-weight: bold;
}

table td {
  padding: 5px;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<link rel="stylesheet" type="text/css" href="//cdn.sstatic.net/codegolf/all.css?v=83c949450c8b">
<div id="answer-list">
  <h2>Leaderboard</h2>
  <table class="answer-list">
    <thead>
      <tr><td></td><td>Author</td><td>Language</td><td>Size</td></tr>
    </thead>
    <tbody id="answers">

    </tbody>
  </table>
</div>
<div id="language-list">
  <h2>Winners by Language</h2>
  <table class="language-list">
    <thead>
      <tr><td>Language</td><td>User</td><td>Score</td></tr>
    </thead>
    <tbody id="languages">

    </tbody>
  </table>
</div>
<table style="display: none">
  <tbody id="answer-template">
    <tr><td>{{PLACE}}</td><td>{{NAME}}</td><td>{{LANGUAGE}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
  </tbody>
</table>
<table style="display: none">
  <tbody id="language-template">
    <tr><td>{{LANGUAGE}}</td><td>{{NAME}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
  </tbody>
</table>

Skidsdev

Posted 2017-05-30T09:36:55.267

Reputation: 9 656

9There is no such thing as a UTF-8 character: UTF-8 is a serialisation of Unicode, not a charset. And if "non-printable" makes sense in a Unicode context, it's certainly much narrower than "all but 95 of the hundreds of thousands of allocated codepoints". – Peter Taylor – 2017-05-30T09:49:34.903

11@PeterTaylor Given we're talking in terms of character hexcodes here, I assumed it was clear that when I say UTF-8 character I mean a Unicode character in the UTF-8 encoding. ASCII is also an encoding standard, not a charset, yet people have no issue with the term "ASCII character". Anyway, I'll edit the wording to clarify. – Skidsdev – 2017-05-30T09:53:36.637

3Any particular reason UTF-8 is required? – CalculatorFeline – 2017-05-30T19:30:36.133

Can the input be empty? – Dennis – 2017-05-31T17:18:17.783

@Dennis yes, see the third testcase – Skidsdev – 2017-06-01T07:45:08.433

1+1 for "If you are unable to print non-printable characters" – Robert Fraser – 2017-06-02T06:17:45.307

Answers

14

Whitespace, 39 36 bytes


  
   	  
 
  
 	
	 				  
	
  
 


Try it online!

Explanation

nssn  ; label 'loop'
ssstssn ; push 4 to use as a multiplication operand
sns   ; dup 4 to use as a heap address
sns   ; dup 4 to use as a heap address
tnts  ; getchar and store at address 4
ttt   ; retrieve the value at address 4
tssn  ; multiply the character value by 4
tnss  ; putchar output the new character
nsnn  ; jmp 'loop'

Originally I wanted to multiply by -0 or -1 since they would be the shortest digits possible to declare in Whitespace. TIO does not differentiate between -0 and +0 so that's out. Unfortunately while the tutorial/spec is ambiguous about how to interpret a negative value as a char TIO (rightly) throws an error about the invalid argument so that also isn't an option.

The next shortest working constant is 4 so we end up performing the same basic approach as the Powershell/Pyth solutions.


Whitespace, 56 53 bytes - maps to tag characters


  
   			                 
 
  
 	
	 				   	
  
 


Try it online!

Explanation

Effectively the same approach as the previous version except this uses 0xE0000 as the constant and adds instead of multiplies. This maps the visible ASCII characters to the corresponding Unicode Tag Character (the range U+E0000-U+E007F). The intended use for this range was to indicate the language of the text in a plaintext file however that use is discouraged. This code will output valid labels if you prefix strings with a 0x01 character.

The Unicode Standard says that characters in this range have no visible rendering so I feel this meets the spirit of the challenge better than the previous approach.

Ephphatha

Posted 2017-05-30T09:36:55.267

Reputation: 581

6Using an invisible program to print invisible text. I like. – Mark – 2017-05-30T20:58:25.907

13

Jelly, 3 bytes

O²Ọ

Try it online!

Squares each codepoint.

Erik the Outgolfer

Posted 2017-05-30T09:36:55.267

Reputation: 38 134

Very similar to the Japt solution I came up with, expect I cubed the codepoint instead of squaring it - you OK with me posting it? – Shaggy – 2017-05-30T09:53:10.643

2@Shaggy Japt ≠ Jelly, so you can post it. – Erik the Outgolfer – 2017-05-30T09:54:39.643

Clever solution, didn't think of squaring. – Skidsdev – 2017-05-30T09:55:31.553

1Grand, just wanted to be sure, lest you thought I was just ripping off your solution :) – Shaggy – 2017-05-30T10:00:05.330

4@Shaggy there's no guideline against porting a solution to another language ; if there was it would be terrible since there is usually one optimal algorithm which gives optimal implementations in most languages, and nobody but the first poster wouldn't be able to prove they came with the algorithm on their own. Of course if you indeed port someone else's solution, it's only fairplay to mention their answer. – Aaron – 2017-05-30T15:47:05.823

@Holger Let us continue this discussion in chat.

– Erik the Outgolfer – 2017-05-31T09:49:36.563

7

Japt, 5 2 bytes

cp

Try it online


Explanation

     :Implicit input of string U
c    :Map over the character codes of the string.
p    :Square them.
     :Implicit output of result.

Shaggy

Posted 2017-05-30T09:36:55.267

Reputation: 24 623

Hmm, after closer examination, it seems that 126 ** 3 == 2000376, which isn't in the range [0..1114111]. You can still square though :) That's because UTF-8 ends there, while UTF-16 continues. – Erik the Outgolfer – 2017-05-30T10:00:33.377

1@EriktheOutgolfer Ehm. UTF-8 has exactly the same range as UTF-16 by definition. (In theory, UTF-8 could store higher codepoints, using 5 or 6 bytes per codepoint, but that is illegal.) – Mr Lister – 2017-05-31T06:49:49.517

5

Brain-Flak, 33 bytes

Includes +1 for -c

{((({}){}){}<>)<>}<>{({}<>)<>}<>

Try it online!

# For each character
{
  # Multiply by 4 and move to the other stack
  ((({}){}){}<>)

# End loop
<>}

# For each character on the other stack
<>{

  # Copy it back (reverse the stack)
  ({}<>)<>

# End loop
}<>

Riley

Posted 2017-05-30T09:36:55.267

Reputation: 11 345

4

Braingolf v0.6, 17 bytes

VRl1-M[R.*>v]R&@

Squares each char value then prints.

-1 byte thanks to Erik the Outgolfer's squaring solution

Braingolf v0.7, 6 bytes [non-competing]

{.*}&@

Also squares each value then prints, but v0.7 has the "foreach" {} loop

Skidsdev

Posted 2017-05-30T09:36:55.267

Reputation: 9 656

4

Mathematica, 48 bytes

FromCharacterCode[4Mod[Hash/@Characters@#,978]]&

Explanation:

                             Characters@#      & - Convert string to array of characters
                       Hash/@                    - Hash them all using default hash
                   Mod[                  ,978]   - apply a modulus which uniquely transforms each potential character's hash into a number
                  4                              - times by 4 to move values out of 0x20-0x7E.
FromCharacterCode[                            ]  - Convert array of numbers back to string

Interestingly of the two modulus options less than 1000 which changed the 96 characters into 96 unique values with modulus 978 the lowest two values were 7 then 33. Luckily times by 4 converts this to 28 and 132 which both just fall outside the visible range. If I used the other modulus of 784 then I needed to multiply by 18 to move the numbers outside the range.

Test case.

Note: extra backslashes in there as escape characters for " and \. Also character 0x7E doesn't seem to want to paste correctly.

Input: "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"

Output: enter image description here

The use of Hash came about as ToCharacterCode is really long. However hashing it was nearly as expensive. The easy mathematica way to do this would be 49 bytes:

FromCharacterCode[4ToCharacterCode@Characters@#]&

Ian Miller

Posted 2017-05-30T09:36:55.267

Reputation: 727

2

PowerShell, 32 31 Bytes

-1 Thanks to neil, 99+ to 4*

[char[]]"$args"|%{[char](4*$_)}

multiplies 9 by each character code and prints it back.

colsw

Posted 2017-05-30T09:36:55.267

Reputation: 3 195

Out of interest, would multiplying by a small number (4-9) work? – Neil – 2017-05-30T10:23:26.017

the lowest printable times 4 is greater than the largest printable, that's -1 - thanks! – colsw – 2017-05-30T10:47:08.430

2

CJam, 8 5 bytes

l95f+

Try it online!

Adds 95 to each codepoint.

Erik the Outgolfer

Posted 2017-05-30T09:36:55.267

Reputation: 38 134

Isn't there a way to multiply or square instead? – NieDzejkob – 2017-05-31T14:49:51.250

@NieDzejkob No, this is based on the fact that Character + Long = chr(ord(Character) + Long). Character * Long = [Character] * Long. Character # Long = error (# is exponentiation in CJam). – Erik the Outgolfer – 2017-05-31T14:54:12.690

2

Pyth, 6 bytes

smC*4C

Try it here.

Multiplies each codepoint by 4.

Erik the Outgolfer

Posted 2017-05-30T09:36:55.267

Reputation: 38 134

1

05AB1E, 4 bytes

ÇnçJ

Try it online!

Squares each codepoint.

Erik the Outgolfer

Posted 2017-05-30T09:36:55.267

Reputation: 38 134

1

CJam, 4 bytes

lWf^

XORs each code point with -1. CJam's characters are 16 bits wide, so this maps code point n to code point 65535 - n.

Try it online!

Dennis

Posted 2017-05-30T09:36:55.267

Reputation: 196 637

1

Decimal, 37 bytes

91D31030030012255D412D590D543D301291D

Explanation:

91D             ; declare jump 1
    310         ; push user input to stack
    300         ; duplicate
    300         ; duplicate
    12255D      ; push EOF to stack
    412D        ; compare top two values, pop, push result
    5 90D 5     ; if result == true, quit
    43D         ; multiply top two values, pop, push result
    301         ; print
    2           ; pop
91D             ; goto jump 1

Try it online!

MD XF

Posted 2017-05-30T09:36:55.267

Reputation: 11 605

so jumping to zero (90D) terminates? – Skidsdev – 2017-06-02T07:49:41.610

@Mayube Exactly. – MD XF – 2017-06-02T14:20:12.040

1

Google Sheets, 68 bytes

=ArrayFormula(Join("",IfError(Char(Code(Mid(A1,Row(A:A),1))^2),"")))

I wanted to post this to show how awkward it is to do some basic functions in Sheets. Do you want to do an operation to every character in a cell and out the concatenated result? You're at 42 bytes before you even act on those characters.

=ArrayFormula(Join("",Mid(A1,Row(A:A),1)))

Otherwise, this is the same as other solutions: square the code point of each character.

Engineer Toast

Posted 2017-05-30T09:36:55.267

Reputation: 5 769

0

Python 3, 40 38 bytes

print([chr(ord(x)*9)for x in input()])

Try It Online!

Alex

Posted 2017-05-30T09:36:55.267

Reputation: 417

0

C, 42 bytes

c;f(){while(~(c=getchar()))putwchar(c*c);}

Assumes a UTF-8 locale. Input is squared.

Try it online!

MD XF

Posted 2017-05-30T09:36:55.267

Reputation: 11 605

0

Clean, 25 bytes

import StdEnv

map((+)'~')

A partial function literal.

Try it online!

Realistically:

f s = {# c+'~' \\ c <-: s}

Unboxed array comprehension over an unboxed array of the same type ({#Char} -> {#Char}). Clean will be able to determine that the uniqueness is transferrable (!u:{#Char} -> u:{#Char}), and that the size is the same as the input size. This means that if you pass a *String, every character will be destructively updated with the corresponding one in the output, meaning no memory allocation or movement is done and the graph node is fully reused.

Try it online!

Οurous

Posted 2017-05-30T09:36:55.267

Reputation: 7 916