Fix my Basic Orthography

4

Fix my Basic Orthography

While I am excellent at following the English capitalisation rules when writing, when I am typing I have a habit a capitalising almost everything (as you will see from the titles of my challenges). I need a program to fix that, but because I use so many capitals, and capitals are big letters, your program must be as short as possible.

Requirements

  • Your program should take each word, and if it is the first alphanumeric set of characters after a full stop or the beginning of the input, capitalize the first letter. All the rest of the letters until the next full stop will be lowercase. At the full stop, capitalize the first letter and continue.
  • Punctuation must be preserved.
  • In a sentence without a full stop at the end, that should be added.
  • Spaces, where missing, should be inserted. A space is 'missing' if there is nothing between a full stop and the character following it.

(Note for americanish speakers: full stop = period = .)

Test Cases

tHIS SenteNce HAS veRy bAd GrammAr. ==> This sentence has very bad grammar.

-----

PuncTuation must-be PReserved. ==> Punctuation must-be preserved.

-----

full StOps ShoulD Be inserted ==> Full stops should be inserted.

-----

MultiPLe sEntEnceS Are possible. thEY Are, yoU KNOW. ==> Multiple sentences are possible. They are, you know.

-----

spaces. are.inserted. ==> Spaces. Are. Inserted.

Rules

  • Your program should take input as an argument or from STDIN (or the closest alternative in your language).
  • Standard loopholes are forbidden.

Scoring

When is the tag, it's the least bytes to win!

Submissions

To make sure that your answer shows up, please start your answer with a headline, using the following Markdown template:

# Language Name, N bytes

where N is the size of your submission. If you improve your score, you can keep old scores in the headline, by striking them through. For instance:

# Ruby, <s>104</s> <s>101</s> 96 bytes

If there you want to include multiple numbers in your header (e.g. because your score is the sum of two files or you want to list interpreter flag penalties separately), make sure that the actual score is the last number in the header:

# Perl, 43 + 2 (-p flag) = 45 bytes

You can also make the language name a link which will then show up in the leaderboard snippet:

# [><>](http://esolangs.org/wiki/Fish), 121 bytes

Leaderboard

/* Configuration */

var QUESTION_ID = 79842; // Obtain this from the url
// It will be like https://XYZ.stackexchange.com/questions/QUESTION_ID/... on any question page
var ANSWER_FILTER = "!t)IWYnsLAZle2tQ3KqrVveCRJfxcRLe";
var COMMENT_FILTER = "!)Q2B_A2kjfAiU78X(md6BoYk";
var OVERRIDE_USER = 53406; // This should be the user ID of the challenge author.

/* App */

var answers = [], answers_hash, answer_ids, answer_page = 1, more_answers = true, comment_page;

function answersUrl(index) {
  return "https://api.stackexchange.com/2.2/questions/" +  QUESTION_ID + "/answers?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + ANSWER_FILTER;
}

function commentUrl(index, answers) {
  return "https://api.stackexchange.com/2.2/answers/" + answers.join(';') + "/comments?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + COMMENT_FILTER;
}

function getAnswers() {
  jQuery.ajax({
    url: answersUrl(answer_page++),
    method: "get",
    dataType: "jsonp",
    crossDomain: true,
    success: function (data) {
      answers.push.apply(answers, data.items);
      answers_hash = [];
      answer_ids = [];
      data.items.forEach(function(a) {
        a.comments = [];
        var id = +a.share_link.match(/\d+/);
        answer_ids.push(id);
        answers_hash[id] = a;
      });
      if (!data.has_more) more_answers = false;
      comment_page = 1;
      getComments();
    }
  });
}

function getComments() {
  jQuery.ajax({
    url: commentUrl(comment_page++, answer_ids),
    method: "get",
    dataType: "jsonp",
    crossDomain: true,
    success: function (data) {
      data.items.forEach(function(c) {
        if (c.owner.user_id === OVERRIDE_USER)
          answers_hash[c.post_id].comments.push©;
      });
      if (data.has_more) getComments();
      else if (more_answers) getAnswers();
      else process();
    }
  });  
}

getAnswers();

var SCORE_REG = /<h\d>\s*([^\n,]*[^\s,]),.*?(\d+)(?=[^\n\d<>]*(?:<(?:s>[^\n<>]*<\/s>|[^\n<>]+>)[^\n\d<>]*)*<\/h\d>)/;

var OVERRIDE_REG = /^Override\s*header:\s*/i;

function getAuthorName(a) {
  return a.owner.display_name;
}

function process() {
  var valid = [];
  
  answers.forEach(function(a) {
    var body = a.body;
    a.comments.forEach(function(c) {
      if(OVERRIDE_REG.test(c.body))
        body = '<h1>' + c.body.replace(OVERRIDE_REG, '') + '</h1>';
    });
    
    var match = body.match(SCORE_REG);
    if (match)
      valid.push({
        user: getAuthorName(a),
        size: +match[2],
        language: match[1],
        link: a.share_link,
      });
    
  });
  
  valid.sort(function (a, b) {
    var aB = a.size,
        bB = b.size;
    return aB - bB
  });

  var languages = {};
  var place = 1;
  var lastSize = null;
  var lastPlace = 1;
  valid.forEach(function (a) {
    if (a.size != lastSize)
      lastPlace = place;
    lastSize = a.size;
    ++place;
    
    var answer = jQuery("#answer-template").html();
    answer = answer.replace("{{PLACE}}", lastPlace + ".")
                   .replace("{{NAME}}", a.user)
                   .replace("{{LANGUAGE}}", a.language)
                   .replace("{{SIZE}}", a.size)
                   .replace("{{LINK}}", a.link);
    answer = jQuery(answer);
    jQuery("#answers").append(answer);

    var lang = a.language;
    if (/<a/.test(lang)) lang = jQuery(lang).text();
    
    languages[lang] = languages[lang] || {lang: a.language, user: a.user, size: a.size, link: a.link};
  });

  var langs = [];
  for (var lang in languages)
    if (languages.hasOwnProperty(lang))
      langs.push(languages[lang]);

  langs.sort(function (a, b) {
    if (a.lang > b.lang) return 1;
    if (a.lang < b.lang) return -1;
    return 0;
  });

  for (var i = 0; i < langs.length; ++i)
  {
    var language = jQuery("#language-template").html();
    var lang = langs[i];
    language = language.replace("{{LANGUAGE}}", lang.lang)
                       .replace("{{NAME}}", lang.user)
                       .replace("{{SIZE}}", lang.size)
                       .replace("{{LINK}}", lang.link);
    language = jQuery(language);
    jQuery("#languages").append(language);
  }

}
body { text-align: left !important}

#answer-list {
  padding: 10px;
  width: 290px;
  float: left;
}

#language-list {
  padding: 10px;
  width: 290px;
  float: left;
}

table thead {
  font-weight: bold;
}

table td {
  padding: 5px;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<link rel="stylesheet" type="text/css" href="//cdn.sstatic.net/codegolf/all.css?v=83c949450c8b">
<div id="answer-list">
  <h2>Leaderboard</h2>
  <table class="answer-list">
    <thead>
      <tr><td></td><td>Author</td><td>Language</td><td>Size</td></tr>
    </thead>
    <tbody id="answers">

    </tbody>
  </table>
</div>
<div id="language-list">
  <h2>Winners by Language</h2>
  <table class="language-list">
    <thead>
      <tr><td>Language</td><td>User</td><td>Score</td></tr>
    </thead>
    <tbody id="languages">

    </tbody>
  </table>
</div>
<table style="display: none">
  <tbody id="answer-template">
    <tr><td>{{PLACE}}</td><td>{{NAME}}</td><td>{{LANGUAGE}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
  </tbody>
</table>
<table style="display: none">
  <tbody id="language-template">
    <tr><td>{{LANGUAGE}}</td><td>{{NAME}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
  </tbody>
</table>

George Gibson

Posted 2016-05-13T06:43:57.023

Reputation: 2 369

9"and capitals are big letters, your program must be as short as possible." ....... – Bálint – 2016-05-13T06:56:09.270

1We only get 1 sentence? – Bálint – 2016-05-13T06:57:45.933

@Bálint Not strictly so, I will add another test case. – George Gibson – 2016-05-13T07:10:19.340

Do we need to insert a space after full stop? – Bálint – 2016-05-13T07:17:11.500

@Bálint Only if there is a space in the original. – George Gibson – 2016-05-13T07:19:26.103

7Why exclude Matlab (I know I can use Octave), Mathematica and the like if it can be shown that they work? It makes sense in CnR challenges, but I don't like it in code golf. Sorry, but -1 until that rule is changed. – Stewie Griffin – 2016-05-13T07:35:13.220

What about exclamation mark? (!) – Bálint – 2016-05-13T08:06:13.567

1The extra requirements (or even the restating of them) for function vs program are unnecessary, we have defaults for that. – Rɪᴋᴇʀ – 2016-05-13T14:53:54.347

@StewieGriffin Updated. – George Gibson – 2016-05-13T15:14:37.760

@EᴀsᴛᴇʀʟʏIʀᴋ Removed. – George Gibson – 2016-05-13T15:14:57.980

@GeorgeGibson seriously, the extra rules like you must provide a link and your program must not write anything to STDERR are really not important. Actually, the whole "Rules" section could go. – Rɪᴋᴇʀ – 2016-05-13T15:15:52.337

@EᴀsᴛᴇʀʟʏIʀᴋ OK, removed. – George Gibson – 2016-05-13T15:17:24.243

What should I change to make this challenge more clear? – George Gibson – 2016-05-13T15:22:31.973

1>

  • Based on your test cases, it seems like there's more than just letters, spaces and . in the inputs (e.g. - and ,). In that case, is something like Sentence one. "sentence two valid input? (i.e. first non-space after full stop is not a letter) 2) Regarding "if it is directly after a full stop or the beginning of the input, capitalize the first letter", I'm assuming you don't mean directly directly, but possibly with spaces in between the full stop and the next letter. What do we do with inputs with no spaces in between, or more than one space (or are they invalid inputs?)
  • < – Sp3000 – 2016-05-13T15:28:48.003

    @Sp3000 Any better? – George Gibson – 2016-05-13T15:35:06.223

    Looks better - just for completeness, I guess if a space is missing then it gets added? e.g. a.b becomes A. B. – Sp3000 – 2016-05-13T15:57:11.047

    (Also, I think these changes invalidate some existing answers, so it might be good to let people know that the challenge has been clarified via comments) – Sp3000 – 2016-05-13T15:58:27.240

    @Sp3000 Good point, yes. – George Gibson – 2016-05-13T15:58:34.977

    @muddyfish Thanks, the test case has been fixed (force of habit). Also, I have. See requirement 2: Punctuation must be preserved. – George Gibson – 2016-05-13T16:42:05.197

    @muddyfish No, the speech marks are punctuation, and are left untouched. The space would be put after them. – George Gibson – 2016-05-13T16:45:26.057

    @muddyfish Give me a minute... – George Gibson – 2016-05-13T16:49:34.670

    @muddyfish OK, I'll change the spec as you have a good point. – George Gibson – 2016-05-13T16:52:40.260

    3None of this question has anything to do with grammar. You also need to fix your basic vocabulary: the word for how things should be written is orthography. – Peter Taylor – 2016-05-13T17:49:28.393

    1Spaces, when missing ... when are spaces missing? It seems that there are many rules that can be inferred only looking at the test cases – edc65 – 2016-05-13T18:09:28.847

    1"In a sentence without a full stop at the end, that should be added." How do we know it's the end of a sentence? – msh210 – 2016-05-13T20:05:50.107

    1Why did you collapse the duplicate space before "Really" in one of your test cases? I see nothing in the rules about that. – msh210 – 2016-05-13T20:06:50.823

    1"Your program should take each word, and if it is directly after a full stop or the beginning of the input, capitalize the first letter." So A sentence. #2 sentence. should remain as is but A sentence. #2sentence. should become A sentence. #2Sentence., right? – msh210 – 2016-05-13T20:09:52.530

    @msh210 Sorry, what I meant was 'at the end of your input'. – George Gibson – 2016-05-14T06:49:47.737

    @edc65 Fixed. Are there any others? – George Gibson – 2016-05-14T06:51:54.780

    @msh210 I will remove that test case, I thought I'd added that in the rules, but it seems otherwise. – George Gibson – 2016-05-14T06:53:27.380

    @msh210 No, I'd just worded that badly. I will edit. – George Gibson – 2016-05-14T06:55:24.203

    @msh210 Done. Hopefully your points have now been resolved. – George Gibson – 2016-05-14T07:01:04.667

    1Ouput for 'A '? (A,space,space) 'A.' or 'A(space) .' or 'A(space)(space).'? – edc65 – 2016-05-14T07:31:23.250

    @edc65 The output would be A(space).. – George Gibson – 2016-05-14T07:36:58.143

    Answers

    2

    Pyke, 16 bytes

    Q\.cFl8l5DI\.+(J
    

    Try it here!

    (15 bytes after a bugfix)

    \.cFl8l5DI\.+(J
    

    Explanation:

    Q\.c             -  input().split(".")
        F            - for i in ^:
         l8l5        -  i = i.lstrip().capitalise()
              I      -  if i:
               \.+   -   i += "."
                   J - " ".join(^)
    

    Blue

    Posted 2016-05-13T06:43:57.023

    Reputation: 26 661

    4

    Vim, 40 39 keystrokes

    VgugUl:s/\. *\(\a\)/\. \U\1/g<cr>:s/\.$<cr>A.
    

    This assumes that input will not take multiple lines.

    Explanation:

    Vgu                                            "Make the whole line lowercase
       gUl                                         "Make the first character uppercase
           :s/\. *\(\a\)/\. \U\1/g<cr>             "Replace a (.) followed by any number of
                                                   "spaces, followed by a character, with
                                                   "with a (.), one space, and that character
                                                   "In lowercase
                                      :s/\.$<cr>   "Remove a dot, if it's at the end of this line
                                                A. "Add a dot to the end of this line.
    

    James

    Posted 2016-05-13T06:43:57.023

    Reputation: 54 537

    2

    JavaScript (ES6), 76 95 bytes

    f=
    s=>s.replace(/ +/g,' ').replace(/(\w)([^.]*)\.?/g,(_,c,r)=>c.toUpperCase()+r.toLowerCase()+'.')
    input{width:100%;
    <input oninput=o.value=f(this.value)><input id=o>

    Neil

    Posted 2016-05-13T06:43:57.023

    Reputation: 95 035

    @Bálint I can do better than that: my function can be tested on this site! – Neil – 2016-05-13T08:20:13.063

    Edit the byte count – Bálint – 2016-05-13T08:26:59.050

    Please update so multiple spaces are replaced with one. – George Gibson – 2016-05-13T16:04:06.707

    2

    Python 3, 86 80 bytes

    print('. '.join(x.strip().capitalize() for x in input().split('.') if x!='')+'.')
    

    This works for all of the current test cases, but not for much beyond that.

    Thanks @muddyfish for 6 bytes.

    https://repl.it/CQ7h/0

    atlasologist

    Posted 2016-05-13T06:43:57.023

    Reputation: 2 945

    1print('. '.join(x.strip().capitalize() for x in input().split('.')if x!='')+'.') is shorter – Blue – 2016-05-13T17:24:55.467

    print('. '.join(x.strip().capitalize()for x in input().split('.')if x)+'.') is shorter still and print'. '.join(x.strip().capitalize()for x in input().split('.')if x)+'.' if you're willing to go python 2 – Blue – 2016-05-13T17:37:57.100

    I think you can also save 4 more by dropping the !=''. – RootTwo – 2016-05-13T21:36:52.887

    Isn't there an useless whitespace before the if statement? – Yytsi – 2016-05-21T17:24:02.397

    1

    PHP, 123 103 102 95 92 bytes

    <?foreach(explode('.',trim(fgets(STDIN)))as$b)echo!$b?!1:ucfirst(strtolower(trim($b))).'. ';
    

    Takes a newline-terminated line from STDIN and prints the fixed version to STDOUT.

    Élektra

    Posted 2016-05-13T06:43:57.023

    Reputation: 284

    1

    Retina, 41 bytes

    S-`\. *
    %(T`L`l
    T`l`L`^.
    )`(?<!\.)$
    .
    ¶
    <space>
    

    Try it online!

    Test suite.

    Leaky Nun

    Posted 2016-05-13T06:43:57.023

    Reputation: 45 011

    Please update to comply with the revised rules. – George Gibson – 2016-05-13T16:02:45.373

    1

    Ruby, 50 bytes

    Basic split/map/join solution.

    ->s{s.split(?.).map{|e|e.strip.capitalize+?.}*' '}
    

    Value Ink

    Posted 2016-05-13T06:43:57.023

    Reputation: 10 608

    1

    V, 25 bytes (non-competing)

    guGgUló® *¨á©/® Õ±
    ó®$
    A.
    

    Try it online!

    This isn't super creative, since it's just a direct port of my vim answer. However, it's a lot shorter, so I'm happy with it.

    Due to an oversight, this is one or two bytes longer than it needs to be. That should be fixed soon.

    James

    Posted 2016-05-13T06:43:57.023

    Reputation: 54 537

    0

    Perl 5, 42 bytes

    say map{$_=ucfirst lc.'. '}split/\.\s*/,<>
    

    Try it online!

    Xcali

    Posted 2016-05-13T06:43:57.023

    Reputation: 7 671

    0

    Perl, 40 + 12 (flags) = 52 bytes

    #!perl -paF\.\s*
    map{s/\s*$/. /;$\.=ucfirst lc}@F}{chop$\
    

    Using:

    echo "foo.baz. bar" | perl -paF'\.\s*' -e 'map{s/\s*$/. /;$\.=ucfirst lc}@F}{chop$\'
    

    Ungolfed:

    while (<>) {
        my @F = split(/\.\s*/, $_);
    # code above added by -paF\.\s*
        # $\ has undef (or '')
        for (@F) {
            $_ =~ s/\s*$/. /;
            $\ .= ucfirst(lc($_));
        }
    } {
        chop($\);
    # code below added by -p
        print;  # prints $_ (undef here) and $\
    }
    

    Ideone.

    Denis Ibaev

    Posted 2016-05-13T06:43:57.023

    Reputation: 876

    0

    Lua, 149 141 bytes

    u=function(l)return l:upper()end print(io.read():lower():gsub("%s+"," "):gsub("%.([^%s])",". %1"):gsub("^(%w)",u):gsub("(%.[^%w]*%w)",u).."")
    

    Lua lacks some common features of regular expressions, and so it had to get a little sloppier than I would have liked. I might try to fix it up later though.

    Blab

    Posted 2016-05-13T06:43:57.023

    Reputation: 451

    0

    Q 78 Bytes

    (76 if anonymous function)

    f:{(". "/:{{@[(" "=*x)_x;0;"c"$-32+"i"$]}'"."\:_-1_x}x),$["."=u:*|x;u;u,"."]}
    

    Test

    f"tHIS SenteNce HAS veRy bAd GrammAr."                  /This sentence has very bad grammar.
    f"PuncTuation must-be PReserved."                       /Punctuation must-be preserved.
    f"full StOps ShoulD Be inserted"                        /Full stops should be inserted.
    f"MultiPLe sEntEnceS Are possible. thEY Are, yoU KNOW." /Multiple sentences are possible. They are, you know.
    f"spaces. are.inserted."                                /Spaces. Are. Inserted.
    

    Q has lower and upper functions (conversion to lowercase and to uppercase), but more primitive sub-language k (concise, used in the example) only has lower (operator _), so part of the text (12 bytes "c"$-32+"i"$) implements upper.

    Split the original string using ". " as separator, processes each line, and join again

    For each line we discard first character if is a blank (join inserts a blanck after each .), and capitalice first character.

    Finally, insert a final "." if last char is not "."

    J. Sendra

    Posted 2016-05-13T06:43:57.023

    Reputation: 396