Fix my Basic Orthography

Question

4

Fix my Basic Orthography

While I am excellent at following the English capitalisation rules when writing, when I am typing I have a habit a capitalising almost everything (as you will see from the titles of my challenges). I need a program to fix that, but because I use so many capitals, and capitals are big letters, your program must be as short as possible.

Requirements

Your program should take each word, and if it is the first alphanumeric set of characters after a full stop or the beginning of the input, capitalize the first letter. All the rest of the letters until the next full stop will be lowercase. At the full stop, capitalize the first letter and continue.
Punctuation must be preserved.
In a sentence without a full stop at the end, that should be added.
Spaces, where missing, should be inserted. A space is 'missing' if there is nothing between a full stop and the character following it.

(Note for americanish speakers: full stop = period = .)

Test Cases

tHIS SenteNce HAS veRy bAd GrammAr. ==> This sentence has very bad grammar.

-----

PuncTuation must-be PReserved. ==> Punctuation must-be preserved.

-----

full StOps ShoulD Be inserted ==> Full stops should be inserted.

-----

MultiPLe sEntEnceS Are possible. thEY Are, yoU KNOW. ==> Multiple sentences are possible. They are, you know.

-----

spaces. are.inserted. ==> Spaces. Are. Inserted.

Rules

Your program should take input as an argument or from STDIN (or the closest alternative in your language).
Standard loopholes are forbidden.

Scoring

When code-golf is the tag, it's the least bytes to win!

Submissions

To make sure that your answer shows up, please start your answer with a headline, using the following Markdown template:

# Language Name, N bytes

where N is the size of your submission. If you improve your score, you can keep old scores in the headline, by striking them through. For instance:

# Ruby, <s>104</s> <s>101</s> 96 bytes

If there you want to include multiple numbers in your header (e.g. because your score is the sum of two files or you want to list interpreter flag penalties separately), make sure that the actual score is the last number in the header:

# Perl, 43 + 2 (-p flag) = 45 bytes

You can also make the language name a link which will then show up in the leaderboard snippet:

# [><>](http://esolangs.org/wiki/Fish), 121 bytes

Leaderboard

/* Configuration */

var QUESTION_ID = 79842; // Obtain this from the url
// It will be like https://XYZ.stackexchange.com/questions/QUESTION_ID/... on any question page
var ANSWER_FILTER = "!t)IWYnsLAZle2tQ3KqrVveCRJfxcRLe";
var COMMENT_FILTER = "!)Q2B_A2kjfAiU78X(md6BoYk";
var OVERRIDE_USER = 53406; // This should be the user ID of the challenge author.

/* App */

var answers = [], answers_hash, answer_ids, answer_page = 1, more_answers = true, comment_page;

function answersUrl(index) {
  return "https://api.stackexchange.com/2.2/questions/" +  QUESTION_ID + "/answers?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + ANSWER_FILTER;
}

function commentUrl(index, answers) {
  return "https://api.stackexchange.com/2.2/answers/" + answers.join(';') + "/comments?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + COMMENT_FILTER;
}

function getAnswers() {
  jQuery.ajax({
    url: answersUrl(answer_page++),
    method: "get",
    dataType: "jsonp",
    crossDomain: true,
    success: function (data) {
      answers.push.apply(answers, data.items);
      answers_hash = [];
      answer_ids = [];
      data.items.forEach(function(a) {
        a.comments = [];
        var id = +a.share_link.match(/\d+/);
        answer_ids.push(id);
        answers_hash[id] = a;
      });
      if (!data.has_more) more_answers = false;
      comment_page = 1;
      getComments();
    }
  });
}

function getComments() {
  jQuery.ajax({
    url: commentUrl(comment_page++, answer_ids),
    method: "get",
    dataType: "jsonp",
    crossDomain: true,
    success: function (data) {
      data.items.forEach(function(c) {
        if (c.owner.user_id === OVERRIDE_USER)
          answers_hash[c.post_id].comments.push©;
      });
      if (data.has_more) getComments();
      else if (more_answers) getAnswers();
      else process();
    }
  });  
}

getAnswers();

var SCORE_REG = /<h\d>\s*([^\n,]*[^\s,]),.*?(\d+)(?=[^\n\d<>]*(?:<(?:s>[^\n<>]*<\/s>|[^\n<>]+>)[^\n\d<>]*)*<\/h\d>)/;

var OVERRIDE_REG = /^Override\s*header:\s*/i;

function getAuthorName(a) {
  return a.owner.display_name;
}

function process() {
  var valid = [];
  
  answers.forEach(function(a) {
    var body = a.body;
    a.comments.forEach(function(c) {
      if(OVERRIDE_REG.test(c.body))
        body = '<h1>' + c.body.replace(OVERRIDE_REG, '') + '</h1>';
    });
    
    var match = body.match(SCORE_REG);
    if (match)
      valid.push({
        user: getAuthorName(a),
        size: +match[2],
        language: match[1],
        link: a.share_link,
      });
    
  });
  
  valid.sort(function (a, b) {
    var aB = a.size,
        bB = b.size;
    return aB - bB
  });

  var languages = {};
  var place = 1;
  var lastSize = null;
  var lastPlace = 1;
  valid.forEach(function (a) {
    if (a.size != lastSize)
      lastPlace = place;
    lastSize = a.size;
    ++place;
    
    var answer = jQuery("#answer-template").html();
    answer = answer.replace("{{PLACE}}", lastPlace + ".")
                   .replace("{{NAME}}", a.user)
                   .replace("{{LANGUAGE}}", a.language)
                   .replace("{{SIZE}}", a.size)
                   .replace("{{LINK}}", a.link);
    answer = jQuery(answer);
    jQuery("#answers").append(answer);

    var lang = a.language;
    if (/<a/.test(lang)) lang = jQuery(lang).text();
    
    languages[lang] = languages[lang] || {lang: a.language, user: a.user, size: a.size, link: a.link};
  });

  var langs = [];
  for (var lang in languages)
    if (languages.hasOwnProperty(lang))
      langs.push(languages[lang]);

  langs.sort(function (a, b) {
    if (a.lang > b.lang) return 1;
    if (a.lang < b.lang) return -1;
    return 0;
  });

  for (var i = 0; i < langs.length; ++i)
  {
    var language = jQuery("#language-template").html();
    var lang = langs[i];
    language = language.replace("{{LANGUAGE}}", lang.lang)
                       .replace("{{NAME}}", lang.user)
                       .replace("{{SIZE}}", lang.size)
                       .replace("{{LINK}}", lang.link);
    language = jQuery(language);
    jQuery("#languages").append(language);
  }

}

body { text-align: left !important}

#answer-list {
  padding: 10px;
  width: 290px;
  float: left;
}

#language-list {
  padding: 10px;
  width: 290px;
  float: left;
}

table thead {
  font-weight: bold;
}

table td {
  padding: 5px;
}

<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<link rel="stylesheet" type="text/css" href="//cdn.sstatic.net/codegolf/all.css?v=83c949450c8b">
<div id="answer-list">
  <h2>Leaderboard</h2>
  <table class="answer-list">
    <thead>
      <tr><td></td><td>Author</td><td>Language</td><td>Size</td></tr>
    </thead>
    <tbody id="answers">

    </tbody>
  </table>
</div>
<div id="language-list">
  <h2>Winners by Language</h2>
  <table class="language-list">
    <thead>
      <tr><td>Language</td><td>User</td><td>Score</td></tr>
    </thead>
    <tbody id="languages">

    </tbody>
  </table>
</div>
<table style="display: none">
  <tbody id="answer-template">
    <tr><td>{{PLACE}}</td><td>{{NAME}}</td><td>{{LANGUAGE}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
  </tbody>
</table>
<table style="display: none">
  <tbody id="language-template">
    <tr><td>{{LANGUAGE}}</td><td>{{NAME}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
  </tbody>
</table>

George Gibson

Posted 2016-05-13T06:43:57.023

Reputation: 2 369

9"and capitals are big letters, your program must be as short as possible." ....... – Bálint – 2016-05-13T06:56:09.270

1We only get 1 sentence? – Bálint – 2016-05-13T06:57:45.933

@Bálint Not strictly so, I will add another test case. – George Gibson – 2016-05-13T07:10:19.340

Do we need to insert a space after full stop? – Bálint – 2016-05-13T07:17:11.500

@Bálint Only if there is a space in the original. – George Gibson – 2016-05-13T07:19:26.103

7Why exclude Matlab (I know I can use Octave), Mathematica and the like if it can be shown that they work? It makes sense in CnR challenges, but I don't like it in code golf. Sorry, but -1 until that rule is changed. – Stewie Griffin – 2016-05-13T07:35:13.220

What about exclamation mark? (!) – Bálint – 2016-05-13T08:06:13.567

1The extra requirements (or even the restating of them) for function vs program are unnecessary, we have defaults for that. – Rɪᴋᴇʀ – 2016-05-13T14:53:54.347

@StewieGriffin Updated. – George Gibson – 2016-05-13T15:14:37.760

@EᴀsᴛᴇʀʟʏIʀᴋ Removed. – George Gibson – 2016-05-13T15:14:57.980

@GeorgeGibson seriously, the extra rules like you must provide a link and your program must not write anything to STDERR are really not important. Actually, the whole "Rules" section could go. – Rɪᴋᴇʀ – 2016-05-13T15:15:52.337

@EᴀsᴛᴇʀʟʏIʀᴋ OK, removed. – George Gibson – 2016-05-13T15:17:24.243

What should I change to make this challenge more clear? – George Gibson – 2016-05-13T15:22:31.973

1>

Based on your test cases, it seems like there's more than just letters, spaces and . in the inputs (e.g. - and ,). In that case, is something like Sentence one. "sentence two valid input? (i.e. first non-space after full stop is not a letter) 2) Regarding "if it is directly after a full stop or the beginning of the input, capitalize the first letter", I'm assuming you don't mean directly directly, but possibly with spaces in between the full stop and the next letter. What do we do with inputs with no spaces in between, or more than one space (or are they invalid inputs?)

< – Sp3000 – 2016-05-13T15:28:48.003

@Sp3000 Any better? – George Gibson – 2016-05-13T15:35:06.223

Looks better - just for completeness, I guess if a space is missing then it gets added? e.g. a.b becomes A. B. – Sp3000 – 2016-05-13T15:57:11.047

(Also, I think these changes invalidate some existing answers, so it might be good to let people know that the challenge has been clarified via comments) – Sp3000 – 2016-05-13T15:58:27.240

@Sp3000 Good point, yes. – George Gibson – 2016-05-13T15:58:34.977

@muddyfish Thanks, the test case has been fixed (force of habit). Also, I have. See requirement 2: Punctuation must be preserved. – George Gibson – 2016-05-13T16:42:05.197

@muddyfish No, the speech marks are punctuation, and are left untouched. The space would be put after them. – George Gibson – 2016-05-13T16:45:26.057

@muddyfish Give me a minute... – George Gibson – 2016-05-13T16:49:34.670

@muddyfish OK, I'll change the spec as you have a good point. – George Gibson – 2016-05-13T16:52:40.260

3None of this question has anything to do with grammar. You also need to fix your basic vocabulary: the word for how things should be written is orthography. – Peter Taylor – 2016-05-13T17:49:28.393

1Spaces, when missing ... when are spaces missing? It seems that there are many rules that can be inferred only looking at the test cases – edc65 – 2016-05-13T18:09:28.847

1"In a sentence without a full stop at the end, that should be added." How do we know it's the end of a sentence? – msh210 – 2016-05-13T20:05:50.107

1Why did you collapse the duplicate space before "Really" in one of your test cases? I see nothing in the rules about that. – msh210 – 2016-05-13T20:06:50.823

1"Your program should take each word, and if it is directly after a full stop or the beginning of the input, capitalize the first letter." So A sentence. #2 sentence. should remain as is but A sentence. #2sentence. should become A sentence. #2Sentence., right? – msh210 – 2016-05-13T20:09:52.530

@msh210 Sorry, what I meant was 'at the end of your input'. – George Gibson – 2016-05-14T06:49:47.737

@edc65 Fixed. Are there any others? – George Gibson – 2016-05-14T06:51:54.780

@msh210 I will remove that test case, I thought I'd added that in the rules, but it seems otherwise. – George Gibson – 2016-05-14T06:53:27.380

@msh210 No, I'd just worded that badly. I will edit. – George Gibson – 2016-05-14T06:55:24.203

@msh210 Done. Hopefully your points have now been resolved. – George Gibson – 2016-05-14T07:01:04.667

1Ouput for 'A '? (A,space,space) 'A.' or 'A(space) .' or 'A(space)(space).'? – edc65 – 2016-05-14T07:31:23.250

@edc65 The output would be A(space).. – George Gibson – 2016-05-14T07:36:58.143