4
Fix my Basic Orthography
While I am excellent at following the English capitalisation rules when writing, when I am typing I have a habit a capitalising almost everything (as you will see from the titles of my challenges). I need a program to fix that, but because I use so many capitals, and capitals are big letters, your program must be as short as possible.
Requirements
- Your program should take each word, and if it is the first alphanumeric set of characters after a full stop or the beginning of the input, capitalize the first letter. All the rest of the letters until the next full stop will be lowercase. At the full stop, capitalize the first letter and continue.
- Punctuation must be preserved.
- In a sentence without a full stop at the end, that should be added.
- Spaces, where missing, should be inserted. A space is 'missing' if there is nothing between a full stop and the character following it.
(Note for americanish speakers: full stop = period = .
)
Test Cases
tHIS SenteNce HAS veRy bAd GrammAr. ==> This sentence has very bad grammar.
-----
PuncTuation must-be PReserved. ==> Punctuation must-be preserved.
-----
full StOps ShoulD Be inserted ==> Full stops should be inserted.
-----
MultiPLe sEntEnceS Are possible. thEY Are, yoU KNOW. ==> Multiple sentences are possible. They are, you know.
-----
spaces. are.inserted. ==> Spaces. Are. Inserted.
Rules
- Your program should take input as an argument or from
STDIN
(or the closest alternative in your language). - Standard loopholes are forbidden.
Scoring
When code-golf is the tag, it's the least bytes to win!
Submissions
To make sure that your answer shows up, please start your answer with a headline, using the following Markdown template:
# Language Name, N bytes
where N
is the size of your submission. If you improve your score, you can keep old scores in the headline, by striking them through. For instance:
# Ruby, <s>104</s> <s>101</s> 96 bytes
If there you want to include multiple numbers in your header (e.g. because your score is the sum of two files or you want to list interpreter flag penalties separately), make sure that the actual score is the last number in the header:
# Perl, 43 + 2 (-p flag) = 45 bytes
You can also make the language name a link which will then show up in the leaderboard snippet:
# [><>](http://esolangs.org/wiki/Fish), 121 bytes
Leaderboard
/* Configuration */
var QUESTION_ID = 79842; // Obtain this from the url
// It will be like https://XYZ.stackexchange.com/questions/QUESTION_ID/... on any question page
var ANSWER_FILTER = "!t)IWYnsLAZle2tQ3KqrVveCRJfxcRLe";
var COMMENT_FILTER = "!)Q2B_A2kjfAiU78X(md6BoYk";
var OVERRIDE_USER = 53406; // This should be the user ID of the challenge author.
/* App */
var answers = [], answers_hash, answer_ids, answer_page = 1, more_answers = true, comment_page;
function answersUrl(index) {
return "https://api.stackexchange.com/2.2/questions/" + QUESTION_ID + "/answers?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + ANSWER_FILTER;
}
function commentUrl(index, answers) {
return "https://api.stackexchange.com/2.2/answers/" + answers.join(';') + "/comments?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + COMMENT_FILTER;
}
function getAnswers() {
jQuery.ajax({
url: answersUrl(answer_page++),
method: "get",
dataType: "jsonp",
crossDomain: true,
success: function (data) {
answers.push.apply(answers, data.items);
answers_hash = [];
answer_ids = [];
data.items.forEach(function(a) {
a.comments = [];
var id = +a.share_link.match(/\d+/);
answer_ids.push(id);
answers_hash[id] = a;
});
if (!data.has_more) more_answers = false;
comment_page = 1;
getComments();
}
});
}
function getComments() {
jQuery.ajax({
url: commentUrl(comment_page++, answer_ids),
method: "get",
dataType: "jsonp",
crossDomain: true,
success: function (data) {
data.items.forEach(function(c) {
if (c.owner.user_id === OVERRIDE_USER)
answers_hash[c.post_id].comments.push©;
});
if (data.has_more) getComments();
else if (more_answers) getAnswers();
else process();
}
});
}
getAnswers();
var SCORE_REG = /<h\d>\s*([^\n,]*[^\s,]),.*?(\d+)(?=[^\n\d<>]*(?:<(?:s>[^\n<>]*<\/s>|[^\n<>]+>)[^\n\d<>]*)*<\/h\d>)/;
var OVERRIDE_REG = /^Override\s*header:\s*/i;
function getAuthorName(a) {
return a.owner.display_name;
}
function process() {
var valid = [];
answers.forEach(function(a) {
var body = a.body;
a.comments.forEach(function(c) {
if(OVERRIDE_REG.test(c.body))
body = '<h1>' + c.body.replace(OVERRIDE_REG, '') + '</h1>';
});
var match = body.match(SCORE_REG);
if (match)
valid.push({
user: getAuthorName(a),
size: +match[2],
language: match[1],
link: a.share_link,
});
});
valid.sort(function (a, b) {
var aB = a.size,
bB = b.size;
return aB - bB
});
var languages = {};
var place = 1;
var lastSize = null;
var lastPlace = 1;
valid.forEach(function (a) {
if (a.size != lastSize)
lastPlace = place;
lastSize = a.size;
++place;
var answer = jQuery("#answer-template").html();
answer = answer.replace("{{PLACE}}", lastPlace + ".")
.replace("{{NAME}}", a.user)
.replace("{{LANGUAGE}}", a.language)
.replace("{{SIZE}}", a.size)
.replace("{{LINK}}", a.link);
answer = jQuery(answer);
jQuery("#answers").append(answer);
var lang = a.language;
if (/<a/.test(lang)) lang = jQuery(lang).text();
languages[lang] = languages[lang] || {lang: a.language, user: a.user, size: a.size, link: a.link};
});
var langs = [];
for (var lang in languages)
if (languages.hasOwnProperty(lang))
langs.push(languages[lang]);
langs.sort(function (a, b) {
if (a.lang > b.lang) return 1;
if (a.lang < b.lang) return -1;
return 0;
});
for (var i = 0; i < langs.length; ++i)
{
var language = jQuery("#language-template").html();
var lang = langs[i];
language = language.replace("{{LANGUAGE}}", lang.lang)
.replace("{{NAME}}", lang.user)
.replace("{{SIZE}}", lang.size)
.replace("{{LINK}}", lang.link);
language = jQuery(language);
jQuery("#languages").append(language);
}
}
body { text-align: left !important}
#answer-list {
padding: 10px;
width: 290px;
float: left;
}
#language-list {
padding: 10px;
width: 290px;
float: left;
}
table thead {
font-weight: bold;
}
table td {
padding: 5px;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<link rel="stylesheet" type="text/css" href="//cdn.sstatic.net/codegolf/all.css?v=83c949450c8b">
<div id="answer-list">
<h2>Leaderboard</h2>
<table class="answer-list">
<thead>
<tr><td></td><td>Author</td><td>Language</td><td>Size</td></tr>
</thead>
<tbody id="answers">
</tbody>
</table>
</div>
<div id="language-list">
<h2>Winners by Language</h2>
<table class="language-list">
<thead>
<tr><td>Language</td><td>User</td><td>Score</td></tr>
</thead>
<tbody id="languages">
</tbody>
</table>
</div>
<table style="display: none">
<tbody id="answer-template">
<tr><td>{{PLACE}}</td><td>{{NAME}}</td><td>{{LANGUAGE}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
</tbody>
</table>
<table style="display: none">
<tbody id="language-template">
<tr><td>{{LANGUAGE}}</td><td>{{NAME}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
</tbody>
</table>
9"and capitals are big letters, your program must be as short as possible." ....... – Bálint – 2016-05-13T06:56:09.270
1We only get 1 sentence? – Bálint – 2016-05-13T06:57:45.933
@Bálint Not strictly so, I will add another test case. – George Gibson – 2016-05-13T07:10:19.340
Do we need to insert a space after full stop? – Bálint – 2016-05-13T07:17:11.500
@Bálint Only if there is a space in the original. – George Gibson – 2016-05-13T07:19:26.103
7Why exclude Matlab (I know I can use Octave), Mathematica and the like if it can be shown that they work? It makes sense in CnR challenges, but I don't like it in code golf. Sorry, but -1 until that rule is changed. – Stewie Griffin – 2016-05-13T07:35:13.220
What about exclamation mark? (!) – Bálint – 2016-05-13T08:06:13.567
1The extra requirements (or even the restating of them) for function vs program are unnecessary, we have defaults for that. – Rɪᴋᴇʀ – 2016-05-13T14:53:54.347
@StewieGriffin Updated. – George Gibson – 2016-05-13T15:14:37.760
@EᴀsᴛᴇʀʟʏIʀᴋ Removed. – George Gibson – 2016-05-13T15:14:57.980
@GeorgeGibson seriously, the extra rules like
you must provide a link
andyour program must not write anything to STDERR
are really not important. Actually, the whole "Rules" section could go. – Rɪᴋᴇʀ – 2016-05-13T15:15:52.337@EᴀsᴛᴇʀʟʏIʀᴋ OK, removed. – George Gibson – 2016-05-13T15:17:24.243
What should I change to make this challenge more clear? – George Gibson – 2016-05-13T15:22:31.973
1>
.
in the inputs (e.g.-
and,
). In that case, is something likeSentence one. "sentence two
valid input? (i.e. first non-space after full stop is not a letter) 2) Regarding "if it is directly after a full stop or the beginning of the input, capitalize the first letter", I'm assuming you don't mean directly directly, but possibly with spaces in between the full stop and the next letter. What do we do with inputs with no spaces in between, or more than one space (or are they invalid inputs?)@Sp3000 Any better? – George Gibson – 2016-05-13T15:35:06.223
Looks better - just for completeness, I guess if a space is missing then it gets added? e.g.
a.b
becomesA. B.
– Sp3000 – 2016-05-13T15:57:11.047(Also, I think these changes invalidate some existing answers, so it might be good to let people know that the challenge has been clarified via comments) – Sp3000 – 2016-05-13T15:58:27.240
@Sp3000 Good point, yes. – George Gibson – 2016-05-13T15:58:34.977
@muddyfish Thanks, the test case has been fixed (force of habit). Also, I have. See requirement 2: Punctuation must be preserved. – George Gibson – 2016-05-13T16:42:05.197
@muddyfish No, the speech marks are punctuation, and are left untouched. The space would be put after them. – George Gibson – 2016-05-13T16:45:26.057
@muddyfish Give me a minute... – George Gibson – 2016-05-13T16:49:34.670
@muddyfish OK, I'll change the spec as you have a good point. – George Gibson – 2016-05-13T16:52:40.260
3None of this question has anything to do with grammar. You also need to fix your basic vocabulary: the word for how things should be written is orthography. – Peter Taylor – 2016-05-13T17:49:28.393
1
Spaces, when missing ...
when are spaces missing? It seems that there are many rules that can be inferred only looking at the test cases – edc65 – 2016-05-13T18:09:28.8471"In a sentence without a full stop at the end, that should be added." How do we know it's the end of a sentence? – msh210 – 2016-05-13T20:05:50.107
1Why did you collapse the duplicate space before "Really" in one of your test cases? I see nothing in the rules about that. – msh210 – 2016-05-13T20:06:50.823
1"Your program should take each word, and if it is directly after a full stop or the beginning of the input, capitalize the first letter." So
A sentence. #2 sentence.
should remain as is butA sentence. #2sentence.
should becomeA sentence. #2Sentence.
, right? – msh210 – 2016-05-13T20:09:52.530@msh210 Sorry, what I meant was 'at the end of your input'. – George Gibson – 2016-05-14T06:49:47.737
@edc65 Fixed. Are there any others? – George Gibson – 2016-05-14T06:51:54.780
@msh210 I will remove that test case, I thought I'd added that in the rules, but it seems otherwise. – George Gibson – 2016-05-14T06:53:27.380
@msh210 No, I'd just worded that badly. I will edit. – George Gibson – 2016-05-14T06:55:24.203
@msh210 Done. Hopefully your points have now been resolved. – George Gibson – 2016-05-14T07:01:04.667
1Ouput for 'A '? (A,space,space) 'A.' or 'A(space) .' or 'A(space)(space).'? – edc65 – 2016-05-14T07:31:23.250
@edc65 The output would be
A(space).
. – George Gibson – 2016-05-14T07:36:58.143