Find the Formula

7

2

Introduction

What you have to do is, given an an ASCII diagram of a chemical, output the chemical formula.

Rules

Your code must not access the internet and may not use a stored library of molecules. You may have a library of elements.

You must verify all of the elements included in the formula are real, with all elements needed to be required found here:

If there are invalid elements in the formula, you must output: Invalid elements in diagram.

Order the formulae using the Hill system: carbon then hydrogen then the other elements alphabetically.

Any library/text file of elements is not needed to be included in the byte count.

Examples

H2O

H-O-H

or

H   H
 \ /
  O

CO2

O=C=O

H4N

  H
  |
H-N-H
  |
  H

Winning

The winning code is the shortest program in bytes

Leaderboard

/* Configuration */

var QUESTION_ID = 52630; // Obtain this from the url
// It will be like http://XYZ.stackexchange.com/questions/QUESTION_ID/... on any question page
var ANSWER_FILTER = "!t)IWYnsLAZle2tQ3KqrVveCRJfxcRLe";

/* App */

var answers = [], page = 1;

function answersUrl(index) {
  return "http://api.stackexchange.com/2.2/questions/" +  QUESTION_ID + "/answers?page=" + index + "&pagesize=100&order=desc&sort=creation&site=codegolf&filter=" + ANSWER_FILTER;
}

function getAnswers() {
  jQuery.ajax({
    url: answersUrl(page++),
    method: "get",
    dataType: "jsonp",
    crossDomain: true,
    success: function (data) {
      answers.push.apply(answers, data.items);
      if (data.has_more) getAnswers();
      else process();
    }
  });
}

getAnswers();

var SIZE_REG = /\d+(?=[^\d&]*(?:<(?:s>[^&]*<\/s>|[^&]+>)[^\d&]*)*$)/;
var NUMBER_REG = /\d+/;
var LANGUAGE_REG = /^#*\s*([^,]+)/;

function shouldHaveHeading(a) {
  var pass = false;
  var lines = a.body_markdown.split("\n");
  try {
    pass |= /^#/.test(a.body_markdown);
    pass |= ["-", "="]
              .indexOf(lines[1][0]) > -1;
    pass &= LANGUAGE_REG.test(a.body_markdown);
  } catch (ex) {}
  return pass;
}

function shouldHaveScore(a) {
  var pass = false;
  try {
    pass |= SIZE_REG.test(a.body_markdown.split("\n")[0]);
  } catch (ex) {}
  return pass;
}

function getAuthorName(a) {
  return a.owner.display_name;
}

function process() {
  answers = answers.filter(shouldHaveScore)
                   .filter(shouldHaveHeading);
  answers.sort(function (a, b) {
    var aB = +(a.body_markdown.split("\n")[0].match(SIZE_REG) || [Infinity])[0],
        bB = +(b.body_markdown.split("\n")[0].match(SIZE_REG) || [Infinity])[0];
    return aB - bB
  });

  var languages = {};
  var place = 1;
  var lastSize = null;
  var lastPlace = 1;
  answers.forEach(function (a) {
    var headline = a.body_markdown.split("\n")[0];
    //console.log(a);
    var answer = jQuery("#answer-template").html();
    var num = headline.match(NUMBER_REG)[0];
    var size = (headline.match(SIZE_REG)||[0])[0];
    var language = headline.match(LANGUAGE_REG)[1];
    var user = getAuthorName(a);
    if (size != lastSize)
      lastPlace = place;
    lastSize = size;
    ++place;
    answer = answer.replace("{{PLACE}}", lastPlace + ".")
                   .replace("{{NAME}}", user)
                   .replace("{{LANGUAGE}}", language)
                   .replace("{{SIZE}}", size)
                   .replace("{{LINK}}", a.share_link);
    answer = jQuery(answer)
    jQuery("#answers").append(answer);

    languages[language] = languages[language] || {lang: language, user: user, size: size, link: a.share_link};
  });

  var langs = [];
  for (var lang in languages)
    if (languages.hasOwnProperty(lang))
      langs.push(languages[lang]);

  langs.sort(function (a, b) {
    if (a.lang > b.lang) return 1;
    if (a.lang < b.lang) return -1;
    return 0;
  });

  for (var i = 0; i < langs.length; ++i)
  {
    var language = jQuery("#language-template").html();
    var lang = langs[i];
    language = language.replace("{{LANGUAGE}}", lang.lang)
                       .replace("{{NAME}}", lang.user)
                       .replace("{{SIZE}}", lang.size)
                       .replace("{{LINK}}", lang.link);
    language = jQuery(language);
    jQuery("#languages").append(language);
  }

}
body { text-align: left !important}

#answer-list {
  padding: 10px;
  width: 50%;
  float: left;
}

#language-list {
  padding: 10px;
  width: 50%px;
  float: left;
}

table thead {
  font-weight: bold;
}

table td {
  padding: 5px;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<link rel="stylesheet" type="text/css" href="//cdn.sstatic.net/codegolf/all.css?v=83c949450c8b">
<div id="answer-list">
  <h2>Leaderboard</h2>
  <table class="answer-list">
    <thead>
      <tr><td></td><td>Author</td><td>Language</td><td>Size</td></tr>
    </thead>
    <tbody id="answers">

    </tbody>
  </table>
</div>
<div id="language-list">
  <h2>Winners by Language</h2>
  <table class="language-list">
    <thead>
      <tr><td>Language</td><td>User</td><td>Score</td></tr>
    </thead>
    <tbody id="languages">

    </tbody>
  </table>
</div>
<table style="display: none">
  <tbody id="answer-template">
    <tr><td>{{PLACE}}</td><td>{{NAME}}</td><td>{{LANGUAGE}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
  </tbody>
</table>
<table style="display: none">
  <tbody id="language-template">
    <tr><td>{{LANGUAGE}}</td><td>{{NAME}}</td><td>{{SIZE}}</td><td><a href="{{LINK}}">Link</a></td></tr>
  </tbody>
</table>

Beta Decay

Posted 2015-07-05T00:22:09.913

Reputation: 21 478

@ThomasKwa Carbon then hydrogen then alphabetical – Beta Decay – 2015-07-05T00:34:16.940

Is the library of elements allowed to be in any order (could we have it in Hill system order)? – lirtosiast – 2015-07-05T00:39:37.757

3Are your examples listed as output followed by corresponding input? I can follow it but listing them that way around creates a nagging doubt about whether I have understood the question correctly. – trichoplax – 2015-07-05T00:57:42.490

@ThomasKwa are you asking whether H2C=CH2 should be handled as an input or an output? – trichoplax – 2015-07-05T00:58:56.807

Do we need to handle inputs like H2C=CH2? – lirtosiast – 2015-07-05T01:11:04.140

3>

  • Does the library of elementw count for the score? 2. The Hill system uses alphabetical order (no special case for H) when there' no C. Do we use the Hill system or the order you described?
  • < – Dennis – 2015-07-05T01:44:36.073

    Do we take the periodic table in the question as definitive? It has a minor quirk in that it lists Fl and Lv instead of Uuq and Uuh even though some elements before these are listed in the Uu? form. – Level River St – 2015-07-05T05:55:36.370

    @steveverrill Yes, use the periodic table given – Beta Decay – 2015-07-05T07:09:55.527

    @Dennis 1. No 2. Yes you use my specified order – Beta Decay – 2015-07-05T07:10:35.770

    @ThomasKwa No you do not – Beta Decay – 2015-07-05T07:12:22.573

    Answers

    1

    PHP, 205 bytes

    function($f)use($l){for($a=preg_split('#(?=[A-Z])#',eregi_replace('[^a-z]','',$f));++$i<count($a);)if($l[$a[$i]]++===null)die('Invalid elements in diagram');foreach($l as$k=>$v)echo$v?$k.($v>1?$v:''):'';};
    

    The array of elements is stored in the $l variable in the following form:

    $l = ['C' => 0, 'H' => 0, 'Ac' => 0, …, 'Zr' => 0];
    

    Here is the ungolfed version:

    function ($formula) use ($elementsList)
    {
        $linearFormula = preg_replace('#[^a-z]#i', '', $formula);       // Remove everything except the letters in the formula
        $elementsInFormula = preg_split('#(?=[A-Z])#', $linearFormula); // Split the formula before each uppercase letter, with an additional empty element in the first position
    
        for ($i = 1; $i < count($elementsInFormula); ++$i)
        {
            // Check if the element is valid
            if (!isset($elementsList[$elementsInFormula[$i]])) {
                die('Invalid elements in diagram');
            }
    
            // Increment the associated counter
            ++$elementsList[$elementsInFormula[$i]];
        }
    
        // Print the raw formula
        foreach ($elementsList as $element => $number)
        {
            if ($number > 0) {
                echo $element;
                if ($number > 1) {
                    echo $number;
                }
            }
        }
    };
    

    Blackhole

    Posted 2015-07-05T00:22:09.913

    Reputation: 2 362

    @BetaDecay I'm not sure if my manipulation of the (not counted) array of elements is allowed. If it's not, what are the allowed form for this array? – Blackhole – 2015-07-05T10:34:34.727

    @BetaDecay I've given the expected form (the $l variable) in my answer. Basically, the elements are the keys of the array, ordered using the Hill system, and are associated with a 0 value. – Blackhole – 2015-07-05T10:38:50.370

    2Your problem needs to take input via function arguments or STDIN. A variable input is not allowed on PCG as a whole AFAIK – Beta Decay – 2015-07-05T14:08:08.460

    @BetaDecay I've modified my answer accordingly. – Blackhole – 2015-07-05T16:02:46.340

    @BetaDecay It's strange that I write the only answer. I don't know why your question doesn't have more success… – Blackhole – 2015-07-13T17:05:37.280

    Maybe people just couldn't be bothered to try – Beta Decay – 2015-07-13T17:06:41.317