Javascript - 6 languages - high accuracy
Current Languages: Java, C, HTML, PHP, CSS, Javascript
I work on the principle that whenever an input satisfies a criteria, it is given a score, and based on that score results are given.
Features:
- No built-in functions that determine language type used.
- Does not straightaway declare the input text is
x
language on seeing a keyword.
- Suggests other probable languages also.
Should you feel that any of your inputs of the programs (that I have done till now) are not caught or get invalid results, then please report and I'd be happy to fix them.
Sample Input 1:
class A{public static void main(String[]a){System.out.println("<?php");}}
Sample Output 1:
My program thinks you have :
Java with a chance of 100%
Php with a chance of 25%
----------------
Explanation:
This should have failed the program and I would have printed PHP
, but since my program works on the basis of scores, nothing fails and it easily identifies Java in the first place, followed by other possible results.
Sample Input 2:
class A{public static void main(String[]a){System.out.println("HelloWorld!");}}
Sample Output 2:
Java
----------------
Sample Input 3:
ABCDEFGHIJKLMNOPQRSTUVWXYZ
Sample Output 3:
Language not catched! Sorry.
----------------
The code:
// Helper functions
String.prototype.m = function(condition){
return this.match(condition);
};
String.prototype.capitalize = function(){
return this[0].toUpperCase() + this.substr(1);
};
function getFuncName(func){
var temp = func.toString();
temp = temp.substr( "function ".length);
temp = temp.substr( 0, temp.indexOf("("));
return temp.capitalize();
}
// Get input
var lang_input = prompt("Enter programming language");
// Max score of 4 per lang
function java(input){
var score = 0;
score += input.m(/class[\s\n]+[\w$]+[\s\n]*\{/) ? 1 : 0;
score += input.m(/public[\s\n]+static[\s\n]+void[\s\n]+main[\s\n]*/) ? 1 : 0;
score += input.m(/\}[\s\n]*\}[\s\n]*$/) ? 1 : 0;
score += input.m(/System[\s\n]*[.][\s\n]*out/) ? 1 : 0;
return score;
}
function c(input){
var score = 0;
// if java has passsed
if(checks[0][1] >= 3)return 0;
score += input.m(/^#include\s+<[\w.]+>\s*\n/) ? 1 : 0;
score += input.m(/main[\s\n]*\([\s\n]*(void)?[\s\n]*\)[\s\n]*\{/) ? 1 : 0;
score += input.m(/printf[\s\n]+\(/) || input.m(/%d/) ? 1 : 0;
score += input.m(/#include\s+<[\w.]+>\s*\n/) || input.m(/(%c|%f|%s)/) ? 1 : 0;
return score;
}
function PHP(input){
var score = 0;
score += input.m(/<\?php/) ? 1 : 0;
score += input.m(/\?>/) ? 1 : 0;
score += input.m(/echo/) ? 1 : 0;
score += input.m(/$[\w]+\s*=\s*/) ? 1 : 0;
return score;
}
function HTML(input){
var score = 0;
// if php has passed
if(checks[2][1] >= 2) return 0;
score += input.m(/<!DOCTYPE ["' \w:\/\/]*>/) ? 1 : 0;
score += input.m(/<html>/) && input.m(/<\/html>/) ? 1 : 0;
score += input.m(/<body>/) && input.m(/<\/body/) ? 1 : 0;
score += input.m(/<head>/) && input.m(/<\/head>/) ? 1 : 0;
return score;
}
function javascript(input){
var score = 0;
score += input.m(/console[\s\n]*[.][\s\n]*log[\s\n*]\(/) ? 1 : 0;
score += input.m(/[\s\n]*var[\s\n]+/) ? 1 : 0;
score += input.m(/[\s\n]*function[\s\n]+[\w]+[\s\n]+\(/) ? 1 : 0;
score += input.m(/document[\s\n]*[.]/) ||
( input.m(/\/\*/) && input.m(/\*\//) ) ||
( input.m(/\/\/.*\n/) )? 1 : 0;
return score;
}
function CSS(input){
var score = 0;
score += input.m(/[a-zA-Z]+[\s\n]*\{[\w\n]*[a-zA-Z\-]+[\s\n]*:/) ? 1 : 0;
// since color is more common, I give it a separate place
score += input.m(/color/) ? 1 : 0;
score += input.m(/height/) || input.m(/width/) ? 1 : 0;
score += input.m(/#[a-zA-Z]+[\s\n]*\{[\w\n]*[a-zA-Z\-]+[\s\n]*:/) ||
input.m(/[.][a-zA-Z]+[\s\n]*\{[\w\n]*[a-zA-Z\-]+[\s\n]*:/) ||
( input.m(/\/\*/) && input.m(/\*\//) ) ? 1 : 0;
return score;
}
// [Langs to check, scores]
var checks = [[java, 0], [c, 0], [PHP, 0], [HTML, 0], [javascript, 0], [CSS, 0]];
//Their scores
// Assign scores
for(var i = 0; i < checks.length; i++){
var func = checks[i][0];
checks[i][1] = func(lang_input);
}
// Sort the scores
checks.sort(function(a,b){ return b[1] - a[1]; });
var all_zero = true;
function check_all_zero(index){
if(checks[index][1] > 0){ all_zero = false; return 0; } // someone is above zero
// check next index only if it defined, else return zero
if(checks[index + 1])
check_all_zero(index + 1);
}
check_all_zero(0);
if(all_zero){
console.log("Language not catched! Sorry.");
}else {
var new_arr = []; // temp
checks.map(function(value, index){
if(value[1] > 0){
var temp = [getFuncName(value[0]), value[1]];
new_arr.push(temp);
}
});
checks = new_arr.slice(0); // array copy, because of mutation
if(checks.length === 1){
console.log(checks[0][0]);
}else{
console.log("My program thinks you have :");
checks.map(function(value){
var prob = (value[1]/4 * 100);
console.log(value[0] + " with a chance of " + prob + "%");
});
}
} // Main else block finish
console.log("----------------");
That is impossible, cause
print("")
can be used in many languages. – Ismael Miguel – 2014-02-18T22:18:15.067@IsmaelMiguel I never said "unambiguously." Sure, some programs run in many languages, but the goal is to identify as many programs as possible. – Doorknob – 2014-02-18T22:21:50.387
What you are asking is impossible in my opinion. – Ismael Miguel – 2014-02-18T22:22:36.050
1With your edit, now it seems more possible. – Ismael Miguel – 2014-02-18T22:27:49.223
4What about languages that are valid for EVERY input? Like whitespace. This sentence is a valid whitespace program. This whole page is a valid whitespace program. – Ismael Miguel – 2014-02-18T23:40:16.353
@IsmaelMiguel You could try detecting how likely the program is Whitespace (tabs, arrangement of chars, etc.). (That's the whole point of this challenge.) Or you could just not detect whitespace ;) – Doorknob – 2014-02-18T23:44:32.513
To do this properly you'd need a huge test set and train a classifier. – marinus – 2014-02-19T00:10:27.753
@marinus Not necessarily; for example, Python could be easily detected by searching for
:
in the right place. Similarly, PHP/Perl could be detected by$
s in certain places, XML/CSS by obviousness, objective-C by its unique syntax, APL by special chars, etc. – Doorknob – 2014-02-19T00:17:10.413Reasonable job for Amazon Mechanical Turk. – Darren Stone – 2014-02-19T03:30:17.503
1Is the input guaranteed to be a valid program? Like some input could be
class A{public static void main(String[]a){System.println.out("Hello, World!");}}
which is invalid. – Gaurang Tandon – 2014-02-19T05:29:58.2731Or likewise will HTML input always start with
<!DOCTYPE html>
followed by the<html>
,<body>
and other tags (likemeta
) in their correct order? – Gaurang Tandon – 2014-02-19T11:23:56.580@Gaurang Yes, but there could be different class name, different doctype, etc. – Doorknob – 2014-02-19T12:17:38.867
@Doorknob And, though I have 5-6 languages ready, I want just want to confirm that will the given code always produce output i.e. have a
print()
statement? – Gaurang Tandon – 2014-02-20T11:12:17.327Related: http://codegolf.stackexchange.com/questions/15372/write-a-program-in-disguise (try testing your submissions on these)
– None – 2014-08-07T08:18:58.063