Javascript - 6 languages - high accuracy
Current Languages: Java, C, HTML, PHP, CSS, Javascript
I work on the principle that whenever an input satisfies a criteria, it is given a score, and based on that score results are given.
Features:
- No built-in functions that determine language type used.
- Does not straightaway declare the input text is
x language on seeing a keyword.
- Suggests other probable languages also.
Should you feel that any of your inputs of the programs (that I have done till now) are not caught or get invalid results, then please report and I'd be happy to fix them.
Sample Input 1:
class A{public static void main(String[]a){System.out.println("<?php");}}
Sample Output 1:
My program thinks you have :
Java with a chance of 100%
Php with a chance of 25%
----------------
Explanation:
This should have failed the program and I would have printed PHP, but since my program works on the basis of scores, nothing fails and it easily identifies Java in the first place, followed by other possible results.
Sample Input 2:
class A{public static void main(String[]a){System.out.println("HelloWorld!");}}
Sample Output 2:
Java
----------------
Sample Input 3:
ABCDEFGHIJKLMNOPQRSTUVWXYZ
Sample Output 3:
Language not catched! Sorry.
----------------
The code:
// Helper functions
String.prototype.m = function(condition){
return this.match(condition);
};
String.prototype.capitalize = function(){
return this[0].toUpperCase() + this.substr(1);
};
function getFuncName(func){
var temp = func.toString();
temp = temp.substr( "function ".length);
temp = temp.substr( 0, temp.indexOf("("));
return temp.capitalize();
}
// Get input
var lang_input = prompt("Enter programming language");
// Max score of 4 per lang
function java(input){
var score = 0;
score += input.m(/class[\s\n]+[\w$]+[\s\n]*\{/) ? 1 : 0;
score += input.m(/public[\s\n]+static[\s\n]+void[\s\n]+main[\s\n]*/) ? 1 : 0;
score += input.m(/\}[\s\n]*\}[\s\n]*$/) ? 1 : 0;
score += input.m(/System[\s\n]*[.][\s\n]*out/) ? 1 : 0;
return score;
}
function c(input){
var score = 0;
// if java has passsed
if(checks[0][1] >= 3)return 0;
score += input.m(/^#include\s+<[\w.]+>\s*\n/) ? 1 : 0;
score += input.m(/main[\s\n]*\([\s\n]*(void)?[\s\n]*\)[\s\n]*\{/) ? 1 : 0;
score += input.m(/printf[\s\n]+\(/) || input.m(/%d/) ? 1 : 0;
score += input.m(/#include\s+<[\w.]+>\s*\n/) || input.m(/(%c|%f|%s)/) ? 1 : 0;
return score;
}
function PHP(input){
var score = 0;
score += input.m(/<\?php/) ? 1 : 0;
score += input.m(/\?>/) ? 1 : 0;
score += input.m(/echo/) ? 1 : 0;
score += input.m(/$[\w]+\s*=\s*/) ? 1 : 0;
return score;
}
function HTML(input){
var score = 0;
// if php has passed
if(checks[2][1] >= 2) return 0;
score += input.m(/<!DOCTYPE ["' \w:\/\/]*>/) ? 1 : 0;
score += input.m(/<html>/) && input.m(/<\/html>/) ? 1 : 0;
score += input.m(/<body>/) && input.m(/<\/body/) ? 1 : 0;
score += input.m(/<head>/) && input.m(/<\/head>/) ? 1 : 0;
return score;
}
function javascript(input){
var score = 0;
score += input.m(/console[\s\n]*[.][\s\n]*log[\s\n*]\(/) ? 1 : 0;
score += input.m(/[\s\n]*var[\s\n]+/) ? 1 : 0;
score += input.m(/[\s\n]*function[\s\n]+[\w]+[\s\n]+\(/) ? 1 : 0;
score += input.m(/document[\s\n]*[.]/) ||
( input.m(/\/\*/) && input.m(/\*\//) ) ||
( input.m(/\/\/.*\n/) )? 1 : 0;
return score;
}
function CSS(input){
var score = 0;
score += input.m(/[a-zA-Z]+[\s\n]*\{[\w\n]*[a-zA-Z\-]+[\s\n]*:/) ? 1 : 0;
// since color is more common, I give it a separate place
score += input.m(/color/) ? 1 : 0;
score += input.m(/height/) || input.m(/width/) ? 1 : 0;
score += input.m(/#[a-zA-Z]+[\s\n]*\{[\w\n]*[a-zA-Z\-]+[\s\n]*:/) ||
input.m(/[.][a-zA-Z]+[\s\n]*\{[\w\n]*[a-zA-Z\-]+[\s\n]*:/) ||
( input.m(/\/\*/) && input.m(/\*\//) ) ? 1 : 0;
return score;
}
// [Langs to check, scores]
var checks = [[java, 0], [c, 0], [PHP, 0], [HTML, 0], [javascript, 0], [CSS, 0]];
//Their scores
// Assign scores
for(var i = 0; i < checks.length; i++){
var func = checks[i][0];
checks[i][1] = func(lang_input);
}
// Sort the scores
checks.sort(function(a,b){ return b[1] - a[1]; });
var all_zero = true;
function check_all_zero(index){
if(checks[index][1] > 0){ all_zero = false; return 0; } // someone is above zero
// check next index only if it defined, else return zero
if(checks[index + 1])
check_all_zero(index + 1);
}
check_all_zero(0);
if(all_zero){
console.log("Language not catched! Sorry.");
}else {
var new_arr = []; // temp
checks.map(function(value, index){
if(value[1] > 0){
var temp = [getFuncName(value[0]), value[1]];
new_arr.push(temp);
}
});
checks = new_arr.slice(0); // array copy, because of mutation
if(checks.length === 1){
console.log(checks[0][0]);
}else{
console.log("My program thinks you have :");
checks.map(function(value){
var prob = (value[1]/4 * 100);
console.log(value[0] + " with a chance of " + prob + "%");
});
}
} // Main else block finish
console.log("----------------");
That is impossible, cause
print("")can be used in many languages. – Ismael Miguel – 2014-02-18T22:18:15.067@IsmaelMiguel I never said "unambiguously." Sure, some programs run in many languages, but the goal is to identify as many programs as possible. – Doorknob – 2014-02-18T22:21:50.387
What you are asking is impossible in my opinion. – Ismael Miguel – 2014-02-18T22:22:36.050
1With your edit, now it seems more possible. – Ismael Miguel – 2014-02-18T22:27:49.223
4What about languages that are valid for EVERY input? Like whitespace. This sentence is a valid whitespace program. This whole page is a valid whitespace program. – Ismael Miguel – 2014-02-18T23:40:16.353
@IsmaelMiguel You could try detecting how likely the program is Whitespace (tabs, arrangement of chars, etc.). (That's the whole point of this challenge.) Or you could just not detect whitespace ;) – Doorknob – 2014-02-18T23:44:32.513
To do this properly you'd need a huge test set and train a classifier. – marinus – 2014-02-19T00:10:27.753
@marinus Not necessarily; for example, Python could be easily detected by searching for
:in the right place. Similarly, PHP/Perl could be detected by$s in certain places, XML/CSS by obviousness, objective-C by its unique syntax, APL by special chars, etc. – Doorknob – 2014-02-19T00:17:10.413Reasonable job for Amazon Mechanical Turk. – Darren Stone – 2014-02-19T03:30:17.503
1Is the input guaranteed to be a valid program? Like some input could be
class A{public static void main(String[]a){System.println.out("Hello, World!");}}which is invalid. – Gaurang Tandon – 2014-02-19T05:29:58.2731Or likewise will HTML input always start with
<!DOCTYPE html>followed by the<html>,<body>and other tags (likemeta) in their correct order? – Gaurang Tandon – 2014-02-19T11:23:56.580@Gaurang Yes, but there could be different class name, different doctype, etc. – Doorknob – 2014-02-19T12:17:38.867
@Doorknob And, though I have 5-6 languages ready, I want just want to confirm that will the given code always produce output i.e. have a
print()statement? – Gaurang Tandon – 2014-02-20T11:12:17.327Related: http://codegolf.stackexchange.com/questions/15372/write-a-program-in-disguise (try testing your submissions on these)
– None – 2014-08-07T08:18:58.063