C++ (with libgc)
This isn't code-golf, so I went for speed. This implements counting sort. Rather than updating an array (which isn't allowed), it updates a 4-way trie by reconstructing nodes as necessary. It allocates trie nodes on the stack and copies them to the heap every 1000 characters, yielding a 600% performance boost.
This requires libgc, a garbage collector for C/C++. To work on large files (without stack-overflowing), it also requires the compiler to optimize tail recursion so it won't leak memory and/or stack-overflow.
Despite all the object fiddling and garbage collection, this manages to run about 25-40% as fast as a simple array-based counting sort (see below).
#include <gc/gc_cpp.h>
#include <stdio.h>
template<class T>
struct Node : public gc
{
T a, b, c, d;
Node(T a, T b, T c, T d)
: a(a), b(b), c(c), d(d)
{}
Node(T s)
: a(s), b(s), c(s), d(s)
{}
T get(int n) {
switch (n) {
case 0: return a;
case 1: return b;
case 2: return c;
case 3: return d;
default: return NULL;
}
}
};
typedef Node<int> TrieC;
typedef Node<TrieC*> TrieB;
typedef Node<TrieB*> TrieA;
typedef Node<TrieA*> Trie;
static Trie *initialize();
static Trie *sortInput(Trie *root);
static Trie *sortUsingStack(Trie *orig, Trie *root, int c, int count);
template<class T> static Node<T> *merge(Node<T> *node, Node<T> *base);
static Node<int> *merge(Node<int> *node, Node<int> *base);
static void print(Trie *trie);
static void repeat(int c, int n);
static Trie *initialize()
{
return new Trie(new TrieA(new TrieB(new TrieC(0))));
}
static Trie *sortInput(Trie *root)
{
int c = getchar();
if (c == EOF)
return root;
else
return sortInput(sortUsingStack(root, root, c, 1000));
}
static Trie *sortUsingStack(Trie *orig, Trie *root, int chr, int count)
{
if (chr == EOF)
return merge(root, orig);
int a = (chr >> 6) & 3;
int b = (chr >> 4) & 3;
int c = (chr >> 2) & 3;
int d = chr & 3;
TrieA *na = root->get(a);
TrieB *nb = na->get(b);
TrieC *nc = nb->get(c);
int nd = nc->get(d);
int nd2 = nd + 1;
TrieC nc2(
d == 0 ? nd2 : nc->a,
d == 1 ? nd2 : nc->b,
d == 2 ? nd2 : nc->c,
d == 3 ? nd2 : nc->d);
TrieB nb2(
c == 0 ? &nc2 : nb->a,
c == 1 ? &nc2 : nb->b,
c == 2 ? &nc2 : nb->c,
c == 3 ? &nc2 : nb->d);
TrieA na2(
b == 0 ? &nb2 : na->a,
b == 1 ? &nb2 : na->b,
b == 2 ? &nb2 : na->c,
b == 3 ? &nb2 : na->d);
Trie root2(
a == 0 ? &na2 : root->a,
a == 1 ? &na2 : root->b,
a == 2 ? &na2 : root->c,
a == 3 ? &na2 : root->d);
if (count > 0)
return sortUsingStack(orig, &root2, getchar(), count - 1);
else
return merge(&root2, orig);
}
/* Copy tree, but use nodes from base if they're the same. */
template<class T> static Node<T> *merge(Node<T> *node, Node<T> *base)
{
if (node == base)
return node;
else
return new Node<T>(merge(node->a, base->a),
merge(node->b, base->b),
merge(node->c, base->c),
merge(node->d, base->d));
}
static Node<int> *merge(Node<int> *node, Node<int> *base)
{
(void) base;
return new Node<int>(node->a, node->b, node->c, node->d);
}
static void printC(TrieC *t, int start)
{
repeat(start, t->a);
repeat(start + 1, t->b);
repeat(start + 2, t->c);
repeat(start + 3, t->d);
}
static void printB(TrieB *t, int start)
{
printC(t->a, start);
printC(t->b, start + 4);
printC(t->c, start + 8);
printC(t->d, start + 12);
}
static void printA(TrieA *t, int start)
{
printB(t->a, start);
printB(t->b, start + 16);
printB(t->c, start + 32);
printB(t->d, start + 48);
}
static void print(Trie *t)
{
printA(t->a, 0);
printA(t->b, 64);
printA(t->c, 128);
printA(t->d, 192);
}
/* Print the character c, n times. */
static void repeat(int c, int n)
{
if (n > 0) {
putchar(c);
repeat(c, n-1);
}
}
int main(void)
{
print(sortInput(initialize()));
putchar('\n');
return 0;
}
The array-based counting sort I used for comparison (getchar/putchar is the bottleneck):
#include <stdio.h>
int main(void)
{
int tally[256] = {};
int c, i, j;
while ((c = getchar()) != EOF)
tally[c]++;
for (i = 0; i < 256; i++)
for (j = 0; j < tally[i]; j++)
putchar(i);
putchar('\n');
return 0;
}
1Most dynamic languages have a sort function that doesn’t require any sort of import. – Timwi – 2011-04-12T16:41:50.207
@Tmwi: All kinds of sorting function are forbidden. – FUZxxl – 2011-04-12T16:47:21.477
8So any functional implementation is OK? – Eelvex – 2011-04-12T17:27:00.717
2You know, there are plenty of languages that don't even know the concept of a variable. Furthermore, some languages have control-structure-like constructs that are not syntax, but could be considered library functions (e.g. cmdlets in PowerShell). Are those forbidden, too? – Joey – 2011-04-13T05:37:50.637
Not really possible to do in assembly then. – Skizz – 2011-04-14T12:00:37.513
@Skizz: There is LLVM. AFAIK, it is designed to use only non-mutable variables. They will be allocated to registers by the llvm compiler. – FUZxxl – 2011-04-14T15:55:47.053
3And the analyzer will re-use registers as needed, which points out the silliness of the constraint: every solution is re-using memory, it's just hidden under one or more layers of language abstraction. In the case of my solution, the stack gets overwritten time and time again or if the compiler does tail call elimination it's the return value that gets overwritten. – dmckee --- ex-moderator kitten – 2011-04-15T14:48:59.400