C++ 14 - 0.077 seconds for 52875 words
Code:
#include <thread>
#include <mutex>
#include <string>
#include <map>
#include <vector>
#include <iostream>
#include <fstream>
#include <algorithm>
using namespace std;
void thread_func(mutex& file_lock, mutex& map_lock, map<string, vector<string>>& anagrams, ifstream& file) {
string word;
string sorted_word;
while (true) {
{
lock_guard<mutex> locker(file_lock);
if (file.eof()) {
return;
}
file >> word;
}
sorted_word = word;
sort(sorted_word.begin(), sorted_word.end());
{
lock_guard<mutex> locker(map_lock);
anagrams[sorted_word].push_back(word);
}
}
}
void nothread_func(map<string, vector<string>>& anagrams, ifstream& file) {
string word;
string sorted_word;
while (true) {
if (file.eof()) {
return;
}
file >> word;
sorted_word = word;
sort(sorted_word.begin(), sorted_word.end());
anagrams[sorted_word].push_back(word);
}
}
int main(int argc, char *argv[]) {
string fname = "/usr/share/dict/words";
int number_of_threads = 1;
if (argc > 1) {
fname = argv[1];
}
if (argc > 2) {
number_of_threads = atoi(argv[2]);
}
mutex file_lock;
mutex map_lock;
map<string, vector<string>> anagrams;
cout << "Opening file '" << fname << "'" << endl;
ifstream file(fname);
if (!file) {
cerr << "Could not open file" << endl;
return 1;
}
if (number_of_threads == 1) {
nothread_func(anagrams, file);
}
else {
cout << "Starting " << number_of_threads << " threads" << endl;
vector<thread> threads;
for (int i = 0; i < number_of_threads; ++i) {
thread th(thread_func, ref(file_lock), ref(map_lock), ref(anagrams), ref(file));
threads.push_back(move(th));
}
for (auto& thread : threads) {
thread.join();
}
cout << "Threads done" << endl;
}
int largest_size = max_element(anagrams.begin(), anagrams.end(), [](const auto& a, const auto& b){return a.second.size() < b.second.size();})->second.size();
for (auto p : anagrams) {
if (p.second.size() == largest_size) {
for (auto word : p.second) {
cout << word << " ";
}
cout << endl;
}
}
}
Compile: g++ anagrams.cpp -o anagrams --std=c++14 -pthread -ggdb -O3
CLI: ./anagrams <wordfile, defaults to /usr/share/dict/words> <number of threads, defaults to 1>
Example:
$ wc -l /usr/share/dict/cracklib-small
52875 /usr/share/dict/cracklib-small
$ time ./anagrams /usr/share/dict/cracklib-small
Opening file '/usr/share/dict/cracklib-small'
caret carte cater crate react recta trace
pares parse pears rapes reaps spare spear
real 0m0.077s
user 0m0.073s
sys 0m0.000s
The code does have multithreaded built in, but it slows things down on my system. Might be useful on slower systems / bigger files. If the number of threads is 1
, it disables threads and does everything normally. Algorithm is essentially the same as CarpetPython's, but my code is slightly longer (well, 81 lines).
3Unless you're certain that the problem is sufficiently difficult, "first person to solve the question" is usually a bad winning criteria because it would disadvantage anyone who's asleep right now. – Sp3000 – 2015-01-24T03:09:25.547
Also, what happens if the efficiency is in terms of the length of the words? Because surely you need to traverse the words themselves in order to decide if any two are anagrams of each other... – Sp3000 – 2015-01-24T03:12:17.620
You are both right, I will rephrase the question to make it more generic. Thank you for your input. – ericmarkmartin – 2015-01-24T03:15:56.817
3I think "most efficient" is underspecified. Can one preprocess the dictionary? Does one find anagrams of a single word or a list of words? Is this about actual performance on real dictionaries, or worst-case theoretical behavior? – xnor – 2015-01-24T03:19:16.570
Ok. Should I change it to most creative? – ericmarkmartin – 2015-01-24T03:20:31.857
You need to add an objective measure for answers. I suggest algorithmic complexity (O(logn) > O(n) > O(nlogn) > O(n^2) etc). In the event of a tie, you should pick another measure (eg: fastest single threaded execution, least run-time space, most votes, oldest answer ;-). – Logic Knight – 2015-01-24T07:34:46.657
Also, if you mean to assess answers on performance of the /usr/share/dict file, say so. Not 'Assume you have' but 'You will use'. – Logic Knight – 2015-01-24T07:37:34.183
2You added the [fastest-code] tag (which is for fastest runtime speed), but you mention algorithmic complexity in your challenge. Which one is the actual criterion? (in case of algorithmic complexity, use the [fastest-algorithm] tag) – ProgramFOX – 2015-01-27T18:25:50.850