Optimize the Scralphabet

20

1

Scralphabet

A normal bag of Scrabble tiles contains the following letters (? is a blank tile, which can stand for any other letter):

AAAAAAAAABBCCDDDDEEEEEEEEEEEEFFGGGHHIIIIIIIIIJKLLLLMMNNNNNNOOOOOOOOPPQRRRRRRSSSSTTTTTTUUUUVVWWXYYZ??

The letters have the following value:

{"A": 1,"B": 3,"C": 3,"D": 2,"E": 1,"F": 4,"G": 2,"H": 4,"I": 1,"J": 8,"K": 5,"L": 1,"M": 3,"N": 1,"O": 1,"P": 3,"Q": 10,"R": 1,"S": 1,"T": 1,"U": 1,"V": 4,"W": 4,"X": 8,"Y": 4,"Z": 10,"?": 0}

Given a normal bag of Scrabble tiles, construct the highest-scoring set of non-intersecting words (i.e., individual words, not on a Scrabble board) given the following conditions:

  • The score for each word is sum(letter_values) * length(word).
  • You may only include a maximum of one word starting with each letter of the alphabet (so, a maximum of 26 words).
  • Only valid Scrabble words (from this dictionary) may be included. You may read the dictionary from a file, hard-code it (ugh), or scrape it from the website.
  • You need not use every tile, but all unused tiles form a single word, scored the same way, which subtracts from your score.

If you wish, your code may accept two inputs: the bag contents as a string, and the letter values in some format similar to a python dict (as above); alternatively, you can hard-code the bag contents and letter values. It should output the words in your set, their respective scores, and your total score, in some reasonable format.

Highest scoring set of words wins, with ties going to first posted.

sirpercival

Posted 2015-05-15T12:52:40.657

Reputation: 1 824

1Can the valid Scrabble word dictionary be hardcoded as well? – Lynn – 2015-05-15T13:17:06.770

2Also, what if my program is print"FOO18\nBAR15\nBAZ42\n...\n1523"? – Lynn – 2015-05-15T13:20:29.517

2

@Mauris Hardcoding the output is a banned loophole.

– Geobits – 2015-05-15T13:24:44.283

1@Geobits he says you can though. "alternatively, you can hard-code the values". – Tim – 2015-05-15T13:25:22.327

3@Tim The OP is referring to the input. – Martin Ender – 2015-05-15T13:25:50.747

1@Geobits It's not that simple though. There are many forms of "hardcoding" (like simplifying the search space if you already know what you'll find). Or you could write a really elaborate code to find the optimal solution, and then replace it by a brute force for golfing. – Martin Ender – 2015-05-15T13:26:53.460

@MartinBüttner Yea, my reply was to bare hardcoding, as Mauris was suggesting here.

– Geobits – 2015-05-15T13:28:58.773

@Mauris you can if you want...? I'd rather you read it from a file, though, to make the code more readable. And, yes, hardcoding your output is a banned loophole. – sirpercival – 2015-05-15T13:35:14.843

And if someone posts a golfed brute-force, I hope they'll also post the algorithm that led to their solution. – sirpercival – 2015-05-15T13:45:57.763

As far as I know, the normal rule for [code-challenge] is that the code you post must generate the output you post. If someone posts a brute force that is obviously unfeasible to run within years, along with an optimized output, I think the people here are smart enough to see through that. – Geobits – 2015-05-15T14:07:02.187

@Geobits: Speak for yourself. :P – Alex A. – 2015-05-15T14:39:49.257

3Pro tip: Be careful with how you handle the full Scrabble dictionary. I was working on a solution in R and merging the words with their scores used all available memory and crashed my computer. – Alex A. – 2015-05-15T16:17:58.097

Question about the scoring: "A" scores 1 * 1 = 1, while "ABC" scores (1 + 3 + 3) * 3 = 21? Is this correct? – 2012rcampion – 2015-05-17T17:33:15.183

yes, that's correct. – sirpercival – 2015-05-17T17:45:31.867

Answers

15

C, 2765 (optimal)

Edit

Now all in a single C file. This just finds all the optimal solutions. They all must have 6 words of 15 letters and one 10-letter word consisting of 8 letters of value 1 and two blanks. For that I only need to load a fraction of the dictionary and I don't have to look for 15 letter words with blanks. The code is a simple exhaustive depth-first search.

#include <stdio.h>
#include <stdint.h>
#include <string.h>
struct w {
    struct lc { uint64_t hi,lo; } lc;
    char w[16];
} w15[6000], w10[40000];
int n15,n10;
struct lc pool = { 0x12122464612, 0x8624119232c4229 };
int pts[27] = {0,1,3,3,2,1,4,2,4,1,8,5,1,3,1,1,3,10,1,1,1,1,4,4,8,4,10};
int f[27],fs[26], w15c[27],w15l[27][6000];
int count(struct lc a, int l) { return (l < 16 ? a.lo << 4 : a.hi) >> 4*(l&15) & 15; }
int matches_val(uint64_t a, uint64_t b) {
    uint64_t mask = 0x1111111111111111ll;
    return !((a - b ^ a ^ b) & mask);
}
int matches(struct lc all, struct lc a) { return matches_val(all.hi,a.hi) && matches_val(all.lo,a.lo); }
int picks[10];
void try(struct lc cur, int used, int level) {
    int c, i, must;
    if (level == 6) {
    for (i = 0; i<27; i++) if (count(cur, i) && pts[i]>1) return;
    for (i = 0; i < n10; i++) if(!(used & (1 << (w10[i].w[0] & 31))) && matches(w10[i].lc, cur)) {
        for (c = 0; c<level; c++) printf("%s ",w15[picks[c]].w);
        printf("%s\n",w10[i].w);
    }
    return;
    }
    for (i = 0; i < 26;i++) if (count(cur,fs[i])) break;
    must = fs[i];
    for (c = 0; c < w15c[must]; c++) { i = w15l[must][c]; if(!(used & (1 << (w15[i].w[0] & 31))) && matches(cur, w15[i].lc)) {
    struct lc b = { cur.hi - w15[i].lc.hi, cur.lo - w15[i].lc.lo };
    picks[level] = i;
    try(b, used + (1 << (w15[i].w[0] & 31)), level+1);
    }}
}
int cmpfs(int *a, int *b){return f[*a]-f[*b];}
void ins(struct w*w, char *s, int c) {
    int i;
    strcpy(w->w,s);
    for (;*s;s++)
    if (*s&16) w->lc.hi += 1ll << 4*(*s&15); else w->lc.lo += 1ll << 4*(*s&15) - 4;
    if (c) for (i = 0; i < 27;i++) if (count(w->lc,i)) f[i]++, w15l[i][w15c[i]++] = w-w15;
}
int main() {
    int i;
    char s[20];
    while(scanf("%s ",s)>0) {
    if (strlen(s) == 15) ins(w15 + n15++,s,1);
    if (strlen(s) == 10) ins(w10 + n10++,s,0);
    }
    for (i = 0; i < 26;i++) fs[i] = i+1;
    qsort(fs, 26, sizeof(int), cmpfs);
    try(pool, 0, 0);
}

Usage:

$time ./scrab <sowpods.txt
cc -O3    scrab.c   -o scrab
JUXTAPOSITIONAL DEMISEMIQUAVERS ACKNOWLEDGEABLY WEATHERPROOFING CONVEYORIZATION FEATHERBEDDINGS LAURUSTINE
JUXTAPOSITIONAL DEMISEMIQUAVERS ACKNOWLEDGEABLY WEATHERPROOFING CONVEYORIZATION FEATHERBEDDINGS LUXURIATED
JUXTAPOSITIONAL DEMISEMIQUAVERS ACKNOWLEDGEABLY WEATHERPROOFING CONVEYORIZATION FEATHERBEDDINGS LUXURIATES
JUXTAPOSITIONAL DEMISEMIQUAVERS ACKNOWLEDGEABLY WEATHERPROOFING CONVEYORIZATION FEATHERBEDDINGS ULTRAQUIET
JUXTAPOSITIONAL DEMISEMIQUAVERS ACKNOWLEDGEABLY WEATHERPROOFING CONVEYORIZATION FEATHERBEDDINGS UTRICULATE
JUXTAPOSITIONAL DEMISEMIQUAVERS WEATHERPROOFING ACKNOWLEDGEABLY CONVEYORIZATION FEATHERBEDDINGS LAURUSTINE
JUXTAPOSITIONAL DEMISEMIQUAVERS WEATHERPROOFING ACKNOWLEDGEABLY CONVEYORIZATION FEATHERBEDDINGS LUXURIATED
JUXTAPOSITIONAL DEMISEMIQUAVERS WEATHERPROOFING ACKNOWLEDGEABLY CONVEYORIZATION FEATHERBEDDINGS LUXURIATES
JUXTAPOSITIONAL DEMISEMIQUAVERS WEATHERPROOFING ACKNOWLEDGEABLY CONVEYORIZATION FEATHERBEDDINGS ULTRAQUIET
JUXTAPOSITIONAL DEMISEMIQUAVERS WEATHERPROOFING ACKNOWLEDGEABLY CONVEYORIZATION FEATHERBEDDINGS UTRICULATE
OVERADJUSTMENTS QUODLIBETARIANS ACKNOWLEDGEABLY WEATHERPROOFING EXEMPLIFICATIVE HYDROGENIZATION RUBIACEOUS
OVERADJUSTMENTS QUODLIBETARIANS WEATHERPROOFING ACKNOWLEDGEABLY EXEMPLIFICATIVE HYDROGENIZATION RUBIACEOUS

real    0m1.754s
user    0m1.753s
sys 0m0.000s

Note every solution is printed twice because when adding a 15-letter 'W' word 2 orders are created because there are 2 'W' tiles.

The first solution found with the point breakdown:

JUXTAPOSITIONAL 465
DEMISEMIQUAVERS 480
ACKNOWLEDGEABLY 465
WEATHERPROOFING 405
CONVEYORIZATION 480
FEATHERBEDDINGS 390
LAURUSTINE (LAURU?TI?E) 80
no tiles left

Edit: explanation

What makes searching the entire space possible? While adding a new word I only take into account words that have the rarest remaining letter. This letter need to be in some word anyway (and a 15 letter word since this will be a non 1-value letter, though I don't check for that). So I start with words containing J, Q, W, W, X, Z which have counts around 50, 100, 100, 100, 200, 500. At lower levels I get more cutoff because some words are eliminated by the lack of letters. Breadth of the search tree at each level:

0: 1
1: 49
2: 3046
3: 102560
4: 724040
5: 803959
6: 3469

Of course a lot of cutoff is gained by not checking non-optimal solutions (blanks in 15 letter words or shorter words). So it is lucky that 2765 solution can be achieved with this dictionary (but it was close, only 2 combinations of 15 letter words give a reasonable leftover). On the other hand it is easy to modify the code to find lower scoring combinations where not all the leftover 10 letters are 1-valued, though it would be harder to prove this would be an optimal solution.

Also the code shows classic case of premature optimization. This version of matches function makes the code only 30% slower:

int matches(struct lc all, struct lc a) {
    int i;
    for (i = 1; i < 27; i++) if (count(a, i) > count(all, i)) return 0;
    return 1;
}

I even figured out how to make the bit magic parallel comparison even shorter than in my original code (highest nibble cannot be used in this case, but this is not a problem, as I only need 26 out of 32 nibbles):

int matches_val(uint64_t a, uint64_t b) {
    uint64_t mask = 0x1111111111111111ll;
    return !((a - b ^ a ^ b) & mask);
}

But it gives zero advantage.

Edit

Writing the explanation above I realized that most of the time is spent on scanning the list of words for those containing a specific letter not in the matches function. Calculating the lists upfront gave 10x speedup.

nutki

Posted 2015-05-15T12:52:40.657

Reputation: 3 634

Nice! Question, how do you know that "[The optimal solutions] all must have 6 words of 15 letters and one 10-letter word consisting of 8 letters of value 1 and two blanks."? – Claudiu – 2015-05-18T15:59:06.950

@Claudiu, Each letter should score maximum possible multiplier which is 15 as this is the maximum word length. It is not possible to have all words of that length since the total letter count is not divisible by 15. So some letters will have a smaller multiplier. It is better to make them the cheapest letters (which are blanks and ones). The last thing is why having exactly one word shorter than 15 and not for example (5x15, 14, 11) is better, but this can also be proven to always score lower. – nutki – 2015-05-19T07:59:33.493

alrighty, since this is the highest possible score, posted first, i'm going to accept this. nice work (everyone)! – sirpercival – 2015-05-19T12:08:18.597

6

Python 2, score: 1840 2162

This program first finds the best scoring word available with the given tiles (without using wildcards), then makes 10000 attempts to include random words that meet the constraints of unique first character and having available tiles. With the current constants, the program takes 27 seconds to run on my machine. Using larger constants would probably find a higher scoring combination of words.

UPDATE: Now uses a 2 stage selection algorithm, so it finds the best of 50 words at each select stage. The penalty score is now used in the evaluation algorithm too.

from random import *

tilelist = ('AAAAAAAAABBCCDDDDEEEEEEEEEEEEFFGGGHHIIIIIIIIIJKLLLLMM'
        'NNNNNNOOOOOOOOPPQRRRRRRSSSSTTTTTTUUUUVVWWXYYZ??')
maintiles = dict((t, tilelist.count(t)) for t in set(tilelist))
value = {"A": 1,"B": 3,"C": 3,"D": 2,"E": 1,"F": 4,"G": 2,"H": 4,"I": 1,
        "J": 8,"K": 5,"L": 1,"M": 3,"N": 1,"O": 1,"P": 3,"Q": 10,"R": 1,
        "S": 1,"T": 1,"U": 1,"V": 4,"W": 4,"X": 8,"Y": 4,"Z": 10,"?": 0}
words = open('words.txt', 'rt').read().split()

def sumpoints(word):
    return len(word) * sum(value[c] for c in word)

ranked = sorted((sumpoints(w),w) for w in words)[::-1]
for points,word in ranked:
    if all(word.count(ch) <= maintiles[ch] for ch in word):
        firstword = word
        break

def findwordset(first):
    tiles = maintiles.copy()
    startletter = set(tilelist) - {'?'}
    startletter.discard(first[0])
    result = [ (first, sumpoints(first)) ]
    thistotal = sumpoints(first)
    for ch in first:
        tiles[ch] -= 1
    for i in range(30):
        best = None
        for word in sample(words, 50):
            if word[0] in startletter:
                if all(word.count(ch) <= tiles[ch] for ch in word):
                    points = sumpoints(word)
                    if best == None or points > best:
                        best, bestword = points, word
        if best:
            thistotal += best
            result.append( (bestword,best) )
            startletter.discard(bestword[0])
            for ch in bestword:
                tiles[ch] -= 1
    penaltyword = ''.join(c*n for c,n in tiles.items())
    penalty = sumpoints(penaltyword)
    return thistotal - penalty, result, tiles

best = None
for attempt in range(10000):
    wordset = findwordset(firstword)
    if best == None or wordset > best:
        best = wordset

total, result, tiles = best
penaltyword = ''.join(c*n for c,n in tiles.items())
penalty = sumpoints(penaltyword)
for word,points in result:
    print '%20s%6s' % (word, points)
print 'Remaining word "%s" penalty = %s' % (penaltyword, -penalty)
print 'Total score = %s' % total

I include here the best of a few runs:

$ python s.py 
 OXYPHENBUTAZONE   615
   LIQUEFACTIONS   351
  DETERMINATIVES   280
   FAMILIARISERS   234
     JUNKETEERED   253
      WOODPIGEON   170
           GAYAL    45
         CLAUGHT    91
       BRIARWOOD   135
Remaining word "V??" penalty = -12
Total score = 2162

Note that I do not use the wild cards and pay a larger penalty (due to word length). A future enhancement may include wildcard use.

Logic Knight

Posted 2015-05-15T12:52:40.657

Reputation: 6 622

1The length of the words are important: "The score for each word is sum(letter_values) * length(word)". – Logic Knight – 2015-05-15T20:38:55.517

It's not using Scrabble scoring? Aargh. – Peter Taylor – 2015-05-15T20:43:03.233

Wow. The best mine has found so far is a score of 11371. If you multiply your score by the length (97) you get 209714. – Tim – 2015-05-15T21:29:32.333

@Tim it's on a word-by-word basis, not the total. – sirpercival – 2015-05-15T22:11:55.340

@sirpercival yes, but 615* 15 + 351*13... is the same isn't it? – Tim – 2015-05-16T08:56:06.640

I'm probably missing something, but why can't you make a final word from "V??"? There are quite a few that fit the bill... – Kieran Hunt – 2015-05-19T09:20:26.660

My algorithm does not consider blanks "?" at all, so the remaining word will always have a "??" in it. – Logic Knight – 2015-05-19T10:07:31.430

6

Simulated annealing (score 2246)

180     ADDITIVELY
338     ERYTHROPHOBIA
345     FLAGELLOMANIACS
435     INTERSUBJECTIVE
171     KOWTOWERS
390     QUADRINGENARIES
250     WEAPONIZED
200     XENOGAMOUS
-9      for blank used as S
-9      for blank used as W
-18     FTU unused
Total score: 2246

Unfortunately this is non-deterministic. I'll try to fix that and find a deterministic seed which gives a better value.

import java.io.*;
import java.util.*;

public class PPCG50219 {
    // Plus two wildcards
    private static String CHAR_FREQ  = "9224<232911426821646422121";
    private static String CHAR_VALUE = "1332142418513113:11114484:";

    private static List<List<String>> WORDS;

    public static void main(String[] args) {
        init();

        Random rnd = new Random(1);
        FeasibleSolution initial = new FeasibleSolution();
        List<List<String>> shuffledByLetter = new ArrayList<List<String>>(WORDS);
        Collections.shuffle(shuffledByLetter,  rnd);
        for (List<String> list : shuffledByLetter) {
            Collections.shuffle(list, rnd);
            for (String word : list) {
                if (initial.canUse(word)) {
                    initial.use(word);
                    break;
                }
            }
        }

        FeasibleSolution best = anneal(initial, rnd);
        System.out.println(best.toStringDetailed());
    }

    private static void init() {
        try {
            WORDS = new ArrayList<List<String>>(26);
            for (int i = 0; i < 26; i++) WORDS.add(new ArrayList<String>());

            // Take dictionary from stdin with fallback to hard-coded path.
            BufferedReader br = new BufferedReader(new InputStreamReader(System.in, "ISO-8859-1"));
            if (!br.ready()) {
                br.close();
                br = new BufferedReader(new InputStreamReader(new FileInputStream("/home/pjt33/notes/dict/sowpods"), "ISO-8859-1"));
            }

            String line;
            FeasibleSolution soln = new FeasibleSolution();
            while ((line = br.readLine()) != null) WORDS.get(line.charAt(0) - 'A').add(line);
            br.close();
        }
        catch (IOException ioe) {
            throw new RuntimeException(ioe);
        }
    }

    public static FeasibleSolution anneal(FeasibleSolution feasibleSolution, Random rnd)
    {
        //Random rnd = new Random();

        FeasibleSolution best = feasibleSolution;
        int bestScore = best.score();
        double temperature = bestScore / 10;

        FeasibleSolution current = best;
        int currentScore = bestScore;

        for (int i = 0; i < 1024; i++)
        {
            // Try out some random changes, and then adjust the temperature according to the results.
            FeasibleSolution bestAtT = current;
            for (int j = 0; j < 256; j++)
            {
                FeasibleSolution neighbour = current.randomNeighbour(rnd);
                int score = neighbour.score();

                // Use a simple threshold rather than a Boltzmann probability
                if (score >= currentScore - temperature)
                {
                    current = neighbour;
                    currentScore = score;
                    temperature *= 0.95;
                }
                if (score > bestScore)
                {
                    best = neighbour;
                    bestScore = score;
                }
            }

            if (current == bestAtT) temperature *= 1.01;
            if (temperature < 1E-6 * bestScore) break;
        }

        return best;
    }

    static class FeasibleSolution {
        private final String[] words;
        private int blanksUsed;
        private final int[] counts;

        private FeasibleSolution(String[] words) {
            this.words = words;

            counts = new int[26];
            for (String word : words) {
                if (word == null) continue;
                for (char ch : word.toCharArray()) counts[ch - 'A']++;
            }
            for (int i = 0; i < 26; i++) {
                int limit = CHAR_FREQ.charAt(i) - '0';
                if (counts[i] > limit) blanksUsed += counts[i] - limit;
            }
            if (blanksUsed > 2) throw new IllegalArgumentException("Required " + blanksUsed + " blanks");
        }

        public FeasibleSolution() {
            this(new String[26]);
        }

        public FeasibleSolution(FeasibleSolution copy) {
            this(copy.words.clone());
        }

        public boolean canUse(String word) {
            int offset = word.charAt(0) - 'A';

            String current = clear(offset);
            boolean rv = set(offset, word);

            clear(offset);
            if (!set(offset, current)) throw new IllegalStateException();

            return rv;
        }

        public void use(String word) {
            int offset = word.charAt(0) - 'A';
            clear(offset);
            if (!set(offset, word)) throw new IllegalArgumentException();
        }

        private boolean set(int offset, String word) {
            if (words[offset] != null) throw new IllegalStateException();

            if (word != null) {
                for (char ch : word.toCharArray()) {
                    int limit = CHAR_FREQ.charAt(ch - 'A') - '0';
                    counts[ch - 'A']++;
                    if (counts[ch - 'A'] > limit) blanksUsed++;
                }
            }

            words[offset] = word;
            return blanksUsed <= 2;
        }

        private String clear(int offset) {
            String word = words[offset];
            if (word != null) {
                for (char ch : word.toCharArray()) {
                    int limit = CHAR_FREQ.charAt(ch - 'A') - '0';
                    if (counts[ch - 'A'] > limit) blanksUsed--;
                    counts[ch - 'A']--;
                }
            }

            words[offset] = null;
            return word;
        }

        public int score() {
            int score = 0;

            List<List<Integer>> lengths = new ArrayList<List<Integer>>();
            for (int i = 0; i < 26; i++) lengths.add(new ArrayList<Integer>());
            int unused = 100;

            for (String word : words) {
                if (word == null) continue;
                for (char ch : word.toCharArray()) lengths.get(ch - 'A').add(word.length());
                unused -= word.length();
            }

            for (int i = 0; i < 26; i++) {
                int limit = CHAR_FREQ.charAt(i) - '0';
                int off = 0;
                List<Integer> l = lengths.get(i);
                if (l.size() > limit) {
                    off = l.size() - limit;
                    Collections.sort(l);
                }
                else if (l.size() < limit) {
                    int surplus = limit - l.size();
                    l.add(-surplus * unused);
                }
                for (; off < l.size(); off++) score += l.get(off) * (CHAR_VALUE.charAt(i) - '0');
            }
            return score;
        }

        public FeasibleSolution randomNeighbour(Random rnd) {
            FeasibleSolution soln = new FeasibleSolution(this);

            // Shake things up.
            List<Integer> used = new ArrayList<Integer>();
            int totalCount = 0;
            for (int i = 0; i < words.length; i++) {
                if (words[i] == null) continue;
                used.add(i);
                totalCount += words[i].length();
            }
            if (totalCount > 50) {
                int offset = used.get(rnd.nextInt(used.size()));
                soln.clear(offset);
            }

            // TODO We can probably get better results by biasing the shuffle.
            List<List<String>> shuffledByLetter = new ArrayList<List<String>>(WORDS);
            String theBitThatMatters = Arrays.toString(counts);
            Collections.shuffle(shuffledByLetter,  rnd);
            for (List<String> list : shuffledByLetter) {
                Collections.shuffle(list, rnd);
                for (String word : list) {
                    if (word.equals(soln.words[word.charAt(0) - 'A'])) continue;
                    if (soln.canUse(word)) {
                        soln.use(word);
                        if (!theBitThatMatters.equals(Arrays.toString(soln.counts))) return soln;
                    }
                }

                // To avoid getting trapped in a local oscillation.
                int off = list.get(0).charAt(0) - 'A';
                if (soln.words[off] != null) {
                    soln.clear(off);
                    return soln;
                }
            }

            throw new RuntimeException("This really shouldn't be reachable");
        }

        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder();
            for (String word : words) {
                if (word == null) continue;
                if (sb.length() > 0) sb.append(", ");
                sb.append(word);
            }

            return sb.toString();
        }

        private static int wordScore(String word) {
            int wordScore = 0;
            for (char ch : word.toCharArray()) {
                if (ch != '?') wordScore += (CHAR_VALUE.charAt(ch - 'A') - '0');
            }
            return wordScore * word.length();
        }

        public String toStringDetailed() {
            int score = 0;

            List<List<Integer>> lengths = new ArrayList<List<Integer>>();
            for (int i = 0; i < 26; i++) lengths.add(new ArrayList<Integer>());
            int unused = 100;

            StringBuilder surplusChars = new StringBuilder();
            int unusedBlanks = 2;

            StringBuilder sb = new StringBuilder();
            for (String word : words) {
                if (word == null) continue;
                for (char ch : word.toCharArray()) lengths.get(ch - 'A').add(word.length());
                unused -= word.length();
                sb.append(wordScore(word)).append("\t").append(word).append("\n");
            }

            for (int i = 0; i < 26; i++) {
                int limit = CHAR_FREQ.charAt(i) - '0';
                int off = 0;
                List<Integer> l = lengths.get(i);
                if (l.size() > limit) {
                    off = l.size() - limit;
                    unusedBlanks -= off;
                    Collections.sort(l);
                }
                while (l.size() < limit) {
                    surplusChars.append((char)('A' + i));
                    l.add(-unused);
                }
                for (int j = 0; j < off; j++) sb.append(-l.get(j)).append("\tfor blank used as ").append((char)('A' + i)).append("\n");
                for (; off < l.size(); off++) score += l.get(off) * (CHAR_VALUE.charAt(i) - '0');
            }

            while (unusedBlanks > 0) {
                surplusChars.append('?');
                unusedBlanks--;
            }
            sb.append(-wordScore(surplusChars.toString())).append("\t").append(surplusChars).append(" unused\n");

            sb.append("Total score: ").append(score());
            return sb.toString();
        }
    }
}

Peter Taylor

Posted 2015-05-15T12:52:40.657

Reputation: 41 901

4

Python, score 2638 2675 2676 2689 2699 2717

Result:

OXYPHENBUTAZONE for 615
MICROEARTHQUAKE for 525
FLAVOURDYNAMICS for 435
ADJUSTABILITIES for 375
PREINTERVIEWING for 360
WATERFLOODINGS for 308
EAGLE?OOD? for 100
Left-over word: E
2717

Code:

import time
from multiprocessing import Pool

start_tiles = "AAAAAAAAABBCCDDDDEEEEEEEEEEEEFFGGGHHIIIIIIIIIJKLLLLMMNNNNNNOOOOOOOOPPQRRRRRRSSSSTTTTTTUUUUVVWWXYYZ??"
start_tiles = {l: start_tiles.count(l) for l in set(start_tiles)}
values = {"A": 1,"B": 3,"C": 3,"D": 2,"E": 1,"F": 4,"G": 2,"H": 4,"I": 1,"J": 8,"K": 5,"L": 1,"M": 3,"N": 1,"O": 1,"P": 3,"Q": 10,"R": 1,"S": 1,"T": 1,"U": 1,"V": 4,"W": 4,"X": 8,"Y": 4,"Z": 10,"?": 0}
with open("sowpods.txt") as f:
    full_dictionary = list(l.strip() for l in f if l.strip())

def num_wilds_needed(word, tiles):
    return sum(max(0, word.count(l) - tiles[l]) for l in word)

def word_is_possible(word, tiles):
    # never replace 1st letter with wild, for simplicity
    if tiles[word[0]] <= 0:
        return False

    return num_wilds_needed(word, tiles) <= tiles['?']

def word_score(word):
    return sum(values[c] for c in word) * len(word)

def final_score(words, tiles_left, print_leftover=False):
    left_over_word = ""
    for tile, counts in tiles_left.iteritems():
        left_over_word += tile * counts
    if print_leftover:
        print "Left-over word: %s" % (left_over_word,)
    return sum(word_score(word) for word in words) - word_score(left_over_word)

def filter_dictionary(dictionary, tiles_left, start_letters):
    return [word for word in dictionary
            if word[0] in start_letters and word_is_possible(word, tiles_left)]

def pick_word(next_word, start_letters, tiles_left, dictionary):
    if not word_is_possible(next_word, tiles_left):
        raise ValueError("Using word that is impossible: %s" % (next_word,))

    next_letters = set(start_letters)
    next_letters.remove(next_word[0])
    next_tiles = dict(tiles_left)
    for c in next_word:
        next_tiles[c] -= 1

    next_dictionary = filter_dictionary(dictionary, next_tiles, next_letters)

    return next_letters, next_tiles, next_dictionary

class FakeResult:
    def __init__(self, value):
        self.value = value
    def get(self, timeout=None):
        return self.value

class FakePool:
    def apply_async(self, f, args):
        res = f(*args)
        return FakeResult(res)

def proc_next_word(next_word,
                   start_letters, tiles_left, filtered_sorted_dictionary,
                   depth, picks, prefix):
    score = word_score(next_word)
    next_letters, next_tiles, next_dictionary = pick_word(
        next_word, start_letters, tiles_left, filtered_sorted_dictionary)

    if len(prefix) / 2 < 5:
        print "%sDepth %d: ?, %s for %d, %d possible words left" % (
            prefix, len(prefix) / 2, next_word, score, len(filtered_sorted_dictionary))

    next_words, next_score = search(FakePool(), next_letters, next_tiles, next_dictionary,
                                    depth-1, picks, prefix + "  ")

    if len(prefix) / 2 < 5:
        print "%sDepth %d: %d, %s for %d" % (
            prefix, len(prefix) / 2, score + next_score, next_word, score)

    return [next_word] + next_words, score + next_score

def wildify(word, tiles_left):
    # replace missing letters with wilds
    while True:
        for c in word:
            if tiles_left[c] < word.count(c):
                word = word[0] + word[1:].replace(c, '?',  word.count(c) - tiles_left[c])
                break
        else:
            break

    return word

def search(pool, start_letters, tiles_left, filtered_sorted_dictionary, depth, picks, prefix=""):
    if not filtered_sorted_dictionary:
        # no words left - penalize for tiles left
        return [], final_score([], tiles_left)

    if depth == 0:
        raise ValueError("Hit depth 0")

    if tiles_left['?'] > 0:
        # proc top few and re-calculate score based on wildcarding
        best_word_candidates = [wildify(w, tiles_left) for w in filtered_sorted_dictionary[:10000]]
        best_word_candidates.sort(key=word_score, reverse=True)
    else:
        # no wildification needed
        best_word_candidates = filtered_sorted_dictionary

    best_words = best_word_candidates[:picks]
    if depth == 1:
        # only look at 1 word since depth 0 will do nothing
        best_words = [best_words[0]]

    results = [pool.apply_async(proc_next_word, (next_word,
                                                 start_letters, tiles_left, filtered_sorted_dictionary,
                                                 depth, picks, prefix))
               for next_word in best_words]
    results = [result.get() for result in results]

    return max(results, key=lambda (words, result): result)

if __name__ == "__main__":
    start_letters = set("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
    tiles_left = dict(start_tiles)
    print "Preparing word list..."
    dictionary = filter_dictionary(full_dictionary, tiles_left, start_letters)
    dictionary.sort(key=word_score, reverse=True)
    print "Starting search..."
    pool = Pool(8)
    words, _ = search(pool, start_letters, tiles_left, dictionary, 666, 5)

    for word in words:
        for c in word:
            if tiles_left[c] <= 0:
                raise ValueError("Invalid word list")
            tiles_left[c] -= 1

    print
    print "\n".join(("%s for %s" % (word, word_score(word)) for word in words))
    print final_score(words, tiles_left, True)

Explanation:

Depth-first search which searches the entire tree, picking between the top picks best words at each stage.

I sort the entire word list once by score at the beginning. After picking each word, for the next iteration I filter out all the words that are now no longer possible, preserving the order, so I don't have to sort the list at each step. To deal with wildcards, if there's a possibility that a wild card is needed, I pick the top 10000 candidates, replace missing letters with wildcards if needed, and re-sort based on the new (lower) scores.

This output is for picks=5 and took 8m01s to run on my 8-core machine.

Claudiu

Posted 2015-05-15T12:52:40.657

Reputation: 3 870

3

Java 8, score 2641 2681

The program starts by taking the 40 best words. For each word, it finds the 40 best words to go along. Of the 1600 combinations, the program takes the best 40. For each combination, the 40 best words are found, and the cycle repeats.

When just a few tiles are left, the remaining letters are combined with the two blanks for the final word.

Update

I increased the threshold to the 50 best words. Also, each combination only adds words that are less than those already present. This prevents multiple permutations of the same group.

OXYPHENBUTAZONE: 615
MICROEARTHQUAKE: 525
INTERSUBJECTIVE: 435
DAFFADOWNDILLY: 406
PREINTERVIEWING: 360
AUTOALLOGAMIES: 238
GOODSIRES: 99
?A?: 3             (ZAX)
---
Total: 2681

The program:

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

public class Scrabble {

    static final int[] scores = new int[]{1, 3, 3, 2, 1, 4, 2, 4, 1, 8, 5, 1, 3, 1, 1, 3, 10, 1, 1, 1, 1, 4, 4, 8, 4, 10, 0};
    static final int[] freqs = new int[]{9, 2, 2, 4, 12, 2, 3, 2, 9, 1, 1, 4, 2, 6, 8, 2, 1, 6, 4, 6, 4, 2, 2, 1, 2, 1, 2};

    static final int MAX = 50;

    public static void main(String[] args) throws IOException {
        List<String> words = Files.readAllLines(new File("C:/Users/Ypnypn/scrabble.txt").toPath());
        words.sort((s, t) -> score(t) - score(s));
        words.removeIf(w -> !works(w));

        List<List<String>> last = words.stream().map(Arrays::asList).collect(Collectors.toList()),
                finalList = new ArrayList();

        for (int i = 0; i < 10; i++) {
            List<List<String>> next = new ArrayList();
            for (int j = 0; j < MAX && j < last.size(); j++) {
                List<String> group = last.get(j);
                Object[] thirds = words.stream().filter(word -> workTogether(group, word) && least(word, group)).toArray();
                for (int k = 0; k < MAX && k < thirds.length; k++) {
                    List<String> newList = new ArrayList(group);
                    newList.add((String) thirds[k]);
                    next.add(newList);
                }
            }
            last = next;
            last.sort((s, t) -> t.stream().mapToInt(Scrabble::score).sum()
                                - s.stream().mapToInt(Scrabble::score).sum());
            for (List<String> l : last) {
                if (l.stream().mapToInt(String::length).sum() > 90) {
                    finalList.add(l);
                }
            }
        }
        finalList.forEach(g -> {
            List<Integer> chars = new ArrayList();
            int[] counts = new int[26];
            g.forEach(str -> str.chars().forEach(c -> counts[c - 'A']++));
            for (int i = 0; i < 26; i++) {
                while (counts[i] < freqs[i]) {
                    chars.add('A' + i);
                    counts[i]++;
                }
            }
            String end = words.stream()
                    .filter(w -> w.length() == chars.size() + 2)
                    .filter(w -> g.stream().noneMatch(s -> s.charAt(0) == w.charAt(0)))
                    .filter(w -> {
                        List<Integer> copy = new ArrayList(chars);
                        int _1 = 0, _2 = 0;
                        for (char c : w.toCharArray()) {
                            if (copy.contains((int) c)) {
                                copy.remove((Integer) (int) c);
                            } else if (_1 == 0) {
                                _1 = c;
                            } else if (_2 == 0) {
                                _2 = c;
                            } else {
                                return false;
                            }
                        }
                        return true;
                    }).findAny().orElse("");
            if (end.equals("")) {
                return;
            }
            char[] arr = end.toCharArray();
            for (int i = 0; i < arr.length; i++) {
                if (chars.contains((int) arr[i])) {
                    chars.remove((Integer) (int) arr[i]);
                } else {
                    arr[i] = '?';
                }
            }
            g.add(new String(arr));
        });

        finalList.removeIf(g -> g.stream().mapToInt(String::length).sum() < 100);
        finalList.sort((s, t) -> t.stream().mapToInt(Scrabble::score).sum()
                                 - s.stream().mapToInt(Scrabble::score).sum());
        List<String> answer = finalList.get(0);
        for (String str : answer) {
            System.out.print(str + ": " + score(str));
            if (str.contains("?")) {
                String actual = words.stream().filter(s -> s.matches("^" + str.replace('?', '.') + "$")).findAny().get();
                System.out.print("             (" + actual + ")");
            }
            System.out.println();
        }
        System.out.println("---");
        System.out.println("Total: " + answer.stream().mapToInt(Scrabble::score).sum());
    }

    static boolean works(String str) {
        int[] counts = new int[26];
        for (char c : str.toCharArray()) {
            counts[c - 'A']++;
        }
        for (int i = 0; i < 26; i++) {
            if (counts[i] > freqs[i]) {
                return false;
            }
        }
        return true;
    }

    static boolean workTogether(List<String> strs, String str) {
        if (strs.stream().anyMatch(s -> s.charAt(0) == str.charAt(0))) {
            return false;
        }
        int[] counts = new int[26];
        strs.stream().forEach(s -> s.chars().forEach(c -> counts[c - 'A']++));
        str.chars().forEach(c -> counts[c - 'A']++);
        for (int i = 0; i < 26; i++) {
            if (counts[i] > freqs[i]) {
                return false;
            }
        }
        return true;
    }

    static boolean least(String str, List<String> strs) {
        int score = score(str);
        return strs.stream().allMatch(s -> score(s) >= score);
    }

    static int score(String word) {
        int score = 0;
        for (char c : word.toCharArray()) {
            if (c != '?') {
                score += scores[c - 'A'];
            }
        }
        return score * word.length();
    }
}

Ypnypn

Posted 2015-05-15T12:52:40.657

Reputation: 10 485

2

Perl, score: 2655 2630

#!perl -l
%p = qw{A 1 B 3 C 3 D 2 E 1 F 4 G 2 H 4 I 1 J 8 K 5 L 1 M 3 N 1 O 1 P 3 Q 10 R 1 S 1 T 1 U 1 V 4 W 4 X 8 Y 4 Z 10 x 0};
/[A-Z]+/,push @R,$& for <>;
@R = sort{length($b)<=>length($a)}@R;
for(@R) {push@W,$_;push@W,"$`x$'" while /./g;push@O,($_)x(1+length)}
$l = "AAAAAAAAABBCCDDDDEEEEEEEEEEEEFFGGGHHIIIIIIIIIJKLLLLMMNNNNNNOOOOOOOOPPQRRRRRRSSSSTTTTTTUUUUVVWWXYYZxx";
@S = map {$r='';for$x(A..Z,'x'){$r.="${x}{".(1*s/$x//g).",}"};"^$r\$"} map{"$_"}@W;
sub score{$r=0;$r+=$p{$_}for split'',$rr=pop;$r*length$rr}
@X = map{score$_}@W;
$f = 'x';
for(;;) {
$best = -1; $besti = 0;
for ($i=0;$i<@S;$i++) {
    next if $X[$i] < $best;
    if ($O[$i]!~/^[$f]/ && $l=~$S[$i]) {
    $best = $X[$i];
    $besti = $i;
    }
}
if($best < 0){
    $ls = score$l;
    print "left: $l (-$ls)";
    $tot -= $ls;
    print "total: $tot";
    exit;
}
$l=~s/$_// for $W[$besti]=~/./g;
$O[$besti]=~/./; $f .= $&;
print "$W[$besti]/$O[$besti] ($X[$besti])";
$tot += $X[$besti];
}

Use:

$ perl ./scrab.pl <~/sowpods.txt
OXYPHENBUTAZONE/OXYPHENBUTAZONE (615)
MICROEARTHQUAKE/MICROEARTHQUAKE (525)
NONOBJECTIVISMS/NONOBJECTIVISMS (465)
DAFFADOWNDILLY/DAFFADOWNDILLY (406)
PREINTERVIEWIxG/PREINTERVIEWING (345)
LITURGIOLOGxSTS/LITURGIOLOGISTS (240)
URDEE/URDEE (30)
EE/EE (4)
AA/AA (4)
left: AA (-4)
total: 2630

Use of blanks actually does not give that much while significantly slows down the execution:

$ perl ./scrab.pl <~/sowpods.txt 
OXYPHENBUTAZONE/OXYPHENBUTAZONE (615)
MICROEARTHQUAKE/MICROEARTHQUAKE (525)
NONOBJECTIVISMS/NONOBJECTIVISMS (465)
DAFFADOWNDILLY/DAFFADOWNDILLY (406)
PREINTERVIEWED/PREINTERVIEWED (322)
LITURGIOLOGISTS/LITURGIOLOGISTS (255)
RUGAE/RUGAE (30)
EE/EE (4)
AA/AA (4)
left: Axx (-3)
total: 2623

After adding some heuristics:

$ time perl ./scrab.pl <~/sowpods.txt 
OXYPHENBUTAZONE/OXYPHENBUTAZONE (615)
MICROEARTHQUAKE/MICROEARTHQUAKE (525)
NONOBJECTIVISMS/NONOBJECTIVISMS (465)
PREFIGURATIVELY/PREFIGURATIVELY (405)
DOWNREGULATIONS/DOWNREGULATIONS (300)
FORGEAxILITIES/FORGEABILITIES (238)
ADWAxDED/ADWARDED (104)
EA/EA (4)
left: L (-1)
total: 2655

real    3m58.517s
user    3m57.832s
sys 0m0.512s

nutki

Posted 2015-05-15T12:52:40.657

Reputation: 3 634

1

Python 3, score 2735

(The optimal score of 2765, "6 words of 15 letters and one 10-letter word consisting of 8 letters of value 1 and two blanks" has been achieved by nutki.)

I used a greedy approach similar to others':

I start with one-element lists containing the top scoring words containing Q's.

At every step for every list element I create k = 800 new lists with the best legal words for the list. From the aggregated list of lists I keep the k top scoring lists and repeat the process 10 times.

Note that you can get the top k elements of an n-long list in O(n + k*log n) which is O(n) if k<<n as in our case (k = 800, n ~= 250000) with a heap queue. I guess this method isn't used in other submission s hence the smaller k values.

I use wildcards along the way if needed for the words.

Runtime is a couple minutes for k = 800. Greater values and other optimizations haven't yielded better results yet.

Result:

DEMISEMIQUAVERS for 480
OXYPHENBUTAZONE for 615
ACKNOWLEDGEABLY for 465
FLASHFORWARDING for 435
INTERJACULATING for 375
COOPERATIVITIES for 330
METEOROID for 108
? is C for -45
? is M for -27
Left U for -1
Total score of 2735

I experimented with the Descartes product of the best words containing Q,J and X as these letter barely share words. My best score with this startegy was 2723 (DEMISEMIQUAVERS OXYPHENBUTAZONE INTERSUBJECTIVE FLASHFORWARDING KNOWLEDGABILITY RADIOPROTECTION ANALOGUE EA).

The unnecessary complicated spaghetti-code (with traces of experimentation with other methods):

import sys,heapq as hq

def score(s):
    r=0
    for c in s:
        r+=ord('1332142418513113:11114484:0'[ord(c)-65])-48
    return r*len(s)

def score_wl(rwl):
    ac=a[:] 
    ssd=0    
    for rwle in rwl:
        ac,sd=decr(ac,rwle)
        ssd+=sd
    return sum([score(rw) for rw in rwl])-ssd

def decr(av,nw):
    nav=av[:]
    scd=0
    for c in nw:
        if nav[ord(c)-65]>0:
            nav[ord(c)-65]-=1
        else:
            nav[ord('[')-65]-=1
            scd+=(ord('1332142418513113:11114484:'[ord(c)-65])-48)*len(nw)
    return (nav,scd)

def bestwordlist(w,ac,sw,count):
    stl=[swe[0] for swe in sw]
    sl=[]
    for we in w:
        if we[0] not in stl:
            acn,sd=decr(ac,we)
            if min(acn)>=0:
                sl+=[(we,score(we)-sd)]
    mw=hq.nlargest(count,sl,key=lambda p:p[1])
    res=[mwe[0] for mwe in mw]
    return res

def bestword(w,ac,sw):
    ms=0
    mw=''
    stl=[swe[0] for swe in sw]
    for we in w:
        if we[0] not in stl:
            acn,sd=decr(ac,we)
            if min(acn)>=0 and score(we)-sd>ms:
                ms=score(we)-sd
                mw=we
    return mw

def search(t,lev,av,sw):
    if lev>=len(t) and min(av)>=0:
        return [sw]
    if min(av)<0:
        return []
    r=[]
    stl=[swe[0] for swe in sw]
    if av[ord(tl[lev])-65]>0:
        for i in range(1,maxch):
            nw=t[lev][-i][0]
            if nw[0] not in stl:
                nav=decr(av,nw)[0]
                swn=sw[:]+[nw]       
                r+=search(t,lev+1,nav,swn)
    else:
        r+=[sw]
    if len(r)<10000:
        return r
    else:
        return hq.nlargest(10000,r,key=score_wl)

args=sys.argv
maxch=300#int(args[1])
maxch2=800#int(args[2])

w=[]
with open('scr_words.txt','r') as f:
    for l in f:
        w+=[l[:-1]]

a=[ord(c)-48  for c in '9224<2329114268216464221212']
                       #ABCDEFGHIJKLMNOPQRSTUVWXYZ?

t=[]
tl='Q'
for c in tl:
    tp=[]
    wl=[(x,score_wl([x])) for x in w if c in x]
    wl=sorted(wl,key=lambda p:p[1])
    t+=[wl]

r=search(t,0,a,[])

rt=sorted(r,key=score_wl)[-maxch2:]

ms=0
res='-'

for i in range(10-len(tl)):
    rtn=[]
    for sw in rt:

        ac=a[:] 
        for swe in sw:        
            ac,sd=decr(ac,swe)

        bwl=bestwordlist(w,ac,sw,maxch2)

        if not bwl:
            rtn+=[sw]
        for bwle in bwl:
            rtn+=[sw+[bwle]]

    rt=sorted(rtn,key=score_wl)[-maxch2:]
    print(rt[-1],score_wl(rt[-1]),'- left')
    sys.stdout.flush()

ms=-1000000
for sw in rt:    
    ssd=0
    ac=a[:]
    for swe in sw:
        ac,sd=decr(ac,swe)
        ssd+=sd
    sc=sum([score(swe) for swe in sw])-ssd
    left=''.join([chr(i+65)*v for i,v in enumerate(ac)])
    sc-=score(left)
    if sc>ms:
        ms=sc
        res=(sw,left)

fsw,left=res        
print('\n\nres =',ms,' '.join(fsw),'left',left)
    v

randomra

Posted 2015-05-15T12:52:40.657

Reputation: 19 909