OCaml, 1588 (n = 36)
This solution uses the usual bit pattern approach to represent vectors of -1s and 1s. The scalar product is as usual calculated by taking the xor of two bit vectors and subtracting n/2. Vectors are orthogonal iff their xor has exactly n/2 bits set.
Lyndon words are not per se useful as a normalized representation for this, as they exclude any pattern that is a rotation of itself. They are also relatively expensive to compute. Therefore, this code uses a somewhat simpler normal form, which requires that the longest consecutive sequence of zeroes after rotation (or one of them, if there are multiples) must occupy the most significant bits. It follows that the least significant bit is always 1.
Observe also that any candidate vector must have at least n/4 ones (and at most 3n/4). We therefore only consider vectors with n/4 ... n/2 bits set, since we can derive others via complement and rotation (in practice, all such vectors seem to have between n/2-2 and n/2+2 ones, but that seems to also be difficult to prove).
We build these normal forms from the least significant bit up, observing the constraint that any remaining runs of zeroes (called "gaps" in the code) must follow our normal form requirement. In particular, as long as at least one more 1-bit has to be placed, there must be room for the current gap and another than that is at least as big as the current gap or any other gap observed so far.
We also observe that the list of results is small. Therefore, we do not try to avoid duplicates during the discovery process, but simply record the results in per-worker sets and calculate the union of these sets at the end.
It is worth noting that the runtime cost of the algorithm still grows exponentially and at a rate that is comparable to that of the brute-force version; what this buys us is essentially a reduction by a constant factor, and comes at the cost of an algorithm that is more difficult to parallelize than the brute-force version.
Output for n up to 40:
4: 12
8: 40
12: 144
16: 128
20: 80
24: 192
28: 560
32: 0
36: 432
40: 640
The program is written in OCaml, to be compiled with:
ocamlopt -inline 100 -nodynlink -o orthcirc unix.cmxa bigarray.cmxa orthcirc.ml
Run ./orthcirc -help
to see what options the program supports.
On architectures that support it, -fno-PIC
may offer some small additional performance gain.
This is written for OCaml 4.02.3, but may also work with older versions (as long as they aren't too old).
UPDATE: This new version offers better parallelization. Note that it uses p * (n/4 + 1)
worker threads per instance of the problem, and some of them will still run considerably shorter than others. The value of p
must be a power of 2. The speedup on 4-8 cores is minimal (perhaps around 10%), but it scales better to a large number of cores for large n
.
let max_n = ref 40
let min_n = ref 4
let seq_mode = ref false
let show_res = ref false
let fanout = ref 8
let bitcount16 n =
let b2 n = match n land 3 with 0 -> 0 | 1 | 2 -> 1 | _ -> 2 in
let b4 n = (b2 n) + (b2 (n lsr 2)) in
let b8 n = (b4 n) + (b4 (n lsr 4)) in
(b8 n) + (b8 (n lsr 8))
let bitcount_data =
let open Bigarray in
let tmp = Array1.create int8_signed c_layout 65536 in
for i = 0 to 65535 do
Array1.set tmp i (bitcount16 i)
done;
tmp
let bitcount n =
let open Bigarray in
let bc n = Array1.unsafe_get bitcount_data (n land 65535) in
(bc n) + (bc (n lsr 16)) + (bc (n lsr 32)) + (bc (n lsr 48))
module IntSet = Set.Make (struct
type t = int
let compare = Pervasives.compare
end)
let worker_results = ref IntSet.empty
let test_row vec row mask n =
bitcount ((vec lxor (vec lsr row) lxor (vec lsl (n-row))) land mask) * 2 = n
let record vec len n =
let m = (1 lsl n) - 1 in
let rec test_orth_circ ?(row=2) vec m n =
if 2 * row >= n then true
else if not (test_row vec row m n) then false
else test_orth_circ ~row:(row+1) vec m n
in if test_row vec 1 m n &&
test_orth_circ vec m n then
begin
for i = 0 to n - 1 do
let v = ((vec lsr i) lor (vec lsl (n - i))) land m in
worker_results := IntSet.add v !worker_results;
worker_results := IntSet.add (v lxor m) !worker_results
done
end
let show vec n =
for i = 0 to n / 2 - 1 do
let vec' = (vec lsr i) lor (vec lsl (n - i)) in
for j = 0 to n-1 do
match (vec' lsr (n-j)) land 1 with
| 0 -> Printf.printf " 1"
| _ -> Printf.printf " -1"
done; Printf.printf "\n"
done; Printf.printf "\n"; flush stdout
let rec build_normalized ~prefix ~plen ~gap ~maxgap ~maxlen ~bits ~fn =
if bits = 0 then
fn prefix plen maxlen
else begin
let room = maxlen - gap - plen - bits in
if room >= gap && room >= maxgap then begin
build_normalized
~prefix:(prefix lor (1 lsl (plen + gap)))
~plen:(plen + gap + 1)
~gap:0
~maxgap:(if gap > maxgap then gap else maxgap)
~maxlen
~bits:(bits - 1)
~fn;
if room > gap + 1 && room > maxgap then
build_normalized ~prefix ~plen ~gap:(gap + 1) ~maxgap ~maxlen ~bits ~fn
end
end
let rec log2 = function
| 0 -> -1
| n -> 1 + (log2 (n lsr 1))
let rec test_gap n pat =
if n land pat = 0 then true
else if pat land 1 = 0 then test_gap n (pat lsr 1)
else false
let rec test_gaps n maxlen len =
let fill k = (1 lsl k) -1 in
if len = 0 then []
else if test_gap n ((fill maxlen) lxor (fill (maxlen-len))) then
len :: (test_gaps n maxlen (len-1))
else test_gaps n maxlen (len-1)
let rec longest_gap n len =
List.fold_left max 0 (test_gaps n len len)
let start_search low lowbits maxlen bits fn =
let bits = bits - (bitcount low) in
let plen = log2 low + 1 in
let gap = lowbits - plen in
let maxgap = longest_gap low lowbits in
worker_results := IntSet.empty;
if bits >= 0 then
build_normalized ~prefix:low ~plen ~gap ~maxgap ~maxlen ~bits ~fn;
!worker_results
let spawn f x =
let open Unix in
let safe_fork () = try fork() with _ -> -1 in
let input, output = pipe () in
let pid = if !seq_mode then -1 else safe_fork() in
match pid with
| -1 -> (* seq_mode selected or fork() failed *)
close input; close output; (fun () -> f x)
| 0 -> (* child process *)
close input;
let to_parent = out_channel_of_descr output in
Marshal.to_channel to_parent (f x) [];
close_out to_parent; exit 0
| pid -> (* parent process *)
close output;
let from_child = in_channel_of_descr input in
(fun () ->
ignore (waitpid [] pid);
let result = Marshal.from_channel from_child in
close_in from_child; result)
let worker1 (n, k) =
start_search 1 1 n k record
let worker2 (n, k, p) =
start_search (p * 2 + 1) (log2 !fanout + 1) n k record
let spawn_workers n =
let queue = Queue.create () in
if n = 4 || n = 8 then begin
for i = n / 4 to n / 2 do
Queue.add (spawn worker1 (n, i)) queue
done
end else begin
for i = n / 2 downto n / 4 do
for p = 0 to !fanout - 1 do
Queue.add (spawn worker2 (n, i, p)) queue
done
done
end;
Queue.fold (fun acc w -> IntSet.union acc (w())) IntSet.empty queue
let main () =
if !max_n > 60 then begin
print_endline "error: cannot handle n > 60";
exit 1
end;
min_n := max !min_n 4;
if bitcount !fanout <> 1 then begin
print_endline "error: number of threads must be a power of 2";
exit 1;
end;
for n = !min_n to !max_n do
if n mod 4 = 0 then
let result = spawn_workers n in
Printf.printf "%2d: %d\n" n (IntSet.cardinal result);
if !show_res then
IntSet.iter (fun v -> show v n) result;
flush stdout
done
let () =
let args =[("-m", Arg.Set_int min_n, "min size of the n by n/2 matrix");
("-n", Arg.Set_int max_n, "max size of the n by n/2 matrix");
("-p", Arg.Set_int fanout, "parallel fanout");
("-seq", Arg.Set seq_mode, "run in single-threaded mode");
("-show", Arg.Set show_res, "display list of results") ] in
let usage = ("Usage: " ^
(Filename.basename Sys.argv.(0)) ^
" [-n size] [-seq] [-show]") in
let error _ = Arg.usage args usage; exit 1 in
Arg.parse args error usage;
main ()
Can you actually find one for every
n
that is a multiple of four? – flawr – 2016-01-11T21:43:26.430@flawr Now that's a great question! I have no idea but I would love to know. – None – 2016-01-11T21:50:52.680
From what I've seen now (if my script is correct) they seem to be pretty rare. As far as I know the circulant hadamard matrices (square matrices with entries in {-1,1}) are conjectured to exist only for n=1 and 4. – flawr – 2016-01-11T21:52:40.420
@flawr Yes on both counts (I wanted something that was rare to make the challenge interesting). But I am unaware of anything at all being known about n/2 by n circulant matrices. – None – 2016-01-11T21:56:05.997
Why are libraries for primality testing and factoring prohibited? – feersum – 2016-01-12T02:33:11.973
@feersum That was a copy and paste error! Fixed now. – None – 2016-01-12T07:23:32.297
1I have a solution using bitmasking that I think will work for n=32. – lirtosiast – 2016-01-15T18:18:31.360
@ThomasKwa That sounds great. If it finishes n=28 in time that would already be amazing. However...Unfortunately there are no solutions for
n=32
:) I feel the Lyndon words idea should work and that would be fast enough to get somewhere withn=36
too. – None – 2016-01-15T19:38:06.393@Lembik Really? That's too bad. n=28 finishes in about 10 seconds, and should be further optimizable, so I should be able to get through a small portion of n=36. I don't know if Lyndon words can be generared in less than ~1000 cycles each, which would be about the breakeven point. – lirtosiast – 2016-01-15T23:39:51.673