The GOLF CPU Golfing Challenge: Sides of a Cuboid

5

This challenge is the second of a series of problems that should be written in the GOLF CPU. You can find the first here

For this problem, you are given the volume of a cuboid (A rectangular cube). Each of the side lengths are integers. You must return the number of distinct cuboids that have that volume. This sequence matches A034836 (thanks Tylio)

Input:Output [List] [of] [cuboids]
1:1 [1,1,1]
5:1 [1,1,5]
8:3 [1,1,8] [1,2,4] [2,2,2]
24:6 [1,1,24] [1,2,12] [1,3,8] [1,4,6] [2,2,6] [2,3,4]
100:8 [1,1,100] [1,2,50] [1,4,25] [1,5,20] [1,10,10] [2,2,25] [2,5,10] [4,5,5]
1080:52
468719: 1
468720: 678
468721: 2

Your program should be written in GOLF CPU. For input/output you can either use STDIO or the registers. Hardcoding primes or answers isn't allowed.

This is a problem, so the answer that solves the examples above with the fewest amount of cycles wins!

Nathan Merrill

Posted 2015-07-09T03:28:11.483

Reputation: 13 591

2Your example for 100 is wrong, it should be 100:8 [1,1,100] [1,2,50] [1,4,25] [1,5,20] [1,10,10] [2,2,25] [2,5,10] [4,5,5]. – Tyilo – 2015-07-09T05:50:48.823

What are the restrictions on the size of the input? – 2012rcampion – 2016-04-09T09:42:00.980

Answers

7

14,483 cycles

I use the fact that the output depends only on the signature (exponents in the prime factorization) of the input. I used the formula from https://oeis.org/A034836. The sieve of Eratosthenes is used to find primes to factorize the input by trial division.

# Input in register n
# Outputs in register r and exit code

# the signature of n (exponents in its prime factorization) are stored in
# memory starting at address 0x1000000000000000.  They are byte-sized
# since the largest exponent we can have is 64 (2**64)

# mov z, 0x1000000000000000 (implicit)
mov y, z

# pull out factors of 2
# number of factors is stored in k
# mov k, 0 (implicit)
factor_two:
  and c, n, 1
  snz c, 3
    shr n, n, 1
    inc k
    jmp factor_two

# store factors of two if nonzero
sz k, 2
  sb z, k
  inc z

# Sieve of Eratosthenes
# we only check odd primes, since we already removed factors of 2
# we store COMPOSITE(i) at address i
# we also check n for divisibility by every prime we find
# note we only have to check for prime factors up to sqrt(n)!

mov i, 1
sieve_loop:
  add i, i, 2 # go to the next odd number
  lb c, i # check if COMPOSITE(i)
  jnz sieve_loop, c # if so, continue

  mulu j, k, i, i # j = i**2
  geu c, j, n              # if i**2 > n:
  jnz end_sieve_loop, c #   break

  # otherwise, i is prime; check if n is divisible by i
  mov k, 0
  factor_i:
    divu q, r, n, i # r = n % i
    snz r, 3
      mov n, q      # n /= i
      inc k
      jmp factor_i

  # store factors of i if nonzero
  sz k, 2
    sb z, k
    inc z

  # now cross off all composites that are odd multiples of i,
  # but only up to sqrt(n)
  shl d, i, 2 # d = 2 * i
  # we already have j = i**2, we can start marking composites here:
  # since every lesser multiple of i must have a smaller  prime factor and
  # therefore must have already been marked

  sieve_inner_loop:
    sb j, 1
    add j, j, d # j += 2*i
    mulu c, k, j, j  # c = j**2
    leu c, c, n             # if j**2 < n:
    jnz sieve_inner_loop, c #   continue

  jmp sieve_loop

end_sieve_loop:

# if n != 1, n is a prime and needs to be added to the signature
cmp c, n, 1
snz c, 2 # skip if n==1
  sb z, 1

#mov z, 0x1000000000000000

# according to https://oeis.org/A034836, we have:
# a(n) = (np + 3*nsq + 2*ntp)/6
# where np = Times@@Binomial[2+s,2],
# ntp = Times@@Floor[Floor[s,3]/s],
# nsq = Times@@(Floor[s/2] + 1),
# and s is the signature

# Binomial[2+s,2] == (1+s)(2+s)/2
# Floor[Floor[s,3]/s] == 1 if s divides 3, 0 otherwise

# I tested this formula up to n=10,000,000 

mov t, 0 # <= BUG? (doesn't work without this line)
mov d, 1 # np
mov e, 1 # ntp
mov f, 1 # nsq

product_loop:
  lb s, y
  jz finish, s
  inc y

  divu q, r, s, 3 # r = s % 3
  sz r, 1
    mov e, 0

  inc s
  mulu d, x, d, s # d *= s + 1

  inc s
  mulu d, x, d, s # d *= s + 2
  shr d, d, 1     # d /= 2

  shr s, s, 1     # s /= 2 (s == (s+2)/2 == floor(s/2) + 1)
  mulu f, x, f, s # f *= floor(s/2) + 1

  jmp product_loop

finish:

add r, e, f # r = e + f
shl r, r, 1 # r *= 2
add r, r, f # r += f
add r, r, d # r += d
            # r == d + 3*f + 2*e

divu r, s, r, 6 # r /= 6

halt r

Test cases:

robert@unity:~/golf-cpu$ ./assemble.py cube.golf
robert@unity:~/golf-cpu$ ./golf.py cube.bin n=1
Execution terminated after 42 cycles with exit code 1.
robert@unity:~/golf-cpu$ ./golf.py cube.bin n=5
Execution terminated after 77 cycles with exit code 1.
robert@unity:~/golf-cpu$ ./golf.py cube.bin n=8
Execution terminated after 91 cycles with exit code 3.
robert@unity:~/golf-cpu$ ./golf.py cube.bin n=24
Execution terminated after 126 cycles with exit code 6.
robert@unity:~/golf-cpu$ ./golf.py cube.bin n=100
Execution terminated after 218 cycles with exit code 8.
robert@unity:~/golf-cpu$ ./golf.py cube.bin n=1080
Execution terminated after 236 cycles with exit code 52.
robert@unity:~/golf-cpu$ ./golf.py cube.bin n=468719
Execution terminated after 9710 cycles with exit code 1.
robert@unity:~/golf-cpu$ ./golf.py cube.bin n=468720
Execution terminated after 422 cycles with exit code 678.
robert@unity:~/golf-cpu$ ./golf.py cube.bin n=468721
Execution terminated after 3561 cycles with exit code 2.
robert@unity:~/golf-cpu$ 

2012rcampion

Posted 2015-07-09T03:28:11.483

Reputation: 1 319

2

Total cycles for examples: 29,566,640

It cubes the input number in the algorithm, so the input can't be larger than around 2^21.

Cycle count for example input:

Input    Cycles
1        153
5        75
8        773
24       1,528
100      3,185
1080     27,608
468719   9,676,846
468720   9,764,814
468721   10,091,658

I'm using an algorithm I found on OEIS, which does the following:

cuboids(n):
    if is_prime(n):
        return 1
    res = 0
    for d in divisors(n):
        if d^3 <= n:
            for d0 in divisors(n/d):
                if d0^2 <= n/d:
                    res += 1
                if d0 < d:
                    res -= 1

    return res

This might actually be slower than just trying every divisor combination and checking if d1 <= d2 <= d3, however the bottle-neck is finding the prime anyway.

Divisors are found by using the prime factorization of the number. I'm using the same method to build an array of all primes below the input number as I used in the previous problem.

Run with:

python3 assemble.py 52489-cuboids.golf
python3 golf.py -d 52489-cuboids.bin x=<INPUT>

Output will be in the t register.

Example run:

$ python3 golf.py -d 52489-cuboids.bin x=1080 | grep 'Execution\|t: '
Execution terminated after 27608 cycles with exit code 0. Register file at exit:
t: 52                   0x34

Code:

    call build_primes

    mov q, x
    call is_prime
    jnz prime, a

    shl m, x, 3
    add m, m, 8
    call divisors

    mov y, x
    mov k, l

    mov t, 0

    mov p, m

outer_loop:
    cmp a, p, k
    jnz done, a

    lw d, p

    add p, p, 8

    mulu a, r, d, d
    mulu a, r, a, d
    lequ a, a, y
    jz outer_loop, a

    divu x, r, y, d
    mov m, k
    call divisors

    mov q, m

inner_loop:
    cmp a, q, l
    jnz outer_loop, a

    lw o, q

    add q, q, 8

    mulu a, r, o, o
    lequ a, a, x
    jz skip1, a

    inc t

skip1:
    leu a, o, d
    jz inner_loop, a

    dec t

    jmp inner_loop

done:
    halt 0

prime:
    mov t, 1
    halt 0

# Input: x, m
# Output: l
# x: input number
# m: memory location to store divisors
# l: memory location where last divisor is stored + 8
divisors:
    sw m, 1
    add l, m, 8

    mov p, 2

divisors_outer:
    mov q, 1
    push z, 0

divisors_inner:
    divu y, r, x, p
    jnz divisors_not_divisible, r

    mulu q, r, q, p
    push z, q

    mov x, y

    jmp divisors_inner

divisors_not_divisible:
    mov w, l

divisors_not_divisible_outer:
    mov v, m

    pop t, z
    jz divisors_not_divisible_last, t

divisors_not_divisible_inner:
    cmp a, v, w
    jnz divisors_not_divisible_outer, a

    lw s, v
    mulu a, r, s, t
    sw l, a
    add l, l, 8

    add v, v, 8

    jmp divisors_not_divisible_inner

divisors_not_divisible_last:
    shl i, p, 3
    lw p, i

    cmp a, x, 1
    jz divisors_outer, a

    ret l

# Input: x
# Memory layout: [-1, -1, 3, 5, -1, 7, -1, 11, ...]
# x: max integer
# p: current prime
# y: pointer to last found prime
# i: current integer
build_primes:
    sw 0, -1
    sw 8, -1
    sw 16, 1
    mov y, 16

    mov p, 2

build_primes_outer:
    mulu i, r, p, p
    jnz build_primes_final, r

    geu a, i, x
    jnz build_primes_final, a

build_primes_inner:
    shl m, i, 3
    sw m, -1

    add i, i, p

    geu a, i, x
    jz build_primes_inner, a

build_primes_next:
    inc p
    shl m, p, 3
    lw a, m
    jnz build_primes_next, a

    sw y, p
    mov y, m
    sw y, 1

    jmp build_primes_outer

build_primes_final:
    inc p
    geu a, p, x
    jnz build_primes_ret, a

    shl m, p, 3
    lw a, m
    jnz build_primes_final, a

    sw y, p
    mov y, m
    sw y, 1

    jmp build_primes_final

build_primes_ret:
    ret

# Input: q
# Output: a
is_prime:
    shl m, q, 3
    lw a, m
    neq a, a, -1
    ret a

Tyilo

Posted 2015-07-09T03:28:11.483

Reputation: 1 372

The cycle counts listed don't add up to the total cycle count you posted. – Nathan Merrill – 2015-07-09T12:16:41.370

@NathanMerrill Should be fixed now. – Tyilo – 2015-07-09T14:04:16.147