Thursday 8 December 2011

Beating the GCHQ challenge (part 1)

GCHQ has issued a challenge to recruit new staff. The web page canyoucrackit.co.uk was launched with the following cryptic challenge.



A keyword is asked for by the site with very little else as a clue. It doesn't take much to see that this is x86 opcode - run it through ndisasm and you get this:

jmp short 0x6   ;EB04            
scasd    ;AF              
ret 0xa3bf   ;C2BFA3          
sub esp,0x100   ;81EC00010000    
xor ecx,ecx   ;31C9            
mov [esp+ecx],cl  ;880C0C          
inc cl    ;FEC1            
jnz 0xe    ;75F9            
xor eax,eax   ;31C0            
mov edx,0xdeadbeef  ;BAEFBEADDE      
add al,[esp+ecx]  ;02040C          
add al,dl   ;00D0            
ror edx,0x8   ;C1CA08          
mov bl,[esp+ecx]  ;8A1C0C          
mov bh,[esp+eax]  ;8A3C04          
mov [esp+eax],bl  ;881C04          
mov [esp+ecx],bh  ;883C0C          
inc cl    ;FEC1            
jnz 0x1c   ;75E8            
jmp dword 0x95   ;E95C000000      
mov ebx,esp   ;89E3            
add ebx,0x4   ;81C304000000    
pop esp    ;5C              
pop eax    ;58              
cmp eax,0x41414141  ;3D41414141      
jnz 0x8d   ;7543            
pop eax    ;58              
cmp eax,0x42424242  ;3D42424242      
jnz 0x8d   ;753B            
pop edx    ;5A              
mov ecx,edx   ;89D1            
mov esi,esp   ;89E6            
mov edi,ebx   ;89DF            
sub edi,ecx   ;29CF            
rep movsb   ;F3A4            
mov esi,ebx   ;89DE            
mov ecx,edx   ;89D1            
mov edi,ebx   ;89DF            
sub edi,ecx   ;29CF            
xor eax,eax   ;31C0            
xor ebx,ebx   ;31DB            
xor edx,edx   ;31D2            
inc al    ;FEC0            
add bl,[esi+eax]  ;021C06          
mov dl,[esi+eax]  ;8A1406          
mov dh,[esi+ebx]  ;8A341E          
mov [esi+eax],dh  ;883406          
mov [esi+ebx],dl  ;88141E          
add dl,dh   ;00F2            
xor dh,dh   ;30F6            
mov bl,[esi+edx]  ;8A1C16          
mov dl,[edi]   ;8A17            
xor dl,bl   ;30DA            
mov [edi],dl   ;8817            
inc edi    ;47              
dec ecx    ;49              
jnz 0x6b   ;75DE            
xor ebx,ebx   ;31DB            
mov eax,ebx   ;89D8            
inc al    ;FEC0            
int 0x80   ;CD80            
nop    ;90              
nop    ;90              
call dword 0x39   ;E89DFFFFFF      
inc ecx    ;41              
inc ecx    ;41              
inc ecx    ;41              
inc ecx    ;41              
 

All rather promising, but there's a few things to note:
  • The first instruction jumps over the next two. These jumped instructions look pretty suspicious so perhaps they aren't code. If they are data, then this can be recoded as the DWORD 0xa3bfc2af. This isn't relevant now though ;)
  • Almost at the end of the code there's a relative 'call', but no matching 'ret' (at least in the reachable code). The following instructions seem like nonsense, so it seems likely that the code uses an old trick to embed data within code. The 'call' puts the address of the following instruction on the stack, then branches the execution. The code does later check that a value popped from the stack (0x41414141) so it seems that everything after the call is in fact data, not code. Later, it also checks this area of memory for the value 0x42424242 but that is nowhere to be seen! 
It was strange that the code was presented as an image, not as text, or as a binary. This might just be as a way to annoy you in having to type the information before using it, but I guessed a little stenanography was involved. Opening up the file in a hex editor gives us this:


Notice the iTXt section? A quick look at the libpng site shows that this section should hold text, not the garbled junk that's actually there. The text looks hard to recognise, but the two '=' signs at the end is a dead give away for Base64 encoding. Sure enough, a little ipython work and the output was saved to a file. Running it through ndisasm gives a load of junk so its not more code, but the first four bytes are the ascii character 'B' or 42 in hex! That means that this is what's missing from the code - maybe not actual code but embedded data.

Adding this to the code (and fixing the relative offsets) gives us:
[BITS 32]
section .text
    global main

main:
    call code_fragment
    mov eax, 1
    xor ebx, ebx
    int 0x80

code_fragment:
    jmp short setup_stack ;EB04            
    scasd   ;AF              
    ret 0xa3bf  ;C2BFA3
setup_stack:
    sub esp,0x100  ;81EC00010000    
    xor ecx,ecx  ;31C9            
next_byte:
    mov [esp+ecx],cl ;880C0C      
    inc cl   ;FEC1            
    jnz next_byte  ;75F9            
    xor eax,eax  ;31C0            
    mov edx,0xdeadbeef ;BAEFBEADDE      
do_stuff:
    add al,[esp+ecx] ;02040C          
    add al,dl  ;00D0            
    ror edx,0x8  ;C1CA08          
    mov bl,[esp+ecx] ;8A1C0C          
    mov bh,[esp+eax] ;8A3C04          
    mov [esp+eax],bl ;881C04          
    mov [esp+ecx],bh ;883C0C          
    inc cl   ;FEC1            
    jnz do_stuff  ;75E8            
    jmp nop_start  ;E95C000000      
back_in:
    mov ebx,esp  ;89E3            
    add ebx,0x4  ;81C304000000    
    pop esp   ;5C              
    pop eax   ;58              
    cmp eax,0x41414141 ;3D41414141      
    jnz bail_out  ;7543            
    pop eax   ;58              
    cmp eax,0x42424242 ;3D42424242      
    jnz bail_out  ;753B            
    pop edx   ;5A              
    mov ecx,edx  ;89D1            
    mov esi,esp  ;89E6            
    mov edi,ebx  ;89DF            
    sub edi,ecx  ;29CF            
    rep movsb  ;F3A4            
    mov esi,ebx  ;89DE            
    mov ecx,edx  ;89D1            
    mov edi,ebx  ;89DF            
    sub edi,ecx  ;29CF            
    xor eax,eax  ;31C0            
    xor ebx,ebx  ;31DB            
    xor edx,edx  ;31D2            
go_again:
    inc al   ;FEC0            
    add bl,[esi+eax] ;021C06          
    mov dl,[esi+eax] ;8A1406          
    mov dh,[esi+ebx] ;8A341E          
    mov [esi+eax],dh ;883406          
    mov [esi+ebx],dl ;88141E          
    add dl,dh  ;00F2            
    xor dh,dh  ;30F6            
    mov bl,[esi+edx] ;8A1C16          
    mov dl,[edi]  ;8A17            
    xor dl,bl  ;30DA            
    mov [edi],dl  ;8817            
    inc edi   ;47              
    dec ecx   ;49              
    jnz go_again  ;75DE            
bail_out:
    xor ebx,ebx  ;31DB            
    mov eax,ebx  ;89D8            
    inc al   ;FEC0            
    int 0x80  ;CD80
nop_start:
    nop   ;90              
    nop   ;90              
    call back_in  ;E89DFFFFFF      
    inc ecx   ;41              
    inc ecx   ;41              
    inc ecx   ;41              
    inc ecx   ;41        
decoded_code:
    db 0x42
    db 0x42
    db 0x42
    db 0x42    
    db 0x32
    db 0x00
    db 0x00
    db 0x00  
    db 0x91
    db 0xd8
    db 0xf1
    db 0x6d
    db 0x70
    db 0x20
    db 0x3a
    db 0xab
    db 0x67
    db 0x9a
    db 0x0b
    db 0xc4
    db 0x91
    db 0xfb
    db 0xc7
    db 0x66
    db 0x0f
    db 0xfc
    db 0xcd
    db 0xcc
    db 0xb4
    db 0x02
    db 0xfa
    db 0xd7
    db 0x77
    db 0xb4
    db 0x54
    db 0x38
    db 0xab
    db 0x1f
    db 0x0e
    db 0xe3
    db 0x8e
    db 0xd3
    db 0x0d
    db 0xeb
    db 0x99
    db 0xc3
    db 0x93
    db 0xfe
    db 0xd1
    db 0x2b
    db 0x1b
    db 0x11
    db 0xc6
    db 0x11
    db 0xef
    db 0xc8
    db 0xca
    db 0x2f
 


Forgive the silly code labels - I wasn't going for production code here! Running the resulting binary through a debugger shows that the code copies the data after 0x42424242 and does a lot of bit fiddling with it. Follow the code and watch the memory it's messing with and you will end up with:


Now at this point I was expecting the whole solution of the challenge, so a http GET request was not very rewarding! Nevertheless, the resulting javascript file was now available and it was time to move onto stage 2.



1 comment: