NCTF 2025

NCTF 2025 Writeup

Posted Apr 4, 2025 Updated May 13, 2025

By Shiori

26 min read

NCTF 2025

For official writeup, here.

gogo

Input 40 bytes and create two coroutine (coroutVM), processing 20Bytes respectively

What we need: hash table & bytecodes

  
struct main_coroutVM // sizeof=0x158
{
    _16_uint32 reg;
    _256_uint8 mem;
    _chan_left_chan__4_uint8 instr;
    chan_bool checkres;
    map_uint8_main_handler instrSet;
};

Method 1 - Static Analysis

Find bytecode and hash table, then write a script to disassemble

Bytecode

Extract hash table

Each pair is set by:

runtime_mapassign(..., key_x[i], ...) + if(isGC)... + *variable = &handler[j];

Extract the hash table

{
    0x11: "main_LDR",
    0x12: "main_LDRI",
    0x15: "main_STR",
    0x16: "main_STRI",
    0x2A: "main_MOV",
    0x41: "main_ADD",
    0x42: "main_SUB",
    0x47: "main_MUL",
    0x71: "main_LSL",
    0x73: "main_LSR",
    0x7A: "main_XOR",
    0x7B: "main_AND",
    0xFE: "main_RET",
    0xFF: "main_HLT"
}

{
    0x13: main_LDR,
    0x14: main_LDRI,
    0x17: main_STR,
    0x18: main_STRI,
    0x2B: main_MOV,
    0x91: main_ADD,
    0x92: main_SUB,
    0x97: main_MUL,
    0xC1: main_LSL,
    0xC3: main_LSR,
    0xCA: main_XOR,
    0xCB: main_AND,
    0xFE: main_RET,
    0xFF: main_HLT
}

Disassembly

Based on the handler function for each opcode, we know:

Both VM emulate ARM32 instruction set
Both use the same set of handler functions
The bytecodes of two VM are mixed together, only the opcodes that match the respective VM will be executed

Info on hand:

Hash tables for different VM
Instruction structure: Opcode + Oprand * 3, total 4Bytes
Dumped bytecode (DumpedByteCodes.bin)

Instruction	Operands	Operand 1 (Byte `a10`)	Operand 2 (Byte `a11`)	Operand 3 (Byte `a12`)
ADD/SUB/MUL/XOR/AND	3	Rd (reg index)	Rn (reg index)	Rm (reg index)
LSL/LSR	3	Rd (reg index)	Rn (value reg)	Rm (shift amt reg)
MOV	2	Rd (reg index)	Immediate val (WORD little endian)
STRI/LDRI	2	Rd (reg index)	Must be `0x00`	Immediate val (addr)
LDR/STR	2	Rd (reg index)	Rn (addr)	Must be `0x00`

Based on the hash table & instruction set, write a script to read file DumpedByteCodes.bin and disassemble bytecode for two VM respectively, outputting to two .asm files.

movzx edx, [rsp+10h+oprand_3] in STRI. Zero-extend this byte into the 32-bit edx register. The upper 24 bits of edx are set to 0.
Note that this implicitly zeros the upper 32 bits of rdx as well in x86-64). Let’s call the byte value val3. rdx becomes 0x00000000000000XX where XX is val3.

Disassembly script:

  
import struct
import os

# --- Configuration ---

# Define Opcode Mappings (Using simplified mnemonics for clarity)
VM1_OPS = {
    0x11: "LDR",   0x12: "LDRI",  0x15: "STR",   0x16: "STRI",
    0x2A: "MOV",   0x41: "ADD",   0x42: "SUB",   0x47: "MUL",
    0x71: "LSL",   0x73: "LSR",   0x7A: "XOR",   0x7B: "AND",
    0xFE: "RET",   0xFF: "HLT"
}

VM2_OPS = {
    0x13: "LDR",   0x14: "LDRI",  0x17: "STR",   0x18: "STRI",
    0x2B: "MOV",   0x91: "ADD",   0x92: "SUB",   0x97: "MUL",
    0xC1: "LSL",   0xC3: "LSR",   0xCA: "XOR",   0xCB: "AND",
    0xFE: "RET",   0xFF: "HLT"
}

INPUT_FILE = "DumpedByteCodes.bin"
OUTPUT_VM1_FILE = "vm1_disassembly.asm"
OUTPUT_VM2_FILE = "vm2_disassembly.asm"

INSTRUCTION_SIZE = 4 # bytes

# --- Operand Parsing Logic ---

def parse_operands(mnemonic, b1, b2, b3):
    """
    Parses operand bytes based on the instruction mnemonic.
    Returns a formatted string representation of the operands.
    """
    try:
        if mnemonic in ["ADD", "SUB", "MUL", "XOR", "AND", "LSL", "LSR"]:
            return f"R{b1}, R{b2}, R{b3}"
        elif mnemonic == "MOV":
            imm = (b3 << 8) | b2
            return f"R{b1}, #{imm:#06x}"
        elif mnemonic in ["LDRI", "STRI"]:
            if b2 != 0x00:
                print(f"Warning: Expected 0x00 for operand 2 in {mnemonic} (Opcode byte 2), got {b2:#04x}")
            return f"R{b1}, [#{b3:#04x}]"
        elif mnemonic in ["LDR", "STR"]:
            if b3 != 0x00:
                 print(f"Warning: Expected 0x00 for operand 3 in {mnemonic} (Opcode byte 3), got {b3:#04x}")
            return f"R{b1}, [R{b2}]"
        elif mnemonic in ["RET", "HLT"]:
            return ""
        else:
            print(f"Warning: Unknown mnemonic '{mnemonic}' during operand parsing.")
            return f"?? DB 0x{b1:02X}, 0x{b2:02X}, 0x{b3:02X}"
    except Exception as e:
        print(f"Error parsing operands for {mnemonic} with bytes {b1}, {b2}, {b3}: {e}")
        return f"!! PARSE ERROR !!"

# --- Main Disassembly Function ---

def disassemble():
    """
    Reads the bytecode file and performs disassembly for both VMs,
    including the raw bytes in the output.
    """
    if not os.path.exists(INPUT_FILE):
        print(f"Error: Input file '{INPUT_FILE}' not found.")
        return

    processed_instructions = 0
    unknown_opcodes = 0

    try:
        with open(INPUT_FILE, 'rb') as f_in, \\
             open(OUTPUT_VM1_FILE, 'w') as f_out1, \\
             open(OUTPUT_VM2_FILE, 'w') as f_out2:

            print(f"Starting disassembly of '{INPUT_FILE}'...")
            # Add header explaining the format
            header_format = "; Format: Offset   Bytes        Instruction\\n"
            f_out1.write(f"; --- VM1 Disassembly from {INPUT_FILE} ---\\n")
            f_out1.write(header_format)
            f_out2.write(f"; --- VM2 Disassembly from {INPUT_FILE} ---\\n")
            f_out2.write(header_format)

            offset = 0
            while True:
                chunk = f_in.read(INSTRUCTION_SIZE)
                if not chunk:
                    break

                if len(chunk) < INSTRUCTION_SIZE:
                    print(f"Warning: Trailing {len(chunk)} byte(s) at offset 0x{offset:08X}, ignored.")
                    break

                try:
                    b0, b1, b2, b3 = struct.unpack('<BBBB', chunk)
                except struct.error:
                     print(f"Error: Could not unpack 4 bytes at offset 0x{offset:08X}.")
                     break

                offset_str = f"0x{offset:08X}:"
                # Format the raw bytes as hex strings, separated by spaces
                byte_str = ' '.join(f"{byte:02X}" for byte in chunk)
                # Pad the byte string for alignment (4 bytes * 2 hex chars + 3 spaces = 11 chars)
                # Use padding of 12 for a bit extra space.
                padded_byte_str = f"{byte_str:<12}"

                vm1_mnemonic = VM1_OPS.get(b0)
                vm2_mnemonic = VM2_OPS.get(b0)

                instruction_part = "" # The mnemonic and operands part
                target_files = []

                if vm1_mnemonic and vm2_mnemonic: # Shared opcode
                    mnemonic = vm1_mnemonic
                    operands_str = parse_operands(mnemonic, b1, b2, b3)
                    instruction_part = f"{mnemonic:<5} {operands_str}".strip()
                    target_files.extend([f_out1, f_out2])
                elif vm1_mnemonic: # VM1 specific
                    mnemonic = vm1_mnemonic
                    operands_str = parse_operands(mnemonic, b1, b2, b3)
                    instruction_part = f"{mnemonic:<5} {operands_str}".strip()
                    target_files.append(f_out1)
                elif vm2_mnemonic: # VM2 specific
                    mnemonic = vm2_mnemonic
                    operands_str = parse_operands(mnemonic, b1, b2, b3)
                    instruction_part = f"{mnemonic:<5} {operands_str}".strip()
                    target_files.append(f_out2)
                else: # Opcode not found in either map
                    print(f"Warning: Unknown opcode 0x{b0:02X} at offset {offset_str}")
                    unknown_opcodes += 1
                    # Represent unknown opcodes clearly in the output
                    instruction_part = f"; --- UNKNOWN OPCODE {b0:#04x} ---"
                    # Write unknown lines to both files for completeness
                    target_files.extend([f_out1, f_out2])

                # Construct the final disassembled line including bytes
                disassembled_line = f"{offset_str}  {padded_byte_str} {instruction_part}"

                # Write the line to the determined target file(s)
                for f_out in target_files:
                    f_out.write(disassembled_line + "\\n")

                processed_instructions += 1
                offset += INSTRUCTION_SIZE

        print("-" * 30)
        print(f"Disassembly complete.")
        print(f"Processed {processed_instructions} instructions ({processed_instructions * INSTRUCTION_SIZE} bytes).")
        if unknown_opcodes > 0:
            print(f"Encountered {unknown_opcodes} unknown opcodes.")
        print(f"VM1 output saved to '{OUTPUT_VM1_FILE}'")
        print(f"VM2 output saved to '{OUTPUT_VM2_FILE}'")

    except IOError as e:
        print(f"Error handling files: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# --- Run the Disassembler ---
if __name__ == "__main__":
    # Run the main function
    disassemble()

Key takeaways of the two disasm code:

; --- VM1 Disassembly from DumpedByteCodes.bin ---
; Offset           Bytes        Instruction
...
0x00000020:  16 01 00 1C  STRI  R1, [#0x1c] ; 0x9e3779b9
0x00000044:  12 02 00 04  LDRI  R2, [#0x04] ; y
0x0000004C:  12 03 00 10  LDRI  R3, [#0x10] ; z
0x00000064:  71 04 02 00  LSL   R4, R2, R0  ; y << 2
0x0000006C:  73 05 03 0F  LSR   R5, R3, R15 ; z >> 5
0x00000070:  7A 06 04 05  XOR   R6, R4, R5  ; (z >> 5) ^ (y << 2)
0x00000088:  73 04 02 00  LSR   R4, R2, R0  ; y >> 3
0x00000098:  71 05 03 0F  LSL   R5, R3, R15 ; z << 4
0x0000009C:  7A 07 04 05  XOR   R7, R4, R5  ; (y >> 3) ^ (z << 4)
0x000000A0:  41 08 06 07  ADD   R8, R6, R7  ; ((z >> 5) ^ (y << 2)) + ((y >> 3) ^ (z << 4))
0x000000CC:  16 05 00 28  STRI  R5, [#0x28] ; 0xa78c0b4f
0x000000DC:  73 09 01 0F  LSR   R9, R1, R15 ; sum >> 2
0x000000E8:  7B 0D 09 00  AND   R13, R9, R0 ; (sum >> 2) & 3
0x000000EC:  7A 04 0D 0E  XOR   R4, R13, R14; p ^ ( (sum >> 2) & 3 )
0x000000F4:  7B 04 04 00  AND   R4, R4, R0  ; p ^ ( (sum >> 2) & 3 ) & 3
0x00000104:  47 04 04 0C  MUL   R4, R4, R12 ; idx * 4
0x00000110:  41 0A 04 0F  ADD   R10, R4, R15; (idx * 4) + 0x20
0x0000011C:  11 04 0A 00  LDR   R4, [R10]   ; key[p ^ ( (sum >> 2) & 3 ) & 3]
0x00000120:  7A 06 01 02  XOR   R6, R1, R2  ; sum ^ y
0x00000128:  7A 07 03 04  XOR   R7, R3, R4  ; z ^ key[p]
0x0000012C:  41 09 06 07  ADD   R9, R6, R7  ; (sum ^ y) + (z ^ key[p ^ e & 3])
0x0000013C:  7A 0A 08 09  XOR   R10, R8, R9 ; ((z >> 5) ^ (y << 2)) + ((y >> 3) ^ (z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z))
0x00000168:  16 05 00 20  STRI  R5, [#0x20] ; 0x6e637466
0x000001D8:  16 05 00 24  STRI  R5, [#0x24] ; 0x062ef0ed
0x000001DC:  12 02 00 08  LDRI  R2, [#0x08] ; y
0x000001E0:  12 03 00 00  LDRI  R3, [#0x00] ; z
0x000001F8:  71 04 02 00  LSL   R4, R2, R0  ; << 2
0x00000208:  73 05 03 0F  LSR   R5, R3, R15 ; >> 5
0x00000268:  16 05 00 2C  STRI  R5, [#0x2c] ; 0x32303234

; --- VM2 Disassembly from DumpedByteCodes.bin ---
; Offset           Bytes        Instruction
0x0000002C:  91 01 01 03  ADD   R1, R1, R3  ;0x9e3779b9
0x00000048:  2B 0E 00 00  MOV   R14, #0x0000; p = 0
0x00000054:  14 02 00 04  LDRI  R2, [#0x04] ; y
0x00000058:  14 03 00 10  LDRI  R3, [#0x10] ; z
0x00000074:  C3 04 02 00  LSR   R4, R2, R0  ; y >> 2
0x00000078:  C1 05 03 0F  LSL   R5, R3, R15 ; z << 5
0x0000007C:  CA 06 04 05  XOR   R6, R4, R5  ; (z << 5) ^ (y >> 2)
0x00000094:  C1 04 02 00  LSL   R4, R2, R0  ; y << 3
0x000000A4:  C3 05 03 0F  LSR   R5, R3, R15 ; z >> 4
0x000000B0:  CA 07 04 05  XOR   R7, R4, R5  ; (y << 3)(z >> 4)
0x000000B4:  91 08 06 07  ADD   R8, R6, R7  ; (z << 5) ^ (y >> 2) + (y << 3) ^ (z >> 4)
0x000000E4:  18 05 00 28  STRI  R5, [#0x28] ; 0x9f1cf72e
0x000000F8:  C3 09 01 0F  LSR   R9, R1, R15 ; sum >> 2
0x000000FC:  CB 0D 09 00  AND   R13, R9, R0 ; (sum >> 2) & 3
0x0000010C:  CA 04 0D 0E  XOR   R4, R13, R14; (sum >> 2) & 3 ^ p
0x00000114:  CB 04 04 00  AND   R4, R4, R0  ; idx = (p & 3) ^ ( (sum >> 2) & 3 )
0x00000124:  97 04 04 0C  MUL   R4, R4, R12 ; idx * 4
0x00000134:  91 0A 04 0F  ADD   R10, R4, R15; 0x20 + idx * 4
0x00000138:  13 04 0A 00  LDR   R4, [R10]   ; mem[0x20 + idx * 4]
0x00000140:  CA 06 01 02  XOR   R6, R1, R2  ; sum ^ y
0x00000150:  CA 07 03 04  XOR   R7, R3, R4  ; mem[0x20 + idx * 4] ^ z
0x00000154:  91 09 06 07  ADD   R9, R6, R7  ; (sum ^ y) + (mem[0x20 + idx * 4] ^ z)
0x00000158:  CA 0A 08 09  XOR   R10, R8, R9 ; (z << 5) ^ (y >> 2) + (y << 3) ^ (z >> 4) + (sum ^ y) + (mem[0x20 + idx * 4] ^ z)
0x00000184:  18 05 00 20  STRI  R5, [#0x20] ; 0x32303234
0x000001D0:  18 05 00 24  STRI  R5, [#0x24] ; 0xd6eb12c3
0x00000270:  18 05 00 2C  STRI  R5, [#0x2c] ; 0x4e435446

Memory layout of the two VM:

Decrypt script:

  
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>

#define DELTA 0x9e3779b9

void XXTEA_decrypt1(uint32_t *v, int n, const uint32_t key[4]) {
    if (n < 2) return;
    uint32_t y, z, sum, e;
    int p, q = 6 + 52 / n;
    sum = q * DELTA;
    y = v[0];
    do {
        e = (sum >> 2) & 3;
        for (p = n - 1; p > 0; p--) {
            z = v[p - 1];
            v[p] -= ((z >> 5) ^ (y << 2)) + ((y >> 3) ^ (z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z));
            y = v[p];
        }
        z = v[n - 1];
        v[0] -= ((z >> 5) ^ (y << 2)) + ((y >> 3) ^ (z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z));
        y = v[0];
        sum -= DELTA;
    } while (--q > 0);
}

void XXTEA_decrypt2(uint32_t *v, int n, const uint32_t key[4]) {
    if (n < 2) return;
    uint32_t y, z, sum, e;
    int p, q = 6 + 52 / n;
    sum = q * DELTA;
    y = v[0];
    do {
        e = (sum >> 2) & 3;
        for (p = n - 1; p > 0; p--) {
            z = v[p - 1];
            v[p] -= ((z << 5) ^ (y >> 2)) + ((y << 3) ^ (z >> 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z));
            y = v[p];
        }
        z = v[n - 1];
        v[0] -= ((z << 5) ^ (y >> 2)) + ((y << 3) ^ (z >> 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z));
        y = v[0];
        sum -= DELTA;
    } while (--q > 0);
}

int main()
{
    uint32_t key1[] = {0x6e637466, 0x062ef0ed, 0xa78c0b4f, 0x32303234};
    unsigned char cipher1[] =
    {
      0x5D, 0x45, 0xD5, 0xB9, 0x8C, 0x95, 0x9C, 0x38, 0x3B, 0xB1,
      0x3E, 0x1E, 0x5F, 0xC8, 0xE8, 0xBB, 0x64, 0x38, 0x48, 0x69
    };

    size_t cipher_len_bytes = sizeof(cipher1);
    int n = cipher_len_bytes / sizeof(uint32_t);

    XXTEA_decrypt1((uint32_t *)cipher1, n, (const uint32_t *)key1);

    // output
    printf("\\nDecrypted data (as chars, %zu bytes):\\n", cipher_len_bytes);
    for (size_t i = 0; i < cipher_len_bytes; i++)
    {
        if (cipher1[i] >= 32 && cipher1[i] <= 126) {
             printf("%c", cipher1[i]);
        } else {
             printf(".");
        }
    }

    uint32_t key2[] = {0x32303234, 0xd6eb12c3, 0x9f1cf72e, 0x4e435446};
    unsigned char cipher2[] =
    {
      0xDE, 0x81, 0xD8, 0xAD, 0xC2, 0xC4, 0xA6, 0x32, 0x1C, 0xAB,
      0x61, 0x3E, 0xCB, 0xFF, 0xEF, 0xF1, 0x27, 0x30, 0x7A, 0x16
    };

    XXTEA_decrypt2((uint32_t *)cipher2, n, (const uint32_t *)key2);

    // output
    printf("\\nDecrypted data (as chars, %zu bytes):\\n", cipher_len_bytes);
    for (size_t i = 0; i < cipher_len_bytes; i++)
    {
        if (cipher2[i] >= 32 && cipher2[i] <= 126) {
             printf("%c", cipher2[i]);
        } else {
             printf(".");
        }
    }

    return 0;
}

/*
std TEA
void XXTEA_encrypt(uint32_t *v, int n, const uint32_t key[4]) {
    if (n < 2) return;  // At least two elements
    uint32_t y, z, sum = 0, e;
    int p, q = 6 + 52 / n;  // Rounds
    z = v[n - 1];   // |  z(>>5  <<4)  |  v[p]  |  y(>>3  <<2) |
    do {
        sum += DELTA;
        e = (sum >> 2) & 3;
        for (p = 0; p < n - 1; p++) {
            y = v[p + 1];
            v[p] += ((z >> 5) ^ (y << 2)) + ((y >> 3) ^ (z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z));
            z = v[p];
        }
        y = v[0];
        v[n - 1] += ((z >> 5) ^ (y << 2)) + ((y >> 3) ^ (z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z));
        z = v[n - 1];
    } while (--q > 0);
}
*/

NCTF{H4rd_VM_with_Gor0ut1n3_5fc4b0be7ad}

Method 2 - IDAPython

Instruction set:

Each handler function’s oprands are stack passing:

Oprand 1	`^0.1`
Oprand 2	`^1.1`
Oprand 3	`^2.1`

Write IDA python script to set “trace” breakpoint and hook registers:

  
import idaapi
import idc

def set_python_bpt(ea, cond):
    ''' 设置跟踪断点：触发时执行 Python 代码并继续执行 '''
    # 检查地址是否有效
    if not idaapi.is_mapped(ea):
        print(f"[错误] 地址 {ea:x} 不在当前加载的模块中")
        return

    # 添加断点（类型：软件断点，默认行为）
    if not idaapi.add_bpt(ea, 0, idaapi.BPT_SOFT):
        print(f"[错误] 无法在地址 {ea:x} 添加断点")
        return

    # 配置断点属性
    bpt = idaapi.bpt_t()
    if not idaapi.get_bpt(ea, bpt):
        print(f"[错误] 无法获取地址 {ea:x} 的断点信息")
        return

    bpt.elang = 'Python'  # 使用 Python 条件
    bpt.condition = cond  # 条件表达式
    bpt.flags |= idaapi.BPT_ENABLED  # 启用断点
    bpt.flags &= ~idaapi.BPT_BRK  # 触发后不暂停程序（关键修改）

    if not idaapi.update_bpt(bpt):
        print(f"[错误] 无法更新地址 {ea:x} 的断点")
        return
    print(f"[成功] 在地址 {ea:x} 设置跟踪断点")

# ------------------------- 钩子函数定义 -------------------------
def hook_shr():
    rbx = idc.get_reg_value("rbx")
    cl = idc.get_reg_value("cl")
    val1 = rbx
    val2 = cl
    print(f"{val1:x} >> {val2:x} = {(val1 >> val2) & 0xFFFFFFFFFFFFFFFF:x}")

def hook_shl():
    rbx = idc.get_reg_value("rbx")
    cl = idc.get_reg_value("cl")
    val1 = rbx
    val2 = cl
    print(f"{val1:x} << {val2:x} = {(val1 << val2) & 0xFFFFFFFFFFFFFFFF:x}")

def hook_add():
    rdx = idc.get_reg_value("rdx")
    rax = idc.get_reg_value("rax")
    rbx = idc.get_reg_value("rbx")
    val1 = rdx
    val2 = idc.get_qword(rax + rbx*4)
    print(f"{val1:x} + {val2:x} = {(val1 + val2) & 0xFFFFFFFFFFFFFFFF:x}")

def hook_xor():
    rdx = idc.get_reg_value("rdx")
    rax = idc.get_reg_value("rax")
    rbx = idc.get_reg_value("rbx")
    val1 = rdx
    val2 = idc.get_qword(rax + rbx*4)
    print(f"{val1:x} ^ {val2:x} = {(val1 ^ val2) & 0xFFFFFFFFFFFFFFFF:x}")

def hook_sub():
    rdx = idc.get_reg_value("rdx")
    rax = idc.get_reg_value("rax")
    rbx = idc.get_reg_value("rbx")
    val1 = rdx
    val2 = idc.get_qword(rax + rbx*4)
    print(f"{val1:x} - {val2:x} = {(val1 - val2) & 0xFFFFFFFFFFFFFFFF:x}")

def hook_mul():
    rdx = idc.get_reg_value("rdx")
    rbx = idc.get_reg_value("rbx")
    val1 = rdx
    val2 = rbx
    print(f"{val1:x} * {val2:x} = {(val1 * val2) & 0xFFFFFFFFFFFFFFFF:x}")

def hook_and():
    rdx = idc.get_reg_value("rdx")
    rax = idc.get_reg_value("rax")
    rbx = idc.get_reg_value("rbx")
    val1 = rdx
    val2 = idc.get_qword(rax + rbx*4)
    print(f"{val1:x} & {val2:x} = {(val1 & val2) & 0xFFFFFFFFFFFFFFFF:x}")

# ------------------------- 设置断点 -------------------------
# 设置跟踪断点列表（地址需根据实际情况调整）
trace_bpts = [
    (0x0000000000848860, 'hook_shr()'),
    (0x0000000000848820, 'hook_shl()'),
    (0x000000000084871D, 'hook_add()'),
    (0x00000000008487DD, 'hook_xor()'),
    (0x000000000084875D, 'hook_sub()'),
    (0x00000000008487A0, 'hook_mul()'),
    (0x000000000084889D, 'hook_and()')
]

for ea, cond in trace_bpts:
    set_python_bpt(ea, cond)

ezDOS

MS-DOS on Intel 8086. old but classic architecture

Directly examine the assembly code, a heavily modified RC4 and junk code are inspected. Don’t care about its internal implementation, the final XOR step remains unmodified. Hence extract the XOR Key in the proccess.

Junk code:

Debug:

Input buffer structure in 8086 dos: 0x40(maxLength) + actualLength+ data

# 0x26 'a' chars: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa，XORed
53 1C 38 1B 92 6C D2 1A 05 ED 8A 49 A5 C5 31 51 C1 8C 46 0B 82 17 08 6D BB 49 99 69 DB C7 76 5F 73 38 24 67 2F 90

# Key
32 7d 59 7a f3 0d b3 7b 64 8c eb 28 c4 a4 50 30 a0 ed 27 6a e3 76 69 0c da 28 f8 08 ba a6 17 3e 12 59 45 06 4e f1

# Cipher
7C 3E 0D 3C 88 54 83 0E 3B B8 99 1B 9B E5 23 43 C5 80 45 5B 9A 29 24 38 A9 5C CB 7A E5 93 73 0E 70 6D 7C 31 2B 8C

Cyberchef

NCTF{Y0u_4r3_Assemb1y_M4st3r_5d0b497e}

SafeProgram

The program monitors everything you do. So it reminds you: “DONOT DEBUG ME!”. But really?

The main function implements SM4 encryption without modifying S-Box. Note that one dummy thread is continuously spawned / terminated, which triggers TLS each time.

TLS then triggers CRC self-validation whenever a thread is created. By using a hardware breakpoint to bypass this validation, we can obtaining the hacked S-Box.

If we set software breakpoint, then 0xE0000001 exception occurred:

No(discard), F8 step over:

Now the callback functions were found:

DLL_PROCESS_ATTACH (1): Process initialization
DLL_THREAD_ATTACH (2): Thread creation
DLL_THREAD_DETACH (3): Thread exit
DLL_PROCESS_DETACH (0): Process termination

Divide by 0 exception:

The decrypted keySource array orchestrates a sequential substitution on SBox.

Extract necessary data, and write a decrypt script:

  
import struct

# 系统参数
FK = [0xA3B1BAC6, 0x56AA3350, 0x677D9197, 0xB27022DC]

# 固定参数CK
CK = [
    0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
    0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
    0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
    0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
    0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
    0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
    0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
    0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
]

# 自定义S盒
SBox = [
    0xD1, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6,
    0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, 0x2B, 0x67, 0x9A, 0x76,
    0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86,
    0x06, 0x99, 0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A,
    0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, 0xE4, 0xB3,
    0x17, 0xA9, 0x1C, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA,
    0x75, 0x8F, 0x3F, 0xA6, 0x47, 0x07, 0xA7, 0x4F, 0xF3, 0x73,
    0x71, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0xD6, 0xA8,
    0x68, 0x6B, 0x81, 0xB2, 0xFC, 0x64, 0xDA, 0x8B, 0xF8, 0xEB,
    0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, 0x1E, 0x24, 0x0E, 0x78,
    0x63, 0x58, 0x9F, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21,
    0xC9, 0x87, 0xD4, 0x00, 0x46, 0x57, 0x5E, 0xD3, 0x27, 0x52,
    0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, 0xEA, 0xBF,
    0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE,
    0xF9, 0x61, 0x15, 0xA1, 0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34,
    0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3,
    0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29,
    0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xD5, 0xDB, 0x37, 0x45,
    0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C,
    0x5B, 0x51, 0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F,
    0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, 0x0A, 0xC1,
    0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12,
    0xB8, 0xE5, 0xB4, 0xB0, 0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96,
    0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
    0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE,
    0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48
]

def left_rotate(n, b):
    return ((n << b) | (n >> (32 - b))) & 0xFFFFFFFF

def tau(b):
    a = b.to_bytes(4, 'big')
    a = [SBox[x] for x in a]
    return int.from_bytes(bytes(a), 'big')

def L(b):
    return b ^ left_rotate(b, 2) ^ left_rotate(b, 10) ^ left_rotate(b, 18) ^ left_rotate(b, 24)

def T(b):
    return L(tau(b))

def L_prime(b):
    return b ^ left_rotate(b, 13) ^ left_rotate(b, 23)

def T_prime(b):
    return L_prime(tau(b))

def sm4_key_expansion(key):
    MK = struct.unpack('>4I', key)
    K = [0] * 36
    K[0] = MK[0] ^ FK[0]
    K[1] = MK[1] ^ FK[1]
    K[2] = MK[2] ^ FK[2]
    K[3] = MK[3] ^ FK[3]
    rk = [0] * 32
    for i in range(32):
        tmp = K[i+1] ^ K[i+2] ^ K[i+3] ^ CK[i]
        tmp = T_prime(tmp)
        K[i+4] = K[i] ^ tmp
        rk[i] = K[i+4]
    return rk

def sm4_decrypt(ciphertext, rk):
    x = list(struct.unpack('>4I', ciphertext))
    for i in range(32):
        rk_i = rk[i]
        tmp = x[i+1] ^ x[i+2] ^ x[i+3] ^ rk_i
        tmp = T(tmp)
        x.append(x[i] ^ tmp)
    return struct.pack('>4I', x[35], x[34], x[33], x[32])

# 输入数据
key = bytes([0x4E, 0x43, 0x54, 0x46, 0x32, 0x34, 0x6E, 0x63, 0x74, 0x66,
  0x4E, 0x43, 0x54, 0x46, 0x32, 0x34])
cipher = bytes([0xFB, 0x97, 0x3C, 0x3B, 0xF1, 0x99, 0x12, 0xDF, 0x13, 0x30,
  0xF7, 0xD8, 0x7F, 0xEB, 0xA0, 0x6C, 0x14, 0x5B, 0xA6, 0x2A,
  0xA8, 0x05, 0xA5, 0xF3, 0x76, 0xBE, 0xC9, 0x01, 0xF9, 0x36,
  0x7B, 0x46])

# 生成轮密钥并反转
rk = sm4_key_expansion(key)
dec_rk = rk[::-1]

# 分块解密
block1 = cipher[:16]
block2 = cipher[16:]
plain1 = sm4_decrypt(block1, dec_rk)
plain2 = sm4_decrypt(block2, dec_rk)
plain = plain1 + plain2

print("Decrypted:", plain)

NCTF{58cb925e0cd823c0d0b54fd06b820b7e}

Appendix

Standard SBox for SM4：

0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
00 | D6 90 E9 FE CC E1 3D B7 16 B6 14 C2 28 FB 2C 05
10 | 2B 67 9A 76 2A BE 04 C3 AA 44 13 26 49 86 06 99
20 | 9C 42 50 F4 91 EF 98 7A 33 54 0B 43 ED CF AC 62
30 | E4 B3 1C A9 C9 08 E8 95 80 DF 94 FA 75 8F 3F A6
40 | 47 07 A7 FC F3 73 17 BA 83 59 3C 19 E6 85 4F A8
50 | 68 6B 81 B2 71 64 DA 8B F8 EB 0F 4B 70 56 9D 35
60 | 1E 24 0E 5E 63 58 D1 A2 25 22 7C 3B 01 21 78 87
70 | D4 00 46 57 9F D3 27 52 4C 36 02 E7 A0 C4 C8 9E
80 | EA BF 8A D2 40 C7 38 B5 A3 F7 F2 CE F9 61 15 A1
90 | E0 AE 5D A4 9B 34 1A 55 AD 93 32 30 F5 8C B1 E3
A0 | 1D F6 E2 2E 82 66 CA 60 C0 29 23 AB 0D 53 4E 6F
B0 | D5 DB 37 45 DE FD 8E 2F 03 FF 6A 72 6D 6C 5B 51
C0 | 8D 1B AF 92 BB DD BC 7F 11 D9 5C 41 1F 10 5A D8
D0 | 0A C1 31 88 A5 CD 7B BD 2D 74 D0 12 B8 E5 B4 B0
E0 | 89 69 97 4A 0C 96 77 7E 65 B9 F1 09 C5 6E C6 84
F0 | 18 F0 7D EC 3A DC 4D 20 79 EE 5F 3E D7 CB 39 48

Structure I used

  
typedef struct _EXCEPTION_POINTERS {
  PEXCEPTION_RECORD ExceptionRecord;
  PCONTEXT ContextRecord;
} EXCEPTION_POINTERS, *PEXCEPTION_POINTERS;

  
void NTAPI TlsCallback(
    PVOID DllHandle,
    DWORD Reason,
    PVOID Reserved
);

ctf

vm go

This post is licensed under CC BY 4.0 by the author.