mipsevm,contracts: fix onchain read/write memory proof, improve unicorn...

mipsevm,contracts: fix onchain read/write memory proof, improve unicorn tooling mips steps and update tests

mipsevm,contracts: fix onchain read/write memory proof, improve unicorn...
mipsevm,contracts: fix onchain read/write memory proof, improve unicorn tooling mips steps and update tests
1771860d · protolambda · c868c051 · 1771860d · 1771860d · 1771860d
Commit 1771860d authored Apr 26, 2023 by protolambda
7 changed files
--- a/contracts/src/MIPS.sol
+++ b/contracts/src/MIPS.sol
@@ -220,13 +220,12 @@ contract MIPS {
  }
  function proofOffset(uint8 proofIndex) internal returns (uint256 offset) {
-    require(proofIndex & 3 == 0, "addr must be aligned to 4 bytes");
    // A proof of 32 bit memory, with 32-byte leaf values, is (32-5)=27 bytes32 entries.
    // And the leaf value itself needs to be encoded as well. And proof.offset == 390
-    offset = 390 + proofIndex + (28*32);
+    offset = 390 + (uint256(proofIndex) * (28*32));
    uint256 s = 0;
    assembly { s := calldatasize() }
-    require(s > (offset + 28*32), "check that there is enough calldata");
+    require(s >= (offset + 28*32), "check that there is enough calldata");
    return offset;
  }
@@ -246,11 +245,9 @@ contract MIPS {
      for { let i := 0 } lt(i, 27) { i := add(i, 1) } {
        let sibling := calldataload(offset)
        offset := add(offset, 32)
-        if and(shr(i, path), 1) {
+        switch and(shr(i, path), 1)
-          node := hashPair(sibling, node)
+        case 0 { node := hashPair(node, sibling) }
-          continue
+        case 1 { node := hashPair(sibling, node) }
-        }
-        node := hashPair(node, sibling)
      }
      let memRoot := mload(0x80) // load memRoot, first field of state
      if iszero(eq(node, memRoot)) { // verify the root matches
@@ -284,11 +281,9 @@ contract MIPS {
      for { let i := 0 } lt(i, 27) { i := add(i, 1) } {
        let sibling := calldataload(offset)
        offset := add(offset, 32)
-        if and(shr(i, path), 1) {
+        switch and(shr(i, path), 1)
-          node := hashPair(sibling, node)
+        case 0 { node := hashPair(node, sibling) }
-          continue
+        case 1 { node := hashPair(sibling, node) }
-        }
-        node := hashPair(node, sibling)
      }
      mstore(0x80, node) // store new memRoot, first field of state
    }
@@ -404,7 +399,7 @@ contract MIPS {
    }
    // ALU
-    uint32 val = execute(insn, rs, rt, mem);
+    uint32 val = execute(insn, rs, rt, mem) & 0xffFFffFF; // swr outputs more than 4 bytes without the mask
    uint32 func = insn & 0x3f; // 6-bits
    if (opcode == 0 && func >= 8 && func < 0x1c) {
@@ -438,7 +433,7 @@ contract MIPS {
    // write memory
    if (storeAddr != 0xFF_FF_FF_FF) {
-      writeMem(storeAddr, 1, mem);
+      writeMem(storeAddr, 1, val);
    }
    // write back the value to destination register

--- a/mipsevm/evm_test.go
+++ b/mipsevm/evm_test.go
@@ -3,7 +3,6 @@ package mipsevm
 import (
 	"bytes"
 	"encoding/binary"
-	"fmt"
 	"math/big"
 	"os"
 	"path"
@@ -14,13 +13,9 @@ import (
 	"github.com/ethereum/go-ethereum/core/vm"
 	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/stretchr/testify/require"
-	uc "github.com/unicorn-engine/unicorn/bindings/go/unicorn"
 )
 func TestEVM(t *testing.T) {
-	t.Skip("work in progress memory proof")
 	testFiles, err := os.ReadDir("test/bin")
 	require.NoError(t, err)
@@ -63,32 +58,20 @@ func TestEVM(t *testing.T) {
 			require.NoError(t, mu.MemMap(baseAddrStart, ((baseAddrEnd-baseAddrStart)&^pageAddrMask)+pageSize))
 			require.NoError(t, mu.MemMap(endAddr&^pageAddrMask, pageSize))
-			al := &AccessList{mem: state.Memory}
 			err = LoadUnicorn(state, mu)
 			require.NoError(t, err, "load state into unicorn")
-			err = HookUnicorn(state, mu, os.Stdout, os.Stderr, al)
+			us, err := NewUnicornState(mu, state, os.Stdout, os.Stderr)
 			require.NoError(t, err, "hook unicorn to state")
-			var stateData []byte
+			for i := 0; i < 1000; i++ {
-			var insn uint32
+				if us.state.PC == endAddr {
-			var pc uint32
+					break
-			var post []byte
-			preCode := func() {
-				insn = state.Memory.GetMemory(state.PC)
-				pc = state.PC
-				fmt.Printf("PRE - pc: %08x insn: %08x\n", pc, insn)
-				// remember the pre-state, to repeat it in the EVM during the post processing step
-				stateData = state.EncodeWitness()
-				if post != nil {
-					require.Equal(t, hexutil.Bytes(stateData).String(), hexutil.Bytes(post).String(),
-						"unicorn produced different state than EVM")
-				}
 				}
-			postCode := func() {
+				insn := state.Memory.GetMemory(state.PC)
-				fmt.Printf("POST - pc: %08x insn: %08x\n", pc, insn)
+				t.Logf("step: %4d pc: 0x%08x insn: 0x%08x", state.Step, state.PC, insn)
-				proofData := append([]byte(nil), al.proofData...)
+				stateData, proofData := us.Step(true)
 				stateHash := crypto.Keccak256Hash(stateData)
 				var input []byte
@@ -112,29 +95,19 @@ func TestEVM(t *testing.T) {
 				postHash := common.Hash(*(*[32]byte)(ret))
 				logs := evmState.Logs()
 				require.Equal(t, 1, len(logs), "expecting a log with post-state")
-				post = logs[0].Data
+				evmPost := logs[0].Data
-				require.Equal(t, crypto.Keccak256Hash(post), postHash, "logged state must be accurate")
+				require.Equal(t, crypto.Keccak256Hash(evmPost), postHash, "logged state must be accurate")
 				env.StateDB.RevertToSnapshot(snap)
 				t.Logf("EVM step took %d gas, and returned stateHash %s", startingGas-leftOverGas, postHash)
-			}
-			firstStep := true
+				// verify the post-state matches.
-			_, err = mu.HookAdd(uc.HOOK_CODE, func(mu uc.Unicorn, addr uint64, size uint32) {
+				// TODO: maybe more readable to decode the evmPost state, and do attribute-wise comparison.
-				if state.PC == endAddr {
+				uniPost := us.state.EncodeWitness()
-					require.NoError(t, mu.Stop(), "stop test when returned")
+				require.Equal(t, hexutil.Bytes(uniPost).String(), hexutil.Bytes(evmPost).String(),
-				}
+					"unicorn produced different state than EVM")
-				if !firstStep {
-					postCode()
 			}
-				preCode()
+			require.Equal(t, uint32(endAddr), state.PC, "must reach end")
-				firstStep = false
-			}, 0, ^uint64(0))
-			require.NoError(t, err, "hook code")
-			err = RunUnicorn(mu, state.PC, 1000)
-			require.NoError(t, err, "must run steps without error")
 			// inspect test result
 			done, result := state.Memory.GetMemory(baseAddrEnd+4), state.Memory.GetMemory(baseAddrEnd+8)
 			require.Equal(t, done, uint32(1), "must be done")

--- a/mipsevm/page.go
+++ b/mipsevm/page.go
@@ -56,6 +56,7 @@ func (p *CachedPage) MerkleRoot() [32]byte {
 			continue
 		}
 		p.Cache[j] = crypto.Keccak256Hash(p.Data[i : i+64])
+		//fmt.Printf("0x%x 0x%x -> 0x%x\n", p.Data[i:i+32], p.Data[i+32:i+64], p.Cache[j])
 		p.Ok[j] = true
 	}

--- a/mipsevm/state_test.go
+++ b/mipsevm/state_test.go
@@ -9,8 +9,6 @@ import (
 	"testing"
 	"github.com/stretchr/testify/require"
-	uc "github.com/unicorn-engine/unicorn/bindings/go/unicorn"
 )
 // baseAddrStart - baseAddrEnd is used in tests to write the results to
@@ -60,19 +58,16 @@ func TestState(t *testing.T) {
 			err = LoadUnicorn(state, mu)
 			require.NoError(t, err, "load state into unicorn")
-			err = HookUnicorn(state, mu, os.Stdout, os.Stderr, NoOpTracer{})
+			us, err := NewUnicornState(mu, state, os.Stdout, os.Stderr)
 			require.NoError(t, err, "hook unicorn to state")
-			// Add hook to stop unicorn once we reached the end of the test (i.e. "ate food")
+			for i := 0; i < 1000; i++ {
-			_, err = mu.HookAdd(uc.HOOK_CODE, func(mu uc.Unicorn, addr uint64, size uint32) {
+				if us.state.PC == endAddr {
-				if state.PC == endAddr {
+					break
-					require.NoError(t, mu.Stop(), "stop test when returned")
 				}
-			}, 0, ^uint64(0))
+				us.Step(false)
-			require.NoError(t, err, "hook code")
+			}
+			require.Equal(t, uint32(endAddr), us.state.PC, "must reach end")
-			err = RunUnicorn(mu, state.PC, 1000)
-			require.NoError(t, err, "must run steps without error")
 			// inspect test result
 			done, result := state.Memory.GetMemory(baseAddrEnd+4), state.Memory.GetMemory(baseAddrEnd+8)
 			require.Equal(t, done, uint32(1), "must be done")
@@ -97,11 +92,15 @@ func TestMinimal(t *testing.T) {
 	err = LoadUnicorn(state, mu)
 	require.NoError(t, err, "load state into unicorn")
 	var stdOutBuf, stdErrBuf bytes.Buffer
-	err = HookUnicorn(state, mu, io.MultiWriter(&stdOutBuf, os.Stdout), io.MultiWriter(&stdErrBuf, os.Stderr), NoOpTracer{})
+	us, err := NewUnicornState(mu, state, io.MultiWriter(&stdOutBuf, os.Stdout), io.MultiWriter(&stdErrBuf, os.Stderr))
 	require.NoError(t, err, "hook unicorn to state")
-	err = RunUnicorn(mu, state.PC, 400_000)
+	for i := 0; i < 400_000; i++ {
-	require.NoError(t, err, "must run steps without error")
+		if us.state.Exited {
+			break
+		}
+		us.Step(false)
+	}
 	require.True(t, state.Exited, "must complete program")
 	require.Equal(t, uint8(0), state.ExitCode, "exit with 0")

--- a/mipsevm/tracer.go
+++ b/mipsevm/tracer.go
-package mipsevm
-import "fmt"
-type MemEntry struct {
-	EffAddr  uint32
-	PreValue uint32
-}
-type AccessList struct {
-	mem *Memory
-	memAccessAddr uint32
-	proofData []byte
-}
-func (al *AccessList) Reset() {
-	al.memAccessAddr = ^uint32(0)
-	al.proofData = nil
-}
-func (al *AccessList) OnRead(effAddr uint32) {
-	if al.memAccessAddr == effAddr {
-		return
-	}
-	if al.memAccessAddr != ^uint32(0) {
-		panic(fmt.Errorf("bad read of %08x, already have %08x", effAddr, al.memAccessAddr))
-	}
-	al.memAccessAddr = effAddr
-	proof := al.mem.MerkleProof(effAddr)
-	al.proofData = append(al.proofData, proof[:]...)
-}
-func (al *AccessList) OnWrite(effAddr uint32) {
-	if al.memAccessAddr == effAddr {
-		return
-	}
-	if al.memAccessAddr != ^uint32(0) {
-		panic(fmt.Errorf("bad write of %08x, already have %08x", effAddr, al.memAccessAddr))
-	}
-	proof := al.mem.MerkleProof(effAddr)
-	al.proofData = append(al.proofData, proof[:]...)
-}
-func (al *AccessList) PreInstruction(pc uint32) {
-	proof := al.mem.MerkleProof(pc)
-	al.proofData = append(al.proofData, proof[:]...)
-}
-var _ Tracer = (*AccessList)(nil)
-type Tracer interface {
-	// OnRead remembers reads from the given effAddr.
-	// Warning: the addr is an effective-addr, i.e. always aligned.
-	// But unicorn may fire it multiple times, for each byte that was changed within the effective addr boundaries.
-	OnRead(effAddr uint32)
-	// OnWrite remembers writes to the given effAddr.
-	// Warning: the addr is an effective-addr, i.e. always aligned.
-	// But unicorn may fire it multiple times, for each byte that was changed within the effective addr boundaries.
-	OnWrite(effAddr uint32)
-	PreInstruction(pc uint32)
-}
-type NoOpTracer struct{}
-func (n NoOpTracer) OnRead(effAddr uint32) {}
-func (n NoOpTracer) OnWrite(effAddr uint32) {}
-func (n NoOpTracer) PreInstruction(pc uint32) {}
-var _ Tracer = NoOpTracer{}
--- a/mipsevm/unicorn.go
+++ b/mipsevm/unicorn.go
@@ -5,45 +5,44 @@ import (
 	"io"
 	"log"
 	"math"
+	"sync"
 	uc "github.com/unicorn-engine/unicorn/bindings/go/unicorn"
 )
-func NewUnicorn() (uc.Unicorn, error) {
+type UnicornState struct {
-	return uc.NewUnicorn(uc.ARCH_MIPS, uc.MODE_32|uc.MODE_BIG_ENDIAN)
+	sync.Mutex
+	mu uc.Unicorn
+	state *State
+	stdOut io.Writer
+	stdErr io.Writer
+	lastMemAccess   uint32
+	memProofEnabled bool
+	memProof        [28 * 32]byte
+	onStep func()
 }
-func LoadUnicorn(st *State, mu uc.Unicorn) error {
+// TODO add pre-image oracle
-	// mmap and write each page of memory state into unicorn
+func NewUnicornState(mu uc.Unicorn, state *State, stdOut, stdErr io.Writer) (*UnicornState, error) {
-	for pageIndex, page := range st.Memory.Pages {
+	m := &UnicornState{
-		addr := uint64(pageIndex) << pageAddrSize
+		mu:     mu,
-		if err := mu.MemMap(addr, pageSize); err != nil {
+		state:  state,
-			return fmt.Errorf("failed to mmap page at addr 0x%x: %w", addr, err)
+		stdOut: stdOut,
-		}
+		stdErr: stdErr,
-		if err := mu.MemWrite(addr, page.Data[:]); err != nil {
-			return fmt.Errorf("failed to write page at addr 0x%x: %w", addr, err)
-		}
-	}
-	// write all registers into unicorn, including PC, LO, HI
-	regValues := make([]uint64, 32+3)
-	// TODO: do we have to sign-extend registers before writing them to unicorn, or are the trailing bits unused?
-	for i, v := range st.Registers {
-		regValues[i] = uint64(v)
 	}
-	regValues[32] = uint64(st.PC)
+	st := m.state
-	regValues[33] = uint64(st.LO)
-	regValues[34] = uint64(st.HI)
-	if err := mu.RegWriteBatch(regBatchKeys(), regValues); err != nil {
-		return fmt.Errorf("failed to write registers: %w", err)
-	}
-	return nil
-}
-func HookUnicorn(st *State, mu uc.Unicorn, stdOut, stdErr io.Writer, tr Tracer) error {
+	var err error
-	_, err := mu.HookAdd(uc.HOOK_INTR, func(mu uc.Unicorn, intno uint32) {
+	_, err = mu.HookAdd(uc.HOOK_INTR, func(mu uc.Unicorn, intno uint32) {
 		if intno != 17 {
 			log.Fatal("invalid interrupt ", intno, " at step ", st.Step)
 		}
 		syscallNum, _ := mu.RegRead(uc.MIPS_REG_V0)
 		fmt.Printf("syscall: %d\n", syscallNum)
@@ -96,7 +95,7 @@ func HookUnicorn(st *State, mu uc.Unicorn, stdOut, stdErr io.Writer, tr Tracer)
 		mu.RegWrite(uc.MIPS_REG_A3, 0)
 	}, 0, ^uint64(0))
 	if err != nil {
-		return fmt.Errorf("failed to set up interrupt/syscall hook: %w", err)
+		return nil, fmt.Errorf("failed to set up interrupt/syscall hook: %w", err)
 	}
 	// Shout if Go mmap calls didn't allocate the memory properly
@@ -105,15 +104,21 @@ func HookUnicorn(st *State, mu uc.Unicorn, stdOut, stdErr io.Writer, tr Tracer)
 		return false
 	}, 0, ^uint64(0))
 	if err != nil {
-		return fmt.Errorf("failed to set up unmapped-mem-write hook: %w", err)
+		return nil, fmt.Errorf("failed to set up unmapped-mem-write hook: %w", err)
 	}
 	_, err = mu.HookAdd(uc.HOOK_MEM_READ, func(mu uc.Unicorn, access int, addr64 uint64, size int, value int64) {
 		effAddr := uint32(addr64 & 0xFFFFFFFC) // pass effective addr to tracer
-		tr.OnRead(effAddr)
+		if m.memProofEnabled && m.lastMemAccess != effAddr {
+			if m.lastMemAccess != ^uint32(0) {
+				panic(fmt.Errorf("unexpected different mem access at %08x, already have access at %08x buffered", effAddr, m.lastMemAccess))
+			}
+			m.lastMemAccess = effAddr
+			m.memProof = m.state.Memory.MerkleProof(effAddr)
+		}
 	}, 0, ^uint64(0))
 	if err != nil {
-		return fmt.Errorf("failed to set up mem-write hook: %w", err)
+		return nil, fmt.Errorf("failed to set up mem-write hook: %w", err)
 	}
 	_, err = mu.HookAdd(uc.HOOK_MEM_WRITE, func(mu uc.Unicorn, access int, addr64 uint64, size int, value int64) {
@@ -124,68 +129,74 @@ func HookUnicorn(st *State, mu uc.Unicorn, stdOut, stdErr io.Writer, tr Tracer)
 			panic("invalid mem size")
 		}
 		effAddr := uint32(addr64 & 0xFFFFFFFC)
-		tr.OnWrite(effAddr)
+		pre := st.Memory.GetMemory(effAddr)
+		var post uint32
 		rt := value
 		rs := addr64 & 3
 		if size == 1 {
-			mem := st.Memory.GetMemory(effAddr)
 			val := uint32((rt & 0xFF) << (24 - (rs&3)*8))
 			mask := 0xFFFFFFFF ^ uint32(0xFF<<(24-(rs&3)*8))
-			st.Memory.SetMemory(effAddr, (mem&mask)|val)
+			post = (pre & mask) | val
 		} else if size == 2 {
-			mem := st.Memory.GetMemory(effAddr)
 			val := uint32((rt & 0xFFFF) << (16 - (rs&2)*8))
 			mask := 0xFFFFFFFF ^ uint32(0xFFFF<<(16-(rs&2)*8))
-			st.Memory.SetMemory(effAddr, (mem&mask)|val)
+			post = (pre & mask) | val
 		} else if size == 4 {
-			st.Memory.SetMemory(effAddr, uint32(rt))
+			post = uint32(rt)
 		} else {
 			log.Fatal("bad size write to ram")
 		}
+		if m.memProofEnabled && m.lastMemAccess != effAddr {
+			if m.lastMemAccess != ^uint32(0) {
+				panic(fmt.Errorf("unexpected different mem access at %08x, already have access at %08x buffered", effAddr, m.lastMemAccess))
+			}
+			m.lastMemAccess = effAddr
+			m.memProof = m.state.Memory.MerkleProof(effAddr)
+		}
+		// only set memory after making the proof: we need the pre-state
+		st.Memory.SetMemory(effAddr, post)
 	}, 0, ^uint64(0))
 	if err != nil {
-		return fmt.Errorf("failed to set up mem-write hook: %w", err)
+		return nil, fmt.Errorf("failed to set up mem-write hook: %w", err)
 	}
-	regBatch := regBatchKeys()
+	return m, nil
-	_, err = mu.HookAdd(uc.HOOK_CODE, func(mu uc.Unicorn, addr uint64, size uint32) {
+}
-		st.Step += 1
-		batch, err := mu.RegReadBatch(regBatch)
+func (m *UnicornState) Step(proof bool) (stateWitness []byte, memProof []byte) {
-		if err != nil {
+	m.memProofEnabled = proof
-			panic(fmt.Errorf("failed to read register batch: %w", err))
+	m.lastMemAccess = ^uint32(0)
-		}
-		for i := 0; i < 32; i++ {
+	if proof {
-			st.Registers[i] = uint32(batch[i])
+		stateWitness = m.state.EncodeWitness()
+		insnProof := m.state.Memory.MerkleProof(m.state.PC)
+		memProof = append(memProof, insnProof[:]...)
 	}
-		prevPC := st.PC
-		st.PC = uint32(batch[32])
-		// We detect if we are potentially in a delay-slot.
+	insn := m.state.Memory.GetMemory(m.state.PC)
-		// If we may be (i.e. last PC is 1 instruction before current),
+	oldNextPC := m.state.NextPC
-		// then parse the last instruction to determine what the next PC would be.
+	newNextPC := oldNextPC + 4
-		// This reflects the handleBranch / handleJump behavior that schedules next-PC.
-		if st.PC == prevPC+4 {
-			st.NextPC = prevPC + 8
-			prevInsn := st.Memory.GetMemory(prevPC)
+	opcode := insn >> 26
-			opcode := prevInsn >> 26
 	switch opcode {
 	case 2, 3: // J/JAL
-				st.NextPC = signExtend(prevInsn&0x03FFFFFF, 25) << 2
+		newNextPC = signExtend(insn&0x03FFFFFF, 25) << 2
 	case 1, 4, 5, 6, 7: // branching
-				rs := st.Registers[(prevInsn>>21)&0x1F]
+		rs := m.state.Registers[(insn>>21)&0x1F]
 		shouldBranch := false
 		switch opcode {
 		case 4, 5:
-					rt := st.Registers[(prevInsn>>16)&0x1F]
+			rt := m.state.Registers[(insn>>16)&0x1F]
 			shouldBranch = (rs == rt && opcode == 4) || (rs != rt && opcode == 5)
 		case 6:
 			shouldBranch = int32(rs) <= 0 // blez
 		case 7:
 			shouldBranch = int32(rs) > 0 // bgtz
 		case 1:
-					rtv := (prevInsn >> 16) & 0x1F
+			rtv := (insn >> 16) & 0x1F
 			if rtv == 0 {
 				shouldBranch = int32(rs) < 0
 			} // bltz
@@ -194,25 +205,83 @@ func HookUnicorn(st *State, mu uc.Unicorn, stdOut, stdErr io.Writer, tr Tracer)
 			} // bgez
 		}
 		if shouldBranch {
-					st.NextPC = prevPC + 4 + (signExtend(prevInsn&0xFFFF, 15) << 2)
+			newNextPC = m.state.PC + 4 + (signExtend(insn&0xFFFF, 15) << 2)
 		}
 	case 0:
-				if funcv := prevInsn & 0x3f; funcv == 8 || funcv == 9 { // JR/JALR
+		if funcv := insn & 0x3f; funcv == 8 || funcv == 9 { // JR/JALR
-					rs := st.Registers[(prevInsn>>21)&0x1F]
+			rs := m.state.Registers[(insn>>21)&0x1F]
-					st.NextPC = rs
+			newNextPC = rs
 		}
 	}
-		} else {
-			st.NextPC = st.PC + 4
+	// Execute only a single instruction.
+	// The memory and syscall hooks will update the state with any of the dynamic changes.
+	err := m.mu.StartWithOptions(uint64(m.state.PC), uint64(m.state.NextPC), &uc.UcOptions{
+		Timeout: 0, // 0 to disable, value is in ms.
+		Count:   1,
+	})
+	if proof {
+		memProof = append(memProof, m.memProof[:]...)
 	}
-		st.LO = uint32(batch[33])
+	// count it
-		st.HI = uint32(batch[34])
+	m.state.Step += 1
-	}, 0, ^uint64(0))
+	// Now do post-processing to keep our state in sync:
+	// 1) match the registers post-state
+	batch, err := m.mu.RegReadBatch(regBatchKeys)
+	if err != nil {
+		panic(fmt.Errorf("failed to read register batch: %w", err))
+	}
+	for i := 0; i < 32; i++ {
+		m.state.Registers[i] = uint32(batch[i])
+	}
+	_ = uint32(batch[32]) // ignore the PC, we follow oldNextPC instead, to emulate delay-slot behavior
+	m.state.LO = uint32(batch[33])
+	m.state.HI = uint32(batch[34])
+	// 2) adopt the old nextPC as new PC.
+	// This effectively implements delay-slots, even though unicorn immediately loses
+	// delay-slot information when only executing a single instruction.
+	m.state.PC = oldNextPC
+	err = m.mu.RegWrite(uc.MIPS_REG_PC, uint64(oldNextPC))
 	if err != nil {
-		return fmt.Errorf("failed to set up instruction hook: %w", err)
+		panic("failed to write PC register")
 	}
+	m.state.NextPC = newNextPC
+	return
+}
+func NewUnicorn() (uc.Unicorn, error) {
+	return uc.NewUnicorn(uc.ARCH_MIPS, uc.MODE_32|uc.MODE_BIG_ENDIAN)
+}
+func LoadUnicorn(st *State, mu uc.Unicorn) error {
+	// mmap and write each page of memory state into unicorn
+	for pageIndex, page := range st.Memory.Pages {
+		addr := uint64(pageIndex) << pageAddrSize
+		if err := mu.MemMap(addr, pageSize); err != nil {
+			return fmt.Errorf("failed to mmap page at addr 0x%x: %w", addr, err)
+		}
+		if err := mu.MemWrite(addr, page.Data[:]); err != nil {
+			return fmt.Errorf("failed to write page at addr 0x%x: %w", addr, err)
+		}
+	}
+	// write all registers into unicorn, including PC, LO, HI
+	regValues := make([]uint64, 32+3)
+	// TODO: do we have to sign-extend registers before writing them to unicorn, or are the trailing bits unused?
+	for i, v := range st.Registers {
+		regValues[i] = uint64(v)
+	}
+	regValues[32] = uint64(st.PC)
+	regValues[33] = uint64(st.LO)
+	regValues[34] = uint64(st.HI)
+	if err := mu.RegWriteBatch(regBatchKeys, regValues); err != nil {
+		return fmt.Errorf("failed to write registers: %w", err)
+	}
 	return nil
 }
@@ -225,18 +294,11 @@ func signExtend(v uint32, i uint32) uint32 {
 	}
 }
-func RunUnicorn(mu uc.Unicorn, entrypoint uint32, steps uint64) error {
+var regBatchKeys = func() []int {
-	return mu.StartWithOptions(uint64(entrypoint), ^uint64(0), &uc.UcOptions{
-		Timeout: 0, // 0 to disable, value is in ms.
-		Count:   steps,
-	})
-}
-func regBatchKeys() []int {
 	var batch []int
 	for i := 0; i < 32; i++ {
 		batch = append(batch, uc.MIPS_REG_ZERO+i)
 	}
 	batch = append(batch, uc.MIPS_REG_PC, uc.MIPS_REG_LO, uc.MIPS_REG_HI)
 	return batch
-}
+}()
--- a/mipsevm/unicorn_test.go
+++ b/mipsevm/unicorn_test.go
@@ -8,18 +8,55 @@ import (
 	uc "github.com/unicorn-engine/unicorn/bindings/go/unicorn"
 )
-// TestUnicorn test that unicorn works
+// TestUnicornDelaySlot test that unicorn works, and determine exactly how delay slots behave
-func TestUnicorn(t *testing.T) {
+func TestUnicornDelaySlot(t *testing.T) {
 	mu, err := NewUnicorn()
 	require.NoError(t, err)
 	defer mu.Close()
 	require.NoError(t, mu.MemMap(0, 4096))
 	require.NoError(t, mu.RegWrite(uc.MIPS_REG_RA, 420), "set RA to addr that is multiple of 4")
-	require.NoError(t, mu.MemWrite(0, []byte{0x03, 0xe0, 0x00, 0x08}), "jmp $ra")
+	require.NoError(t, mu.MemWrite(0, []byte{0x03, 0xe0, 0x00, 0x08}), "jr $ra")
+	require.NoError(t, mu.MemWrite(4, []byte{0x20, 0x09, 0x0a, 0xFF}), "addi $t1 $r0 0x0aff")
+	require.NoError(t, mu.MemWrite(32, []byte{0x20, 0x09, 0x0b, 0xFF}), "addi $t1 $r0 0x0bff")
+	mu.HookAdd(uc.HOOK_CODE, func(mu uc.Unicorn, addr uint64, size uint32) {
+		t.Logf("addr: %08x", addr)
+	}, uint64(0), ^uint64(0))
+	// stop at instruction in addr=4, the delay slot
+	require.NoError(t, mu.StartWithOptions(uint64(0), uint64(4), &uc.UcOptions{
+		Timeout: 0, // 0 to disable, value is in ms.
+		Count:   2,
+	}))
+	t1, err := mu.RegRead(uc.MIPS_REG_T1)
+	require.NoError(t, err)
+	require.NotEqual(t, uint64(0x0aff), t1, "delay slot should not execute")
-	require.NoError(t, RunUnicorn(mu, 0, 1))
 	pc, err := mu.RegRead(uc.MIPS_REG_PC)
 	require.NoError(t, err)
-	require.Equal(t, uint64(420), pc, "jumped")
+	// unicorn is weird here: when entering a delay slot, it does not update the PC register by itself.
+	require.Equal(t, uint64(0), pc, "delay slot, no jump yet")
+	// now restart, but run two instructions, to include the delay slot
+	require.NoError(t, mu.StartWithOptions(uint64(0), ^uint64(0), &uc.UcOptions{
+		Timeout: 0, // 0 to disable, value is in ms.
+		Count:   2,
+	}))
+	pc, err = mu.RegRead(uc.MIPS_REG_PC)
+	require.NoError(t, err)
+	require.Equal(t, uint64(420), pc, "jumped after NOP delay slot")
+	t1, err = mu.RegRead(uc.MIPS_REG_T1)
+	require.NoError(t, err)
+	require.Equal(t, uint64(0x0aff), t1, "delay slot should execute")
+	require.NoError(t, mu.StartWithOptions(uint64(32), uint64(32+4), &uc.UcOptions{
+		Timeout: 0, // 0 to disable, value is in ms.
+		Count:   1,
+	}))
+	t1, err = mu.RegRead(uc.MIPS_REG_T1)
+	require.NoError(t, err)
+	require.Equal(t, uint64(0x0bff), t1, "regular instruction should work fine")
 }