Commit 82fadf6a authored by protolambda's avatar protolambda

contracts,mipsevm: state format update progress, bugfixes, EVM MIPS-tests passing

parent 788522a9
// SPDX-License-Identifier: MIT
pragma solidity ^0.7.3;
pragma experimental ABIEncoderV2;
pragma solidity ^0.7.6;
// https://inst.eecs.berkeley.edu/~cs61c/resources/MIPS_Green_Sheet.pdf
// https://uweb.engr.arizona.edu/~ece369/Resources/spim/MIPSReference.pdf
......@@ -9,32 +8,31 @@ pragma experimental ABIEncoderV2;
// https://www.cs.cmu.edu/afs/cs/academic/class/15740-f97/public/doc/mips-isa.pdf
// page A-177
// This is a separate contract from the challenge contract
// Anyone can use it to validate a MIPS state transition
// First, to prepare, you call AddMerkleState, which adds valid state nodes in the stateHash.
// If you are using the Preimage oracle, you call AddPreimage
// Then, you call Step. Step will revert if state is missing. If all state is present, it will return the next hash
// This MIPS contract emulates a single MIPS instruction.
//
// Note that delay slots are isolated instructions:
// the nextPC in the state pre-schedules where the VM jumps next.
//
// The Step input is a packed VM state, with binary-merkle-tree witness data for memory reads/writes.
// The Step outputs a keccak256 hash of the packed VM State, and logs the resulting state for offchain usage.
contract MIPS {
struct State {
bytes32 memRoot;
bytes32 preimageKey;
uint32 preimageOffset;
uint32[32] registers;
uint32 pc;
uint32 nextPC; // State is executing a branch/jump delay slot if nextPC != pc+4
uint32 lr;
uint32 lo;
uint32 hi;
uint32 heap;
uint8 exitCode;
bool exited;
uint64 step;
uint32[32] registers;
}
// total State size: 32+32+4+32*4+5*4+1+1+8 = 226 bytes
// total State size: 32+32+6*4+1+1+8+32*4 = 226 bytes
uint32 constant public HEAP_START = 0x20000000;
uint32 constant public BRK_START = 0x40000000;
......@@ -51,12 +49,35 @@ contract MIPS {
return uint32(dat&mask | (isSigned ? signed : 0));
}
function outputState() internal returns (bytes32) {
State memory state;
function outputState() internal returns (bytes32 out) {
assembly {
state := 0x80
// copies 'size' bytes, right-aligned in word at 'from', to 'to', incl. trailing data
function copyMem(from, to, size) -> fromOut, toOut {
mstore(to, mload(add(from, sub(32, size))))
fromOut := add(from, 32)
toOut := add(to, size)
}
let from := 0x80 // state
let start := mload(0x40) // free mem ptr
let to := start
from, to := copyMem(from, to, 32) // memRoot
from, to := copyMem(from, to, 32) // preimageKey
from, to := copyMem(from, to, 4) // preimageOffset
from, to := copyMem(from, to, 4) // pc
from, to := copyMem(from, to, 4) // nextPC
from, to := copyMem(from, to, 4) // lo
from, to := copyMem(from, to, 4) // hi
from, to := copyMem(from, to, 4) // heap
from, to := copyMem(from, to, 1) // exitCode
from, to := copyMem(from, to, 1) // exited
from, to := copyMem(from, to, 8) // step
from := add(from, 32) // offset to registers
for { let i := 0 } lt(i, 32) { i := add(i, 1) } { from, to := copyMem(from, to, 4) } // registers
mstore(to, 0) // clean up end
log0(start, sub(to, start)) // log the resulting MIPS state, for debugging
out := keccak256(start, sub(to, start))
}
return keccak256(abi.encode(state));
return out;
}
function handleSyscall() internal returns (bytes32) {
......@@ -131,7 +152,7 @@ contract MIPS {
return outputState();
}
function handleHiLo(uint32 func, uint32 rt, uint32 rs, uint32 storeReg) internal returns (bytes32) {
function handleHiLo(uint32 func, uint32 rs, uint32 rt, uint32 storeReg) internal returns (bytes32) {
State memory state;
assembly {
state := 0x80
......@@ -167,7 +188,7 @@ contract MIPS {
return outputState();
}
function handleJump(bool andLink, uint32 dest) internal returns (bytes32) {
function handleJump(uint32 linkReg, uint32 dest) internal returns (bytes32) {
State memory state;
assembly {
state := 0x80
......@@ -175,8 +196,8 @@ contract MIPS {
uint32 prevPC = state.pc;
state.pc = state.nextPC;
state.nextPC = dest;
if (andLink) {
state.lr = prevPC+8; // set the link-register to the instr after the delay slot instruction.
if (linkReg != 0) {
state.registers[linkReg] = prevPC+8; // set the link-register to the instr after the delay slot instruction.
}
return outputState();
}
......@@ -186,6 +207,7 @@ contract MIPS {
assembly {
state := 0x80
}
require(storeReg < 32, "valid register");
// never write to reg 0, and it can be conditional (movz, movn)
if (storeReg != 0 && conditional) {
state.registers[storeReg] = val;
......@@ -199,8 +221,42 @@ contract MIPS {
// will revert if any required input state is missing
function Step(bytes32 stateHash, bytes calldata stateData, bytes calldata proof) public returns (bytes32) {
require(stateHash == keccak256(stateData), "stateHash must match input");
State memory state = abi.decode(stateData, (State)); // TODO not efficient, need to write a "decodePacked" for State
State memory state;
// packed data is ~6 times smaller
assembly {
if iszero(eq(state, 0x80)) { // expected state mem offset check
revert(0,0)
}
if iszero(eq(mload(0x40), mul(32, 48))) { // expected memory check
revert(0,0)
}
if iszero(eq(stateData.offset, add(mul(32, 4), 4))) { // expected state data offset
revert(0,0)
}
function putField(callOffset, memOffset, size) -> callOffsetOut, memOffsetOut {
// calldata is packed, thus starting left-aligned, shift-right to pad and right-align
let w := shr(shl(3, sub(32, size)), calldataload(callOffset))
mstore(memOffset, w)
callOffsetOut := add(callOffset, size)
memOffsetOut := add(memOffset, 32)
}
let c := stateData.offset // calldata offset
let m := 0x80 // mem offset
c, m := putField(c, m, 32) // memRoot
c, m := putField(c, m, 32) // preimageKey
c, m := putField(c, m, 4) // preimageOffset
c, m := putField(c, m, 4) // pc
c, m := putField(c, m, 4) // nextPC
c, m := putField(c, m, 4) // lo
c, m := putField(c, m, 4) // hi
c, m := putField(c, m, 4) // heap
c, m := putField(c, m, 1) // exitCode
c, m := putField(c, m, 1) // exited
c, m := putField(c, m, 8) // step
mstore(m, add(m, 32)) // offset to registers
m := add(m, 32)
for { let i := 0 } lt(i, 32) { i := add(i, 1) } { c, m := putField(c, m, 4) } // registers
}
if(state.exited) { // don't change state once exited
return stateHash;
}
......@@ -209,28 +265,33 @@ contract MIPS {
// instruction fetch
uint32 insn; // TODO proof the memory read against memRoot
assembly {
insn := shr(sub(256, 32), calldataload(add(proof.offset, 0x20)))
if iszero(eq(proof.offset, 390)) {
revert(0,0)
}
insn := shr(sub(256, 32), calldataload(proof.offset))
}
uint32 opcode = insn >> 26; // 6-bits
// j-type j/jal
if (opcode == 2 || opcode == 3) {
return handleJump(opcode == 3, SE(insn&0x03FFFFFF, 26) << 2);
// TODO likely bug in original code: MIPS spec says this should be in the "current" region;
// a 256 MB aligned region (i.e. use top 4 bits of branch delay slot (pc+4))
return handleJump(opcode == 2 ? 0 : 31, SE(insn&0x03FFFFFF, 26) << 2);
}
// register fetch
uint32 rs; // source register 1
uint32 rt; // source register 2 / temp
uint32 rtReg = ((insn >> 14) & 0x7C);
uint32 rs; // source register 1 value
uint32 rt; // source register 2 / temp value
uint32 rtReg = (insn >> 16) & 0x1F;
// R-type or I-type (stores rt)
rs = state.registers[(insn >> 19) & 0x7C];
uint32 rd = (insn >> 14) & 0x7C;
rs = state.registers[(insn >> 21) & 0x1F];
uint32 rdReg = rtReg;
if (opcode == 0 || opcode == 0x1c) {
// R-type (stores rd)
rt = state.registers[rtReg];
rd = (insn >> 9) & 0x7C;
rdReg = (insn >> 11) & 0x1F;
} else if (opcode < 0x20) {
// rt is SignExtImm
// don't sign extend for andi, ori, xori
......@@ -246,7 +307,7 @@ contract MIPS {
rt = state.registers[rtReg];
// store actual rt with lwl and lwr
rd = rtReg;
rdReg = rtReg;
}
if ((opcode >= 4 && opcode < 8) || opcode == 1) {
......@@ -263,11 +324,13 @@ contract MIPS {
uint32 addr = rs & 0xFFFFFFFC;
// TODO proof memory read at addr
assembly {
mem := and(shr(sub(256, 64), calldataload(add(proof.offset, 0x20))), 0xFFFFFFFF)
mem := shr(sub(256, 32), calldataload(add(proof.offset, 4)))
}
if (opcode >= 0x28 && opcode != 0x30) {
// store
storeAddr = addr;
// store opcodes don't write back to a register
rdReg = 0;
}
}
......@@ -277,14 +340,14 @@ contract MIPS {
uint32 func = insn & 0x3f; // 6-bits
if (opcode == 0 && func >= 8 && func < 0x1c) {
if (func == 8 || func == 9) { // jr/jalr
return handleJump(func == 9, rs);
return handleJump(func == 8 ? 0 : rdReg, rs);
}
if (func == 0xa) { // movz
return handleRd(rd, rs, rt == 0);
return handleRd(rdReg, rs, rt == 0);
}
if (func == 0xb) { // movn
return handleRd(rd, rs, rt != 0);
return handleRd(rdReg, rs, rt != 0);
}
// syscall (can read and write)
......@@ -295,7 +358,7 @@ contract MIPS {
// lo and hi registers
// can write back
if (func >= 0x10 && func < 0x1c) {
return handleHiLo(func, rs, rt, rd);
return handleHiLo(func, rs, rt, rdReg);
}
}
......@@ -314,7 +377,7 @@ contract MIPS {
}
// write back the value to destination register
return handleRd(rd, val, true);
return handleRd(rdReg, val, true);
}
function execute(uint32 insn, uint32 rs, uint32 rt, uint32 mem) internal pure returns (uint32) {
......
......@@ -80,7 +80,7 @@ type Addresses struct {
Challenge common.Address
}
func NewEVMEnv(contracts *Contracts, addrs *Addresses) *vm.EVM {
func NewEVMEnv(contracts *Contracts, addrs *Addresses) (*vm.EVM, *state.StateDB) {
chainCfg := params.MainnetChainConfig
bc := &testChain{}
header := bc.GetHeader(common.Hash{}, 100)
......@@ -100,7 +100,7 @@ func NewEVMEnv(contracts *Contracts, addrs *Addresses) *vm.EVM {
env.StateDB.SetCode(addrs.MIPSMemory, contracts.MIPSMemory.DeployedBytecode.Object)
env.StateDB.SetCode(addrs.Challenge, contracts.Challenge.DeployedBytecode.Object)
// TODO: any state to set, or immutables to replace, to link the contracts together?
return env
return env, state
}
type testChain struct {
......
......@@ -3,12 +3,14 @@ package main
import (
"bytes"
"encoding/binary"
"fmt"
"math/big"
"os"
"path"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethereum/go-ethereum/core/vm"
"github.com/ethereum/go-ethereum/crypto"
"github.com/stretchr/testify/require"
......@@ -17,7 +19,6 @@ import (
)
func TestEVM(t *testing.T) {
t.Skip("work in progress!")
testFiles, err := os.ReadDir("test/bin")
require.NoError(t, err)
......@@ -26,7 +27,7 @@ func TestEVM(t *testing.T) {
require.NoError(t, err)
// the first unlisted source seems to be the ABIDecoderV2 code that the compiler inserts
mipsSrcMap, err := contracts.MIPS.SourceMap([]string{"~ABIDecoderV2?", "~compiler?", "../contracts/src/MIPS.sol"})
mipsSrcMap, err := contracts.MIPS.SourceMap([]string{"../contracts/src/MIPS.sol", "~compiler?", "../contracts/src/MIPS.sol"})
require.NoError(t, err)
addrs := &Addresses{
......@@ -42,14 +43,14 @@ func TestEVM(t *testing.T) {
t.Skip("oracle test needs to be updated to use syscall pre-image oracle")
}
env := NewEVMEnv(contracts, addrs)
env.Config.Debug = true
env, evmState := NewEVMEnv(contracts, addrs)
env.Config.Debug = false
//env.Config.Tracer = logger.NewMarkdownLogger(&logger.Config{}, os.Stdout)
env.Config.Tracer = mipsSrcMap.Tracer(os.Stdout)
fn := path.Join("test/bin", f.Name())
programMem, err := os.ReadFile(fn)
state := &State{PC: 0, Memory: make(map[uint32]*Page)}
state := &State{PC: 0, NextPC: 4, Memory: make(map[uint32]*Page)}
err = state.SetMemoryRange(0, bytes.NewReader(programMem))
require.NoError(t, err, "load program into state")
......@@ -70,52 +71,37 @@ func TestEVM(t *testing.T) {
err = HookUnicorn(state, mu, os.Stdout, os.Stderr, al)
require.NoError(t, err, "hook unicorn to state")
// Add hook to stop unicorn once we reached the end of the test (i.e. "ate food")
_, err = mu.HookAdd(uc.HOOK_CODE, func(mu uc.Unicorn, addr uint64, size uint32) {
if state.PC == endAddr {
require.NoError(t, mu.Stop(), "stop test when returned")
}
}, 0, ^uint64(0))
require.NoError(t, err, "")
so := NewStateCache()
for i := 0; i < 1000; i++ {
insn := state.GetMemory(state.PC)
al.Reset() // reset
require.NoError(t, RunUnicorn(mu, state.PC, 1))
require.LessOrEqual(t, len(al.memReads)+len(al.memWrites), 1, "expecting at most a single mem read or write")
proofData := make([]byte, 0, 32*2)
proofData = append(proofData, uint32ToBytes32(32)...) // length in bytes
var tmp [32]byte
binary.BigEndian.PutUint32(tmp[0:4], insn) // instruction
if len(al.memReads) > 0 {
binary.BigEndian.PutUint32(tmp[4:8], state.GetMemory(al.memReads[0]))
var stateData []byte
var insn uint32
var pc uint32
var post []byte
preCode := func() {
insn = state.GetMemory(state.PC)
pc = state.PC
fmt.Printf("PRE - pc: %08x insn: %08x\n", pc, insn)
// remember the pre-state, to repeat it in the EVM during the post processing step
stateData = state.EncodeWitness(so)
if post != nil {
require.Equal(t, hexutil.Bytes(stateData).String(), hexutil.Bytes(post).String(),
"unicorn produced different state than EVM")
}
if len(al.memWrites) > 0 {
binary.BigEndian.PutUint32(tmp[4:8], state.GetMemory(al.memWrites[0]))
}
proofData = append(proofData, tmp[:]...)
memRoot := state.MerkleizeMemory(so)
al.Reset() // reset access list
}
postCode := func() {
fmt.Printf("POST - pc: %08x insn: %08x\n", pc, insn)
stateData := make([]byte, 0, 44*32)
stateData = append(stateData, memRoot[:]...)
stateData = append(stateData, make([]byte, 32)...) // TODO preimageKey
stateData = append(stateData, make([]byte, 32)...) // TODO preimageOffset
for i := 0; i < 32; i++ {
stateData = append(stateData, uint32ToBytes32(state.Registers[i])...)
var proofData []byte
proofData = binary.BigEndian.AppendUint32(proofData, insn)
if len(al.memReads) > 0 {
proofData = binary.BigEndian.AppendUint32(proofData, al.memReads[0].PreValue)
} else if len(al.memWrites) > 0 {
proofData = binary.BigEndian.AppendUint32(proofData, al.memWrites[0].PreValue)
} else {
proofData = append(proofData, make([]byte, 4)...)
}
stateData = append(stateData, uint32ToBytes32(state.PC)...)
stateData = append(stateData, uint32ToBytes32(state.NextPC)...)
stateData = append(stateData, uint32ToBytes32(state.LR)...)
stateData = append(stateData, uint32ToBytes32(state.LO)...)
stateData = append(stateData, uint32ToBytes32(state.HI)...)
stateData = append(stateData, uint32ToBytes32(state.Heap)...)
stateData = append(stateData, uint8ToBytes32(state.ExitCode)...)
stateData = append(stateData, boolToBytes32(state.Exited)...)
stateData = append(stateData, uint64ToBytes32(state.Step)...)
proofData = append(proofData, make([]byte, 32-4-4)...)
stateHash := crypto.Keccak256Hash(stateData)
var input []byte
......@@ -129,14 +115,39 @@ func TestEVM(t *testing.T) {
input = append(input, uint32ToBytes32(uint32(len(proofData)))...) // proof data length in bytes
input = append(input, proofData[:]...)
startingGas := uint64(30_000_000)
// we take a snapshot so we can clean up the state, and isolate the logs of this instruction run.
snap := env.StateDB.Snapshot()
ret, leftOverGas, err := env.Call(vm.AccountRef(sender), addrs.MIPS, input, startingGas, big.NewInt(0))
require.NoError(t, err, "evm should not fail")
t.Logf("step took %d gas", startingGas-leftOverGas)
t.Logf("output (state hash): %x", ret)
// TODO compare output against unicorn (need to reconstruct state and memory hash)
require.Len(t, ret, 32, "expecting 32-byte state hash")
// remember state hash, to check it against state
postHash := common.Hash(*(*[32]byte)(ret))
logs := evmState.Logs()
require.Equal(t, 1, len(logs), "expecting a log with post-state")
post = logs[0].Data
require.Equal(t, crypto.Keccak256Hash(post), postHash, "logged state must be accurate")
env.StateDB.RevertToSnapshot(snap)
t.Logf("EVM step took %d gas, and returned stateHash %s", startingGas-leftOverGas, postHash)
}
firstStep := true
_, err = mu.HookAdd(uc.HOOK_CODE, func(mu uc.Unicorn, addr uint64, size uint32) {
if state.PC == endAddr {
require.NoError(t, mu.Stop(), "stop test when returned")
}
if !firstStep {
postCode()
}
preCode()
firstStep = false
}, 0, ^uint64(0))
require.NoError(t, err, "hook code")
err = RunUnicorn(mu, state.PC, 1000)
require.NoError(t, err, "must run steps without error")
// inspect test result
done, result := state.GetMemory(baseAddrEnd+4), state.GetMemory(baseAddrEnd+8)
require.Equal(t, done, uint32(1), "must be done")
......@@ -145,28 +156,8 @@ func TestEVM(t *testing.T) {
}
}
func uint64ToBytes32(v uint64) []byte {
var out [32]byte
binary.BigEndian.PutUint64(out[32-8:], v)
return out[:]
}
func uint32ToBytes32(v uint32) []byte {
var out [32]byte
binary.BigEndian.PutUint32(out[32-4:], v)
return out[:]
}
func uint8ToBytes32(v uint8) []byte {
var out [32]byte
out[31] = v
return out[:]
}
func boolToBytes32(v bool) []byte {
var out [32]byte
if v {
out[31] = 1
}
return out[:]
}
......@@ -11,6 +11,7 @@ import (
func LoadELF(f *elf.File) (*State, error) {
s := &State{
PC: uint32(f.Entry),
NextPC: uint32(f.Entry + 4),
HI: 0,
LO: 0,
Heap: 0x20000000,
......
......@@ -82,7 +82,7 @@ type SourceMap struct {
func (s *SourceMap) Info(pc uint64) (source string, line uint32, col uint32) {
instr := s.Instr[pc]
if instr.F < 0 {
return
return "generated", 0, 0
}
if instr.F >= int32(len(s.Sources)) {
source = "unknown"
......@@ -103,7 +103,7 @@ func (s *SourceMap) Info(pc uint64) (source string, line uint32, col uint32) {
func (s *SourceMap) FormattedInfo(pc uint64) string {
f, l, c := s.Info(pc)
return fmt.Sprintf("%s:%d:%d %v", f, l, c, s.Instr[pc])
return fmt.Sprintf("%s:%d:%d", f, l, c)
}
// ParseSourceMap parses a solidity sourcemap: mapping bytecode indices to source references.
......@@ -200,11 +200,21 @@ func (s *SourceMapTracer) CaptureEnter(typ vm.OpCode, from common.Address, to co
func (s *SourceMapTracer) CaptureExit(output []byte, gasUsed uint64, err error) {}
func (s *SourceMapTracer) CaptureState(pc uint64, op vm.OpCode, gas, cost uint64, scope *vm.ScopeContext, rData []byte, depth int, err error) {
fmt.Fprintf(s.out, "%s: pc %x opcode %s map %v\n", s.srcMap.FormattedInfo(pc), pc, op.String(), s.srcMap.Instr[pc])
fmt.Fprintf(s.out, "%-40s : pc %x opcode %s\n", s.srcMap.FormattedInfo(pc), pc, op.String())
}
func (s *SourceMapTracer) CaptureFault(pc uint64, op vm.OpCode, gas, cost uint64, scope *vm.ScopeContext, depth int, err error) {
fmt.Fprintf(s.out, "%s: FAULT %v\n", s.srcMap.FormattedInfo(pc), err)
fmt.Fprintf(s.out, "%-40s: pc %x opcode %s FAULT %v\n", s.srcMap.FormattedInfo(pc), pc, op.String(), err)
fmt.Println("----")
fmt.Fprintf(s.out, "calldata: %x\n", scope.Contract.Input)
fmt.Println("----")
fmt.Fprintf(s.out, "memory: %x\n", scope.Memory.Data())
fmt.Println("----")
fmt.Fprintf(s.out, "stack:\n")
stack := scope.Stack.Data()
for i := range stack {
fmt.Fprintf(s.out, "%3d: %x\n", -i, stack[len(stack)-1-i].Bytes32())
}
}
var _ vm.EVMLogger = (*SourceMapTracer)(nil)
......@@ -5,6 +5,8 @@ import (
"encoding/hex"
"fmt"
"io"
"github.com/ethereum/go-ethereum/common"
)
const (
......@@ -36,27 +38,47 @@ func (p *Page) UnmarshalText(dat []byte) error {
type State struct {
Memory map[uint32]*Page `json:"memory"`
Registers [32]uint32 `json:"registers"`
PreimageKey common.Hash `json:"preimageKey"`
PreimageOffset uint32 `json:"preimageOffset"`
PC uint32 `json:"pc"`
NextPC uint32 `json:"nextPC"`
LR uint32 `json:"lr"`
HI uint32 `json:"hi"`
LO uint32 `json:"lo"`
HI uint32 `json:"hi"`
Heap uint32 `json:"heap"` // to handle mmap growth
ExitCode uint8 `json:"exit"`
Exited bool `json:"exited"`
Step uint64 `json:"step"`
Registers [32]uint32 `json:"registers"`
}
// TODO: VM state pre-image:
// PC, HI, LO, Heap = 4 * 32/8 = 16 bytes
// Registers = 32 * 32/8 = 256 bytes
// Memory tree root = 32 bytes
// Misc exit/step data = TBD
// + proof(s) for memory leaf nodes
func (s *State) EncodeWitness(so StateOracle) []byte {
out := make([]byte, 0)
memRoot := s.MerkleizeMemory(so)
memRoot = common.Hash{31: 42} // TODO need contract to actually write memory
out = append(out, memRoot[:]...)
out = append(out, s.PreimageKey[:]...)
out = binary.BigEndian.AppendUint32(out, s.PreimageOffset)
out = binary.BigEndian.AppendUint32(out, s.PC)
out = binary.BigEndian.AppendUint32(out, s.NextPC)
out = binary.BigEndian.AppendUint32(out, s.LO)
out = binary.BigEndian.AppendUint32(out, s.HI)
out = binary.BigEndian.AppendUint32(out, s.Heap)
out = append(out, s.ExitCode)
if s.Exited {
out = append(out, 1)
} else {
out = append(out, 0)
}
out = binary.BigEndian.AppendUint64(out, s.Step)
for _, r := range s.Registers {
out = binary.BigEndian.AppendUint32(out, r)
}
return out
}
func (s *State) MerkleizeMemory(so StateOracle) [32]byte {
// empty parts of the tree are all zero. Precompute the hash of each full-zero range sub-tree level.
......@@ -120,20 +142,21 @@ func (s *State) MerkleizeMemory(so StateOracle) [32]byte {
return merkleizeMemory(1, 0)
}
func (s *State) SetMemory(addr uint32, size uint32, v uint32) {
for i := size; i > 0; i-- {
pageIndex := addr >> pageAddrSize
pageAddr := addr & pageAddrMask
p, ok := s.Memory[pageIndex]
if !ok {
// allocate the page if we have not already.
// Go may mmap relatively large ranges, but we only allocate the pages just in time.
p = &Page{}
s.Memory[pageIndex] = p
}
p[pageAddr] = uint8(v >> (i - 1))
addr += 1
func (s *State) SetMemory(addr uint32, v uint32) {
// addr must be aligned to 4 bytes
if addr&0x3 != 0 {
panic(fmt.Errorf("unaligned memory access: %x", addr))
}
pageIndex := addr >> pageAddrSize
pageAddr := addr & pageAddrMask
p, ok := s.Memory[pageIndex]
if !ok {
// allocate the page if we have not already.
// Go may mmap relatively large ranges, but we only allocate the pages just in time.
p = &Page{}
s.Memory[pageIndex] = p
}
binary.BigEndian.PutUint32(p[pageAddr:pageAddr+4], v)
}
func (s *State) GetMemory(addr uint32) uint32 {
......
......@@ -37,7 +37,7 @@ func TestState(t *testing.T) {
//state, err := LoadELF(elfProgram)
//require.NoError(t, err, "must load ELF into state")
programMem, err := os.ReadFile(fn)
state := &State{PC: 0, Memory: make(map[uint32]*Page)}
state := &State{PC: 0, NextPC: 4, Memory: make(map[uint32]*Page)}
err = state.SetMemoryRange(0, bytes.NewReader(programMem))
require.NoError(t, err, "load program into state")
......@@ -69,7 +69,7 @@ func TestState(t *testing.T) {
require.NoError(t, mu.Stop(), "stop test when returned")
}
}, 0, ^uint64(0))
require.NoError(t, err, "")
require.NoError(t, err, "hook code")
err = RunUnicorn(mu, state.PC, 1000)
require.NoError(t, err, "must run steps without error")
......
package main
type MemEntry struct {
EffAddr uint32
PreValue uint32
}
type AccessList struct {
memReads []uint32
memWrites []uint32
memReads []MemEntry
memWrites []MemEntry
}
func (al *AccessList) Reset() {
......@@ -10,39 +15,39 @@ func (al *AccessList) Reset() {
al.memWrites = al.memWrites[:0]
}
func (al *AccessList) OnRead(addr uint32) {
func (al *AccessList) OnRead(effAddr uint32, preValue uint32) {
// if it matches the last, it's a duplicate; this happens because of multiple callbacks for the same effective addr.
if len(al.memReads) > 0 && al.memReads[len(al.memReads)-1] == addr {
if len(al.memReads) > 0 && al.memReads[len(al.memReads)-1].EffAddr == effAddr {
return
}
al.memReads = append(al.memReads, addr)
al.memReads = append(al.memReads, MemEntry{EffAddr: effAddr, PreValue: preValue})
}
func (al *AccessList) OnWrite(addr uint32) {
func (al *AccessList) OnWrite(effAddr uint32, preValue uint32) {
// if it matches the last, it's a duplicate; this happens because of multiple callbacks for the same effective addr.
if len(al.memWrites) > 0 && al.memWrites[len(al.memWrites)-1] == addr {
if len(al.memWrites) > 0 && al.memWrites[len(al.memWrites)-1].EffAddr == effAddr {
return
}
al.memWrites = append(al.memWrites, addr)
al.memWrites = append(al.memWrites, MemEntry{EffAddr: effAddr, PreValue: preValue})
}
var _ Tracer = (*AccessList)(nil)
type Tracer interface {
// OnRead remembers reads from the given addr.
// OnRead remembers reads from the given effAddr.
// Warning: the addr is an effective-addr, i.e. always aligned.
// But unicorn will fire it multiple times, for each byte that was changed within the effective addr boundaries.
OnRead(addr uint32)
// OnWrite remembers writes to the given addr.
OnRead(effAddr uint32, value uint32)
// OnWrite remembers writes to the given effAddr.
// Warning: the addr is an effective-addr, i.e. always aligned.
// But unicorn will fire it multiple times, for each byte that was changed within the effective addr boundaries.
OnWrite(addr uint32)
OnWrite(effAddr uint32, value uint32)
}
type NoOpTracer struct{}
func (n NoOpTracer) OnRead(addr uint32) {}
func (n NoOpTracer) OnRead(effAddr uint32, value uint32) {}
func (n NoOpTracer) OnWrite(addr uint32) {}
func (n NoOpTracer) OnWrite(effAddr uint32, value uint32) {}
var _ Tracer = NoOpTracer{}
......@@ -109,8 +109,8 @@ func HookUnicorn(st *State, mu uc.Unicorn, stdOut, stdErr io.Writer, tr Tracer)
}
_, err = mu.HookAdd(uc.HOOK_MEM_READ, func(mu uc.Unicorn, access int, addr64 uint64, size int, value int64) {
addr := uint32(addr64 & 0xFFFFFFFC) // pass effective addr to tracer
tr.OnRead(addr)
effAddr := uint32(addr64 & 0xFFFFFFFC) // pass effective addr to tracer
tr.OnRead(effAddr, st.GetMemory(effAddr))
}, 0, ^uint64(0))
if err != nil {
return fmt.Errorf("failed to set up mem-write hook: %w", err)
......@@ -123,9 +123,26 @@ func HookUnicorn(st *State, mu uc.Unicorn, stdOut, stdErr io.Writer, tr Tracer)
if size < 0 || size > 4 {
panic("invalid mem size")
}
st.SetMemory(uint32(addr64), uint32(size), uint32(value))
addr := uint32(addr64 & 0xFFFFFFFC) // pass effective addr to tracer
tr.OnWrite(addr)
effAddr := uint32(addr64 & 0xFFFFFFFC)
tr.OnWrite(effAddr, st.GetMemory(effAddr))
rt := value
rs := addr64 & 3
if size == 1 {
mem := st.GetMemory(effAddr)
val := uint32((rt & 0xFF) << (24 - (rs&3)*8))
mask := 0xFFFFFFFF ^ uint32(0xFF<<(24-(rs&3)*8))
st.SetMemory(effAddr, (mem&mask)|val)
} else if size == 2 {
mem := st.GetMemory(effAddr)
val := uint32((rt & 0xFFFF) << (16 - (rs&2)*8))
mask := 0xFFFFFFFF ^ uint32(0xFFFF<<(16-(rs&2)*8))
st.SetMemory(effAddr, (mem&mask)|val)
} else if size == 4 {
st.SetMemory(effAddr, uint32(rt))
} else {
log.Fatal("bad size write to ram")
}
}, 0, ^uint64(0))
if err != nil {
return fmt.Errorf("failed to set up mem-write hook: %w", err)
......@@ -141,9 +158,57 @@ func HookUnicorn(st *State, mu uc.Unicorn, stdOut, stdErr io.Writer, tr Tracer)
for i := 0; i < 32; i++ {
st.Registers[i] = uint32(batch[i])
}
prevPC := st.PC
st.PC = uint32(batch[32])
// We detect if we are potentially in a delay-slot.
// If we may be (i.e. last PC is 1 instruction before current),
// then parse the last instruction to determine what the next PC would be.
// This reflects the handleBranch / handleJump behavior that schedules next-PC.
if st.PC == prevPC+4 {
st.NextPC = prevPC + 8
prevInsn := st.GetMemory(prevPC)
opcode := prevInsn >> 26
switch opcode {
case 2, 3: // J/JAL
st.NextPC = signExtend(prevInsn&0x03FFFFFF, 25) << 2
case 1, 4, 5, 6, 7: // branching
rs := st.Registers[(prevInsn>>21)&0x1F]
shouldBranch := false
switch opcode {
case 4, 5:
rt := st.Registers[(prevInsn>>16)&0x1F]
shouldBranch = (rs == rt && opcode == 4) || (rs != rt && opcode == 5)
case 6:
shouldBranch = int32(rs) <= 0 // blez
case 7:
shouldBranch = int32(rs) > 0 // bgtz
case 1:
rtv := (prevInsn >> 16) & 0x1F
if rtv == 0 {
shouldBranch = int32(rs) < 0
} // bltz
if rtv == 1 {
shouldBranch = int32(rs) >= 0
} // bgez
}
if shouldBranch {
st.NextPC = prevPC + 4 + (signExtend(prevInsn&0xFFFF, 15) << 2)
}
case 0:
if funcv := prevInsn & 0x3f; funcv == 8 || funcv == 9 { // JR/JALR
rs := st.Registers[(prevInsn>>21)&0x1F]
st.NextPC = rs
}
}
} else {
st.NextPC = st.PC + 4
}
st.LO = uint32(batch[33])
st.HI = uint32(batch[34])
fmt.Printf("pc: 0x%08x\n", st.PC)
}, 0, ^uint64(0))
if err != nil {
return fmt.Errorf("failed to set up instruction hook: %w", err)
......@@ -152,6 +217,15 @@ func HookUnicorn(st *State, mu uc.Unicorn, stdOut, stdErr io.Writer, tr Tracer)
return nil
}
func signExtend(v uint32, i uint32) uint32 {
mask := ^((uint32(1) << i) - 1)
if v&(1<<i) != 0 {
return v | mask
} else {
return v &^ mask
}
}
func RunUnicorn(mu uc.Unicorn, entrypoint uint32, steps uint64) error {
return mu.StartWithOptions(uint64(entrypoint), ^uint64(0), &uc.UcOptions{
Timeout: 0, // 0 to disable, value is in ms.
......
......@@ -4,6 +4,7 @@ import (
"testing"
"github.com/stretchr/testify/require"
uc "github.com/unicorn-engine/unicorn/bindings/go/unicorn"
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment