4844: blob encoding version 2 (#8827)

* blob encoding version 2 * - cosmetic cleanup - use testify/require for testing - improve big blob test case - some performance improvements * op-service: blob encoding nits (#10) --------- Co-authored-by: Roberto Bayardo <roberto.bayardo@coinbase.com> Co-authored-by: protolambda <proto@protolambda.com>

4844: blob encoding version 2 (#8827)
* blob encoding version 2 * - cosmetic cleanup - use testify/require for testing - improve big blob test case - some performance improvements * op-service: blob encoding nits (#10) --------- Co-authored-by: Roberto Bayardo <roberto.bayardo@coinbase.com> Co-authored-by: protolambda <proto@protolambda.com>
721a24d3 · angel-ding-cb · GitHub · 579d6ffe · 721a24d3 · 721a24d3
Commit 721a24d3 authored Jan 08, 2024 by angel-ding-cb Committed by GitHub Jan 08, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 270 additions and 69 deletions

Makefile op-service/Makefile +1 -0

blob.go op-service/eth/blob.go +182 -40

blob_test.go op-service/eth/blob_test.go +87 -29

No files found.
--- a/op-service/Makefile
+++ b/op-service/Makefile
@@ -9,6 +9,7 @@ fuzz:
 	go test -run NOTAREALTEST -v -fuzztime 10s -fuzz FuzzExecutionPayloadMarshalUnmarshalV1 ./eth
 	go test -run NOTAREALTEST -v -fuzztime 10s -fuzz FuzzExecutionPayloadMarshalUnmarshalV2 ./eth
 	go test -run NOTAREALTEST -v -fuzztime 10s -fuzz FuzzOBP01 ./eth
+	go test -run NOTAREALTEST -v -fuzztime 10s -fuzz FuzzEncodeDecodeBlob ./eth
 .PHONY: \
 	test \

--- a/op-service/eth/blob.go
+++ b/op-service/eth/blob.go
@@ -2,7 +2,7 @@ package eth
 import (
 	"crypto/sha256"
-	"encoding/binary"
+	"errors"
 	"fmt"
 	"reflect"
@@ -14,7 +14,17 @@ import (
 const (
 	BlobSize        = 4096 * 32
-	MaxBlobDataSize = 4096*31 - 4
+	MaxBlobDataSize = (4*31+3)*1024 - 4
+	EncodingVersion = 0
+	VersionOffset   = 1    // offset of the version byte in the blob encoding
+	Rounds          = 1024 // number of encode/decode rounds
+)
+var (
+	ErrBlobInvalidFieldElement    = errors.New("invalid field element")
+	ErrBlobInvalidEncodingVersion = errors.New("invalid encoding version")
+	ErrBlobInvalidLength          = errors.New("invalid length for blob")
+	ErrBlobInputTooLarge          = errors.New("too much data to encode in one blob")
 )
 type Blob [BlobSize]byte
@@ -70,60 +80,192 @@ func VerifyBlobProof(blob *Blob, commitment kzg4844.Commitment, proof kzg4844.Pr
 // FromData encodes the given input data into this blob. The encoding scheme is as follows:
 //
-// First, field elements are encoded as big-endian uint256 in BLS modulus range. To avoid modulus
+// In each round we perform 7 reads of input of lengths (31,1,31,1,31,1,31) bytes respectively for
-// overflow, we can't use the full 32 bytes, so we write data only to the topmost 31 bytes of each.
+// a total of 127 bytes. This data is encoded into the next 4 field elements of the output by
-// TODO: we can optimize this to get a bit more data from the blobs by using the top byte
+// placing each of the 4x31 byte chunks into bytes [1:32] of its respective field element. The
-// partially.
+// three single byte chunks (24 bits) are split into 4x6-bit chunks, each of which is written into
-//
+// the top most byte of its respective field element, leaving the top 2 bits of each field element
-// The first field element encodes the length of input data as a little endian uint32 in its
+// empty to avoid modulus overflow.  This process is repeated for up to 1024 rounds until all data
-// topmost 4 (out of 31) bytes, and the first 27 bytes of the input data in its remaining 27
+// is encoded.
-// bytes.
 //
-// The remaining field elements each encode 31 bytes of the remaining input data, up until the end
+// For only the very first output field, bytes [1:5] are used to encode the version and the length
-// of the input.
+// of the data.
-//
-// TODO: version the encoding format to allow for future encoding changes
 func (b *Blob) FromData(data Data) error {
 	if len(data) > MaxBlobDataSize {
-		return fmt.Errorf("data is too large for blob. len=%v", len(data))
+		return fmt.Errorf("%w: len=%v", ErrBlobInputTooLarge, data)
 	}
 	b.Clear()
-	// encode 4-byte little-endian length value into topmost 4 bytes (out of 31) of first field
-	// element
+	readOffset := 0
-	binary.LittleEndian.PutUint32(b[1:5], uint32(len(data)))
-	// encode first 27 bytes of input data into remaining bytes of first field element
+	// read 1 byte of input, 0 if there is no input left
-	offset := copy(b[5:32], data)
+	read1 := func() byte {
-	// encode (up to) 31 bytes of remaining input data at a time into the subsequent field element
+		if readOffset >= len(data) {
-	for i := 1; i < 4096; i++ {
+			return 0
-		offset += copy(b[i*32+1:i*32+32], data[offset:])
+		}
-		if offset == len(data) {
+		out := data[readOffset]
-			break
+		readOffset += 1
+		return out
+	}
+	writeOffset := 0
+	var buf31 [31]byte
+	var zero31 [31]byte
+	// Read up to 31 bytes of input (left-aligned), into buf31.
+	read31 := func() {
+		if readOffset >= len(data) {
+			copy(buf31[:], zero31[:])
+			return
+		}
+		n := copy(buf31[:], data[readOffset:]) // copy as much data as we can
+		copy(buf31[n:], zero31[:])             // pad with zeroes (since there might not be enough data)
+		readOffset += n
+	}
+	// Write a byte, updates the write-offset.
+	// Asserts that the write-offset matches encoding-algorithm expectations.
+	// Asserts that the value is 6 bits.
+	write1 := func(v byte) {
+		if writeOffset%32 != 0 {
+			panic(fmt.Errorf("blob encoding: invalid byte write offset: %d", writeOffset))
+		}
+		if v&0b1100_0000 != 0 {
+			panic(fmt.Errorf("blob encoding: invalid 6 bit value: 0b%b", v))
+		}
+		b[writeOffset] = v
+		writeOffset += 1
+	}
+	// Write buf31 to the blob, updates the write-offset.
+	// Asserts that the write-offset matches encoding-algorithm expectations.
+	write31 := func() {
+		if writeOffset%32 != 1 {
+			panic(fmt.Errorf("blob encoding: invalid bytes31 write offset: %d", writeOffset))
+		}
+		copy(b[writeOffset:], buf31[:])
+		writeOffset += 31
+	}
+	for round := 0; round < Rounds && readOffset < len(data); round++ {
+		// The first field element encodes the version and the length of the data in [1:5].
+		// This is a manual substitute for read31(), preparing the buf31.
+		if round == 0 {
+			buf31[0] = EncodingVersion
+			// Encode the length as big-endian uint24.
+			// The length check at the start above ensures we can always fit the length value into only 3 bytes.
+			ilen := uint32(len(data))
+			buf31[1] = byte(ilen >> 16)
+			buf31[2] = byte(ilen >> 8)
+			buf31[3] = byte(ilen)
+			readOffset += copy(buf31[4:], data[:])
+		} else {
+			read31()
 		}
+		x := read1()
+		A := x & 0b0011_1111
+		write1(A)
+		write31()
+		read31()
+		y := read1()
+		B := (y & 0b0000_1111) | ((x & 0b1100_0000) >> 2)
+		write1(B)
+		write31()
+		read31()
+		z := read1()
+		C := z & 0b0011_1111
+		write1(C)
+		write31()
+		read31()
+		D := ((z & 0b1100_0000) >> 2) | ((y & 0b1111_0000) >> 4)
+		write1(D)
+		write31()
 	}
-	if offset < len(data) {
-		return fmt.Errorf("failed to fit all data into blob. bytes remaining: %v", len(data)-offset)
+	if readOffset < len(data) {
+		panic(fmt.Errorf("expected to fit data but failed, read offset: %d, data: %d", readOffset, len(data)))
 	}
 	return nil
 }
 // ToData decodes the blob into raw byte data. See FromData above for details on the encoding
-// format.
+// format. If error is returned it will be one of InvalidFieldElementError,
+// InvalidEncodingVersionError and InvalidLengthError.
 func (b *Blob) ToData() (Data, error) {
-	data := make(Data, 4096*32)
+	// check the version
-	for i := 0; i < 4096; i++ {
+	if b[VersionOffset] != EncodingVersion {
-		if b[i*32] != 0 {
+		return nil, fmt.Errorf(
-			return nil, fmt.Errorf("invalid blob, found non-zero high order byte %x of field element %d", b[i*32], i)
+			"%w: expected version %d, got %d", ErrBlobInvalidEncodingVersion, EncodingVersion, b[VersionOffset])
+	}
+	// decode the 3-byte big-endian length value into a 4-byte integer
+	outputLen := uint32(b[2])<<16 | uint32(b[3])<<8 | uint32(b[4])
+	if outputLen > MaxBlobDataSize {
+		return nil, fmt.Errorf("%w: got %d", ErrBlobInvalidLength, outputLen)
+	}
+	// round 0 is special cased to copy only the remaining 27 bytes of the first field element into
+	// the output due to version/length encoding already occupying its first 5 bytes.
+	output := make(Data, MaxBlobDataSize)
+	copy(output[0:27], b[5:])
+	// now process remaining 3 field elements to complete round 0
+	opos := 28 // current position into output buffer
+	ipos := 32 // current position into the input blob
+	var err error
+	encodedByte := make([]byte, 4) // buffer for the 4 6-bit chunks
+	encodedByte[0] = b[0]
+	for i := 1; i < 4; i++ {
+		encodedByte[i], opos, ipos, err = b.decodeFieldElement(opos, ipos, output)
+		if err != nil {
+			return nil, err
 		}
-		copy(data[i*31:i*31+31], b[i*32+1:i*32+32])
 	}
-	// extract the length prefix & trim the output accordingly
+	opos = reassembleBytes(opos, encodedByte, output)
-	dataLen := binary.LittleEndian.Uint32(data[:4])
-	data = data[4:]
+	// in each remaining round we decode 4 field elements (128 bytes) of the input into 127 bytes
-	if dataLen > uint32(len(data)) {
+	// of output
-		return nil, fmt.Errorf("invalid blob, length prefix out of range: %d", dataLen)
+	for i := 1; i < Rounds && opos < int(outputLen); i++ {
+		for j := 0; j < 4; j++ {
+			// save the first byte of each field element for later re-assembly
+			encodedByte[j], opos, ipos, err = b.decodeFieldElement(opos, ipos, output)
+			if err != nil {
+				return nil, err
+			}
+		}
+		opos = reassembleBytes(opos, encodedByte, output)
 	}
-	data = data[:dataLen]
+	output = output[:outputLen]
-	return data, nil
+	return output, nil
+}
+// decodeFieldElement decodes the next input field element by writing its lower 31 bytes into its
+// appropriate place in the output and checking the high order byte is valid. Returns an
+// InvalidFieldElementError if a field element is seen with either of its two high order bits set.
+func (b *Blob) decodeFieldElement(opos, ipos int, output []byte) (byte, int, int, error) {
+	// two highest order bits of the first byte of each field element should always be 0
+	if b[ipos]&0b1100_0000 != 0 {
+		return 0, 0, 0, fmt.Errorf("%w: field element: %d", ErrBlobInvalidFieldElement, ipos)
+	}
+	copy(output[opos:], b[ipos+1:ipos+32])
+	return b[ipos], opos + 32, ipos + 32, nil
+}
+// reassembleBytes takes the 4x6-bit chunks from encodedByte, reassembles them into 3 bytes of
+// output, and places them in their appropriate output positions.
+func reassembleBytes(opos int, encodedByte []byte, output []byte) int {
+	opos-- // account for fact that we don't output a 128th byte
+	x := (encodedByte[0] & 0b0011_1111) | ((encodedByte[1] & 0b0011_0000) << 2)
+	y := (encodedByte[1] & 0b0000_1111) | ((encodedByte[3] & 0b0000_1111) << 4)
+	z := (encodedByte[2] & 0b0011_1111) | ((encodedByte[3] & 0b0011_0000) << 2)
+	// put the re-assembled bytes in their appropriate output locations
+	output[opos-32] = z
+	output[opos-(32*2)] = y
+	output[opos-(32*3)] = x
+	return opos
 }
 func (b *Blob) Clear() {

--- a/op-service/eth/blob_test.go
+++ b/op-service/eth/blob_test.go
 package eth
 import (
+	"math/rand"
 	"testing"
+	"github.com/stretchr/testify/require"
 )
 func TestBlobEncodeDecode(t *testing.T) {
@@ -19,32 +22,69 @@ func TestBlobEncodeDecode(t *testing.T) {
 	var b Blob
 	for _, c := range cases {
 		data := Data(c)
-		if err := b.FromData(data); err != nil {
+		err := b.FromData(data)
-			t.Fatalf("failed to encode bytes: %v", err)
+		require.NoError(t, err)
-		}
 		decoded, err := b.ToData()
-		if err != nil {
+		require.NoError(t, err)
-			t.Fatalf("failed to decode blob: %v", err)
+		require.Equal(t, c, string(decoded))
-		}
-		if string(decoded) != c {
-			t.Errorf("decoded != input. got: %v, want: %v", decoded, Data(c))
-		}
 	}
 }
+func TestSmallBlobEncoding(t *testing.T) {
+	// the first field element is filled and no data remains
+	data := Data(make([]byte, 128))
+	data[127] = 0xFF
+	var b Blob
+	err := b.FromData(data)
+	require.NoError(t, err)
+	decoded, err := b.ToData()
+	require.NoError(t, err)
+	require.Equal(t, data, decoded)
+	// only 10 bytes of data
+	data[9] = 0xFF
+	err = b.FromData(data)
+	require.NoError(t, err)
+	decoded, err = b.ToData()
+	require.NoError(t, err)
+	require.Equal(t, data, decoded)
+	// no 3 bytes of extra data left to encode after the first 4 field elements
+	data = Data(make([]byte, 27+31*3))
+	data[27+31*3-1] = 0xFF
+	err = b.FromData(data)
+	require.NoError(t, err)
+	decoded, err = b.ToData()
+	require.NoError(t, err)
+	require.Equal(t, data, decoded)
+}
 func TestBigBlobEncoding(t *testing.T) {
+	r := rand.New(rand.NewSource(99))
 	bigData := Data(make([]byte, MaxBlobDataSize))
-	bigData[MaxBlobDataSize-1] = 0xFF
+	for i := range bigData {
-	var b Blob
+		bigData[i] = byte(r.Intn(256))
-	if err := b.FromData(bigData); err != nil {
-		t.Fatalf("failed to encode bytes: %v", err)
 	}
+	var b Blob
+	// test the maximum size of data that can be encoded
+	err := b.FromData(bigData)
+	require.NoError(t, err)
 	decoded, err := b.ToData()
-	if err != nil {
+	require.NoError(t, err)
-		t.Fatalf("failed to decode blob: %v", err)
+	require.Equal(t, bigData, decoded)
-	}
-	if string(decoded) != string(bigData) {
+	// perform encode/decode test on progressively smaller inputs to exercise boundary conditions
-		t.Errorf("decoded blob != big blob input")
+	// pertaining to length of the input data
+	for i := 1; i < 256; i++ {
+		tempBigData := bigData[i:]
+		err := b.FromData(tempBigData)
+		require.NoError(t, err)
+		decoded, err := b.ToData()
+		require.NoError(t, err)
+		require.Equal(t, len(tempBigData), len(decoded))
+		require.Equal(t, tempBigData, decoded)
 	}
 }
@@ -54,16 +94,20 @@ func TestInvalidBlobDecoding(t *testing.T) {
 	if err := b.FromData(data); err != nil {
 		t.Fatalf("failed to encode bytes: %v", err)
 	}
 	b[32] = 0x80 // field elements should never have their highest order bit set
-	if _, err := b.ToData(); err == nil {
+	_, err := b.ToData()
-		t.Errorf("expected error, got none")
+	require.ErrorIs(t, err, ErrBlobInvalidFieldElement)
-	}
+	b[32] = 0x0
-	b[32] = 0x00
+	b[VersionOffset] = 0x01 // invalid encoding version
-	b[4] = 0xFF // encode an invalid (much too long) length prefix
+	_, err = b.ToData()
-	if _, err := b.ToData(); err == nil {
+	require.ErrorIs(t, err, ErrBlobInvalidEncodingVersion)
-		t.Errorf("expected error, got none")
+	b[VersionOffset] = EncodingVersion
-	}
+	b[2] = 0xFF // encode an invalid (much too long) length prefix
+	_, err = b.ToData()
+	require.ErrorIs(t, err, ErrBlobInvalidLength)
 }
 func TestTooLongDataEncoding(t *testing.T) {
@@ -72,7 +116,21 @@ func TestTooLongDataEncoding(t *testing.T) {
 	data := Data(make([]byte, BlobSize))
 	var b Blob
 	err := b.FromData(data)
-	if err == nil {
+	require.ErrorIs(t, err, ErrBlobInputTooLarge)
-		t.Errorf("expected error, got none")
+}
-	}
+func FuzzEncodeDecodeBlob(f *testing.F) {
+	var b Blob
+	f.Fuzz(func(t *testing.T, d []byte) {
+		b.Clear()
+		data := Data(d)
+		err := b.FromData(data)
+		require.NoError(t, err)
+		decoded, err := b.ToData()
+		require.NoError(t, err)
+		require.Equal(t, data, decoded)
+	})
 }
+// TODO(optimism#8872): Create test vectors to implement one-way tests confirming that specific inputs yield
+// desired outputs.