design: New TAPE design

codec: Add missing WriteByte function
codec: Add codec package to handle encoding and decoding ints, etc
2025-05-30 21:34:31 -04:00 · 2025-05-30 07:08:43 -04:00 · 2025-05-30 07:05:55 -04:00
4 changed files with 262 additions and 76 deletions
--- a/codec/decode.go
+++ b/codec/decode.go
@@ -0,0 +1,100 @@
 package codec
 import "io"
 // Decoder wraps an [io.Reader] and decodes data from it.
 type Decoder struct {
 	io.Reader
 }
 // ReadFull calls [io.ReadFull] on the reader.
 func (this *Decoder) ReadFull(buffer []byte) (n int, err error) {
 	return io.ReadFull(this, buffer)
 }
 // ReadByte decodes a single byte from the input reader.
 func (this *Decoder) ReadByte() (value byte, n int, err error) {
 	uncasted, n, err := this.ReadUint8()
 	return byte(uncasted), n, err
 }
 // ReadInt8 decodes an 8-bit signed integer from the input reader.
 func (this *Decoder) ReadInt8() (value int8, n int, err error) {
 	uncasted, n, err := this.ReadUint8()
 	return int8(uncasted), n, err
 }
 // ReadUint8 decodes an 8-bit unsigned integer from the input reader.
 func (this *Decoder) ReadUint8() (value uint8, n int, err error) {
 	buffer := [1]byte { }
 	n, err = this.ReadFull(buffer[:])
 	return uint8(buffer[0]), n, err
 }
 // ReadInt16 decodes an 16-bit signed integer from the input reader.
 func (this *Decoder) ReadInt16() (value int16, n int, err error) {
 	uncasted, n, err := this.ReadUint16()
 	return int16(uncasted), n, err
 }
 // ReadUint16 decodes an 16-bit unsigned integer from the input reader.
 func (this *Decoder) ReadUint16() (value uint16, n int, err error) {
 	buffer := [2]byte { }
 	n, err = this.ReadFull(buffer[:])
 	return uint16(buffer[0]) << 8 |
 		uint16(buffer[1]), n, err
 }
 // ReadInt32 decodes an 32-bit signed integer from the input reader.
 func (this *Decoder) ReadInt32() (value int32, n int, err error) {
 	uncasted, n, err := this.ReadUint32()
 	return int32(uncasted), n, err
 }
 // ReadUint32 decodes an 32-bit unsigned integer from the input reader.
 func (this *Decoder) ReadUint32() (value uint32, n int, err error) {
 	buffer := [4]byte { }
 	n, err = this.ReadFull(buffer[:])
 	return uint32(buffer[0]) << 24 |
 		uint32(buffer[1]) << 16 |
 		uint32(buffer[2]) << 8 |
 		uint32(buffer[3]), n, err
 }
 // ReadInt64 decodes an 64-bit signed integer from the input reader.
 func (this *Decoder) ReadInt64() (value int64, n int, err error) {
 	uncasted, n, err := this.ReadUint64()
 	return int64(uncasted), n, err
 }
 // ReadUint64 decodes an 64-bit unsigned integer from the input reader.
 func (this *Decoder) ReadUint64() (value uint64, n int, err error) {
 	buffer := [8]byte { }
 	n, err = this.ReadFull(buffer[:])
 	return uint64(buffer[0]) << 56 |
 		uint64(buffer[1]) << 48 |
 		uint64(buffer[2]) << 48 |
 		uint64(buffer[3]) << 32 |
 		uint64(buffer[4]) << 24 |
 		uint64(buffer[5]) << 16 |
 		uint64(buffer[6]) << 8 |
 		uint64(buffer[7]), n, err
 }
 // ReadGBEU decodes a growing unsigned integer of up to 64 bits from the input
 // reader.
 func (this *Decoder) ReadGBEU() (value uint64, n int, err error) {
 	var fullValue uint64
 	for {
 		chunk, nn, err := this.ReadByte()
 		if err != nil { return 0, n, err }
 		n += nn
 		fullValue *= 0x80
 		fullValue += uint64(chunk & 0x7F)
 		ccb := chunk >> 7
 		if ccb == 0 {
 			return fullValue, n, nil
 		}
 	}
 }
--- a/codec/encode.go
+++ b/codec/encode.go
@@ -0,0 +1,95 @@
 package codec
 import "io"
 // Encoder wraps an [io.Writer] and encodes data to it.
 type Encoder struct {
 	io.Writer
 }
 // WriteByte encodes a single byte to the output writer.
 func (this *Encoder) WriteByte(value byte) (n int, err error) {
 	return this.WriteByte(uint8(value))
 }
 // WriteInt8 encodes an 8-bit signed integer to the output writer.
 func (this *Encoder) WriteInt8(value int8) (n int, err error) {
 	return this.WriteUint8(uint8(value))
 }
 // WriteUint8 encodes an 8-bit unsigned integer to the output writer.
 func (this *Encoder) WriteUint8(value uint8) (n int, err error) {
 	return this.Write([]byte { byte(value) })
 }
 // WriteInt16 encodes an 16-bit signed integer to the output writer.
 func (this *Encoder) WriteInt16(value int16) (n int, err error) {
 	return this.WriteUint16(uint16(value))
 }
 // WriteUint16 encodes an 16-bit unsigned integer to the output writer.
 func (this *Encoder) WriteUint16(value uint16) (n int, err error) {
 	return this.Write([]byte {
 		byte(value >> 8),
 		byte(value),
 	})
 }
 // WriteInt32 encodes an 32-bit signed integer to the output writer.
 func (this *Encoder) WriteInt32(value int32) (n int, err error) {
 	return this.WriteUint32(uint32(value))
 }
 // WriteUint32 encodes an 32-bit unsigned integer to the output writer.
 func (this *Encoder) WriteUint32(value uint32) (n int, err error) {
 	return this.Write([]byte {
 		byte(value >> 24),
 		byte(value >> 16),
 		byte(value >> 8),
 		byte(value),
 	})
 }
 // WriteInt64 encodes an 64-bit signed integer to the output writer.
 func (this *Encoder) WriteInt64(value int64) (n int, err error) {
 	return this.WriteUint64(uint64(value))
 }
 // WriteUint64 encodes an 64-bit unsigned integer to the output writer.
 func (this *Encoder) WriteUint64(value uint64) (n int, err error) {
 	return this.Write([]byte {
 		byte(value >> 56),
 		byte(value >> 48),
 		byte(value >> 40),
 		byte(value >> 32),
 		byte(value >> 24),
 		byte(value >> 16),
 		byte(value >> 8),
 		byte(value),
 	})
 }
 // EncodeGBEU encodes a growing unsigned integer of up to 64 bits to the output
 // writer.
 func (this *Encoder) EncodeGBEU(value uint64) (n int, err error) {
 	// increase if go somehow gets support for over 64 bit integers. we
 	// could also make an expanding int type in goutil to use here, or maybe
 	// there is one in the stdlib. keep this int64 version as well though
 	// because its ergonomic.
 	buffer := [16]byte { }
 	window := (GBEUSize(value) - 1) * 7
 	index := 0
 	for window >= 0 {
 		chunk := uint8(value >> window) & 0x7F
 		if window > 0 {
 			chunk |= 0x80
 		}
 		buffer[index] = chunk
 		index += 1
 		window -= 7
 	}
 	return this.Write(buffer[:])
 }
--- a/codec/measure.go
+++ b/codec/measure.go
@@ -0,0 +1,11 @@
 package codec
 // GBEUSize returns the size (in octets) of a GBEU integer.
 func GBEUSize(value uint64) int {
 	length := 0
 	for {
 		value >>= 7
 		length ++
 		if value == 0 { return length }
 	}
 }
--- a/design/protocol.md
+++ b/design/protocol.md
@@ -40,92 +40,73 @@ designed to allow applications to be presented with data they are not equipped
 to handle while continuing to function normally. This enables backwards
 compatibile application protocol changes.
-The length of a TAPE structure is assumed to be given by the surrounding
+TAPE expresses types using tags. A tag is 8 bits in size, and is divided into
-protocol, which is usually METADAPT-A or B. The root of a TAPE structure can be
+two parts: the Type Number (TN), and the Configuration Number (CN). The TN is 3
-any data value, but is usually a table, which can contain several values that
+bits, and the CN is 5 bits. Both are interpreted as unsigned integers. Both
-each have a numeric key. Values can also be nested. Both sides of the connection
+sides of the connection must agree on the semantic meaning of the values and
-must agree on what data type should be the root value, the data type of each
+their arrangement.
-known table value, etc.
+
 TAPE is based on an encoding method previously developed by silt.
 ### Data Value Types
-The table below lists all data value types supported by TAPE.
+The table below lists all data value types supported by TAPE. They are discussed
 in detail in the following sections.
-| Name        | Size            | Description                 | Encoding Method
+| TN | Bits | Name | Description
-| ----------- | --------------: | --------------------------- | ---------------
+| -: | ---: | ---- | -----------
-| I8          |               1 | A signed 8-bit integer      | BETC
+|  0 |  000 | SI   | Small integer
-| I16         |               2 | A signed 16-bit integer     | BETC
+|  1 |  001 | LI   | Large integer
-| I32         |               4 | A signed 32-bit integer     | BETC
+|  2 |  010 | FP   | Floating point
-| I64         |               8 | A signed 64-bit integer     | BETC
+|  3 |  011 | SBA  | Small byte array
-| U8          |               1 | An unsigned 8-bit integer   | BEU
+|  4 |  100 | LBA  | Large byte array
-| U16         |               2 | An unsigned 16-bit integer  | BEU
+|  5 |  101 | OTA  | One-tag array
-| U32         |               4 | An unsigned 32-bit integer  | BEU
+|  6 |  110 | KTV  | Key-tag-value table
-| U64         |               8 | An unsigned 64-bit integer  | BEU
+|  7 |  111 | N/A  | Reserved
 | Array[^1]   |                 | An array of any above type  | PASTA
 | String      |                 | A UTF-8 string              | UTF-8
 | StringArray |                 | An array the String type    | VILA
 | Table       |                 | A table of any type         | TTLV
-[^1]: Array types are written as <E>Array, where <E> is the element type. For
+#### No Value (NIL)
-example, an array of I32 would be written as I32Array. StringArray still follows
+NIL is used to encode the absence of a value where there would otherwise be one.
-this rule, even though it is encoded differently from other arrays.
+The CN of a NIL is ignored. It has no payload.
-[^2]: SOP (sum of parts) refers to the sum of the size of every item in a data
+#### Small Integer (SI)
-structure.
+SI encodes an integer of up to 5 bits, which are stored in the CN. It has no
 payload. Whether the bits are interpreted as unsigned or as signed two's
 complement is semantic information and must be agreed upon by both sides of the
 connection. Thus, the value may range from 0 to 31 if unsigned, and from -16 to
 17 if signed.
-### Encoding Methods
+#### Large Integer (LI)
-Below are all encoding methods supported by TAPE.
+LI encodes an integer of up to 256 bits, which are stored in the payload. The CN
 determine the length of the payload in bytes. The integer is big-endian. Whether
 the payload is interpreted as unsigned or as signed two's complement is semantic
 information and must be agreed upon by both sides of the connection. Thus, the
 value may range from 0 to 31 if unsigned, and from -16 to 17 if signed.
-#### BETC
+#### Floating Point (FP)
-Big-Endian, Two's Complement signed integer. The size is defined as the least
+FP encodes an IEEE 754 floating point number of up to 256 bits, which are stored
-amount of whole octets which can fit all bits in the integer, regardless if the
+in the payload. The CN determines the length of the payload in bytes, and it may
-bits are on or off. Therefore, the size cannot change at runtime.
+only be one of these values: 16, 32, 64, 128, or 256.
-#### BEU
+#### Small Byte Array (SBA)
-Big-Endian, Unsigned integer. The size is defined as the least amount of whole
+SBA encodes an array of up to 32 bytes, which are stored in the paylod. The
-octets which can fit all bits in the integer, regardless if the bits are on or
+CN determines the length of the payload in bytes.
 off. Therefore, the size cannot change at runtime.
-#### GBEU
+#### Large Byte Array (LBA)
-Growing Big-Endian, Unsigned integer. The integer is broken up into 8-bit
+LBA encodes an array of up to 2^256 bytes, which are stored in the second part
-chunks, where the first bit of each chunk is a CCB. The chunk with its CCB set
+of the payload, directly after the length. The length of the data length field
-to zero instead of one is the last chunk in the integer. Chunks are ordered from
+in bytes is determined by the CN.
 most significant to least significant (big endian). The size is defined as the
 least amount of whole octets which can fit all chunks of the integer. The size
 of this type is not fixed and may change at runtime, so this needs to be
 accounted for during use.
-#### PASTA
+#### One-Tag Array (OTA)
-Packed Single-Type Array. The size is defined as the size of an individual item
+OTA encodes an array of up to 2^256 items, which are stored in the payload after
-times the number of items. Items are placed one after the other with no gaps
+the length field and the item tag, where the length field comes first. Each item
-in-between them, except as required to align the start of each item to the
+must be the same length, as they all share the same tag. The length of the data
-nearest whole octet. Items should be of the same type and must be of the same
+length field in bytes is determined by the CN.
 size.
-#### UTF-8
+#### Key-Tag-Value Table (KTV)
-UTF-8 string. The size is defined as the least amount of whole octets which can
+KTV encodes a table of up to 2^256 key/value pairs, which are stored in the
-fit all bits in the string, regardless if the bits are on or off. The size of
+payload after the length field. The pairs themselves consist of a 16-bit
-this type is not fixed and may change at runtime, so this needs to be accounted
+unsigned big-endian key followed by a tag and then the payload. Pair values can
-for during use.
+be of different types and sizes. The order of the pairs is not significant and
-
+should never be treated as such.
 #### VILA
 Variable Item Length Array. The size is defined as the least amount of whole
 octets which can fit each item plus one GBEU per item describing that item's
 size. The size of this type is not fixed and may change at runtime, so this
 needs to be accounted for during use. The amount of items must be greater than
 zero. Items are each prefixed by their size (in octets) encoded as a GBEU, and
 they are placed one after the other with no gaps in-between them, except as
 required to align the start of each item to the nearest whole octet. Items
 should be of the same type but do not need to be of the same size.
 #### TTLV
 TAPE Tag Length Value. The size is defined as the least amount of whole octets
 which can fit each item plus one U16 and one GBEU per item, where the latter of
 which describes that item's size. The size of this type is not fixed and may
 change at runtime, so this needs to be accounted for during use. Items are each
 prefixed by their numerical tag encoded as a U16, and their size (in octets)
 encoded as a GBEU. Items are placed one after the other with no gaps in-between
 them, except as required to align the start of each item to the nearest whole
 octet. Items need not be of the same type nor the same size.
 ## Transports
 A transport is a protocol that HOPP connections can run on top of. HOPP
@@ -176,7 +157,6 @@ sun will have expanded to swallow earth by then. Your connection will not last
 that long.
 #### Message Chunking
 The most significant bit of the payload size field of an MMB is called the Chunk
 Control Bit (CCB). If the CCB of a given MMB is zero, the represented message is
 interpreted as being self-contained and the data is processed immediately. If
Author	SHA1	Message	Date
Sasha Koshka	c4a985f622	design: New TAPE design	2025-05-30 21:34:31 -04:00
Sasha Koshka	58514f6afe	codec: Add missing WriteByte function	2025-05-30 07:08:43 -04:00
Sasha Koshka	83fa77ba13	codec: Add codec package to handle encoding and decoding ints, etc	2025-05-30 07:05:55 -04:00