Compare commits
3 Commits
717754644c
...
c4a985f622
Author | SHA1 | Date | |
---|---|---|---|
c4a985f622 | |||
58514f6afe | |||
83fa77ba13 |
100
codec/decode.go
Normal file
100
codec/decode.go
Normal file
@ -0,0 +1,100 @@
|
||||
package codec
|
||||
|
||||
import "io"
|
||||
|
||||
// Decoder wraps an [io.Reader] and decodes data from it.
|
||||
type Decoder struct {
|
||||
io.Reader
|
||||
}
|
||||
|
||||
// ReadFull calls [io.ReadFull] on the reader.
|
||||
func (this *Decoder) ReadFull(buffer []byte) (n int, err error) {
|
||||
return io.ReadFull(this, buffer)
|
||||
}
|
||||
|
||||
// ReadByte decodes a single byte from the input reader.
|
||||
func (this *Decoder) ReadByte() (value byte, n int, err error) {
|
||||
uncasted, n, err := this.ReadUint8()
|
||||
return byte(uncasted), n, err
|
||||
}
|
||||
|
||||
// ReadInt8 decodes an 8-bit signed integer from the input reader.
|
||||
func (this *Decoder) ReadInt8() (value int8, n int, err error) {
|
||||
uncasted, n, err := this.ReadUint8()
|
||||
return int8(uncasted), n, err
|
||||
}
|
||||
|
||||
// ReadUint8 decodes an 8-bit unsigned integer from the input reader.
|
||||
func (this *Decoder) ReadUint8() (value uint8, n int, err error) {
|
||||
buffer := [1]byte { }
|
||||
n, err = this.ReadFull(buffer[:])
|
||||
return uint8(buffer[0]), n, err
|
||||
}
|
||||
|
||||
// ReadInt16 decodes an 16-bit signed integer from the input reader.
|
||||
func (this *Decoder) ReadInt16() (value int16, n int, err error) {
|
||||
uncasted, n, err := this.ReadUint16()
|
||||
return int16(uncasted), n, err
|
||||
}
|
||||
|
||||
// ReadUint16 decodes an 16-bit unsigned integer from the input reader.
|
||||
func (this *Decoder) ReadUint16() (value uint16, n int, err error) {
|
||||
buffer := [2]byte { }
|
||||
n, err = this.ReadFull(buffer[:])
|
||||
return uint16(buffer[0]) << 8 |
|
||||
uint16(buffer[1]), n, err
|
||||
}
|
||||
|
||||
// ReadInt32 decodes an 32-bit signed integer from the input reader.
|
||||
func (this *Decoder) ReadInt32() (value int32, n int, err error) {
|
||||
uncasted, n, err := this.ReadUint32()
|
||||
return int32(uncasted), n, err
|
||||
}
|
||||
|
||||
// ReadUint32 decodes an 32-bit unsigned integer from the input reader.
|
||||
func (this *Decoder) ReadUint32() (value uint32, n int, err error) {
|
||||
buffer := [4]byte { }
|
||||
n, err = this.ReadFull(buffer[:])
|
||||
return uint32(buffer[0]) << 24 |
|
||||
uint32(buffer[1]) << 16 |
|
||||
uint32(buffer[2]) << 8 |
|
||||
uint32(buffer[3]), n, err
|
||||
}
|
||||
|
||||
// ReadInt64 decodes an 64-bit signed integer from the input reader.
|
||||
func (this *Decoder) ReadInt64() (value int64, n int, err error) {
|
||||
uncasted, n, err := this.ReadUint64()
|
||||
return int64(uncasted), n, err
|
||||
}
|
||||
|
||||
// ReadUint64 decodes an 64-bit unsigned integer from the input reader.
|
||||
func (this *Decoder) ReadUint64() (value uint64, n int, err error) {
|
||||
buffer := [8]byte { }
|
||||
n, err = this.ReadFull(buffer[:])
|
||||
return uint64(buffer[0]) << 56 |
|
||||
uint64(buffer[1]) << 48 |
|
||||
uint64(buffer[2]) << 48 |
|
||||
uint64(buffer[3]) << 32 |
|
||||
uint64(buffer[4]) << 24 |
|
||||
uint64(buffer[5]) << 16 |
|
||||
uint64(buffer[6]) << 8 |
|
||||
uint64(buffer[7]), n, err
|
||||
}
|
||||
|
||||
// ReadGBEU decodes a growing unsigned integer of up to 64 bits from the input
|
||||
// reader.
|
||||
func (this *Decoder) ReadGBEU() (value uint64, n int, err error) {
|
||||
var fullValue uint64
|
||||
for {
|
||||
chunk, nn, err := this.ReadByte()
|
||||
if err != nil { return 0, n, err }
|
||||
n += nn
|
||||
|
||||
fullValue *= 0x80
|
||||
fullValue += uint64(chunk & 0x7F)
|
||||
ccb := chunk >> 7
|
||||
if ccb == 0 {
|
||||
return fullValue, n, nil
|
||||
}
|
||||
}
|
||||
}
|
95
codec/encode.go
Normal file
95
codec/encode.go
Normal file
@ -0,0 +1,95 @@
|
||||
package codec
|
||||
|
||||
import "io"
|
||||
|
||||
// Encoder wraps an [io.Writer] and encodes data to it.
|
||||
type Encoder struct {
|
||||
io.Writer
|
||||
}
|
||||
|
||||
// WriteByte encodes a single byte to the output writer.
|
||||
func (this *Encoder) WriteByte(value byte) (n int, err error) {
|
||||
return this.WriteByte(uint8(value))
|
||||
}
|
||||
|
||||
// WriteInt8 encodes an 8-bit signed integer to the output writer.
|
||||
func (this *Encoder) WriteInt8(value int8) (n int, err error) {
|
||||
return this.WriteUint8(uint8(value))
|
||||
}
|
||||
|
||||
// WriteUint8 encodes an 8-bit unsigned integer to the output writer.
|
||||
func (this *Encoder) WriteUint8(value uint8) (n int, err error) {
|
||||
return this.Write([]byte { byte(value) })
|
||||
}
|
||||
|
||||
// WriteInt16 encodes an 16-bit signed integer to the output writer.
|
||||
func (this *Encoder) WriteInt16(value int16) (n int, err error) {
|
||||
return this.WriteUint16(uint16(value))
|
||||
}
|
||||
|
||||
// WriteUint16 encodes an 16-bit unsigned integer to the output writer.
|
||||
func (this *Encoder) WriteUint16(value uint16) (n int, err error) {
|
||||
return this.Write([]byte {
|
||||
byte(value >> 8),
|
||||
byte(value),
|
||||
})
|
||||
}
|
||||
|
||||
// WriteInt32 encodes an 32-bit signed integer to the output writer.
|
||||
func (this *Encoder) WriteInt32(value int32) (n int, err error) {
|
||||
return this.WriteUint32(uint32(value))
|
||||
}
|
||||
|
||||
// WriteUint32 encodes an 32-bit unsigned integer to the output writer.
|
||||
func (this *Encoder) WriteUint32(value uint32) (n int, err error) {
|
||||
return this.Write([]byte {
|
||||
byte(value >> 24),
|
||||
byte(value >> 16),
|
||||
byte(value >> 8),
|
||||
byte(value),
|
||||
})
|
||||
}
|
||||
|
||||
// WriteInt64 encodes an 64-bit signed integer to the output writer.
|
||||
func (this *Encoder) WriteInt64(value int64) (n int, err error) {
|
||||
return this.WriteUint64(uint64(value))
|
||||
}
|
||||
|
||||
// WriteUint64 encodes an 64-bit unsigned integer to the output writer.
|
||||
func (this *Encoder) WriteUint64(value uint64) (n int, err error) {
|
||||
return this.Write([]byte {
|
||||
byte(value >> 56),
|
||||
byte(value >> 48),
|
||||
byte(value >> 40),
|
||||
byte(value >> 32),
|
||||
byte(value >> 24),
|
||||
byte(value >> 16),
|
||||
byte(value >> 8),
|
||||
byte(value),
|
||||
})
|
||||
}
|
||||
|
||||
// EncodeGBEU encodes a growing unsigned integer of up to 64 bits to the output
|
||||
// writer.
|
||||
func (this *Encoder) EncodeGBEU(value uint64) (n int, err error) {
|
||||
// increase if go somehow gets support for over 64 bit integers. we
|
||||
// could also make an expanding int type in goutil to use here, or maybe
|
||||
// there is one in the stdlib. keep this int64 version as well though
|
||||
// because its ergonomic.
|
||||
buffer := [16]byte { }
|
||||
|
||||
window := (GBEUSize(value) - 1) * 7
|
||||
index := 0
|
||||
for window >= 0 {
|
||||
chunk := uint8(value >> window) & 0x7F
|
||||
if window > 0 {
|
||||
chunk |= 0x80
|
||||
}
|
||||
buffer[index] = chunk
|
||||
|
||||
index += 1
|
||||
window -= 7
|
||||
}
|
||||
|
||||
return this.Write(buffer[:])
|
||||
}
|
11
codec/measure.go
Normal file
11
codec/measure.go
Normal file
@ -0,0 +1,11 @@
|
||||
package codec
|
||||
|
||||
// GBEUSize returns the size (in octets) of a GBEU integer.
|
||||
func GBEUSize(value uint64) int {
|
||||
length := 0
|
||||
for {
|
||||
value >>= 7
|
||||
length ++
|
||||
if value == 0 { return length }
|
||||
}
|
||||
}
|
@ -40,92 +40,73 @@ designed to allow applications to be presented with data they are not equipped
|
||||
to handle while continuing to function normally. This enables backwards
|
||||
compatibile application protocol changes.
|
||||
|
||||
The length of a TAPE structure is assumed to be given by the surrounding
|
||||
protocol, which is usually METADAPT-A or B. The root of a TAPE structure can be
|
||||
any data value, but is usually a table, which can contain several values that
|
||||
each have a numeric key. Values can also be nested. Both sides of the connection
|
||||
must agree on what data type should be the root value, the data type of each
|
||||
known table value, etc.
|
||||
TAPE expresses types using tags. A tag is 8 bits in size, and is divided into
|
||||
two parts: the Type Number (TN), and the Configuration Number (CN). The TN is 3
|
||||
bits, and the CN is 5 bits. Both are interpreted as unsigned integers. Both
|
||||
sides of the connection must agree on the semantic meaning of the values and
|
||||
their arrangement.
|
||||
|
||||
TAPE is based on an encoding method previously developed by silt.
|
||||
|
||||
### Data Value Types
|
||||
The table below lists all data value types supported by TAPE.
|
||||
The table below lists all data value types supported by TAPE. They are discussed
|
||||
in detail in the following sections.
|
||||
|
||||
| Name | Size | Description | Encoding Method
|
||||
| ----------- | --------------: | --------------------------- | ---------------
|
||||
| I8 | 1 | A signed 8-bit integer | BETC
|
||||
| I16 | 2 | A signed 16-bit integer | BETC
|
||||
| I32 | 4 | A signed 32-bit integer | BETC
|
||||
| I64 | 8 | A signed 64-bit integer | BETC
|
||||
| U8 | 1 | An unsigned 8-bit integer | BEU
|
||||
| U16 | 2 | An unsigned 16-bit integer | BEU
|
||||
| U32 | 4 | An unsigned 32-bit integer | BEU
|
||||
| U64 | 8 | An unsigned 64-bit integer | BEU
|
||||
| Array[^1] | | An array of any above type | PASTA
|
||||
| String | | A UTF-8 string | UTF-8
|
||||
| StringArray | | An array the String type | VILA
|
||||
| Table | | A table of any type | TTLV
|
||||
| TN | Bits | Name | Description
|
||||
| -: | ---: | ---- | -----------
|
||||
| 0 | 000 | SI | Small integer
|
||||
| 1 | 001 | LI | Large integer
|
||||
| 2 | 010 | FP | Floating point
|
||||
| 3 | 011 | SBA | Small byte array
|
||||
| 4 | 100 | LBA | Large byte array
|
||||
| 5 | 101 | OTA | One-tag array
|
||||
| 6 | 110 | KTV | Key-tag-value table
|
||||
| 7 | 111 | N/A | Reserved
|
||||
|
||||
[^1]: Array types are written as <E>Array, where <E> is the element type. For
|
||||
example, an array of I32 would be written as I32Array. StringArray still follows
|
||||
this rule, even though it is encoded differently from other arrays.
|
||||
#### No Value (NIL)
|
||||
NIL is used to encode the absence of a value where there would otherwise be one.
|
||||
The CN of a NIL is ignored. It has no payload.
|
||||
|
||||
[^2]: SOP (sum of parts) refers to the sum of the size of every item in a data
|
||||
structure.
|
||||
#### Small Integer (SI)
|
||||
SI encodes an integer of up to 5 bits, which are stored in the CN. It has no
|
||||
payload. Whether the bits are interpreted as unsigned or as signed two's
|
||||
complement is semantic information and must be agreed upon by both sides of the
|
||||
connection. Thus, the value may range from 0 to 31 if unsigned, and from -16 to
|
||||
17 if signed.
|
||||
|
||||
### Encoding Methods
|
||||
Below are all encoding methods supported by TAPE.
|
||||
#### Large Integer (LI)
|
||||
LI encodes an integer of up to 256 bits, which are stored in the payload. The CN
|
||||
determine the length of the payload in bytes. The integer is big-endian. Whether
|
||||
the payload is interpreted as unsigned or as signed two's complement is semantic
|
||||
information and must be agreed upon by both sides of the connection. Thus, the
|
||||
value may range from 0 to 31 if unsigned, and from -16 to 17 if signed.
|
||||
|
||||
#### BETC
|
||||
Big-Endian, Two's Complement signed integer. The size is defined as the least
|
||||
amount of whole octets which can fit all bits in the integer, regardless if the
|
||||
bits are on or off. Therefore, the size cannot change at runtime.
|
||||
#### Floating Point (FP)
|
||||
FP encodes an IEEE 754 floating point number of up to 256 bits, which are stored
|
||||
in the payload. The CN determines the length of the payload in bytes, and it may
|
||||
only be one of these values: 16, 32, 64, 128, or 256.
|
||||
|
||||
#### BEU
|
||||
Big-Endian, Unsigned integer. The size is defined as the least amount of whole
|
||||
octets which can fit all bits in the integer, regardless if the bits are on or
|
||||
off. Therefore, the size cannot change at runtime.
|
||||
#### Small Byte Array (SBA)
|
||||
SBA encodes an array of up to 32 bytes, which are stored in the paylod. The
|
||||
CN determines the length of the payload in bytes.
|
||||
|
||||
#### GBEU
|
||||
Growing Big-Endian, Unsigned integer. The integer is broken up into 8-bit
|
||||
chunks, where the first bit of each chunk is a CCB. The chunk with its CCB set
|
||||
to zero instead of one is the last chunk in the integer. Chunks are ordered from
|
||||
most significant to least significant (big endian). The size is defined as the
|
||||
least amount of whole octets which can fit all chunks of the integer. The size
|
||||
of this type is not fixed and may change at runtime, so this needs to be
|
||||
accounted for during use.
|
||||
#### Large Byte Array (LBA)
|
||||
LBA encodes an array of up to 2^256 bytes, which are stored in the second part
|
||||
of the payload, directly after the length. The length of the data length field
|
||||
in bytes is determined by the CN.
|
||||
|
||||
#### PASTA
|
||||
Packed Single-Type Array. The size is defined as the size of an individual item
|
||||
times the number of items. Items are placed one after the other with no gaps
|
||||
in-between them, except as required to align the start of each item to the
|
||||
nearest whole octet. Items should be of the same type and must be of the same
|
||||
size.
|
||||
#### One-Tag Array (OTA)
|
||||
OTA encodes an array of up to 2^256 items, which are stored in the payload after
|
||||
the length field and the item tag, where the length field comes first. Each item
|
||||
must be the same length, as they all share the same tag. The length of the data
|
||||
length field in bytes is determined by the CN.
|
||||
|
||||
#### UTF-8
|
||||
UTF-8 string. The size is defined as the least amount of whole octets which can
|
||||
fit all bits in the string, regardless if the bits are on or off. The size of
|
||||
this type is not fixed and may change at runtime, so this needs to be accounted
|
||||
for during use.
|
||||
|
||||
#### VILA
|
||||
Variable Item Length Array. The size is defined as the least amount of whole
|
||||
octets which can fit each item plus one GBEU per item describing that item's
|
||||
size. The size of this type is not fixed and may change at runtime, so this
|
||||
needs to be accounted for during use. The amount of items must be greater than
|
||||
zero. Items are each prefixed by their size (in octets) encoded as a GBEU, and
|
||||
they are placed one after the other with no gaps in-between them, except as
|
||||
required to align the start of each item to the nearest whole octet. Items
|
||||
should be of the same type but do not need to be of the same size.
|
||||
|
||||
#### TTLV
|
||||
TAPE Tag Length Value. The size is defined as the least amount of whole octets
|
||||
which can fit each item plus one U16 and one GBEU per item, where the latter of
|
||||
which describes that item's size. The size of this type is not fixed and may
|
||||
change at runtime, so this needs to be accounted for during use. Items are each
|
||||
prefixed by their numerical tag encoded as a U16, and their size (in octets)
|
||||
encoded as a GBEU. Items are placed one after the other with no gaps in-between
|
||||
them, except as required to align the start of each item to the nearest whole
|
||||
octet. Items need not be of the same type nor the same size.
|
||||
#### Key-Tag-Value Table (KTV)
|
||||
KTV encodes a table of up to 2^256 key/value pairs, which are stored in the
|
||||
payload after the length field. The pairs themselves consist of a 16-bit
|
||||
unsigned big-endian key followed by a tag and then the payload. Pair values can
|
||||
be of different types and sizes. The order of the pairs is not significant and
|
||||
should never be treated as such.
|
||||
|
||||
## Transports
|
||||
A transport is a protocol that HOPP connections can run on top of. HOPP
|
||||
@ -176,7 +157,6 @@ sun will have expanded to swallow earth by then. Your connection will not last
|
||||
that long.
|
||||
|
||||
#### Message Chunking
|
||||
|
||||
The most significant bit of the payload size field of an MMB is called the Chunk
|
||||
Control Bit (CCB). If the CCB of a given MMB is zero, the represented message is
|
||||
interpreted as being self-contained and the data is processed immediately. If
|
||||
|
Loading…
Reference in New Issue
Block a user