tape: Add functions to encode and decode float16
This commit is contained in:
parent
f009a970cd
commit
5d84636b55
@ -112,6 +112,13 @@ func (this *Decoder) ReadUintN(bytes int) (value uint64, n int, err error) {
|
|||||||
return value, n, nil
|
return value, n, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReadFloat16 decodes a 16-bit floating point value from the input reader.
|
||||||
|
func (this *Decoder) ReadFloat16() (value float32, n int, err error) {
|
||||||
|
bits, nn, err := this.ReadUint16()
|
||||||
|
n += nn; if err != nil { return 0, n, err }
|
||||||
|
return math.Float32frombits(f16bitsToF32bits(bits)), n, nil
|
||||||
|
}
|
||||||
|
|
||||||
// ReadFloat32 decldes a 32-bit floating point value from the input reader.
|
// ReadFloat32 decldes a 32-bit floating point value from the input reader.
|
||||||
func (this *Decoder) ReadFloat32() (value float32, n int, err error) {
|
func (this *Decoder) ReadFloat32() (value float32, n int, err error) {
|
||||||
bits, nn, err := this.ReadUint32()
|
bits, nn, err := this.ReadUint32()
|
||||||
@ -132,3 +139,54 @@ func (this *Decoder) ReadTag() (value Tag, n int, err error) {
|
|||||||
n += nn; if err != nil { return 0, n, err }
|
n += nn; if err != nil { return 0, n, err }
|
||||||
return Tag(uncasted), n, nil
|
return Tag(uncasted), n, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// f16bitsToF32bits returns uint32 (float32 bits) converted from specified uint16.
|
||||||
|
// Taken from https://github.com/x448/float16/blob/v0.8.4/float16
|
||||||
|
//
|
||||||
|
// MIT License
|
||||||
|
//
|
||||||
|
// Copyright (c) 2019 Montgomery Edwards⁴⁴⁸ and Faye Amacker
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files (the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in all
|
||||||
|
// copies or substantial portions of the Software.
|
||||||
|
func f16bitsToF32bits(in uint16) uint32 {
|
||||||
|
// All 65536 conversions with this were confirmed to be correct
|
||||||
|
// by Montgomery Edwards⁴⁴⁸ (github.com/x448).
|
||||||
|
|
||||||
|
sign := uint32(in&0x8000) << 16 // sign for 32-bit
|
||||||
|
exp := uint32(in&0x7c00) >> 10 // exponenent for 16-bit
|
||||||
|
coef := uint32(in&0x03ff) << 13 // significand for 32-bit
|
||||||
|
|
||||||
|
if exp == 0x1f {
|
||||||
|
if coef == 0 {
|
||||||
|
// infinity
|
||||||
|
return sign | 0x7f800000 | coef
|
||||||
|
}
|
||||||
|
// NaN
|
||||||
|
return sign | 0x7fc00000 | coef
|
||||||
|
}
|
||||||
|
|
||||||
|
if exp == 0 {
|
||||||
|
if coef == 0 {
|
||||||
|
// zero
|
||||||
|
return sign
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalize subnormal numbers
|
||||||
|
exp++
|
||||||
|
for coef&0x7f800000 == 0 {
|
||||||
|
coef <<= 1
|
||||||
|
exp--
|
||||||
|
}
|
||||||
|
coef &= 0x007fffff
|
||||||
|
}
|
||||||
|
|
||||||
|
return sign | ((exp + (0x7f - 0xf)) << 23) | coef
|
||||||
|
}
|
||||||
|
@ -102,6 +102,11 @@ func (this *Encoder) WriteUintN(value uint64, bytes int) (n int, err error) {
|
|||||||
return n, nil
|
return n, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WriteFloat16 encodes a 16-bit floating point value to the output writer.
|
||||||
|
func (this *Encoder) WriteFloat16(value float32) (n int, err error) {
|
||||||
|
return this.WriteUint16(f32bitsToF16bits(math.Float32bits(value)))
|
||||||
|
}
|
||||||
|
|
||||||
// WriteFloat32 encodes a 32-bit floating point value to the output writer.
|
// WriteFloat32 encodes a 32-bit floating point value to the output writer.
|
||||||
func (this *Encoder) WriteFloat32(value float32) (n int, err error) {
|
func (this *Encoder) WriteFloat32(value float32) (n int, err error) {
|
||||||
return this.WriteUint32(math.Float32bits(value))
|
return this.WriteUint32(math.Float32bits(value))
|
||||||
@ -116,3 +121,69 @@ func (this *Encoder) WriteFloat64(value float64) (n int, err error) {
|
|||||||
func (this *Encoder) WriteTag(value Tag) (n int, err error) {
|
func (this *Encoder) WriteTag(value Tag) (n int, err error) {
|
||||||
return this.WriteUint8(uint8(value))
|
return this.WriteUint8(uint8(value))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// f32bitsToF16bits returns uint16 (Float16 bits) converted from the specified float32.
|
||||||
|
// Conversion rounds to nearest integer with ties to even.
|
||||||
|
// Taken from https://github.com/x448/float16/blob/v0.8.4/float16
|
||||||
|
//
|
||||||
|
// MIT License
|
||||||
|
//
|
||||||
|
// Copyright (c) 2019 Montgomery Edwards⁴⁴⁸ and Faye Amacker
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files (the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in all
|
||||||
|
// copies or substantial portions of the Software.
|
||||||
|
func f32bitsToF16bits(u32 uint32) uint16 {
|
||||||
|
// Translated from Rust to Go by Montgomery Edwards⁴⁴⁸ (github.com/x448).
|
||||||
|
// All 4294967296 conversions with this were confirmed to be correct by x448.
|
||||||
|
// Original Rust implementation is by Kathryn Long (github.com/starkat99) with MIT license.
|
||||||
|
|
||||||
|
sign := u32 & 0x80000000
|
||||||
|
exp := u32 & 0x7f800000
|
||||||
|
coef := u32 & 0x007fffff
|
||||||
|
|
||||||
|
if exp == 0x7f800000 {
|
||||||
|
// NaN or Infinity
|
||||||
|
nanBit := uint32(0)
|
||||||
|
if coef != 0 {
|
||||||
|
nanBit = uint32(0x0200)
|
||||||
|
}
|
||||||
|
return uint16((sign >> 16) | uint32(0x7c00) | nanBit | (coef >> 13))
|
||||||
|
}
|
||||||
|
|
||||||
|
halfSign := sign >> 16
|
||||||
|
|
||||||
|
unbiasedExp := int32(exp>>23) - 127
|
||||||
|
halfExp := unbiasedExp + 15
|
||||||
|
|
||||||
|
if halfExp >= 0x1f {
|
||||||
|
return uint16(halfSign | uint32(0x7c00))
|
||||||
|
}
|
||||||
|
|
||||||
|
if halfExp <= 0 {
|
||||||
|
if 14-halfExp > 24 {
|
||||||
|
return uint16(halfSign)
|
||||||
|
}
|
||||||
|
coef := coef | uint32(0x00800000)
|
||||||
|
halfCoef := coef >> uint32(14-halfExp)
|
||||||
|
roundBit := uint32(1) << uint32(13-halfExp)
|
||||||
|
if (coef&roundBit) != 0 && (coef&(3*roundBit-1)) != 0 {
|
||||||
|
halfCoef++
|
||||||
|
}
|
||||||
|
return uint16(halfSign | halfCoef)
|
||||||
|
}
|
||||||
|
|
||||||
|
uHalfExp := uint32(halfExp) << 10
|
||||||
|
halfCoef := coef >> 13
|
||||||
|
roundBit := uint32(0x00001000)
|
||||||
|
if (coef&roundBit) != 0 && (coef&(3*roundBit-1)) != 0 {
|
||||||
|
return uint16((halfSign | uHalfExp | halfCoef) + 1)
|
||||||
|
}
|
||||||
|
return uint16(halfSign | uHalfExp | halfCoef)
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user