From 5d84636b55ad0237f3da8389420a061b64a5386a Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Mon, 21 Jul 2025 15:58:32 -0400 Subject: [PATCH] tape: Add functions to encode and decode float16 --- tape/decode.go | 58 +++++++++++++++++++++++++++++++++++++++++ tape/encode.go | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/tape/decode.go b/tape/decode.go index 4dcfbd4..d754e85 100644 --- a/tape/decode.go +++ b/tape/decode.go @@ -112,6 +112,13 @@ func (this *Decoder) ReadUintN(bytes int) (value uint64, n int, err error) { return value, n, nil } +// ReadFloat16 decodes a 16-bit floating point value from the input reader. +func (this *Decoder) ReadFloat16() (value float32, n int, err error) { + bits, nn, err := this.ReadUint16() + n += nn; if err != nil { return 0, n, err } + return math.Float32frombits(f16bitsToF32bits(bits)), n, nil +} + // ReadFloat32 decldes a 32-bit floating point value from the input reader. func (this *Decoder) ReadFloat32() (value float32, n int, err error) { bits, nn, err := this.ReadUint32() @@ -132,3 +139,54 @@ func (this *Decoder) ReadTag() (value Tag, n int, err error) { n += nn; if err != nil { return 0, n, err } return Tag(uncasted), n, nil } + +// f16bitsToF32bits returns uint32 (float32 bits) converted from specified uint16. +// Taken from https://github.com/x448/float16/blob/v0.8.4/float16 +// +// MIT License +// +// Copyright (c) 2019 Montgomery Edwards⁴⁴⁸ and Faye Amacker +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +func f16bitsToF32bits(in uint16) uint32 { + // All 65536 conversions with this were confirmed to be correct + // by Montgomery Edwards⁴⁴⁸ (github.com/x448). + + sign := uint32(in&0x8000) << 16 // sign for 32-bit + exp := uint32(in&0x7c00) >> 10 // exponenent for 16-bit + coef := uint32(in&0x03ff) << 13 // significand for 32-bit + + if exp == 0x1f { + if coef == 0 { + // infinity + return sign | 0x7f800000 | coef + } + // NaN + return sign | 0x7fc00000 | coef + } + + if exp == 0 { + if coef == 0 { + // zero + return sign + } + + // normalize subnormal numbers + exp++ + for coef&0x7f800000 == 0 { + coef <<= 1 + exp-- + } + coef &= 0x007fffff + } + + return sign | ((exp + (0x7f - 0xf)) << 23) | coef +} diff --git a/tape/encode.go b/tape/encode.go index 24f12e4..efce7a0 100644 --- a/tape/encode.go +++ b/tape/encode.go @@ -102,6 +102,11 @@ func (this *Encoder) WriteUintN(value uint64, bytes int) (n int, err error) { return n, nil } +// WriteFloat16 encodes a 16-bit floating point value to the output writer. +func (this *Encoder) WriteFloat16(value float32) (n int, err error) { + return this.WriteUint16(f32bitsToF16bits(math.Float32bits(value))) +} + // WriteFloat32 encodes a 32-bit floating point value to the output writer. func (this *Encoder) WriteFloat32(value float32) (n int, err error) { return this.WriteUint32(math.Float32bits(value)) @@ -116,3 +121,69 @@ func (this *Encoder) WriteFloat64(value float64) (n int, err error) { func (this *Encoder) WriteTag(value Tag) (n int, err error) { return this.WriteUint8(uint8(value)) } + +// f32bitsToF16bits returns uint16 (Float16 bits) converted from the specified float32. +// Conversion rounds to nearest integer with ties to even. +// Taken from https://github.com/x448/float16/blob/v0.8.4/float16 +// +// MIT License +// +// Copyright (c) 2019 Montgomery Edwards⁴⁴⁸ and Faye Amacker +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +func f32bitsToF16bits(u32 uint32) uint16 { + // Translated from Rust to Go by Montgomery Edwards⁴⁴⁸ (github.com/x448). + // All 4294967296 conversions with this were confirmed to be correct by x448. + // Original Rust implementation is by Kathryn Long (github.com/starkat99) with MIT license. + + sign := u32 & 0x80000000 + exp := u32 & 0x7f800000 + coef := u32 & 0x007fffff + + if exp == 0x7f800000 { + // NaN or Infinity + nanBit := uint32(0) + if coef != 0 { + nanBit = uint32(0x0200) + } + return uint16((sign >> 16) | uint32(0x7c00) | nanBit | (coef >> 13)) + } + + halfSign := sign >> 16 + + unbiasedExp := int32(exp>>23) - 127 + halfExp := unbiasedExp + 15 + + if halfExp >= 0x1f { + return uint16(halfSign | uint32(0x7c00)) + } + + if halfExp <= 0 { + if 14-halfExp > 24 { + return uint16(halfSign) + } + coef := coef | uint32(0x00800000) + halfCoef := coef >> uint32(14-halfExp) + roundBit := uint32(1) << uint32(13-halfExp) + if (coef&roundBit) != 0 && (coef&(3*roundBit-1)) != 0 { + halfCoef++ + } + return uint16(halfSign | halfCoef) + } + + uHalfExp := uint32(halfExp) << 10 + halfCoef := coef >> 13 + roundBit := uint32(0x00001000) + if (coef&roundBit) != 0 && (coef&(3*roundBit-1)) != 0 { + return uint16((halfSign | uHalfExp | halfCoef) + 1) + } + return uint16(halfSign | uHalfExp | halfCoef) +}