tape: Add functions to encode and decode float16

2025-07-21 15:58:32 -04:00
parent f009a970cd
commit 5d84636b55
2 changed files with 129 additions and 0 deletions
--- a/tape/encode.go
+++ b/tape/encode.go
@@ -102,6 +102,11 @@ func (this *Encoder) WriteUintN(value uint64, bytes int) (n int, err error) {
 	return n, nil
 }

+// WriteFloat16 encodes a 16-bit floating point value to the output writer.
+func (this *Encoder) WriteFloat16(value float32) (n int, err error) {
+	return this.WriteUint16(f32bitsToF16bits(math.Float32bits(value)))
+}
+
 // WriteFloat32 encodes a 32-bit floating point value to the output writer.
 func (this *Encoder) WriteFloat32(value float32) (n int, err error) {
 	return this.WriteUint32(math.Float32bits(value))
@@ -116,3 +121,69 @@ func (this *Encoder) WriteFloat64(value float64) (n int, err error) {
 func (this *Encoder) WriteTag(value Tag) (n int, err error) {
 	return this.WriteUint8(uint8(value))
 }
+
+// f32bitsToF16bits returns uint16 (Float16 bits) converted from the specified float32.
+// Conversion rounds to nearest integer with ties to even.
+// Taken from https://github.com/x448/float16/blob/v0.8.4/float16
+//
+// MIT License
+// 
+// Copyright (c) 2019 Montgomery Edwards⁴⁴⁸ and Faye Amacker
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// 
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+func f32bitsToF16bits(u32 uint32) uint16 {
+	// Translated from Rust to Go by Montgomery Edwards⁴⁴⁸ (github.com/x448).
+	// All 4294967296 conversions with this were confirmed to be correct by x448.
+	// Original Rust implementation is by Kathryn Long (github.com/starkat99) with MIT license.
+
+	sign := u32 & 0x80000000
+	exp := u32 & 0x7f800000
+	coef := u32 & 0x007fffff
+
+	if exp == 0x7f800000 {
+		// NaN or Infinity
+		nanBit := uint32(0)
+		if coef != 0 {
+			nanBit = uint32(0x0200)
+		}
+		return uint16((sign >> 16) | uint32(0x7c00) | nanBit | (coef >> 13))
+	}
+
+	halfSign := sign >> 16
+
+	unbiasedExp := int32(exp>>23) - 127
+	halfExp := unbiasedExp + 15
+
+	if halfExp >= 0x1f {
+		return uint16(halfSign | uint32(0x7c00))
+	}
+
+	if halfExp <= 0 {
+		if 14-halfExp > 24 {
+			return uint16(halfSign)
+		}
+		coef := coef | uint32(0x00800000)
+		halfCoef := coef >> uint32(14-halfExp)
+		roundBit := uint32(1) << uint32(13-halfExp)
+		if (coef&roundBit) != 0 && (coef&(3*roundBit-1)) != 0 {
+			halfCoef++
+		}
+		return uint16(halfSign | halfCoef)
+	}
+
+	uHalfExp := uint32(halfExp) << 10
+	halfCoef := coef >> 13
+	roundBit := uint32(0x00001000)
+	if (coef&roundBit) != 0 && (coef&(3*roundBit-1)) != 0 {
+		return uint16((halfSign | uHalfExp | halfCoef) + 1)
+	}
+	return uint16(halfSign | uHalfExp | halfCoef)
+}