You copied the Doc URL to your clipboard.

## Shared Functions.Float Pseudocode

```// BFAdd()
// =======
// Single-precision add following BFloat16 computation behaviors.

bits(32) result;

(type1,sign1,value1) = BFUnpack(op1);
(type2,sign2,value2) = BFUnpack(op2);
if type1 == FPType_QNaN || type2 == FPType_QNaN then
result = FPDefaultNaN();
else
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);
if inf1 && inf2 && sign1 == NOT(sign2) then
result = FPDefaultNaN();
elsif (inf1 && sign1 == '0') || (inf2 && sign2 == '0') then
result = FPInfinity('0');
elsif (inf1 && sign1 == '1') || (inf2 && sign2 == '1') then
result = FPInfinity('1');
elsif zero1 && zero2 && sign1 == sign2 then
result = FPZero(sign1);
else
result_value = value1 + value2;
if result_value == 0.0 then
result = FPZero('0');    // Positive sign when Round to Odd
else
result = BFRound(result_value);

return result;```

```// BFMatMulAdd()
// =============
// BFloat16 matrix multiply and add to single-precision matrix
// result[2, 2] = addend[2, 2] + (op1[2, 4] * op2[4, 2])

assert N == 128;

bits(N) result;
bits(32) sum, prod0, prod1;

for i = 0 to 1
for j = 0 to 1
sum = Elem[addend, 2*i + j, 32];
for k = 0 to 1
prod0 = BFMul(Elem[op1, 4*i + 2*k + 0, 16], Elem[op2, 4*j + 2*k + 0, 16]);
prod1 = BFMul(Elem[op1, 4*i + 2*k + 1, 16], Elem[op2, 4*j + 2*k + 1, 16]);
Elem[result, 2*i + j, 32] = sum;

return result;```

### Library pseudocode for shared/functions/float/bfloat/BFMul

```// BFMul()
// =======
// BFloat16 widening multiply to single-precision following BFloat16
// computation behaviors.

bits(32) BFMul(bits(16) op1, bits(16) op2)

bits(32) result;

(type1,sign1,value1) = BFUnpack(op1);
(type2,sign2,value2) = BFUnpack(op2);
if type1 == FPType_QNaN || type2 == FPType_QNaN then
result = FPDefaultNaN();
else
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);
if (inf1 && zero2) || (zero1 && inf2) then
result = FPDefaultNaN();
elsif inf1 || inf2 then
result = FPInfinity(sign1 EOR sign2);
elsif zero1 || zero2 then
result = FPZero(sign1 EOR sign2);
else
result = BFRound(value1*value2);

return result;```

### Library pseudocode for shared/functions/float/bfloat/BFRound

```// BFRound()
// =========
// Converts a real number OP into a single-precision value using the
// Round to Odd rounding mode and following BFloat16 computation behaviors.

bits(32) BFRound(real op)

assert op != 0.0;
bits(32) result;

// Format parameters - minimum exponent, numbers of exponent and fraction bits.
minimum_exp = -126;  E = 8;  F = 23;

// Split value into sign, unrounded mantissa and exponent.
if op < 0.0 then
sign = '1';  mantissa = -op;
else
sign = '0';  mantissa = op;
exponent = 0;
while mantissa < 1.0 do
mantissa = mantissa * 2.0;  exponent = exponent - 1;
while mantissa >= 2.0 do
mantissa = mantissa / 2.0;  exponent = exponent + 1;

// Fixed Flush-to-zero.
if exponent < minimum_exp then
return FPZero(sign);

// Start creating the exponent value for the result. Start by biasing the actual exponent
// so that the minimum exponent becomes 1, lower values 0 (indicating possible underflow).
biased_exp = Max(exponent - minimum_exp + 1, 0);
if biased_exp == 0 then mantissa = mantissa / 2.0^(minimum_exp - exponent);

// Get the unrounded mantissa as an integer, and the "units in last place" rounding error.
int_mant = RoundDown(mantissa * 2.0^F);  // < 2.0^F if biased_exp == 0, >= 2.0^F if not
error = mantissa * 2.0^F - Real(int_mant);

// Round to Odd
if error != 0.0 then
int_mant<0> = '1';

// Deal with overflow and generate result.
if biased_exp >= 2^E - 1 then
result = FPInfinity(sign);      // Overflows generate appropriately-signed Infinity
else
result = sign : biased_exp<30-F:0> : int_mant<F-1:0>;

return result;```

### Library pseudocode for shared/functions/float/bfloat/BFUnpack

```// BFUnpack()
// ==========
// Unpacks a BFloat16 or single-precision value into its type,
// sign bit and real number that it represents.
// The real number result has the correct sign for numbers and infinities,
// is very large in magnitude for infinities, and is 0.0 for NaNs.
// (These values are chosen to simplify the description of
// comparisons and conversions.)

(FPType, bit, real) BFUnpack(bits(N) fpval)

assert N IN {16,32};

if N == 16 then
sign   = fpval<15>;
exp    = fpval<14:7>;
frac   = fpval<6:0> : Zeros(16);
else  // N == 32
sign   = fpval<31>;
exp    = fpval<30:23>;
frac   = fpval<22:0>;

if IsZero(exp) then
fptype = FPType_Zero;  value = 0.0;    // Fixed Flush to Zero
elsif IsOnes(exp) then
if IsZero(frac) then
fptype = FPType_Infinity;  value = 2.0^1000000;
else    // no SNaN for BF16 arithmetic
fptype = FPType_QNaN; value = 0.0;
else
fptype = FPType_Nonzero;
value = 2.0^(UInt(exp)-127) * (1.0 + Real(UInt(frac)) * 2.0^-23);

if sign == '1' then value = -value;

return (fptype, sign, value);```

### Library pseudocode for shared/functions/float/bfloat/FPConvertBF

```// FPConvertBF()
// =============
// Converts a single-precision OP to BFloat16 value with rounding controlled by FPCR/FPSCR.

bits(16) FPConvertBF(bits(32) op, FPCRType fpcr, FPRounding rounding)

bits(32) result;    // BF16 value in top 16 bits
// Unpack floating-point operand optionally with flush-to-zero.
(fptype,sign,value) = FPUnpack(op, fpcr);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
if fpcr.DN == '1' then
result = FPDefaultNaN();
else
result = FPConvertNaN(op);
if fptype == FPType_SNaN then
FPProcessException(FPExc_InvalidOp, fpcr);
elsif fptype == FPType_Infinity then
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
result = FPZero(sign);
else
result = FPRoundCVBF(value, fpcr, rounding);

// Returns correctly rounded BF16 value from top 16 bits
return result<31:16>;

// FPConvertBF()
// =============
// Converts a single-precision operand to BFloat16 value.

bits(16) FPConvertBF(bits(32) op, FPCRType fpcr)
return FPConvertBF(op, fpcr, FPRoundingMode(fpcr));```

### Library pseudocode for shared/functions/float/bfloat/FPRoundCVBF

```// FPRoundCVBF()
// =============
// Converts a real number OP into a BFloat16 value using the supplied rounding mode RMODE.

bits(32) FPRoundCVBF(real op, FPCRType fpcr, FPRounding rounding)
boolean isbfloat16 = TRUE;
return FPRoundBase(op, fpcr, rounding, isbfloat16);```

### Library pseudocode for shared/functions/float/fixedtofp/FixedToFP

```// FixedToFP()
// ===========

// Convert M-bit fixed point OP with FBITS fractional bits to
// N-bit precision floating point, controlled by UNSIGNED and ROUNDING.

bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)

assert N IN {16,32,64};
assert M IN {16,32,64};
bits(N) result;
assert fbits >= 0;
assert rounding != FPRounding_ODD;

// Correct signed-ness
int_operand = Int(op, unsigned);

// Scale by fractional bits and generate a real value
real_operand = Real(int_operand) / 2.0^fbits;

if real_operand == 0.0 then
result = FPZero('0');
else
result = FPRound(real_operand, fpcr, rounding);

return result;```

### Library pseudocode for shared/functions/float/fpabs/FPAbs

```// FPAbs()
// =======

bits(N) FPAbs(bits(N) op)

assert N IN {16,32,64};

return '0' : op<N-2:0>;```

```// FPAdd()
// =======

bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
rounding = FPRoundingMode(fpcr);
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
inf1  = (type1 == FPType_Infinity);  inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);      zero2 = (type2 == FPType_Zero);
if inf1 && inf2 && sign1 == NOT(sign2) then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
elsif (inf1 && sign1 == '0') || (inf2 && sign2 == '0') then
result = FPInfinity('0');
elsif (inf1 && sign1 == '1') || (inf2 && sign2 == '1') then
result = FPInfinity('1');
elsif zero1 && zero2 && sign1 == sign2 then
result = FPZero(sign1);
else
result_value = value1 + value2;
if result_value == 0.0 then  // Sign of exact zero result depends on rounding mode
result_sign = if rounding == FPRounding_NEGINF then '1' else '0';
result = FPZero(result_sign);
else
result = FPRound(result_value, fpcr, rounding);

return result;```

### Library pseudocode for shared/functions/float/fpcommon/IsDenormalizedValue

```// IsDenormalizedValue()
// =====================
// Checks either a single-precision or a double-precision floating-point
// value is denormalized.

boolean IsDenormalizedValue(bits(N) fpval)
assert N IN {32,64};

case N of
when 32
exp32      = fpval<30:23>;
frac32     = fpval<22:0>;
isDenormal = IsZero(exp32) && !IsZero(frac32);
when 64
exp64      = fpval<62:52>;
frac64     = fpval<51:0>;
isDenormal = IsZero(exp64) && !IsZero(frac64);

return isDenormal;```

### Library pseudocode for shared/functions/float/fpcompare/FPCompare

```// FPCompare()
// ===========

bits(4) FPCompare(bits(N) op1, bits(N) op2, boolean signal_nans, FPCRType fpcr)

assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
op1_nan = type1 IN {FPType_SNaN, FPType_QNaN};
op2_nan = type2 IN {FPType_SNaN, FPType_QNaN};

if op1_nan || op2_nan then
result = '0011';
if type1 == FPType_SNaN || type2 == FPType_SNaN || signal_nans then
FPProcessException(FPExc_InvalidOp, fpcr);
else
// All non-NaN cases can be evaluated on the values produced by FPUnpack()
if value1 == value2 then
result = '0110';
elsif value1 < value2 then
result = '1000';
else  // value1 > value2
result = '0010';

return result;```

### Library pseudocode for shared/functions/float/fpcompareeq/FPCompareEQ

```// FPCompareEQ()
// =============

boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
op1_nan = type1 IN {FPType_SNaN, FPType_QNaN};
op2_nan = type2 IN {FPType_SNaN, FPType_QNaN};

if op1_nan || op2_nan then
result = FALSE;
if type1 == FPType_SNaN || type2 == FPType_SNaN then
FPProcessException(FPExc_InvalidOp, fpcr);
else
// All non-NaN cases can be evaluated on the values produced by FPUnpack()
result = (value1 == value2);

return result;```

### Library pseudocode for shared/functions/float/fpcomparege/FPCompareGE

```// FPCompareGE()
// =============

boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
op1_nan = type1 IN {FPType_SNaN, FPType_QNaN};
op2_nan = type2 IN {FPType_SNaN, FPType_QNaN};

if op1_nan || op2_nan then
result = FALSE;
FPProcessException(FPExc_InvalidOp, fpcr);
else
// All non-NaN cases can be evaluated on the values produced by FPUnpack()
result = (value1 >= value2);

return result;```

### Library pseudocode for shared/functions/float/fpcomparegt/FPCompareGT

```// FPCompareGT()
// =============

boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
op1_nan = type1 IN {FPType_SNaN, FPType_QNaN};
op2_nan = type2 IN {FPType_SNaN, FPType_QNaN};

if op1_nan || op2_nan then
result = FALSE;
FPProcessException(FPExc_InvalidOp, fpcr);
else
// All non-NaN cases can be evaluated on the values produced by FPUnpack()
result = (value1 > value2);

return result;```

### Library pseudocode for shared/functions/float/fpconvert/FPConvert

```// FPConvert()
// ===========

// Convert floating point OP with N-bit precision to M-bit precision,
// with rounding controlled by ROUNDING.
// This is used by the FP-to-FP conversion instructions and so for
// half-precision data ignores FZ16, but observes AHP.

bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)

assert M IN {16,32,64};
assert N IN {16,32,64};
bits(M) result;

// Unpack floating-point operand optionally with flush-to-zero.
(fptype,sign,value) = FPUnpackCV(op, fpcr);
alt_hp = (M == 16) && (fpcr.AHP == '1');

if fptype == FPType_SNaN || fptype == FPType_QNaN then
if alt_hp then
result = FPZero(sign);
elsif fpcr.DN == '1' then
result = FPDefaultNaN();
else
result = FPConvertNaN(op);
if fptype == FPType_SNaN || alt_hp then
FPProcessException(FPExc_InvalidOp,fpcr);
elsif fptype == FPType_Infinity then
if alt_hp then
result = sign:Ones(M-1);
FPProcessException(FPExc_InvalidOp, fpcr);
else
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
result = FPZero(sign);
else
result = FPRoundCV(value, fpcr, rounding);

return result;

// FPConvert()
// ===========

bits(M) FPConvert(bits(N) op, FPCRType fpcr)
return FPConvert(op, fpcr, FPRoundingMode(fpcr));```

### Library pseudocode for shared/functions/float/fpconvertnan/FPConvertNaN

```// FPConvertNaN()
// ==============
// Converts a NaN of one floating-point type to another

bits(M) FPConvertNaN(bits(N) op)

assert N IN {16,32,64};
assert M IN {16,32,64};
bits(M) result;
bits(51) frac;

sign = op<N-1>;

// Unpack payload from input NaN
case N of
when 64 frac = op<50:0>;
when 32 frac = op<21:0>:Zeros(29);
when 16 frac = op<8:0>:Zeros(42);

// Repack payload into output NaN, while
// converting an SNaN to a QNaN.
case M of
when 64 result = sign:Ones(M-52):frac;
when 32 result = sign:Ones(M-23):frac<50:29>;
when 16 result = sign:Ones(M-10):frac<50:42>;

return result;```

### Library pseudocode for shared/functions/float/fpcrtype/FPCRType

`type FPCRType;`

### Library pseudocode for shared/functions/float/fpdecoderm/FPDecodeRM

```// FPDecodeRM()
// ============

// Decode most common AArch32 floating-point rounding encoding.

FPRounding FPDecodeRM(bits(2) rm)

case rm of
when '00' result = FPRounding_TIEAWAY; // A
when '01' result = FPRounding_TIEEVEN; // N
when '10' result = FPRounding_POSINF;  // P
when '11' result = FPRounding_NEGINF;  // M

return result;```

### Library pseudocode for shared/functions/float/fpdecoderounding/FPDecodeRounding

```// FPDecodeRounding()
// ==================

// Decode floating-point rounding mode and common AArch64 encoding.

FPRounding FPDecodeRounding(bits(2) rmode)
case rmode of
when '00' return FPRounding_TIEEVEN; // N
when '01' return FPRounding_POSINF;  // P
when '10' return FPRounding_NEGINF;  // M
when '11' return FPRounding_ZERO;    // Z```

### Library pseudocode for shared/functions/float/fpdefaultnan/FPDefaultNaN

```// FPDefaultNaN()
// ==============

bits(N) FPDefaultNaN()

assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
sign = '0';

bits(E) exp  = Ones(E);
bits(F) frac = '1':Zeros(F-1);

return sign : exp : frac;```

### Library pseudocode for shared/functions/float/fpdiv/FPDiv

```// FPDiv()
// =======

bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);

if !done then
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);
invalidop = (inf1 && inf2) || (zero1 && zero2);
if invalidop then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
elsif inf1 || zero2 then
result = FPInfinity(sign1 EOR sign2);
if !inf1 then FPProcessException(FPExc_DivideByZero, fpcr);
elsif zero1 || inf2 then
result = FPZero(sign1 EOR sign2);
else
result = FPRound(value1/value2, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpexc/FPExc

```enumeration FPExc       {FPExc_InvalidOp, FPExc_DivideByZero, FPExc_Overflow,
FPExc_Underflow, FPExc_Inexact, FPExc_InputDenorm};```

### Library pseudocode for shared/functions/float/fpinfinity/FPInfinity

```// FPInfinity()
// ============

bits(N) FPInfinity(bit sign)

assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
bits(E) exp  = Ones(E);
bits(F) frac = Zeros(F);

return sign : exp : frac;```

```// FPMatMulAdd()
// =============
//
// Floating point matrix multiply and add to same precision matrix
// result[2, 2] = addend[2, 2] + (op1[2, 2] * op2[2, 2])

assert N == esize * 2 * 2;
bits(N)  result;
bits(esize) prod0, prod1, sum;

for i = 0 to 1
for j = 0 to 1
sum   = Elem[addend, 2*i + j, esize];
prod0 = FPMul(Elem[op1, 2*i + 0, esize],
Elem[op2, 2*j + 0, esize], fpcr);
prod1 = FPMul(Elem[op1, 2*i + 1, esize],
Elem[op2, 2*j + 1, esize], fpcr);
Elem[result, 2*i + j, esize] = sum;

return result;```

### Library pseudocode for shared/functions/float/fpmax/FPMax

```// FPMax()
// =======

bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
if value1 > value2 then
(fptype,sign,value) = (type1,sign1,value1);
else
(fptype,sign,value) = (type2,sign2,value2);
if fptype == FPType_Infinity then
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
sign = sign1 AND sign2; // Use most positive sign
result = FPZero(sign);
else
// The use of FPRound() covers the case where there is a trapped underflow exception
// for a denormalized number even though the result is exact.
result = FPRound(value, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpmaxnormal/FPMaxNormal

```// FPMaxNormal()
// =============

bits(N) FPMaxNormal(bit sign)

assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = Ones(E-1):'0';
frac = Ones(F);

return sign : exp : frac;```

### Library pseudocode for shared/functions/float/fpmaxnum/FPMaxNum

```// FPMaxNum()
// ==========

bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
(type1,-,-) = FPUnpack(op1, fpcr);
(type2,-,-) = FPUnpack(op2, fpcr);

// treat a single quiet-NaN as -Infinity
if type1 == FPType_QNaN && type2 != FPType_QNaN then
op1 = FPInfinity('1');
elsif type1 != FPType_QNaN && type2 == FPType_QNaN then
op2 = FPInfinity('1');

result = FPMax(op1, op2, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpmin/FPMin

```// FPMin()
// =======

bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
if value1 < value2 then
(fptype,sign,value) = (type1,sign1,value1);
else
(fptype,sign,value) = (type2,sign2,value2);
if fptype == FPType_Infinity then
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
sign = sign1 OR sign2; // Use most negative sign
result = FPZero(sign);
else
// The use of FPRound() covers the case where there is a trapped underflow exception
// for a denormalized number even though the result is exact.
result = FPRound(value, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpminnum/FPMinNum

```// FPMinNum()
// ==========

bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
(type1,-,-) = FPUnpack(op1, fpcr);
(type2,-,-) = FPUnpack(op2, fpcr);

// Treat a single quiet-NaN as +Infinity
if type1 == FPType_QNaN && type2 != FPType_QNaN then
op1 = FPInfinity('0');
elsif type1 != FPType_QNaN && type2 == FPType_QNaN then
op2 = FPInfinity('0');

result = FPMin(op1, op2, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpmul/FPMul

```// FPMul()
// =======

bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);
invalidop = (inf1 && zero2) || (zero1 && inf2);
if invalidop then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
elsif inf1 || inf2 then
result = FPInfinity(sign1 EOR sign2);
elsif zero1 || zero2 then
result = FPZero(sign1 EOR sign2);
else
result = FPRound(value1*value2, fpcr);

return result;```

```// FPMulAdd()
// ==========
//
// Calculates addend + op1*op2 with a single rounding. The 'fpcr' argument
// supplies the FPCR control bits.

assert N IN {16,32,64};

rounding = FPRoundingMode(fpcr);
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
inf1 = (type1 == FPType_Infinity); zero1 = (type1 == FPType_Zero);
inf2 = (type2 == FPType_Infinity); zero2 = (type2 == FPType_Zero);

(done,result) = FPProcessNaNs3(typeA, type1, type2,

if typeA == FPType_QNaN && ((inf1 && zero2) || (zero1 && inf2)) then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);

if !done then
infA = (typeA == FPType_Infinity); zeroA = (typeA == FPType_Zero);

// Determine sign and type product will have if it does not cause an
// Invalid Operation.
signP = sign1 EOR sign2;
infP  = inf1 || inf2;
zeroP = zero1 || zero2;

// Non SNaN-generated Invalid Operation cases are multiplies of zero
// by infinity and additions of opposite-signed infinities.
invalidop = (inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP);

if invalidop then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
// Other cases involving infinities produce an infinity of the same sign.
elsif (infA && signA == '0') || (infP && signP == '0') then
result = FPInfinity('0');
elsif (infA && signA == '1') || (infP && signP == '1') then
result = FPInfinity('1');

// Cases where the result is exactly zero and its sign is not determined by the
// rounding mode are additions of same-signed zeros.
elsif zeroA && zeroP && signA == signP then
result = FPZero(signA);

// Otherwise calculate numerical result and round it.
else
result_value = valueA + (value1 * value2);
if result_value == 0.0 then  // Sign of exact zero result depends on rounding mode
result_sign = if rounding == FPRounding_NEGINF then '1' else '0';
result = FPZero(result_sign);
else
result = FPRound(result_value, fpcr);

return result;```

```// FPMulAddH()
// ===========

bits(N) FPMulAddH(bits(N) addend, bits(N DIV 2) op1, bits(N DIV 2) op2, FPCRType fpcr)
assert N IN {32,64};
rounding = FPRoundingMode(fpcr);
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
inf1 = (type1 == FPType_Infinity); zero1 = (type1 == FPType_Zero);
inf2 = (type2 == FPType_Infinity); zero2 = (type2 == FPType_Zero);
(done,result) = FPProcessNaNs3H(typeA, type1, type2, addend, op1, op2, fpcr);

if typeA == FPType_QNaN && ((inf1 && zero2) || (zero1 && inf2)) then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);

if !done then
infA = (typeA == FPType_Infinity); zeroA = (typeA == FPType_Zero);

// Determine sign and type product will have if it does not cause an
// Invalid Operation.
signP = sign1 EOR sign2;
infP = inf1 || inf2;
zeroP = zero1 || zero2;

// Non SNaN-generated Invalid Operation cases are multiplies of zero by infinity and
invalidop = (inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP);

if invalidop then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);

// Other cases involving infinities produce an infinity of the same sign.
elsif (infA && signA == '0') || (infP && signP == '0') then
result = FPInfinity('0');
elsif (infA && signA == '1') || (infP && signP == '1') then
result = FPInfinity('1');

// Cases where the result is exactly zero and its sign is not determined by the
// rounding mode are additions of same-signed zeros.
elsif zeroA && zeroP && signA == signP then
result = FPZero(signA);

// Otherwise calculate numerical result and round it.
else
result_value = valueA + (value1 * value2);
if result_value == 0.0 then // Sign of exact zero result depends on rounding mode
result_sign = if rounding == FPRounding_NEGINF then '1' else '0';
result = FPZero(result_sign);
else
result = FPRound(result_value, fpcr);

return result;```

```// FPProcessNaNs3H()
// =================

(boolean, bits(N)) FPProcessNaNs3H(FPType type1, FPType type2, FPType type3,
bits(N) op1, bits(N DIV 2) op2, bits(N DIV 2) op3,
FPCRType fpcr)

assert N IN {32,64};

bits(N) result;
if type1 == FPType_SNaN then
done = TRUE; result = FPProcessNaN(type1, op1, fpcr);
elsif type2 == FPType_SNaN then
done = TRUE; result = FPConvertNaN(FPProcessNaN(type2, op2, fpcr));
elsif type3 == FPType_SNaN then
done = TRUE; result = FPConvertNaN(FPProcessNaN(type3, op3, fpcr));
elsif type1 == FPType_QNaN then
done = TRUE; result = FPProcessNaN(type1, op1, fpcr);
elsif type2 == FPType_QNaN then
done = TRUE; result = FPConvertNaN(FPProcessNaN(type2, op2, fpcr));
elsif type3 == FPType_QNaN then
done = TRUE; result = FPConvertNaN(FPProcessNaN(type3, op3, fpcr));
else
done = FALSE; result = Zeros(); // 'Don't care' result

return (done, result);```

### Library pseudocode for shared/functions/float/fpmulx/FPMulX

```// FPMulX()
// ========

bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
bits(N) result;
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);

(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);

if (inf1 && zero2) || (zero1 && inf2) then
result = FPTwo(sign1 EOR sign2);
elsif inf1 || inf2 then
result = FPInfinity(sign1 EOR sign2);
elsif zero1 || zero2 then
result = FPZero(sign1 EOR sign2);
else
result = FPRound(value1*value2, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpneg/FPNeg

```// FPNeg()
// =======

bits(N) FPNeg(bits(N) op)

assert N IN {16,32,64};

return NOT(op<N-1>) : op<N-2:0>;```

### Library pseudocode for shared/functions/float/fponepointfive/FPOnePointFive

```// FPOnePointFive()
// ================

bits(N) FPOnePointFive(bit sign)

assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = '0':Ones(E-1);
frac = '1':Zeros(F-1);
result = sign : exp : frac;

return result;```

### Library pseudocode for shared/functions/float/fpprocessexception/FPProcessException

```// FPProcessException()
// ====================
//
// The 'fpcr' argument supplies FPCR control bits. Status information is
// updated directly in the FPSR where appropriate.

FPProcessException(FPExc exception, FPCRType fpcr)

// Determine the cumulative exception bit number
case exception of
when FPExc_InvalidOp     cumul = 0;
when FPExc_DivideByZero  cumul = 1;
when FPExc_Overflow      cumul = 2;
when FPExc_Underflow     cumul = 3;
when FPExc_Inexact       cumul = 4;
when FPExc_InputDenorm   cumul = 7;
enable = cumul + 8;
if fpcr<enable> == '1' then
// Trapping of the exception enabled.
// It is IMPLEMENTATION DEFINED whether the enable bit may be set at all, and
// if so then how exceptions may be accumulated before calling FPTrappedException()
IMPLEMENTATION_DEFINED "floating-point trap handling";
elsif UsingAArch32() then
// Set the cumulative exception bit
FPSCR<cumul> = '1';
else
// Set the cumulative exception bit
FPSR<cumul> = '1';

return;```

### Library pseudocode for shared/functions/float/fpprocessnan/FPProcessNaN

```// FPProcessNaN()
// ==============

bits(N) FPProcessNaN(FPType fptype, bits(N) op, FPCRType fpcr)

assert N IN {16,32,64};
assert fptype IN {FPType_QNaN, FPType_SNaN};

case N of
when 16 topfrac =  9;
when 32 topfrac = 22;
when 64 topfrac = 51;

result = op;
if fptype == FPType_SNaN then
result<topfrac> = '1';
FPProcessException(FPExc_InvalidOp, fpcr);
if fpcr.DN == '1' then  // DefaultNaN requested
result = FPDefaultNaN();

return result;```

### Library pseudocode for shared/functions/float/fpprocessnans/FPProcessNaNs

```// FPProcessNaNs()
// ===============
//
// The boolean part of the return value says whether a NaN has been found and
// processed. The bits(N) part is only relevant if it has and supplies the
// result of the operation.
//
// The 'fpcr' argument supplies FPCR control bits. Status information is
// updated directly in the FPSR where appropriate.

(boolean, bits(N)) FPProcessNaNs(FPType type1, FPType type2,
bits(N) op1, bits(N) op2,
FPCRType fpcr)
assert N IN {16,32,64};
if type1 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type1, op1, fpcr);
elsif type2 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type2, op2, fpcr);
elsif type1 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type1, op1, fpcr);
elsif type2 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type2, op2, fpcr);
else
done = FALSE;  result = Zeros();  // 'Don't care' result

return (done, result);```

### Library pseudocode for shared/functions/float/fpprocessnans3/FPProcessNaNs3

```// FPProcessNaNs3()
// ================
//
// The boolean part of the return value says whether a NaN has been found and
// processed. The bits(N) part is only relevant if it has and supplies the
// result of the operation.
//
// The 'fpcr' argument supplies FPCR control bits. Status information is
// updated directly in the FPSR where appropriate.

(boolean, bits(N)) FPProcessNaNs3(FPType type1, FPType type2, FPType type3,
bits(N) op1, bits(N) op2, bits(N) op3,
FPCRType fpcr)
assert N IN {16,32,64};

if type1 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type1, op1, fpcr);
elsif type2 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type2, op2, fpcr);
elsif type3 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type3, op3, fpcr);
elsif type1 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type1, op1, fpcr);
elsif type2 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type2, op2, fpcr);
elsif type3 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type3, op3, fpcr);
else
done = FALSE;  result = Zeros();  // 'Don't care' result

return (done, result);```

### Library pseudocode for shared/functions/float/fprecipestimate/FPRecipEstimate

```// FPRecipEstimate()
// =================

bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)

assert N IN {16,32,64};
(fptype,sign,value) = FPUnpack(operand, fpcr);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, operand, fpcr);
elsif fptype == FPType_Infinity then
result = FPZero(sign);
elsif fptype == FPType_Zero then
result = FPInfinity(sign);
FPProcessException(FPExc_DivideByZero, fpcr);
elsif (
(N == 16 && Abs(value) < 2.0^-16) ||
(N == 32 && Abs(value) < 2.0^-128) ||
(N == 64 && Abs(value) < 2.0^-1024)
) then
case FPRoundingMode(fpcr) of
when FPRounding_TIEEVEN
overflow_to_inf = TRUE;
when FPRounding_POSINF
overflow_to_inf = (sign == '0');
when FPRounding_NEGINF
overflow_to_inf = (sign == '1');
when FPRounding_ZERO
overflow_to_inf = FALSE;
result = if overflow_to_inf then FPInfinity(sign) else FPMaxNormal(sign);
FPProcessException(FPExc_Overflow, fpcr);
FPProcessException(FPExc_Inexact, fpcr);
elsif ((fpcr.FZ == '1' && N != 16) || (fpcr.FZ16 == '1' && N == 16))
&& (
(N == 16 && Abs(value) >= 2.0^14) ||
(N == 32 && Abs(value) >= 2.0^126) ||
(N == 64 && Abs(value) >= 2.0^1022)
) then
// Result flushed to zero of correct sign
result = FPZero(sign);

// Flush-to-zero never generates a trapped exception.
if UsingAArch32() then
FPSCR.UFC = '1';
else
FPSR.UFC = '1';
else
// Scale to a fixed point value in the range 0.5 <= x < 1.0 in steps of 1/512, and
// calculate result exponent. Scaled value has copied sign bit,
// exponent = 1022 = double-precision biased version of -1,
// fraction = original fraction
case N of
when 16
fraction = operand<9:0> : Zeros(42);
exp = UInt(operand<14:10>);
when 32
fraction = operand<22:0> : Zeros(29);
exp = UInt(operand<30:23>);
when 64
fraction = operand<51:0>;
exp = UInt(operand<62:52>);

if exp == 0 then
if fraction<51> == '0' then
exp = -1;
fraction = fraction<49:0>:'00';
else
fraction = fraction<50:0>:'0';

integer scaled = UInt('1':fraction<51:44>);

case N of
when 16 result_exp =   29 - exp; // In range 29-30 = -1 to 29+1 = 30
when 32 result_exp =  253 - exp; // In range 253-254 = -1 to 253+1 = 254
when 64 result_exp = 2045 - exp; // In range 2045-2046 = -1 to 2045+1 = 2046

// scaled is in range 256..511 representing a fixed-point number in range [0.5..1.0)
estimate = RecipEstimate(scaled);

// estimate is in the range 256..511 representing a fixed point result in the range [1.0..2.0)
// Convert to scaled floating point result with copied sign bit,
// high-order bits from estimate, and exponent calculated above.

fraction = estimate<7:0> : Zeros(44);
if result_exp == 0 then
fraction = '1' : fraction<51:1>;
elsif result_exp == -1 then
fraction = '01' : fraction<51:2>;
result_exp = 0;

case N of
when 16 result = sign : result_exp<N-12:0> : fraction<51:42>;
when 32 result = sign : result_exp<N-25:0> : fraction<51:29>;
when 64 result = sign : result_exp<N-54:0> : fraction<51:0>;

return result;```

### Library pseudocode for shared/functions/float/fprecipestimate/RecipEstimate

```// Compute estimate of reciprocal of 9-bit fixed-point number
//
// a is in range 256 .. 511 representing a number in the range 0.5 <= x < 1.0.
// result is in the range 256 .. 511 representing a number in the range in the range 1.0 to 511/256.

integer RecipEstimate(integer a)
assert 256 <= a && a < 512;
a = a*2+1; // round to nearest
integer b = (2 ^ 19) DIV a;
r = (b+1) DIV 2; // round to nearest
assert 256 <= r && r < 512;
return r;```

### Library pseudocode for shared/functions/float/fprecpx/FPRecpX

```// FPRecpX()
// =========

bits(N) FPRecpX(bits(N) op, FPCRType fpcr)

assert N IN {16,32,64};

case N of
when 16 esize =  5;
when 32 esize =  8;
when 64 esize = 11;

bits(N)           result;
bits(esize)       exp;
bits(esize)       max_exp;
bits(N-(esize+1)) frac = Zeros();

case N of
when 16 exp = op<10+esize-1:10>;
when 32 exp = op<23+esize-1:23>;
when 64 exp = op<52+esize-1:52>;

max_exp = Ones(esize) - 1;

(fptype,sign,value) = FPUnpack(op, fpcr);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, op, fpcr);
else
if IsZero(exp) then // Zero and denormals
result = sign:max_exp:frac;
else // Infinities and normals
result = sign:NOT(exp):frac;

return result;```

### Library pseudocode for shared/functions/float/fpround/FPRound

```// FPRound()
// =========
// Used by data processing and int/fixed <-> FP conversion instructions.
// For half-precision data it ignores AHP, and observes FZ16.

bits(N) FPRound(real op, FPCRType fpcr, FPRounding rounding)
fpcr.AHP = '0';
boolean isbfloat16 = FALSE;
return FPRoundBase(op, fpcr, rounding, isbfloat16);

// FPRound()
// =========

bits(N) FPRound(real op, FPCRType fpcr)
return FPRound(op, fpcr, FPRoundingMode(fpcr));```

### Library pseudocode for shared/functions/float/fpround/FPRoundBase

```// FPRoundBase()
// =============
// Convert a real number OP into an N-bit floating-point value using the
// supplied rounding mode RMODE.

bits(N) FPRoundBase(real op, FPCRType fpcr, FPRounding rounding, boolean isbfloat16)
assert N IN {16,32,64};
assert op != 0.0;
assert rounding != FPRounding_TIEAWAY;
bits(N) result;

// Obtain format parameters - minimum exponent, numbers of exponent and fraction bits.
if N == 16 then
minimum_exp = -14;  E = 5;  F = 10;
elsif N == 32 && isbfloat16 then
minimum_exp = -126;  E = 8;  F = 7;
elsif N == 32 then
minimum_exp = -126;  E = 8;  F = 23;
else  // N == 64
minimum_exp = -1022;  E = 11;  F = 52;

// Split value into sign, unrounded mantissa and exponent.
if op < 0.0 then
sign = '1';  mantissa = -op;
else
sign = '0';  mantissa = op;
exponent = 0;
while mantissa < 1.0 do
mantissa = mantissa * 2.0;  exponent = exponent - 1;
while mantissa >= 2.0 do
mantissa = mantissa / 2.0;  exponent = exponent + 1;

if (((fpcr.FZ == '1' && N != 16) || (fpcr.FZ16 == '1' && N == 16)) &&
exponent < minimum_exp) then
// Flush-to-zero never generates a trapped exception.
if UsingAArch32() then
FPSCR.UFC = '1';
else
FPSR.UFC = '1';
return FPZero(sign);

// Start creating the exponent value for the result. Start by biasing the actual exponent
// so that the minimum exponent becomes 1, lower values 0 (indicating possible underflow).
biased_exp = Max(exponent - minimum_exp + 1, 0);
if biased_exp == 0 then mantissa = mantissa / 2.0^(minimum_exp - exponent);

// Get the unrounded mantissa as an integer, and the "units in last place" rounding error.
int_mant = RoundDown(mantissa * 2.0^F);  // < 2.0^F if biased_exp == 0, >= 2.0^F if not
error = mantissa * 2.0^F - Real(int_mant);

// Underflow occurs if exponent is too small before rounding, and result is inexact or
// the Underflow exception is trapped.
if biased_exp == 0 && (error != 0.0 || fpcr.UFE == '1') then
FPProcessException(FPExc_Underflow, fpcr);

// Round result according to rounding mode.
case rounding of
when FPRounding_TIEEVEN
round_up = (error > 0.5 || (error == 0.5 && int_mant<0> == '1'));
overflow_to_inf = TRUE;
when FPRounding_POSINF
round_up = (error != 0.0 && sign == '0');
overflow_to_inf = (sign == '0');
when FPRounding_NEGINF
round_up = (error != 0.0 && sign == '1');
overflow_to_inf = (sign == '1');
when FPRounding_ZERO, FPRounding_ODD
round_up = FALSE;
overflow_to_inf = FALSE;

if round_up then
int_mant = int_mant + 1;
if int_mant == 2^F then      // Rounded up from denormalized to normalized
biased_exp = 1;
if int_mant == 2^(F+1) then  // Rounded up to next exponent
biased_exp = biased_exp + 1;  int_mant = int_mant DIV 2;

// Handle rounding to odd aka Von Neumann rounding
if error != 0.0 && rounding == FPRounding_ODD then
int_mant<0> = '1';

// Deal with overflow and generate result.
if N != 16 || fpcr.AHP == '0' then  // Single, double or IEEE half precision
if biased_exp >= 2^E - 1 then
result = if overflow_to_inf then FPInfinity(sign) else FPMaxNormal(sign);
FPProcessException(FPExc_Overflow, fpcr);
error = 1.0;  // Ensure that an Inexact exception occurs
else
result = sign : biased_exp<E-1:0> : int_mant<F-1:0> : Zeros(N-(E+F+1));
else                                     // Alternative half precision
if biased_exp >= 2^E then
result = sign : Ones(N-1);
FPProcessException(FPExc_InvalidOp, fpcr);
error = 0.0;  // Ensure that an Inexact exception does not occur
else
result = sign : biased_exp<E-1:0> : int_mant<F-1:0> : Zeros(N-(E+F+1));

// Deal with Inexact exception.
if error != 0.0 then
FPProcessException(FPExc_Inexact, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpround/FPRoundCV

```// FPRoundCV()
// ===========
// Used for FP <-> FP conversion instructions.
// For half-precision data ignores FZ16 and observes AHP.

bits(N) FPRoundCV(real op, FPCRType fpcr, FPRounding rounding)
fpcr.FZ16 = '0';
boolean isbfloat16 = FALSE;
return FPRoundBase(op, fpcr, rounding, isbfloat16);```

### Library pseudocode for shared/functions/float/fprounding/FPRounding

```enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
FPRounding_NEGINF,  FPRounding_ZERO,
FPRounding_TIEAWAY, FPRounding_ODD};```

### Library pseudocode for shared/functions/float/fproundingmode/FPRoundingMode

```// FPRoundingMode()
// ================

// Return the current floating-point rounding mode.

FPRounding FPRoundingMode(FPCRType fpcr)
return FPDecodeRounding(fpcr.RMode);```

### Library pseudocode for shared/functions/float/fproundint/FPRoundInt

```// FPRoundInt()
// ============

// Round op to nearest integral floating point value using rounding mode in FPCR/FPSCR.
// If EXACT is TRUE, set FPSR.IXC if result is not numerically equal to op.

bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)

assert rounding != FPRounding_ODD;
assert N IN {16,32,64};

// Unpack using FPCR to determine if subnormals are flushed-to-zero.
(fptype,sign,value) = FPUnpack(op, fpcr);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, op, fpcr);
elsif fptype == FPType_Infinity then
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
result = FPZero(sign);
else
// Extract integer component.
int_result = RoundDown(value);
error = value - Real(int_result);

// Determine whether supplied rounding mode requires an increment.
case rounding of
when FPRounding_TIEEVEN
round_up = (error > 0.5 || (error == 0.5 && int_result<0> == '1'));
when FPRounding_POSINF
round_up = (error != 0.0);
when FPRounding_NEGINF
round_up = FALSE;
when FPRounding_ZERO
round_up = (error != 0.0 && int_result < 0);
when FPRounding_TIEAWAY
round_up = (error > 0.5 || (error == 0.5 && int_result >= 0));

if round_up then int_result = int_result + 1;

// Convert integer value into an equivalent real value.
real_result = Real(int_result);

// Re-encode as a floating-point value, result is always exact.
if real_result == 0.0 then
result = FPZero(sign);
else
result = FPRound(real_result, fpcr, FPRounding_ZERO);

// Generate inexact exceptions.
if error != 0.0 && exact then
FPProcessException(FPExc_Inexact, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fproundintn/FPRoundIntN

```// FPRoundIntN()
// =============

bits(N) FPRoundIntN(bits(N) op, FPCRType fpcr, FPRounding rounding, integer intsize)
assert rounding != FPRounding_ODD;
assert N IN {32,64};
assert intsize IN {32, 64};
integer exp;
constant integer E = (if N == 32 then 8 else 11);
constant integer F = N - (E + 1);

// Unpack using FPCR to determine if subnormals are flushed-to-zero.
(fptype,sign,value) = FPUnpack(op, fpcr);

if fptype IN {FPType_SNaN, FPType_QNaN, FPType_Infinity} then
if N == 32 then
exp = 126 + intsize;
result = '1':exp<(E-1):0>:Zeros(F);
else
exp = 1022+intsize;
result = '1':exp<(E-1):0>:Zeros(F);
FPProcessException(FPExc_InvalidOp, fpcr);
elsif fptype == FPType_Zero then
result = FPZero(sign);
else
// Extract integer component.
int_result = RoundDown(value);
error = value - Real(int_result);

// Determine whether supplied rounding mode requires an increment.
case rounding of
when FPRounding_TIEEVEN
round_up = error > 0.5 || (error == 0.5 && int_result<0> == '1');
when FPRounding_POSINF
round_up = error != 0.0;
when FPRounding_NEGINF
round_up = FALSE;
when FPRounding_ZERO
round_up = error != 0.0 && int_result < 0;
when FPRounding_TIEAWAY
round_up = error > 0.5 || (error == 0.5 && int_result >= 0);

if round_up then int_result = int_result + 1;
overflow = int_result > 2^(intsize-1)-1 || int_result < -1*2^(intsize-1);

if overflow then
if N == 32 then
exp = 126 + intsize;
result = '1':exp<(E-1):0>:Zeros(F);
else
exp = 1022 + intsize;
result = '1':exp<(E-1):0>:Zeros(F);
FPProcessException(FPExc_InvalidOp, fpcr);
// This case shouldn't set Inexact.
error = 0.0;

else
// Convert integer value into an equivalent real value.
real_result = Real(int_result);

// Re-encode as a floating-point value, result is always exact.
if real_result == 0.0 then
result = FPZero(sign);
else
result = FPRound(real_result, fpcr, FPRounding_ZERO);

// Generate inexact exceptions.
if error != 0.0 then
FPProcessException(FPExc_Inexact, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fprsqrtestimate/FPRSqrtEstimate

```// FPRSqrtEstimate()
// =================

bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)

assert N IN {16,32,64};
(fptype,sign,value) = FPUnpack(operand, fpcr);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, operand, fpcr);
elsif fptype == FPType_Zero then
result = FPInfinity(sign);
FPProcessException(FPExc_DivideByZero, fpcr);
elsif sign == '1' then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
elsif fptype == FPType_Infinity then
result = FPZero('0');
else
// Scale to a fixed-point value in the range 0.25 <= x < 1.0 in steps of 512, with the
// evenness or oddness of the exponent unchanged, and calculate result exponent.
// Scaled value has copied sign bit, exponent = 1022 or 1021 = double-precision
// biased version of -1 or -2, fraction = original fraction extended with zeros.

case N of
when 16
fraction = operand<9:0> : Zeros(42);
exp = UInt(operand<14:10>);
when 32
fraction = operand<22:0> : Zeros(29);
exp = UInt(operand<30:23>);
when 64
fraction = operand<51:0>;
exp = UInt(operand<62:52>);

if exp == 0 then
while fraction<51> == '0' do
fraction = fraction<50:0> : '0';
exp = exp - 1;
fraction = fraction<50:0> : '0';

if exp<0> == '0' then
scaled = UInt('1':fraction<51:44>);
else
scaled = UInt('01':fraction<51:45>);

case N of
when 16 result_exp = (  44 - exp) DIV 2;
when 32 result_exp = ( 380 - exp) DIV 2;
when 64 result_exp = (3068 - exp) DIV 2;

estimate = RecipSqrtEstimate(scaled);

// estimate is in the range 256..511 representing a fixed point result in the range [1.0..2.0)
// Convert to scaled floating point result with copied sign bit and high-order
// fraction bits, and exponent calculated above.
case N of
when 16 result = '0' : result_exp<N-12:0> : estimate<7:0>:Zeros( 2);
when 32 result = '0' : result_exp<N-25:0> : estimate<7:0>:Zeros(15);
when 64 result = '0' : result_exp<N-54:0> : estimate<7:0>:Zeros(44);

return result;```

### Library pseudocode for shared/functions/float/fprsqrtestimate/RecipSqrtEstimate

```// Compute estimate of reciprocal square root of 9-bit fixed-point number
//
// a is in range 128 .. 511 representing a number in the range 0.25 <= x < 1.0.
// result is in the range 256 .. 511 representing a number in the range in the range 1.0 to 511/256.

integer RecipSqrtEstimate(integer a)
assert 128 <= a && a < 512;
if a < 256 then // 0.25 .. 0.5
a = a*2+1;     // a in units of 1/512 rounded to nearest
else // 0.5 .. 1.0
a = (a >> 1) << 1;   // discard bottom bit
a = (a+1)*2;  // a in units of 1/256 rounded to nearest
integer b = 512;
while a*(b+1)*(b+1) < 2^28 do
b = b+1;
// b = largest b such that b < 2^14 / sqrt(a) do
r = (b+1) DIV 2; // round to nearest
assert 256 <= r && r < 512;
return r;```

### Library pseudocode for shared/functions/float/fpsqrt/FPSqrt

```// FPSqrt()
// ========

bits(N) FPSqrt(bits(N) op, FPCRType fpcr)
assert N IN {16,32,64};
(fptype,sign,value) = FPUnpack(op, fpcr);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, op, fpcr);
elsif fptype == FPType_Zero then
result = FPZero(sign);
elsif fptype == FPType_Infinity && sign == '0' then
result = FPInfinity(sign);
elsif sign == '1' then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
else
result = FPRound(Sqrt(value), fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpsub/FPSub

```// FPSub()
// =======

bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)

assert N IN {16,32,64};
rounding = FPRoundingMode(fpcr);
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);
invalidop =  inf1 && inf2 && sign1 == sign2;

if invalidop then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
elsif (inf1 && sign1 == '0') || (inf2 && sign2 == '1') then
result = FPInfinity('0');
elsif (inf1 && sign1 == '1') || (inf2 && sign2 == '0') then
result = FPInfinity('1');
elsif zero1 && zero2 && sign1 == NOT(sign2) then
result = FPZero(sign1);
else
result_value = value1 - value2;
if result_value == 0.0 then  // Sign of exact zero result depends on rounding mode
result_sign = if rounding == FPRounding_NEGINF then '1' else '0';
result = FPZero(result_sign);
else
result = FPRound(result_value, fpcr, rounding);

return result;```

### Library pseudocode for shared/functions/float/fpthree/FPThree

```// FPThree()
// =========

bits(N) FPThree(bit sign)

assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = '1':Zeros(E-1);
frac = '1':Zeros(F-1);
result = sign : exp : frac;

return result;```

### Library pseudocode for shared/functions/float/fptofixed/FPToFixed

```// FPToFixed()
// ===========

// Convert N-bit precision floating point OP to M-bit fixed point with
// FBITS fractional bits, controlled by UNSIGNED and ROUNDING.

bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)

assert N IN {16,32,64};
assert M IN {16,32,64};
assert fbits >= 0;
assert rounding != FPRounding_ODD;

// Unpack using fpcr to determine if subnormals are flushed-to-zero.
(fptype,sign,value) = FPUnpack(op, fpcr);

// If NaN, set cumulative flag or take exception.
if fptype == FPType_SNaN || fptype == FPType_QNaN then
FPProcessException(FPExc_InvalidOp, fpcr);

// Scale by fractional bits and produce integer rounded towards minus-infinity.
value = value * 2.0^fbits;
int_result = RoundDown(value);
error = value - Real(int_result);

// Determine whether supplied rounding mode requires an increment.
case rounding of
when FPRounding_TIEEVEN
round_up = (error > 0.5 || (error == 0.5 && int_result<0> == '1'));
when FPRounding_POSINF
round_up = (error != 0.0);
when FPRounding_NEGINF
round_up = FALSE;
when FPRounding_ZERO
round_up = (error != 0.0 && int_result < 0);
when FPRounding_TIEAWAY
round_up = (error > 0.5 || (error == 0.5 && int_result >= 0));

if round_up then int_result = int_result + 1;

// Generate saturated result and exceptions.
(result, overflow) = SatQ(int_result, M, unsigned);
if overflow then
FPProcessException(FPExc_InvalidOp, fpcr);
elsif error != 0.0 then
FPProcessException(FPExc_Inexact, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fptofixedjs/FPToFixedJS

```// FPToFixedJS()
// =============

// Converts a double precision floating point input value
// to a signed integer, with rounding to zero.

(bits(N), bit) FPToFixedJS(bits(M) op, FPCRType fpcr, boolean Is64)

assert M == 64 && N == 32;

// Unpack using fpcr to determine if subnormals are flushed-to-zero.
(fptype,sign,value) = FPUnpack(op, fpcr);

Z = '1';
// If NaN, set cumulative flag or take exception.
if fptype == FPType_SNaN || fptype == FPType_QNaN then
FPProcessException(FPExc_InvalidOp, fpcr);
Z = '0';

int_result = RoundDown(value);
error = value - Real(int_result);

// Determine whether supplied rounding mode requires an increment.

round_it_up = (error != 0.0 && int_result < 0);
if round_it_up then int_result = int_result + 1;

if int_result < 0 then
result = int_result - 2^32*RoundUp(Real(int_result)/Real(2^32));
else
result = int_result - 2^32*RoundDown(Real(int_result)/Real(2^32));

// Generate exceptions.
if int_result < -(2^31) || int_result > (2^31)-1 then
FPProcessException(FPExc_InvalidOp, fpcr);
Z = '0';
elsif error != 0.0 then
FPProcessException(FPExc_Inexact, fpcr);
Z = '0';
elsif sign == '1' && value == 0.0 then
Z = '0';
elsif sign == '0' && value == 0.0 && !IsZero(op<51:0>) then
Z = '0';

if fptype == FPType_Infinity then result = 0;

return (result<N-1:0>, Z);```

### Library pseudocode for shared/functions/float/fptwo/FPTwo

```// FPTwo()
// =======

bits(N) FPTwo(bit sign)

assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = '1':Zeros(E-1);
frac = Zeros(F);
result = sign : exp : frac;

return result;```

### Library pseudocode for shared/functions/float/fptype/FPType

```enumeration FPType      {FPType_Nonzero, FPType_Zero, FPType_Infinity,
FPType_QNaN, FPType_SNaN};```

### Library pseudocode for shared/functions/float/fpunpack/FPUnpack

```// FPUnpack()
// ==========
//
// Used by data processing and int/fixed <-> FP conversion instructions.
// For half-precision data it ignores AHP, and observes FZ16.

(FPType, bit, real) FPUnpack(bits(N) fpval, FPCRType fpcr)
fpcr.AHP = '0';
(fp_type, sign, value) = FPUnpackBase(fpval, fpcr);
return (fp_type, sign, value);```

### Library pseudocode for shared/functions/float/fpunpack/FPUnpackBase

```// FPUnpackBase()
// ==============
//
// Unpack a floating-point number into its type, sign bit and the real number
// that it represents. The real number result has the correct sign for numbers
// and infinities, is very large in magnitude for infinities, and is 0.0 for
// NaNs. (These values are chosen to simplify the description of comparisons
// and conversions.)
//
// The 'fpcr' argument supplies FPCR control bits. Status information is
// updated directly in the FPSR where appropriate.

(FPType, bit, real) FPUnpackBase(bits(N) fpval, FPCRType fpcr)
assert N IN {16,32,64};

if N == 16 then
sign   = fpval<15>;
exp16  = fpval<14:10>;
frac16 = fpval<9:0>;
if IsZero(exp16) then
// Produce zero if value is zero or flush-to-zero is selected
if IsZero(frac16) || fpcr.FZ16 == '1' then
fptype = FPType_Zero;  value = 0.0;
else
fptype = FPType_Nonzero;  value = 2.0^-14 * (Real(UInt(frac16)) * 2.0^-10);
elsif IsOnes(exp16) && fpcr.AHP == '0' then  // Infinity or NaN in IEEE format
if IsZero(frac16) then
fptype = FPType_Infinity;  value = 2.0^1000000;
else
fptype = if frac16<9> == '1' then FPType_QNaN else FPType_SNaN;
value = 0.0;
else
fptype = FPType_Nonzero;
value = 2.0^(UInt(exp16)-15) * (1.0 + Real(UInt(frac16)) * 2.0^-10);

elsif N == 32 then

sign   = fpval<31>;
exp32  = fpval<30:23>;
frac32 = fpval<22:0>;
if IsZero(exp32) then
// Produce zero if value is zero or flush-to-zero is selected.
if IsZero(frac32) || fpcr.FZ == '1' then
fptype = FPType_Zero;  value = 0.0;
if !IsZero(frac32) then  // Denormalized input flushed to zero
FPProcessException(FPExc_InputDenorm, fpcr);
else
fptype = FPType_Nonzero;  value = 2.0^-126 * (Real(UInt(frac32)) * 2.0^-23);
elsif IsOnes(exp32) then
if IsZero(frac32) then
fptype = FPType_Infinity;  value = 2.0^1000000;
else
fptype = if frac32<22> == '1' then FPType_QNaN else FPType_SNaN;
value = 0.0;
else
fptype = FPType_Nonzero;
value = 2.0^(UInt(exp32)-127) * (1.0 + Real(UInt(frac32)) * 2.0^-23);

else // N == 64

sign   = fpval<63>;
exp64  = fpval<62:52>;
frac64 = fpval<51:0>;
if IsZero(exp64) then
// Produce zero if value is zero or flush-to-zero is selected.
if IsZero(frac64) || fpcr.FZ == '1' then
fptype = FPType_Zero;  value = 0.0;
if !IsZero(frac64) then  // Denormalized input flushed to zero
FPProcessException(FPExc_InputDenorm, fpcr);
else
fptype = FPType_Nonzero;  value = 2.0^-1022 * (Real(UInt(frac64)) * 2.0^-52);
elsif IsOnes(exp64) then
if IsZero(frac64) then
fptype = FPType_Infinity;  value = 2.0^1000000;
else
fptype = if frac64<51> == '1' then FPType_QNaN else FPType_SNaN;
value = 0.0;
else
fptype = FPType_Nonzero;
value = 2.0^(UInt(exp64)-1023) * (1.0 + Real(UInt(frac64)) * 2.0^-52);

if sign == '1' then value = -value;

return (fptype, sign, value);```

### Library pseudocode for shared/functions/float/fpunpack/FPUnpackCV

```// FPUnpackCV()
// ============
//
// Used for FP <-> FP conversion instructions.
// For half-precision data ignores FZ16 and observes AHP.

(FPType, bit, real) FPUnpackCV(bits(N) fpval, FPCRType fpcr)
fpcr.FZ16 = '0';
(fp_type, sign, value) = FPUnpackBase(fpval, fpcr);
return (fp_type, sign, value);```

### Library pseudocode for shared/functions/float/fpzero/FPZero

```// FPZero()
// ========

bits(N) FPZero(bit sign)

assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = Zeros(E);
frac = Zeros(F);
result = sign : exp : frac;

return result;```

### Library pseudocode for shared/functions/float/vfpexpandimm/VFPExpandImm

```// VFPExpandImm()
// ==============

bits(N) VFPExpandImm(bits(8) imm8)

assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - E - 1;
sign = imm8<7>;
exp  = NOT(imm8<6>):Replicate(imm8<6>,E-3):imm8<5:4>;
frac = imm8<3:0>:Zeros(F-4);
result = sign : exp : frac;

return result;```