You copied the Doc URL to your clipboard.

## Shared Functions.Float Pseudocode

```// BFAdd()
// =======
// Single-precision add following BFloat16 computation behaviors.

bits(32) result;

(type1,sign1,value1) = BFUnpack(op1);
(type2,sign2,value2) = BFUnpack(op2);
if type1 == FPType_QNaN || type2 == FPType_QNaN then
result = FPDefaultNaN();
else
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);
if inf1 && inf2 && sign1 == NOT(sign2) then
result = FPDefaultNaN();
elsif (inf1 && sign1 == '0') || (inf2 && sign2 == '0') then
result = FPInfinity('0');
elsif (inf1 && sign1 == '1') || (inf2 && sign2 == '1') then
result = FPInfinity('1');
elsif zero1 && zero2 && sign1 == sign2 then
result = FPZero(sign1);
else
result_value = value1 + value2;
if result_value == 0.0 then
result = FPZero('0');    // Positive sign when Round to Odd
else
result = BFRound(result_value);

return result;```

```// BFMatMulAdd()
// =============
// BFloat16 matrix multiply and add to single-precision matrix
// result[2, 2] = addend[2, 2] + (op1[2, 4] * op2[4, 2])

assert N == 128;

bits(N) result;
bits(32) sum, prod0, prod1;

for i = 0 to 1
for j = 0 to 1
sum = Elem[addend, 2*i + j, 32];
for k = 0 to 1
prod0 = BFMul(Elem[op1, 4*i + 2*k + 0, 16], Elem[op2, 4*j + 2*k + 0, 16]);
prod1 = BFMul(Elem[op1, 4*i + 2*k + 1, 16], Elem[op2, 4*j + 2*k + 1, 16]);
Elem[result, 2*i + j, 32] = sum;

return result;```

### Library pseudocode for shared/functions/float/bfloat/BFMul

```// BFMul()
// =======
// BFloat16 widening multiply to single-precision following BFloat16
// computation behaviors.

bits(32) BFMul(bits(16) op1, bits(16) op2)
bits(32) result;

(type1,sign1,value1) = BFUnpack(op1);
(type2,sign2,value2) = BFUnpack(op2);
if type1 == FPType_QNaN || type2 == FPType_QNaN then
result = FPDefaultNaN();
else
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);
if (inf1 && zero2) || (zero1 && inf2) then
result = FPDefaultNaN();
elsif inf1 || inf2 then
result = FPInfinity(sign1 EOR sign2);
elsif zero1 || zero2 then
result = FPZero(sign1 EOR sign2);
else
result = BFRound(value1*value2);

return result;```

```// BFMulAdd()
// ==========
// Used by BFMLALB and BFMLALT instructions.

boolean altfp = HaveAltFP() && fpcr.AH == '1';  // When TRUE:
boolean fpexc = !altfp;                         //     Do not generate floating point exceptions
if altfp then fpcr.<FIZ,FZ> = '11';             //     Flush denormal input and output to zero
if altfp then fpcr.RMode    = '00';             //     Use RNE rounding mode

### Library pseudocode for shared/functions/float/bfloat/BFRound

```// BFRound()
// =========
// Converts a real number OP into a single-precision value using the
// Round to Odd rounding mode and following BFloat16 computation behaviors.

bits(32) BFRound(real op)
assert op != 0.0;
bits(32) result;

// Format parameters - minimum exponent, numbers of exponent and fraction bits.
minimum_exp = -126;  E = 8;  F = 23;

// Split value into sign, unrounded mantissa and exponent.
if op < 0.0 then
sign = '1';  mantissa = -op;
else
sign = '0';  mantissa = op;
exponent = 0;
while mantissa < 1.0 do
mantissa = mantissa * 2.0;  exponent = exponent - 1;
while mantissa >= 2.0 do
mantissa = mantissa / 2.0;  exponent = exponent + 1;

// Fixed Flush-to-zero.
if exponent < minimum_exp then
return FPZero(sign);

// Start creating the exponent value for the result. Start by biasing the actual exponent
// so that the minimum exponent becomes 1, lower values 0 (indicating possible underflow).
biased_exp = Max(exponent - minimum_exp + 1, 0);
if biased_exp == 0 then mantissa = mantissa / 2.0^(minimum_exp - exponent);

// Get the unrounded mantissa as an integer, and the "units in last place" rounding error.
int_mant = RoundDown(mantissa * 2.0^F);  // < 2.0^F if biased_exp == 0, >= 2.0^F if not
error = mantissa * 2.0^F - Real(int_mant);

// Round to Odd
if error != 0.0 then
int_mant<0> = '1';

// Deal with overflow and generate result.
if biased_exp >= 2^E - 1 then
result = FPInfinity(sign);      // Overflows generate appropriately-signed Infinity
else
result = sign : biased_exp<30-F:0> : int_mant<F-1:0>;

return result;```

### Library pseudocode for shared/functions/float/bfloat/BFUnpack

```// BFUnpack()
// ==========
// Unpacks a BFloat16 or single-precision value into its type,
// sign bit and real number that it represents.
// The real number result has the correct sign for numbers and infinities,
// is very large in magnitude for infinities, and is 0.0 for NaNs.
// (These values are chosen to simplify the description of
// comparisons and conversions.)

(FPType, bit, real) BFUnpack(bits(N) fpval)
assert N IN {16,32};

if N == 16 then
sign   = fpval<15>;
exp    = fpval<14:7>;
frac   = fpval<6:0> : Zeros(16);
else  // N == 32
sign   = fpval<31>;
exp    = fpval<30:23>;
frac   = fpval<22:0>;

if IsZero(exp) then
fptype = FPType_Zero;  value = 0.0;    // Fixed Flush to Zero
elsif IsOnes(exp) then
if IsZero(frac) then
fptype = FPType_Infinity;  value = 2.0^1000000;
else    // no SNaN for BF16 arithmetic
fptype = FPType_QNaN; value = 0.0;
else
fptype = FPType_Nonzero;
value = 2.0^(UInt(exp)-127) * (1.0 + Real(UInt(frac)) * 2.0^-23);

if sign == '1' then value = -value;

return (fptype, sign, value);```

### Library pseudocode for shared/functions/float/bfloat/FPConvertBF

```// FPConvertBF()
// =============
// Converts a single-precision OP to BFloat16 value with using rounding mode of
// Round to Nearest Even when executed from AArch64 state and
// FPCR.AH == '1', otherwise rounding is controlled by FPCR/FPSCR.

bits(16) FPConvertBF(bits(32) op, FPCRType fpcr, FPRounding rounding)
bits(32) result;                                // BF16 value in top 16 bits
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
boolean fpexc = !altfp;                         // Generate no floating-point exceptions
if altfp then fpcr.<FIZ,FZ> = '11';             // Flush denormal input and output to zero
if altfp then rounding = FPRounding_TIEEVEN;    // Use RNE rounding mode

// Unpack floating-point operand, with always flush-to-zero if fpcr.AH == '1'.
(fptype,sign,value) = FPUnpack(op, fpcr, fpexc);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
if fpcr.DN == '1' then
result = FPDefaultNaN();
else
result = FPConvertNaN(op);
if fptype == FPType_SNaN then
if fpexc then FPProcessException(FPExc_InvalidOp, fpcr);
elsif fptype == FPType_Infinity then
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
result = FPZero(sign);
else
result = FPRoundCVBF(value, fpcr, rounding, fpexc);

// Returns correctly rounded BF16 value from top 16 bits
return result<31:16>;

// FPConvertBF()
// =============
// Converts a single-precision operand to BFloat16 value.

bits(16) FPConvertBF(bits(32) op, FPCRType fpcr)
return FPConvertBF(op, fpcr, FPRoundingMode(fpcr));```

### Library pseudocode for shared/functions/float/bfloat/FPRoundCVBF

```// FPRoundCVBF()
// =============
// Converts a real number OP into a BFloat16 value using the supplied
// rounding mode RMODE. The 'fpexc' argument controls the generation of
// floating-point exceptions.

bits(32) FPRoundCVBF(real op, FPCRType fpcr, FPRounding rounding, boolean fpexc)
boolean isbfloat16 = TRUE;
return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc);```

### Library pseudocode for shared/functions/float/fixedtofp/FixedToFP

```// FixedToFP()
// ===========

// Convert M-bit fixed point OP with FBITS fractional bits to
// N-bit precision floating point, controlled by UNSIGNED and ROUNDING.

bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)
assert N IN {16,32,64};
assert M IN {16,32,64};
bits(N) result;
assert fbits >= 0;
assert rounding != FPRounding_ODD;

// Correct signed-ness
int_operand = Int(op, unsigned);

// Scale by fractional bits and generate a real value
real_operand = Real(int_operand) / 2.0^fbits;

if real_operand == 0.0 then
result = FPZero('0');
else
result = FPRound(real_operand, fpcr, rounding);

return result;```

### Library pseudocode for shared/functions/float/fpabs/FPAbs

```// FPAbs()
// =======

bits(N) FPAbs(bits(N) op)
assert N IN {16,32,64};
if !UsingAArch32() && HaveAltFP() then
FPCRType fpcr = FPCR[];
if fpcr.AH == '1' then
(fptype, -, -) = FPUnpack(op, fpcr, FALSE);
if fptype IN {FPType_SNaN, FPType_QNaN} then
return op;        // When fpcr.AH=1, sign of NaN has no consequence
return '0' : op<N-2:0>;```

```// FPAdd()
// =======

bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
rounding = FPRoundingMode(fpcr);
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);

(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
inf1  = (type1 == FPType_Infinity);  inf2  = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);      zero2 = (type2 == FPType_Zero);
if inf1 && inf2 && sign1 == NOT(sign2) then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
elsif (inf1 && sign1 == '0') || (inf2 && sign2 == '0') then
result = FPInfinity('0');
elsif (inf1 && sign1 == '1') || (inf2 && sign2 == '1') then
result = FPInfinity('1');
elsif zero1 && zero2 && sign1 == sign2 then
result = FPZero(sign1);
else
result_value = value1 + value2;
if result_value == 0.0 then  // Sign of exact zero result depends on rounding mode
result_sign = if rounding == FPRounding_NEGINF then '1' else '0';
result = FPZero(result_sign);
else
result = FPRound(result_value, fpcr, rounding);

FPProcessDenorms(type1, type2, N, fpcr);
return result;```

### Library pseudocode for shared/functions/float/fpcompare/FPCompare

```// FPCompare()
// ===========

bits(4) FPCompare(bits(N) op1, bits(N) op2, boolean signal_nans, FPCRType fpcr)
assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);

if type1 IN {FPType_SNaN, FPType_QNaN} || type2 IN {FPType_SNaN, FPType_QNaN} then
result = '0011';
if type1 == FPType_SNaN || type2 == FPType_SNaN || signal_nans then
FPProcessException(FPExc_InvalidOp, fpcr);
else
// All non-NaN cases can be evaluated on the values produced by FPUnpack()
if value1 == value2 then
result = '0110';
elsif value1 < value2 then
result = '1000';
else  // value1 > value2
result = '0010';

FPProcessDenorms(type1, type2, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpcompareeq/FPCompareEQ

```// FPCompareEQ()
// =============

boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);

if type1 IN {FPType_SNaN, FPType_QNaN} || type2 IN {FPType_SNaN, FPType_QNaN} then
result = FALSE;
if type1 == FPType_SNaN || type2 == FPType_SNaN then
FPProcessException(FPExc_InvalidOp, fpcr);
else
// All non-NaN cases can be evaluated on the values produced by FPUnpack()
result = (value1 == value2);

FPProcessDenorms(type1, type2, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpcomparege/FPCompareGE

```// FPCompareGE()
// =============

boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);

if type1 IN {FPType_SNaN, FPType_QNaN} || type2 IN {FPType_SNaN, FPType_QNaN} then
result = FALSE;
FPProcessException(FPExc_InvalidOp, fpcr);
else
// All non-NaN cases can be evaluated on the values produced by FPUnpack()
result = (value1 >= value2);
FPProcessDenorms(type1, type2, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpcomparegt/FPCompareGT

```// FPCompareGT()
// =============

boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);

if type1 IN {FPType_SNaN, FPType_QNaN} || type2 IN {FPType_SNaN, FPType_QNaN} then
result = FALSE;
FPProcessException(FPExc_InvalidOp, fpcr);
else
// All non-NaN cases can be evaluated on the values produced by FPUnpack()
result = (value1 > value2);

FPProcessDenorms(type1, type2, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpconvert/FPConvert

```// FPConvert()
// ===========

// Convert floating point OP with N-bit precision to M-bit precision,
// with rounding controlled by ROUNDING.
// This is used by the FP-to-FP conversion instructions and so for
// half-precision data ignores FZ16, but observes AHP.

bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)

assert M IN {16,32,64};
assert N IN {16,32,64};
bits(M) result;

// Unpack floating-point operand optionally with flush-to-zero.
(fptype,sign,value) = FPUnpackCV(op, fpcr);

alt_hp = (M == 16) && (fpcr.AHP == '1');

if fptype == FPType_SNaN || fptype == FPType_QNaN then
if alt_hp then
result = FPZero(sign);
elsif fpcr.DN == '1' then
result = FPDefaultNaN();
else
result = FPConvertNaN(op);
if fptype == FPType_SNaN || alt_hp then
FPProcessException(FPExc_InvalidOp,fpcr);
elsif fptype == FPType_Infinity then
if alt_hp then
result = sign:Ones(M-1);
FPProcessException(FPExc_InvalidOp, fpcr);
else
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
result = FPZero(sign);
else
result = FPRoundCV(value, fpcr, rounding);

FPProcessDenorm(fptype, N, fpcr);

return result;

// FPConvert()
// ===========

bits(M) FPConvert(bits(N) op, FPCRType fpcr)
return FPConvert(op, fpcr, FPRoundingMode(fpcr));```

### Library pseudocode for shared/functions/float/fpconvertnan/FPConvertNaN

```// FPConvertNaN()
// ==============
// Converts a NaN of one floating-point type to another

bits(M) FPConvertNaN(bits(N) op)
assert N IN {16,32,64};
assert M IN {16,32,64};
bits(M) result;
bits(51) frac;

sign = op<N-1>;

// Unpack payload from input NaN
case N of
when 64 frac = op<50:0>;
when 32 frac = op<21:0>:Zeros(29);
when 16 frac = op<8:0>:Zeros(42);

// Repack payload into output NaN, while
// converting an SNaN to a QNaN.
case M of
when 64 result = sign:Ones(M-52):frac;
when 32 result = sign:Ones(M-23):frac<50:29>;
when 16 result = sign:Ones(M-10):frac<50:42>;

return result;```

### Library pseudocode for shared/functions/float/fpcrtype/FPCRType

`type FPCRType;`

### Library pseudocode for shared/functions/float/fpdecoderm/FPDecodeRM

```// FPDecodeRM()
// ============

// Decode most common AArch32 floating-point rounding encoding.

FPRounding FPDecodeRM(bits(2) rm)
case rm of
when '00' result = FPRounding_TIEAWAY; // A
when '01' result = FPRounding_TIEEVEN; // N
when '10' result = FPRounding_POSINF;  // P
when '11' result = FPRounding_NEGINF;  // M

return result;```

### Library pseudocode for shared/functions/float/fpdecoderounding/FPDecodeRounding

```// FPDecodeRounding()
// ==================

// Decode floating-point rounding mode and common AArch64 encoding.

FPRounding FPDecodeRounding(bits(2) rmode)
case rmode of
when '00' return FPRounding_TIEEVEN; // N
when '01' return FPRounding_POSINF;  // P
when '10' return FPRounding_NEGINF;  // M
when '11' return FPRounding_ZERO;    // Z```

### Library pseudocode for shared/functions/float/fpdefaultnan/FPDefaultNaN

```// FPDefaultNaN()
// ==============

bits(N) FPDefaultNaN()
assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
bit sign;
if !UsingAArch32() && HaveAltFP() then
FPCRType fpcr = FPCR[];
sign = if fpcr.AH == '1' then '1' else '0';
else
sign = '0';

bits(E) exp  = Ones(E);
bits(F) frac = '1':Zeros(F-1);
return sign : exp : frac;```

### Library pseudocode for shared/functions/float/fpdiv/FPDiv

```// FPDiv()
// =======

bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);

if !done then
inf1  = type1 == FPType_Infinity;
inf2  = type2 == FPType_Infinity;
zero1 = type1 == FPType_Zero;
zero2 = type2 == FPType_Zero;

if (inf1 && inf2) || (zero1 && zero2) then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
elsif inf1 || zero2 then
result = FPInfinity(sign1 EOR sign2);
if !inf1 then FPProcessException(FPExc_DivideByZero, fpcr);
elsif zero1 || inf2 then
result = FPZero(sign1 EOR sign2);
else
result = FPRound(value1/value2, fpcr);

if !zero2 then
FPProcessDenorms(type1, type2, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpexc/FPExc

```enumeration FPExc       {FPExc_InvalidOp, FPExc_DivideByZero, FPExc_Overflow,
FPExc_Underflow, FPExc_Inexact, FPExc_InputDenorm};```

### Library pseudocode for shared/functions/float/fpinfinity/FPInfinity

```// FPInfinity()
// ============

bits(N) FPInfinity(bit sign)
assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
bits(E) exp  = Ones(E);
bits(F) frac = Zeros(F);
return sign : exp : frac;```

```// FPMatMulAdd()
// =============
//
// Floating point matrix multiply and add to same precision matrix
// result[2, 2] = addend[2, 2] + (op1[2, 2] * op2[2, 2])

assert N == esize * 2 * 2;
bits(N)  result;
bits(esize) prod0, prod1, sum;

for i = 0 to 1
for j = 0 to 1
sum   = Elem[addend, 2*i + j, esize];
prod0 = FPMul(Elem[op1, 2*i + 0, esize],
Elem[op2, 2*j + 0, esize], fpcr);
prod1 = FPMul(Elem[op1, 2*i + 1, esize],
Elem[op2, 2*j + 1, esize], fpcr);
Elem[result, 2*i + j, esize] = sum;

return result;```

### Library pseudocode for shared/functions/float/fpmax/FPMax

```// FPMax()
// =======

bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
return FPMax(op1, op2, fpcr, altfp);

// FPMax()
// =======
// Compare two inputs and return the larger value after rounding. The
// 'fpcr' argument supplies the FPCR control bits and 'altfp' determines
// if the function should use alternative floating-point behaviour.

bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr, boolean altfp)
assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);

if (altfp && type1 == FPType_Zero && type2 == FPType_Zero &&
((sign1 == '0' && sign2 == '1') || (sign1 == '1' && sign2 == '0'))) then
return FPZero(sign2);

(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr, altfp, TRUE);

if !done then
if value1 > value2 then
(fptype,sign,value) = (type1,sign1,value1);
else
(fptype,sign,value) = (type2,sign2,value2);
if fptype == FPType_Infinity then
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
sign = sign1 AND sign2;         // Use most positive sign
result = FPZero(sign);
else
// The use of FPRound() covers the case where there is a trapped underflow exception
// for a denormalized number even though the result is exact.
rounding = FPRoundingMode(fpcr);
if altfp then    // Denormal output is not flushed to zero
fpcr.FZ = '0';
fpcr.FZ16 = '0';

result = FPRound(value, fpcr, rounding, TRUE);

FPProcessDenorms(type1, type2, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpmaxnormal/FPMaxNormal

```// FPMaxNormal()
// =============

bits(N) FPMaxNormal(bit sign)
assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = Ones(E-1):'0';
frac = Ones(F);
return sign : exp : frac;```

### Library pseudocode for shared/functions/float/fpmaxnum/FPMaxNum

```// FPMaxNum()
// ==========

bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
(type1,-,-) = FPUnpack(op1, fpcr);
(type2,-,-) = FPUnpack(op2, fpcr);

boolean type1_nan = type1 IN {FPType_QNaN, FPType_SNaN};
boolean type2_nan = type2 IN {FPType_QNaN, FPType_SNaN};
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';

if !(altfp && type1_nan && type2_nan) then
// Treat a single quiet-NaN as -Infinity.
if type1 == FPType_QNaN && type2 != FPType_QNaN then
op1 = FPInfinity('1');
elsif type1 != FPType_QNaN && type2 == FPType_QNaN then
op2 = FPInfinity('1');

altfmaxfmin = FALSE;    // Restrict use of FMAX/FMIN NaN propagation rules
result = FPMax(op1, op2, fpcr, altfmaxfmin);

return result;```

### Library pseudocode for shared/functions/float/fpmerge/IsMerging

```// IsMerging()
// ===========
// Returns TRUE if the output elements other than the lowest are taken from
// the destination register.

boolean IsMerging(FPCRType fpcr)
boolean merge = HaveAltFP() && !UsingAArch32() && fpcr.NEP == '1';
return merge;```

### Library pseudocode for shared/functions/float/fpmin/FPMin

```// FPMin()
// =======

bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
return FPMin(op1, op2, fpcr, altfp);

// FPMin()
// =======
// Compare two operands and return the smaller operand after rounding. The
// 'fpcr' argument supplies the FPCR control bits and 'altfp' determines
// if the function should use alternative behaviour.

bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr, boolean altfp)
assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);

if (altfp && type1 == FPType_Zero && type2 == FPType_Zero &&
((sign1 == '0' && sign2 == '1') || (sign1 == '1' && sign2 == '0'))) then
return FPZero(sign2);

(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr, altfp, TRUE);

if !done then
if value1 < value2 then
(fptype,sign,value) = (type1,sign1,value1);
else
(fptype,sign,value) = (type2,sign2,value2);
if fptype == FPType_Infinity then
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
sign = sign1 OR sign2;              // Use most negative sign
result = FPZero(sign);
else
// The use of FPRound() covers the case where there is a trapped underflow exception
// for a denormalized number even though the result is exact.
rounding = FPRoundingMode(fpcr);
if altfp then    // Denormal output is not flushed to zero
fpcr.FZ = '0';
fpcr.FZ16 = '0';

result = FPRound(value, fpcr, rounding, TRUE);

FPProcessDenorms(type1, type2, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpminnum/FPMinNum

```// FPMinNum()
// ==========

bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
(type1,-,-) = FPUnpack(op1, fpcr);
(type2,-,-) = FPUnpack(op2, fpcr);

boolean type1_nan = type1 IN {FPType_QNaN, FPType_SNaN};
boolean type2_nan = type2 IN {FPType_QNaN, FPType_SNaN};
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';

if !(altfp && type1_nan && type2_nan) then
// Treat a single quiet-NaN as +Infinity.
if type1 == FPType_QNaN && type2 != FPType_QNaN then
op1 = FPInfinity('0');
elsif type1 != FPType_QNaN && type2 == FPType_QNaN then
op2 = FPInfinity('0');

altfmaxfmin = FALSE;    // Restrict use of FMAX/FMIN NaN propagation rules
result = FPMin(op1, op2, fpcr, altfmaxfmin);

return result;```

### Library pseudocode for shared/functions/float/fpmul/FPMul

```// FPMul()
// =======

bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);

if (inf1 && zero2) || (zero1 && inf2) then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
elsif inf1 || inf2 then
result = FPInfinity(sign1 EOR sign2);
elsif zero1 || zero2 then
result = FPZero(sign1 EOR sign2);
else
result = FPRound(value1*value2, fpcr);

FPProcessDenorms(type1, type2, N, fpcr);

return result;```

```// FPMulAdd()
// ==========

boolean fpexc = TRUE;       // Generate floating-point exceptions

// ==========
//
// Calculates addend + op1*op2 with a single rounding. The 'fpcr' argument
// supplies the FPCR control bits, and 'fpexc' controls the generation of
// floating-point exceptions.

FPCRType fpcr, boolean fpexc)
assert N IN {16,32,64};

(type1,sign1,value1) = FPUnpack(op1, fpcr, fpexc);
(type2,sign2,value2) = FPUnpack(op2, fpcr, fpexc);
rounding = FPRoundingMode(fpcr);
inf1 = (type1 == FPType_Infinity); zero1 = (type1 == FPType_Zero);
inf2 = (type2 == FPType_Infinity); zero2 = (type2 == FPType_Zero);

(done,result) = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, fpcr, fpexc);

if !(HaveAltFP() && !UsingAArch32() && fpcr.AH == '1') then
if typeA == FPType_QNaN && ((inf1 && zero2) || (zero1 && inf2)) then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);

if !done then
infA = (typeA == FPType_Infinity); zeroA = (typeA == FPType_Zero);

// Determine sign and type product will have if it does not cause an
// Invalid Operation.
signP = sign1 EOR sign2;
infP  = inf1 || inf2;
zeroP = zero1 || zero2;

// Non SNaN-generated Invalid Operation cases are multiplies of zero
// by infinity and additions of opposite-signed infinities.
invalidop = (inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP);

if invalidop then
result = FPDefaultNaN();
if fpexc then FPProcessException(FPExc_InvalidOp, fpcr);
// Other cases involving infinities produce an infinity of the same sign.
elsif (infA && signA == '0') || (infP && signP == '0') then
result = FPInfinity('0');
elsif (infA && signA == '1') || (infP && signP == '1') then
result = FPInfinity('1');

// Cases where the result is exactly zero and its sign is not determined by the
// rounding mode are additions of same-signed zeros.
elsif zeroA && zeroP && signA == signP then
result = FPZero(signA);

// Otherwise calculate numerical result and round it.
else
result_value = valueA + (value1 * value2);
if result_value == 0.0 then  // Sign of exact zero result depends on rounding mode
result_sign = if rounding == FPRounding_NEGINF then '1' else '0';
result = FPZero(result_sign);
else
result = FPRound(result_value, fpcr, rounding, fpexc);

if !invalidop && fpexc then
FPProcessDenorms3(typeA, type1, type2, N, fpcr);

return result;```

```// FPMulAddH()
// ===========

bits(N) FPMulAddH(bits(N) addend, bits(N DIV 2) op1, bits(N DIV 2) op2, FPCRType fpcr)
assert N == 32;
rounding = FPRoundingMode(fpcr);
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
inf1 = (type1 == FPType_Infinity); zero1 = (type1 == FPType_Zero);
inf2 = (type2 == FPType_Infinity); zero2 = (type2 == FPType_Zero);
(done,result) = FPProcessNaNs3H(typeA, type1, type2, addend, op1, op2, fpcr);

if !(HaveAltFP() && !UsingAArch32() && fpcr.AH == '1') then
if typeA == FPType_QNaN && ((inf1 && zero2) || (zero1 && inf2)) then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);

if !done then
infA = (typeA == FPType_Infinity); zeroA = (typeA == FPType_Zero);

// Determine sign and type product will have if it does not cause an
// Invalid Operation.
signP = sign1 EOR sign2;
infP = inf1 || inf2;
zeroP = zero1 || zero2;

// Non SNaN-generated Invalid Operation cases are multiplies of zero by infinity and
invalidop = (inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP);

if invalidop then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);

// Other cases involving infinities produce an infinity of the same sign.
elsif (infA && signA == '0') || (infP && signP == '0') then
result = FPInfinity('0');
elsif (infA && signA == '1') || (infP && signP == '1') then
result = FPInfinity('1');

// Cases where the result is exactly zero and its sign is not determined by the
// rounding mode are additions of same-signed zeros.
elsif zeroA && zeroP && signA == signP then
result = FPZero(signA);

// Otherwise calculate numerical result and round it.
else
result_value = valueA + (value1 * value2);
if result_value == 0.0 then // Sign of exact zero result depends on rounding mode
result_sign = if rounding == FPRounding_NEGINF then '1' else '0';
result = FPZero(result_sign);
else
result = FPRound(result_value, fpcr);

if !invalidop then
FPProcessDenorm(typeA, N, fpcr);

return result;```

```// FPProcessNaNs3H()
// =================

(boolean, bits(N)) FPProcessNaNs3H(FPType type1, FPType type2, FPType type3,
bits(N) op1, bits(N DIV 2) op2, bits(N DIV 2) op3,
FPCRType fpcr)
assert N IN {32,64};

bits(N) result;
// When TRUE, use alternative NaN propagation rules.
boolean altfp   = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
boolean op1_nan = type1 IN {FPType_SNaN, FPType_QNaN};
boolean op2_nan = type2 IN {FPType_SNaN, FPType_QNaN};
boolean op3_nan = type3 IN {FPType_SNaN, FPType_QNaN};
boolean fpexc   = TRUE;
if altfp then
if (type1 == FPType_SNaN || type2 == FPType_SNaN || type3 == FPType_SNaN) then
type_nan = FPType_SNaN;
else
type_nan = FPType_QNaN;

if altfp && op1_nan && op2_nan && op3_nan then
done = TRUE;  result = FPConvertNaN(FPProcessNaN(type_nan, op2, fpcr, fpexc));   // <n> register NaN selected
elsif altfp && op2_nan && (op1_nan || op3_nan) then
done = TRUE;  result = FPConvertNaN(FPProcessNaN(type_nan, op2, fpcr, fpexc));   // <n> register NaN selected
elsif altfp && op3_nan && op1_nan then
done = TRUE;  result = FPConvertNaN(FPProcessNaN(type_nan, op3, fpcr, fpexc));   // <m> register NaN selected
elsif type1 == FPType_SNaN then
done = TRUE; result = FPProcessNaN(type1, op1, fpcr, fpexc);
elsif type2 == FPType_SNaN then
done = TRUE; result = FPConvertNaN(FPProcessNaN(type2, op2, fpcr, fpexc));
elsif type3 == FPType_SNaN then
done = TRUE; result = FPConvertNaN(FPProcessNaN(type3, op3, fpcr, fpexc));
elsif type1 == FPType_QNaN then
done = TRUE; result = FPProcessNaN(type1, op1, fpcr, fpexc);
elsif type2 == FPType_QNaN then
done = TRUE; result = FPConvertNaN(FPProcessNaN(type2, op2, fpcr, fpexc));
elsif type3 == FPType_QNaN then
done = TRUE; result = FPConvertNaN(FPProcessNaN(type3, op3, fpcr, fpexc));
else
done = FALSE; result = Zeros(); // 'Don't care' result
return (done, result);```

### Library pseudocode for shared/functions/float/fpmulx/FPMulX

```// FPMulX()
// ========

bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
bits(N) result;
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);

(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);

if (inf1 && zero2) || (zero1 && inf2) then
result = FPTwo(sign1 EOR sign2);
elsif inf1 || inf2 then
result = FPInfinity(sign1 EOR sign2);
elsif zero1 || zero2 then
result = FPZero(sign1 EOR sign2);
else
result = FPRound(value1*value2, fpcr);

FPProcessDenorms(type1, type2, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpneg/FPNeg

```// FPNeg()
// =======

bits(N) FPNeg(bits(N) op)
assert N IN {16,32,64};
if !UsingAArch32() && HaveAltFP() then
FPCRType fpcr = FPCR[];
if fpcr.AH == '1' then
(fptype, -, -) = FPUnpack(op, fpcr, FALSE);
if fptype IN {FPType_SNaN, FPType_QNaN} then
return op;        // When fpcr.AH=1, sign of NaN has no consequence
return NOT(op<N-1>) : op<N-2:0>;```

### Library pseudocode for shared/functions/float/fponepointfive/FPOnePointFive

```// FPOnePointFive()
// ================

bits(N) FPOnePointFive(bit sign)
assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = '0':Ones(E-1);
frac = '1':Zeros(F-1);
result = sign : exp : frac;

return result;```

### Library pseudocode for shared/functions/float/fpprocessdenorms/FPProcessDenorm

```// FPProcessDenorm()
// =================
// Handles denormal input in case of single-precision or double-precision
// when using alternative floating-point mode.

FPProcessDenorm(FPType fptype, integer N, FPCRType fpcr)
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
if altfp && N != 16 && fptype == FPType_Denormal then
FPProcessException(FPExc_InputDenorm, fpcr);```

### Library pseudocode for shared/functions/float/fpprocessdenorms/FPProcessDenorms

```// FPProcessDenorms()
// ==================
// Handles denormal input in case of single-precision or double-precision
// when using alternative floating-point mode.

FPProcessDenorms(FPType type1, FPType type2, integer N, FPCRType fpcr)
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
if altfp && N != 16 && (type1 == FPType_Denormal || type2 == FPType_Denormal) then
FPProcessException(FPExc_InputDenorm, fpcr);```

### Library pseudocode for shared/functions/float/fpprocessdenorms/FPProcessDenorms3

```// FPProcessDenorms3()
// ===================
// Handles denormal input in case of single-precision or double-precision
// when using alternative floating-point mode.

FPProcessDenorms3(FPType type1, FPType type2, FPType type3, integer N, FPCRType fpcr)
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
if altfp && N != 16 && (type1 == FPType_Denormal || type2 == FPType_Denormal ||
type3 == FPType_Denormal) then
FPProcessException(FPExc_InputDenorm, fpcr);```

### Library pseudocode for shared/functions/float/fpprocessexception/FPProcessException

```// FPProcessException()
// ====================
//
// The 'fpcr' argument supplies FPCR control bits. Status information is
// updated directly in the FPSR where appropriate.

FPProcessException(FPExc exception, FPCRType fpcr)
// Determine the cumulative exception bit number
case exception of
when FPExc_InvalidOp     cumul = 0;
when FPExc_DivideByZero  cumul = 1;
when FPExc_Overflow      cumul = 2;
when FPExc_Underflow     cumul = 3;
when FPExc_Inexact       cumul = 4;
when FPExc_InputDenorm   cumul = 7;
enable = cumul + 8;
if fpcr<enable> == '1' then
// Trapping of the exception enabled.
// It is IMPLEMENTATION DEFINED whether the enable bit may be set at all, and
// if so then how exceptions may be accumulated before calling FPTrappedException()
IMPLEMENTATION_DEFINED "floating-point trap handling";
elsif UsingAArch32() then
// Set the cumulative exception bit
FPSCR<cumul> = '1';
else
// Set the cumulative exception bit
FPSR<cumul> = '1';
return;```

### Library pseudocode for shared/functions/float/fpprocessnan/FPProcessNaN

```// FPProcessNaN()
// ==============

bits(N) FPProcessNaN(FPType fptype, bits(N) op, FPCRType fpcr)
boolean fpexc = TRUE;   // Generate floating-point exceptions
return FPProcessNaN(fptype, op, fpcr, fpexc);

// FPProcessNaN()
// ==============
// Handle NaN input operands, returning the operand or default NaN value
// if fpcr.DN is selected. The 'fpcr' argument supplies the FPCR control bits.
// The 'fpexc' argument controls the generation of exceptions, regardless of
// whether 'fptype' is a signalling NaN or a quiet NaN.

bits(N) FPProcessNaN(FPType fptype, bits(N) op, FPCRType fpcr, boolean fpexc)
assert N IN {16,32,64};
assert fptype IN {FPType_QNaN, FPType_SNaN};

case N of
when 16 topfrac =  9;
when 32 topfrac = 22;
when 64 topfrac = 51;

result = op;
if fptype == FPType_SNaN then
result<topfrac> = '1';
if fpexc then FPProcessException(FPExc_InvalidOp, fpcr);
if fpcr.DN == '1' then  // DefaultNaN requested
result = FPDefaultNaN();
return result;```

### Library pseudocode for shared/functions/float/fpprocessnans/FPProcessNaNs

```// FPProcessNaNs()
// ===============

(boolean, bits(N)) FPProcessNaNs(FPType type1, FPType type2, bits(N) op1,
bits(N) op2, FPCRType fpcr)
boolean altfmaxfmin = FALSE;    // Do not use alfp mode for FMIN, FMAX and variants
boolean fpexc       = TRUE;     // Generate floating-point exceptions
return FPProcessNaNs(type1, type2, op1, op2, fpcr, altfmaxfmin, fpexc);

// FPProcessNaNs()
// ===============
//
// The boolean part of the return value says whether a NaN has been found and
// processed. The bits(N) part is only relevant if it has and supplies the
// result of the operation.
//
// The 'fpcr' argument supplies FPCR control bits and 'altfmaxfmin' controls
// alternative floating-point behaviour for FMAX, FMIN and variants. 'fpexc'
// controls the generation of floating-point exceptions. Status information
// is updated directly in the FPSR where appropriate.

(boolean, bits(N)) FPProcessNaNs(FPType type1, FPType type2, bits(N) op1, bits(N) op2,
FPCRType fpcr, boolean altfmaxfmin, boolean fpexc)
assert N IN {16,32,64};
boolean altfp    = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
boolean op1_nan  = type1 IN {FPType_SNaN, FPType_QNaN};
boolean op2_nan  = type2 IN {FPType_SNaN, FPType_QNaN};
boolean any_snan = type1 == FPType_SNaN || type2 == FPType_SNaN;
FPType  type_nan = if any_snan then FPType_SNaN else FPType_QNaN;

if altfmaxfmin && (op1_nan || op2_nan) then
FPProcessException(FPExc_InvalidOp, fpcr);
done = TRUE; sign2 = op2<N-1>;
result = if type2 == FPType_Zero then FPZero(sign2) else op2;
elsif altfp && op1_nan && op2_nan then
done = TRUE;  result = FPProcessNaN(type_nan, op1, fpcr, fpexc);   // <n> register NaN selected
elsif type1 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type1, op1, fpcr, fpexc);
elsif type2 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type2, op2, fpcr, fpexc);
elsif type1 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type1, op1, fpcr, fpexc);
elsif type2 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type2, op2, fpcr, fpexc);
else
done = FALSE;  result = Zeros();  // 'Don't care' result
return (done, result);```

### Library pseudocode for shared/functions/float/fpprocessnans3/FPProcessNaNs3

```// FPProcessNaNs3()
// ================

(boolean, bits(N)) FPProcessNaNs3(FPType type1, FPType type2, FPType type3,
bits(N) op1, bits(N) op2, bits(N) op3,
FPCRType fpcr)
boolean fpexc = TRUE;   // Generate floating-point exceptions
return FPProcessNaNs3(type1, type2, type3, op1, op2, op3, fpcr, fpexc);

// FPProcessNaNs3()
// ================
// The boolean part of the return value says whether a NaN has been found and
// processed. The bits(N) part is only relevant if it has and supplies the
// result of the operation.
//
// The 'fpcr' argument supplies FPCR control bits and 'fpexc' controls the
// generation of floating-point exceptions. Status information is updated
// directly in the FPSR where appropriate.

(boolean, bits(N)) FPProcessNaNs3(FPType type1, FPType type2, FPType type3,
bits(N) op1, bits(N) op2, bits(N) op3,
FPCRType fpcr, boolean fpexc)
assert N IN {16,32,64};
boolean op1_nan = type1 IN {FPType_SNaN, FPType_QNaN};
boolean op2_nan = type2 IN {FPType_SNaN, FPType_QNaN};
boolean op3_nan = type3 IN {FPType_SNaN, FPType_QNaN};

boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
if altfp then
if type1 == FPType_SNaN || type2 == FPType_SNaN || type3 == FPType_SNaN then
type_nan = FPType_SNaN;
else
type_nan = FPType_QNaN;

if altfp && op1_nan && op2_nan && op3_nan then
done = TRUE;  result = FPProcessNaN(type_nan, op2, fpcr, fpexc);   // <n> register NaN selected
elsif altfp && op2_nan && (op1_nan || op3_nan) then
done = TRUE;  result = FPProcessNaN(type_nan, op2, fpcr, fpexc);   // <n> register NaN selected
elsif altfp && op3_nan && op1_nan then
done = TRUE;  result = FPProcessNaN(type_nan, op3, fpcr, fpexc);   // <m> register NaN selected
elsif type1 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type1, op1, fpcr, fpexc);
elsif type2 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type2, op2, fpcr, fpexc);
elsif type3 == FPType_SNaN then
done = TRUE;  result = FPProcessNaN(type3, op3, fpcr, fpexc);
elsif type1 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type1, op1, fpcr, fpexc);
elsif type2 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type2, op2, fpcr, fpexc);
elsif type3 == FPType_QNaN then
done = TRUE;  result = FPProcessNaN(type3, op3, fpcr, fpexc);
else
done = FALSE;  result = Zeros();  // 'Don't care' result
return (done, result);```

### Library pseudocode for shared/functions/float/fprecipestimate/FPRecipEstimate

```// FPRecipEstimate()
// =================

bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)
assert N IN {16,32,64};

// When using alternative floating-point behaviour, do not generate
// floating-point exceptions, flush denormal input and output to zero,
// and use RNE rounding mode.
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
boolean fpexc = !altfp;
if altfp then fpcr.<FIZ,FZ> = '11';
if altfp then fpcr.RMode    = '00';

(fptype,sign,value) = FPUnpack(operand, fpcr, fpexc);

FPRounding rounding = FPRoundingMode(fpcr);
if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, operand, fpcr, fpexc);
elsif fptype == FPType_Infinity then
result = FPZero(sign);
elsif fptype == FPType_Zero then
result = FPInfinity(sign);
if fpexc then FPProcessException(FPExc_DivideByZero, fpcr);
elsif (
(N == 16 && Abs(value) < 2.0^-16) ||
(N == 32 && Abs(value) < 2.0^-128) ||
(N == 64 && Abs(value) < 2.0^-1024)
) then
case rounding of
when FPRounding_TIEEVEN
overflow_to_inf = TRUE;
when FPRounding_POSINF
overflow_to_inf = (sign == '0');
when FPRounding_NEGINF
overflow_to_inf = (sign == '1');
when FPRounding_ZERO
overflow_to_inf = FALSE;
result = if overflow_to_inf then FPInfinity(sign) else FPMaxNormal(sign);
if fpexc then
FPProcessException(FPExc_Overflow, fpcr);
FPProcessException(FPExc_Inexact, fpcr);
elsif ((fpcr.FZ == '1' && N != 16) || (fpcr.FZ16 == '1' && N == 16))
&& (
(N == 16 && Abs(value) >= 2.0^14) ||
(N == 32 && Abs(value) >= 2.0^126) ||
(N == 64 && Abs(value) >= 2.0^1022)
) then
// Result flushed to zero of correct sign
result = FPZero(sign);

// Flush-to-zero never generates a trapped exception.
if UsingAArch32() then
FPSCR.UFC = '1';
else
if fpexc then FPSR.UFC = '1';
else
// Scale to a fixed point value in the range 0.5 <= x < 1.0 in steps of 1/512, and
// calculate result exponent. Scaled value has copied sign bit,
// exponent = 1022 = double-precision biased version of -1,
// fraction = original fraction
case N of
when 16
fraction = operand<9:0> : Zeros(42);
exp = UInt(operand<14:10>);
when 32
fraction = operand<22:0> : Zeros(29);
exp = UInt(operand<30:23>);
when 64
fraction = operand<51:0>;
exp = UInt(operand<62:52>);

if exp == 0 then
if fraction<51> == '0' then
exp = -1;
fraction = fraction<49:0>:'00';
else
fraction = fraction<50:0>:'0';

integer scaled;
boolean increasedprecision = N==32 && HaveFeatRPRES() && altfp;

if !increasedprecision then
scaled = UInt('1':fraction<51:44>);
else
scaled = UInt('1':fraction<51:41>);

case N of
when 16 result_exp =   29 - exp; // In range 29-30 = -1 to 29+1 = 30
when 32 result_exp =  253 - exp; // In range 253-254 = -1 to 253+1 = 254
when 64 result_exp = 2045 - exp; // In range 2045-2046 = -1 to 2045+1 = 2046

// Scaled is in range 256 .. 511 or 2048 .. 4095 range representing a
// fixed-point number in range [0.5 .. 1.0].
estimate = RecipEstimate(scaled, increasedprecision);

// Estimate is in the range 256 .. 511 or 4096 .. 8191 representing a
// fixed-point result in the range [1.0 .. 2.0].
// Convert to scaled floating point result with copied sign bit,
// high-order bits from estimate, and exponent calculated above.
if !increasedprecision then
fraction = estimate<7:0> : Zeros(44);
else
fraction = estimate<11:0> : Zeros(40);

if result_exp == 0 then
fraction = '1' : fraction<51:1>;
elsif result_exp == -1 then
fraction = '01' : fraction<51:2>;
result_exp = 0;

case N of
when 16 result = sign : result_exp<N-12:0> : fraction<51:42>;
when 32 result = sign : result_exp<N-25:0> : fraction<51:29>;
when 64 result = sign : result_exp<N-54:0> : fraction<51:0>;

return result;```

### Library pseudocode for shared/functions/float/fprecipestimate/RecipEstimate

```// RecipEstimate()
// ===============
// Compute estimate of reciprocal of 9-bit fixed-point number.
//
// a is in range 256 .. 511 or 2048 .. 4096 representing a number in
// the range 0.5 <= x < 1.0.
// increasedprecision determines if the mantissa is 8-bit or 12-bit.
// result is in the range 256 .. 511 or 4096 .. 8191 representing a
// number in the range 1.0 to 511/256 or 1.00 to 8191/4096.

integer RecipEstimate(integer a, boolean increasedprecision)
integer r;
if !increasedprecision then
assert 256 <= a && a < 512;
a = a*2+1;                       // Round to nearest
integer b = (2 ^ 19) DIV a;
r = (b+1) DIV 2;                 // Round to nearest
assert 256 <= r && r < 512;
else
assert 2048 <= a && a < 4096;
a = a*2+1;                       // Round to nearest
real real_val = Real(2^25)/Real(a);
r = RoundDown(real_val);
real error = real_val - Real(r);
boolean round_up = error > 0.5;  // Error cannot be exactly 0.5 so do not need tie case
if round_up then r = r+1;
assert 4096 <= r && r < 8192;
return r;```

### Library pseudocode for shared/functions/float/fprecpx/FPRecpX

```// FPRecpX()
// =========

bits(N) FPRecpX(bits(N) op, FPCRType fpcr)
assert N IN {16,32,64};

case N of
when 16 esize =  5;
when 32 esize =  8;
when 64 esize = 11;

bits(N)           result;
bits(esize)       exp;
bits(esize)       max_exp;
bits(N-(esize+1)) frac = Zeros();

boolean altfp = HaveAltFP() && fpcr.AH == '1';
boolean fpexc = !altfp;                 // Generate no floating-point exceptions
if altfp then fpcr.<FIZ,FZ> = '11';     // Flush denormal input and output to zero
(fptype,sign,value) = FPUnpack(op, fpcr, fpexc);

case N of
when 16 exp = op<10+esize-1:10>;
when 32 exp = op<23+esize-1:23>;
when 64 exp = op<52+esize-1:52>;

max_exp = Ones(esize) - 1;

if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, op, fpcr, fpexc);
else
if IsZero(exp) then                 // Zero and denormals
result = sign:max_exp:frac;
else                                // Infinities and normals
result = sign:NOT(exp):frac;

return result;```

### Library pseudocode for shared/functions/float/fpround/FPRound

```// FPRound()
// =========
// Used by data processing and int/fixed <-> FP conversion instructions.
// For half-precision data it ignores AHP, and observes FZ16.

bits(N) FPRound(real op, FPCRType fpcr, FPRounding rounding)
fpcr.AHP = '0';
boolean fpexc = TRUE;    // Generate floating-point exceptions
boolean isbfloat16  = FALSE;
return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc);

// FPRound()
// =========
// Used by data processing and int/fixed <-> FP conversion instructions.
// For half-precision data it ignores AHP, and observes FZ16.
//
// The 'fpcr' argument supplies FPCR control bits and 'fpexc' controls the
// generation of floating-point exceptions. Status information is updated
// directly in the FPSR where appropriate.

bits(N) FPRound(real op, FPCRType fpcr, FPRounding rounding, boolean fpexc)
fpcr.AHP = '0';
boolean isbfloat16 = FALSE;
return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc);

// FPRound()
// =========

bits(N) FPRound(real op, FPCRType fpcr)
return FPRound(op, fpcr, FPRoundingMode(fpcr));```

### Library pseudocode for shared/functions/float/fpround/FPRoundBase

```// FPRoundBase()
// =============

bits(N) FPRoundBase(real op, FPCRType fpcr, FPRounding rounding, boolean isbfloat16)
boolean fpexc = TRUE;    // Generate floating-point exceptions
return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc);

// FPRoundBase()
// =============
// Convert a real number OP into an N-bit floating-point value using the
// supplied rounding mode RMODE.
//
// The 'fpcr' argument supplies FPCR control bits and 'fpexc' controls the
// generation of floating-point exceptions. Status information is updated
// directly in the FPSR where appropriate.

bits(N) FPRoundBase(real op, FPCRType fpcr, FPRounding rounding,
boolean isbfloat16, boolean fpexc)
assert N IN {16,32,64};
assert op != 0.0;
assert rounding != FPRounding_TIEAWAY;
bits(N) result;

// Obtain format parameters - minimum exponent, numbers of exponent and fraction bits.
if N == 16 then
minimum_exp = -14;  E = 5;  F = 10;
elsif N == 32 && isbfloat16 then
minimum_exp = -126;  E = 8;  F = 7;
elsif N == 32 then
minimum_exp = -126;  E = 8;  F = 23;
else  // N == 64
minimum_exp = -1022;  E = 11;  F = 52;

// Split value into sign, unrounded mantissa and exponent.
if op < 0.0 then
sign = '1';  mantissa = -op;
else
sign = '0';  mantissa = op;
exponent = 0;
while mantissa < 1.0 do
mantissa = mantissa * 2.0;  exponent = exponent - 1;
while mantissa >= 2.0 do
mantissa = mantissa / 2.0;  exponent = exponent + 1;

// When TRUE, detection of underflow occurs after rounding and the test for a
// denormalized number for single and double precision values occurs after rounding.
altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';

// Deal with flush-to-zero before rounding if FPCR.AH != '1'.
if (!altfp && ((fpcr.FZ == '1' && N != 16) || (fpcr.FZ16 == '1' && N == 16)) &&
exponent < minimum_exp) then
// Flush-to-zero never generates a trapped exception.
if UsingAArch32() then
FPSCR.UFC = '1';
else
FPSR.UFC = '1';
return FPZero(sign);

biased_exp_unconstrained = exponent - minimum_exp + 1;
int_mant_unconstrained = RoundDown(mantissa * 2.0^F);
error_unconstrained = mantissa * 2.0^F - Real(int_mant_unconstrained);

// Start creating the exponent value for the result. Start by biasing the actual exponent
// so that the minimum exponent becomes 1, lower values 0 (indicating possible underflow).
biased_exp = Max(exponent - minimum_exp + 1, 0);
if biased_exp == 0 then mantissa = mantissa / 2.0^(minimum_exp - exponent);

// Get the unrounded mantissa as an integer, and the "units in last place" rounding error.
int_mant = RoundDown(mantissa * 2.0^F);  // < 2.0^F if biased_exp == 0, >= 2.0^F if not
error = mantissa * 2.0^F - Real(int_mant);

// Underflow occurs if exponent is too small before rounding, and result is inexact or
// the Underflow exception is trapped. This applies before rounding if FPCR.AH != '1'.
if !altfp && biased_exp == 0 && (error != 0.0 || fpcr.UFE == '1') then
if fpexc then FPProcessException(FPExc_Underflow, fpcr);

// Round result according to rounding mode.
if altfp then
case rounding of
when FPRounding_TIEEVEN
round_up_unconstrained = (error_unconstrained > 0.5 ||
(error_unconstrained == 0.5 && int_mant_unconstrained<0> == '1'));
round_up = (error > 0.5 || (error == 0.5 && int_mant<0> == '1'));
overflow_to_inf = TRUE;
when FPRounding_POSINF
round_up_unconstrained = (error_unconstrained != 0.0 && sign == '0');
round_up = (error != 0.0 && sign == '0');
overflow_to_inf = (sign == '0');
when FPRounding_NEGINF
round_up_unconstrained = (error_unconstrained != 0.0 && sign == '1');
round_up = (error != 0.0 && sign == '1');
overflow_to_inf = (sign == '1');
when FPRounding_ZERO, FPRounding_ODD
round_up_unconstrained = FALSE;
round_up = FALSE;
overflow_to_inf = FALSE;

if round_up_unconstrained then
int_mant_unconstrained = int_mant_unconstrained + 1;
if int_mant_unconstrained == 2^(F+1) then    // Rounded up to next exponent
biased_exp_unconstrained = biased_exp_unconstrained + 1;
int_mant_unconstrained   = int_mant_unconstrained DIV 2;

// Deal with flush-to-zero and underflow after rounding if FPCR.AH == '1'.
if biased_exp_unconstrained < 1 && int_mant_unconstrained != 0 then
// the result of unconstrained rounding is less than the minimum normalized number
if (fpcr.FZ == '1' && N != 16) || (fpcr.FZ16 == '1' && N == 16) then   // Flush-to-zero
if fpexc then
FPSR.UFC = '1';
FPProcessException(FPExc_Inexact, fpcr);
return FPZero(sign);
elsif error != 0.0 || fpcr.UFE == '1' then
if fpexc then FPProcessException(FPExc_Underflow, fpcr);
else    // altfp == FALSE
case rounding of
when FPRounding_TIEEVEN
round_up = (error > 0.5 || (error == 0.5 && int_mant<0> == '1'));
overflow_to_inf = TRUE;
when FPRounding_POSINF
round_up = (error != 0.0 && sign == '0');
overflow_to_inf = (sign == '0');
when FPRounding_NEGINF
round_up = (error != 0.0 && sign == '1');
overflow_to_inf = (sign == '1');
when FPRounding_ZERO, FPRounding_ODD
round_up = FALSE;
overflow_to_inf = FALSE;

if round_up then
int_mant = int_mant + 1;
if int_mant == 2^F then      // Rounded up from denormalized to normalized
biased_exp = 1;
if int_mant == 2^(F+1) then  // Rounded up to next exponent
biased_exp = biased_exp + 1;
int_mant = int_mant DIV 2;

// Handle rounding to odd
if error != 0.0 && rounding == FPRounding_ODD then
int_mant<0> = '1';

// Deal with overflow and generate result.
if N != 16 || fpcr.AHP == '0' then  // Single, double or IEEE half precision
if biased_exp >= 2^E - 1 then
result = if overflow_to_inf then FPInfinity(sign) else FPMaxNormal(sign);
if fpexc then FPProcessException(FPExc_Overflow, fpcr);
error = 1.0;  // Ensure that an Inexact exception occurs
else
result = sign : biased_exp<E-1:0> : int_mant<F-1:0> : Zeros(N-(E+F+1));
else                                     // Alternative half precision
if biased_exp >= 2^E then
result = sign : Ones(N-1);
if fpexc then FPProcessException(FPExc_InvalidOp, fpcr);
error = 0.0;  // Ensure that an Inexact exception does not occur
else
result = sign : biased_exp<E-1:0> : int_mant<F-1:0> : Zeros(N-(E+F+1));

// Deal with Inexact exception.
if error != 0.0 then
if fpexc then FPProcessException(FPExc_Inexact, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpround/FPRoundCV

```// FPRoundCV()
// ===========
// Used for FP <-> FP conversion instructions.
// For half-precision data ignores FZ16 and observes AHP.

bits(N) FPRoundCV(real op, FPCRType fpcr, FPRounding rounding)
fpcr.FZ16 = '0';
boolean fpexc = TRUE;    // Generate floating-point exceptions
boolean isbfloat16 = FALSE;
return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc);```

### Library pseudocode for shared/functions/float/fprounding/FPRounding

```enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
FPRounding_NEGINF,  FPRounding_ZERO,
FPRounding_TIEAWAY, FPRounding_ODD};```

### Library pseudocode for shared/functions/float/fproundingmode/FPRoundingMode

```// FPRoundingMode()
// ================

// Return the current floating-point rounding mode.

FPRounding FPRoundingMode(FPCRType fpcr)
return FPDecodeRounding(fpcr.RMode);```

### Library pseudocode for shared/functions/float/fproundint/FPRoundInt

```// FPRoundInt()
// ============

// Round op to nearest integral floating point value using rounding mode in FPCR/FPSCR.
// If EXACT is TRUE, set FPSR.IXC if result is not numerically equal to op.

bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)
assert rounding != FPRounding_ODD;
assert N IN {16,32,64};

// When alternative floating-point support is TRUE, do not generate
// Input Denormal floating-point exceptions.
altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
fpexc = !altfp;

// Unpack using FPCR to determine if subnormals are flushed-to-zero.
(fptype,sign,value) = FPUnpack(op, fpcr, fpexc);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, op, fpcr);
elsif fptype == FPType_Infinity then
result = FPInfinity(sign);
elsif fptype == FPType_Zero then
result = FPZero(sign);
else
// Extract integer component.
int_result = RoundDown(value);
error = value - Real(int_result);

// Determine whether supplied rounding mode requires an increment.
case rounding of
when FPRounding_TIEEVEN
round_up = (error > 0.5 || (error == 0.5 && int_result<0> == '1'));
when FPRounding_POSINF
round_up = (error != 0.0);
when FPRounding_NEGINF
round_up = FALSE;
when FPRounding_ZERO
round_up = (error != 0.0 && int_result < 0);
when FPRounding_TIEAWAY
round_up = (error > 0.5 || (error == 0.5 && int_result >= 0));

if round_up then int_result = int_result + 1;

// Convert integer value into an equivalent real value.
real_result = Real(int_result);

// Re-encode as a floating-point value, result is always exact.
if real_result == 0.0 then
result = FPZero(sign);
else
result = FPRound(real_result, fpcr, FPRounding_ZERO);

// Generate inexact exceptions.
if error != 0.0 && exact then
FPProcessException(FPExc_Inexact, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fproundintn/FPRoundIntN

```// FPRoundIntN()
// =============

bits(N) FPRoundIntN(bits(N) op, FPCRType fpcr, FPRounding rounding, integer intsize)
assert rounding != FPRounding_ODD;
assert N IN {32,64};
assert intsize IN {32, 64};
integer exp;
constant integer E = (if N == 32 then 8 else 11);
constant integer F = N - (E + 1);

// When alternative floating-point support is TRUE, do not generate
// Input Denormal floating-point exceptions.
altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
fpexc = !altfp;

// Unpack using FPCR to determine if subnormals are flushed-to-zero.
(fptype,sign,value) = FPUnpack(op, fpcr, fpexc);

if fptype IN {FPType_SNaN, FPType_QNaN, FPType_Infinity} then
if N == 32 then
exp = 126 + intsize;
result = '1':exp<(E-1):0>:Zeros(F);
else
exp = 1022+intsize;
result = '1':exp<(E-1):0>:Zeros(F);
FPProcessException(FPExc_InvalidOp, fpcr);
elsif fptype == FPType_Zero then
result = FPZero(sign);
else
// Extract integer component.
int_result = RoundDown(value);
error = value - Real(int_result);

// Determine whether supplied rounding mode requires an increment.
case rounding of
when FPRounding_TIEEVEN
round_up = error > 0.5 || (error == 0.5 && int_result<0> == '1');
when FPRounding_POSINF
round_up = error != 0.0;
when FPRounding_NEGINF
round_up = FALSE;
when FPRounding_ZERO
round_up = error != 0.0 && int_result < 0;
when FPRounding_TIEAWAY
round_up = error > 0.5 || (error == 0.5 && int_result >= 0);

if round_up then int_result = int_result + 1;
overflow = int_result > 2^(intsize-1)-1 || int_result < -1*2^(intsize-1);

if overflow then
if N == 32 then
exp = 126 + intsize;
result = '1':exp<(E-1):0>:Zeros(F);
else
exp = 1022 + intsize;
result = '1':exp<(E-1):0>:Zeros(F);
FPProcessException(FPExc_InvalidOp, fpcr);
// This case shouldn't set Inexact.
error = 0.0;

else
// Convert integer value into an equivalent real value.
real_result = Real(int_result);

// Re-encode as a floating-point value, result is always exact.
if real_result == 0.0 then
result = FPZero(sign);
else
result = FPRound(real_result, fpcr, FPRounding_ZERO);

// Generate inexact exceptions.
if error != 0.0 then
FPProcessException(FPExc_Inexact, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fprsqrtestimate/FPRSqrtEstimate

```// FPRSqrtEstimate()
// =================

bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)
assert N IN {16,32,64};

// When using alternative floating-point behaviour, do not generate
// floating-point exceptions and flush denormal input to zero.
boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
boolean fpexc = !altfp;
if altfp then fpcr.<FIZ,FZ> = '11';

(fptype,sign,value) = FPUnpack(operand, fpcr, fpexc);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, operand, fpcr, fpexc);
elsif fptype == FPType_Zero then
result = FPInfinity(sign);
if fpexc then FPProcessException(FPExc_DivideByZero, fpcr);
elsif sign == '1' then
result = FPDefaultNaN();
if fpexc then FPProcessException(FPExc_InvalidOp, fpcr);
elsif fptype == FPType_Infinity then
result = FPZero('0');
else
// Scale to a fixed-point value in the range 0.25 <= x < 1.0 in steps of 512, with the
// evenness or oddness of the exponent unchanged, and calculate result exponent.
// Scaled value has copied sign bit, exponent = 1022 or 1021 = double-precision
// biased version of -1 or -2, fraction = original fraction extended with zeros.

case N of
when 16
fraction = operand<9:0> : Zeros(42);
exp = UInt(operand<14:10>);
when 32
fraction = operand<22:0> : Zeros(29);
exp = UInt(operand<30:23>);
when 64
fraction = operand<51:0>;
exp = UInt(operand<62:52>);

if exp == 0 then
while fraction<51> == '0' do
fraction = fraction<50:0> : '0';
exp = exp - 1;
fraction = fraction<50:0> : '0';

integer scaled;
boolean increasedprecision = N==32 && HaveFeatRPRES() && altfp;

if !increasedprecision then
if exp<0> == '0' then
scaled = UInt('1':fraction<51:44>);
else
scaled = UInt('01':fraction<51:45>);
else
if exp<0> == '0' then
scaled = UInt('1':fraction<51:41>);
else
scaled = UInt('01':fraction<51:42>);

case N of
when 16 result_exp = (  44 - exp) DIV 2;
when 32 result_exp = ( 380 - exp) DIV 2;
when 64 result_exp = (3068 - exp) DIV 2;

estimate = RecipSqrtEstimate(scaled, increasedprecision);

// Estimate is in the range 256 .. 511 or 4096 .. 8191 representing a
// fixed-point result in the range [1.0 .. 2.0].
// Convert to scaled floating point result with copied sign bit and high-order
// fraction bits, and exponent calculated above.
case N of
when 16 result = '0' : result_exp<N-12:0> : estimate<7:0>:Zeros(2);
when 32
if !increasedprecision then
result = '0' : result_exp<N-25:0> : estimate<7:0>:Zeros(15);
else
result = '0' : result_exp<N-25:0> : estimate<11:0>:Zeros(11);
when 64 result = '0' : result_exp<N-54:0> : estimate<7:0>:Zeros(44);

return result;```

### Library pseudocode for shared/functions/float/fprsqrtestimate/RecipSqrtEstimate

```// RecipSqrtEstimate()
// ===================
// Compute estimate of reciprocal square root of 9-bit fixed-point number.
//
// a is in range 128 .. 511 or 1024 .. 4095, with increased precision,
// representing a number in the range 0.25 <= x < 1.0.
// increasedprecision determines if the mantissa is 8-bit or 12-bit.
// result is in the range 256 .. 511 or 4096 .. 8191, with increased precision,
// representing a number in the range 1.0 to 511/256 or 8191/4096.

integer RecipSqrtEstimate(integer a, boolean increasedprecision)
integer r;
if !increasedprecision then
assert 128 <= a && a < 512;
if a < 256 then                      // 0.25 .. 0.5
a = a*2+1;                       // a in units of 1/512 rounded to nearest
else                                 // 0.5 .. 1.0
a = (a >> 1) << 1;               // Discard bottom bit
a = (a+1)*2;                     // a in units of 1/256 rounded to nearest
integer b = 512;
while a*(b+1)*(b+1) < 2^28 do
b = b+1;
// b = largest b such that b < 2^14 / sqrt(a)
r = (b+1) DIV 2;                     // Round to nearest
assert 256 <= r && r < 512;
else
assert 1024 <= a && a < 4096;
real real_val;
real error;
integer int_val;

if a < 2048 then                     // 0.25... 0.5
a = a*2 + 1;                     // Take 10 bits of fraction and force a 1 at the bottom
real_val = Real(a)/2.0;
else                                 // 0.5..1.0
a = (a >> 1) << 1;               // Discard bottom bit
a = a+1;                         // Taking 10 bits of the fraction and force a 1 at the bottom
real_val = Real(a);

real_val = Sqrt(real_val);           // This number will lie in the range of 32 to 64
// Round to nearest even for a DP float number
real_val = real_val * Real(2^47);    // The integer is the size of the whole DP mantissa
int_val  = RoundDown(real_val);      // Calculate rounding value
error    = real_val - Real(int_val);
round_up = error > 0.5;              // Error cannot be exactly 0.5 so do not need tie case
if round_up then int_val = int_val+1;

real_val = Real(2^65)/Real(int_val); // Lies in the range 4096 <= real_val < 8192
int_val  = RoundDown(real_val);      // Round that (to nearest even) to give integer
error    = real_val - Real(int_val);
round_up = (error > 0.5 || (error == 0.5 && int_val<0> == '1'));
if round_up then int_val = int_val+1;

r = int_val;
assert 4096 <= r && r < 8192;
return r;```

### Library pseudocode for shared/functions/float/fpsqrt/FPSqrt

```// FPSqrt()
// ========

bits(N) FPSqrt(bits(N) op, FPCRType fpcr)
assert N IN {16,32,64};
(fptype,sign,value) = FPUnpack(op, fpcr);

if fptype == FPType_SNaN || fptype == FPType_QNaN then
result = FPProcessNaN(fptype, op, fpcr);
elsif fptype == FPType_Zero then
result = FPZero(sign);
elsif fptype == FPType_Infinity && sign == '0' then
result = FPInfinity(sign);
elsif sign == '1' then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
else
result = FPRound(Sqrt(value), fpcr);

FPProcessDenorm(fptype, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpsub/FPSub

```// FPSub()
// =======

bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)
assert N IN {16,32,64};
rounding = FPRoundingMode(fpcr);
(type1,sign1,value1) = FPUnpack(op1, fpcr);
(type2,sign2,value2) = FPUnpack(op2, fpcr);
(done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr);
if !done then
inf1 = (type1 == FPType_Infinity);
inf2 = (type2 == FPType_Infinity);
zero1 = (type1 == FPType_Zero);
zero2 = (type2 == FPType_Zero);

if inf1 && inf2 && sign1 == sign2 then
result = FPDefaultNaN();
FPProcessException(FPExc_InvalidOp, fpcr);
elsif (inf1 && sign1 == '0') || (inf2 && sign2 == '1') then
result = FPInfinity('0');
elsif (inf1 && sign1 == '1') || (inf2 && sign2 == '0') then
result = FPInfinity('1');
elsif zero1 && zero2 && sign1 == NOT(sign2) then
result = FPZero(sign1);
else
result_value = value1 - value2;
if result_value == 0.0 then  // Sign of exact zero result depends on rounding mode
result_sign = if rounding == FPRounding_NEGINF then '1' else '0';
result = FPZero(result_sign);
else
result = FPRound(result_value, fpcr, rounding);

FPProcessDenorms(type1, type2, N, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fpthree/FPThree

```// FPThree()
// =========

bits(N) FPThree(bit sign)
assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = '1':Zeros(E-1);
frac = '1':Zeros(F-1);
result = sign : exp : frac;

return result;```

### Library pseudocode for shared/functions/float/fptofixed/FPToFixed

```// FPToFixed()
// ===========

// Convert N-bit precision floating point OP to M-bit fixed point with
// FBITS fractional bits, controlled by UNSIGNED and ROUNDING.

bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)
assert N IN {16,32,64};
assert M IN {16,32,64};
assert fbits >= 0;
assert rounding != FPRounding_ODD;

// When alternative floating-point support is TRUE, do not generate
// Input Denormal floating-point exceptions.
altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1';
fpexc = !altfp;

// Unpack using fpcr to determine if subnormals are flushed-to-zero.
(fptype,sign,value) = FPUnpack(op, fpcr, fpexc);

// If NaN, set cumulative flag or take exception.
if fptype == FPType_SNaN || fptype == FPType_QNaN then
FPProcessException(FPExc_InvalidOp, fpcr);

// Scale by fractional bits and produce integer rounded towards minus-infinity.
value = value * 2.0^fbits;
int_result = RoundDown(value);
error = value - Real(int_result);

// Determine whether supplied rounding mode requires an increment.
case rounding of
when FPRounding_TIEEVEN
round_up = (error > 0.5 || (error == 0.5 && int_result<0> == '1'));
when FPRounding_POSINF
round_up = (error != 0.0);
when FPRounding_NEGINF
round_up = FALSE;
when FPRounding_ZERO
round_up = (error != 0.0 && int_result < 0);
when FPRounding_TIEAWAY
round_up = (error > 0.5 || (error == 0.5 && int_result >= 0));

if round_up then int_result = int_result + 1;

// Generate saturated result and exceptions.
(result, overflow) = SatQ(int_result, M, unsigned);
if overflow then
FPProcessException(FPExc_InvalidOp, fpcr);
elsif error != 0.0 then
FPProcessException(FPExc_Inexact, fpcr);

return result;```

### Library pseudocode for shared/functions/float/fptofixedjs/FPToFixedJS

```// FPToFixedJS()
// =============

// Converts a double precision floating point input value
// to a signed integer, with rounding to zero.

(bits(N), bit) FPToFixedJS(bits(M) op, FPCRType fpcr, boolean Is64)
assert M == 64 && N == 32;

// If FALSE, never generate Input Denormal floating-point exceptions.
fpexc_idenorm = !(HaveAltFP() && !UsingAArch32() && fpcr.AH == '1');

// Unpack using fpcr to determine if subnormals are flushed-to-zero.
(fptype,sign,value) = FPUnpack(op, fpcr, fpexc_idenorm);

Z = '1';
// If NaN, set cumulative flag or take exception.
if fptype == FPType_SNaN || fptype == FPType_QNaN then
FPProcessException(FPExc_InvalidOp, fpcr);
Z = '0';

int_result = RoundDown(value);
error = value - Real(int_result);

// Determine whether supplied rounding mode requires an increment.

round_it_up = (error != 0.0 && int_result < 0);
if round_it_up then int_result = int_result + 1;

if int_result < 0 then
result = int_result - 2^32*RoundUp(Real(int_result)/Real(2^32));
else
result = int_result - 2^32*RoundDown(Real(int_result)/Real(2^32));

// Generate exceptions.
if int_result < -(2^31) || int_result > (2^31)-1 then
FPProcessException(FPExc_InvalidOp, fpcr);
Z = '0';
elsif error != 0.0 then
FPProcessException(FPExc_Inexact, fpcr);
Z = '0';
elsif sign == '1' && value == 0.0 then
Z = '0';
elsif sign == '0' && value == 0.0 && !IsZero(op<51:0>) then
Z = '0';

if fptype == FPType_Infinity then result = 0;

return (result<N-1:0>, Z);```

### Library pseudocode for shared/functions/float/fptwo/FPTwo

```// FPTwo()
// =======

bits(N) FPTwo(bit sign)
assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = '1':Zeros(E-1);
frac = Zeros(F);
result = sign : exp : frac;
return result;```

### Library pseudocode for shared/functions/float/fptype/FPType

```enumeration FPType {FPType_Zero,
FPType_Denormal,
FPType_Nonzero,
FPType_Infinity,
FPType_QNaN,
FPType_SNaN};```

### Library pseudocode for shared/functions/float/fpunpack/FPUnpack

```// FPUnpack()
// ==========

(FPType, bit, real) FPUnpack(bits(N) fpval, FPCRType fpcr)
fpcr.AHP = '0';
boolean fpexc = TRUE;   // Generate floating-point exceptions
(fp_type, sign, value) = FPUnpackBase(fpval, fpcr, fpexc);
return (fp_type, sign, value);

// FPUnpack()
// ==========
//
// Used by data processing and int/fixed <-> FP conversion instructions.
// For half-precision data it ignores AHP, and observes FZ16.

(FPType, bit, real) FPUnpack(bits(N) fpval, FPCRType fpcr, boolean fpexc)
fpcr.AHP = '0';
(fp_type, sign, value) = FPUnpackBase(fpval, fpcr, fpexc);
return (fp_type, sign, value);```

### Library pseudocode for shared/functions/float/fpunpack/FPUnpackBase

```// FPUnpackBase()
// ==============

(FPType, bit, real) FPUnpackBase(bits(N) fpval, FPCRType fpcr)
boolean fpexc = TRUE;   // Generate floating-point exceptions
(fp_type, sign, value) = FPUnpackBase(fpval, fpcr, fpexc);
return (fp_type, sign, value);

// FPUnpackBase()
// ==============
//
// Unpack a floating-point number into its type, sign bit and the real number
// that it represents. The real number result has the correct sign for numbers
// and infinities, is very large in magnitude for infinities, and is 0.0 for
// NaNs. (These values are chosen to simplify the description of comparisons
// and conversions.)
//
// The 'fpcr' argument supplies FPCR control bits and 'fpexc' controls the
// generation of floating-point exceptions. Status information is updated
// directly in the FPSR where appropriate.

(FPType, bit, real) FPUnpackBase(bits(N) fpval, FPCRType fpcr, boolean fpexc)
assert N IN {16,32,64};

boolean altfp = HaveAltFP() && !UsingAArch32();
boolean fiz   = altfp && fpcr.FIZ == '1';
boolean fz    = fpcr.FZ == '1' && !(altfp && fpcr.AH == '1');

if N == 16 then
sign   = fpval<15>;
exp16  = fpval<14:10>;
frac16 = fpval<9:0>;
if IsZero(exp16) then
if IsZero(frac16) || fpcr.FZ16 == '1' then
fptype = FPType_Zero;  value = 0.0;
else
fptype = FPType_Denormal;  value = 2.0^-14 * (Real(UInt(frac16)) * 2.0^-10);
elsif IsOnes(exp16) && fpcr.AHP == '0' then  // Infinity or NaN in IEEE format
if IsZero(frac16) then
fptype = FPType_Infinity;  value = 2.0^1000000;
else
fptype = if frac16<9> == '1' then FPType_QNaN else FPType_SNaN;
value = 0.0;
else
fptype = FPType_Nonzero;
value = 2.0^(UInt(exp16)-15) * (1.0 + Real(UInt(frac16)) * 2.0^-10);

elsif N == 32 then
sign   = fpval<31>;
exp32  = fpval<30:23>;
frac32 = fpval<22:0>;

if IsZero(exp32) then
if IsZero(frac32) then
// Produce zero if value is zero.
fptype = FPType_Zero;  value = 0.0;
elsif fz || fiz then        // Flush-to-zero if FIZ==1 or AH,FZ==01
fptype = FPType_Zero;  value = 0.0;
// Check whether to raise Input Denormal floating-point exception.
// fpcr.FIZ==1 does not raise Input Denormal exception.
if fz then
// Denormalized input flushed to zero
if fpexc then FPProcessException(FPExc_InputDenorm, fpcr);
else
fptype = FPType_Denormal;  value = 2.0^-126 * (Real(UInt(frac32)) * 2.0^-23);
elsif IsOnes(exp32) then
if IsZero(frac32) then
fptype = FPType_Infinity;  value = 2.0^1000000;
else
fptype = if frac32<22> == '1' then FPType_QNaN else FPType_SNaN;
value = 0.0;
else
fptype = FPType_Nonzero;
value = 2.0^(UInt(exp32)-127) * (1.0 + Real(UInt(frac32)) * 2.0^-23);

else // N == 64
sign   = fpval<63>;
exp64  = fpval<62:52>;
frac64 = fpval<51:0>;

if IsZero(exp64) then
if IsZero(frac64) then
// Produce zero if value is zero.
fptype = FPType_Zero;  value = 0.0;
elsif fz || fiz then        // Flush-to-zero if FIZ==1 or AH,FZ==01
fptype = FPType_Zero;  value = 0.0;
// Check whether to raise Input Denormal floating-point exception.
// fpcr.FIZ==1 does not raise Input Denormal exception.
if fz then
// Denormalized input flushed to zero
if fpexc then FPProcessException(FPExc_InputDenorm, fpcr);
else
fptype = FPType_Denormal;  value = 2.0^-1022 * (Real(UInt(frac64)) * 2.0^-52);
elsif IsOnes(exp64) then
if IsZero(frac64) then
fptype = FPType_Infinity;  value = 2.0^1000000;
else
fptype = if frac64<51> == '1' then FPType_QNaN else FPType_SNaN;
value = 0.0;
else
fptype = FPType_Nonzero;
value = 2.0^(UInt(exp64)-1023) * (1.0 + Real(UInt(frac64)) * 2.0^-52);

if sign == '1' then value = -value;

return (fptype, sign, value);```

### Library pseudocode for shared/functions/float/fpunpack/FPUnpackCV

```// FPUnpackCV()
// ============
//
// Used for FP <-> FP conversion instructions.
// For half-precision data ignores FZ16 and observes AHP.

(FPType, bit, real) FPUnpackCV(bits(N) fpval, FPCRType fpcr)
fpcr.FZ16 = '0';
boolean fpexc = TRUE;   // Generate floating-point exceptions
(fp_type, sign, value) = FPUnpackBase(fpval, fpcr, fpexc);
return (fp_type, sign, value);```

### Library pseudocode for shared/functions/float/fpzero/FPZero

```// FPZero()
// ========

bits(N) FPZero(bit sign)
assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - (E + 1);
exp  = Zeros(E);
frac = Zeros(F);
result = sign : exp : frac;
return result;```

### Library pseudocode for shared/functions/float/vfpexpandimm/VFPExpandImm

```// VFPExpandImm()
// ==============

bits(N) VFPExpandImm(bits(8) imm8)
assert N IN {16,32,64};
constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11);
constant integer F = N - E - 1;
sign = imm8<7>;
exp  = NOT(imm8<6>):Replicate(imm8<6>,E-3):imm8<5:4>;
frac = imm8<3:0>:Zeros(F-4);
result = sign : exp : frac;

return result;```