You copied the Doc URL to your clipboard.
Shared Functions.Float Pseudocode
Library pseudocode for shared/functions/float/bfloat/BFAdd
// BFAdd() // ======= // Single-precision add following BFloat16 computation behaviors. bits(32) BFAdd(bits(32) op1, bits(32) op2) bits(32) result; (type1,sign1,value1) = BFUnpack(op1); (type2,sign2,value2) = BFUnpack(op2); if type1 == FPType_QNaN || type2 == FPType_QNaN then result = FPDefaultNaN(); else inf1 = (type1 == FPType_Infinity); inf2 = (type2 == FPType_Infinity); zero1 = (type1 == FPType_Zero); zero2 = (type2 == FPType_Zero); if inf1 && inf2 && sign1 == NOT(sign2) then result = FPDefaultNaN(); elsif (inf1 && sign1 == '0') || (inf2 && sign2 == '0') then result = FPInfinity('0'); elsif (inf1 && sign1 == '1') || (inf2 && sign2 == '1') then result = FPInfinity('1'); elsif zero1 && zero2 && sign1 == sign2 then result = FPZero(sign1); else result_value = value1 + value2; if result_value == 0.0 then result = FPZero('0'); // Positive sign when Round to Odd else result = BFRound(result_value); return result;
Library pseudocode for shared/functions/float/bfloat/BFMatMulAdd
// BFMatMulAdd() // ============= // BFloat16 matrix multiply and add to single-precision matrix // result[2, 2] = addend[2, 2] + (op1[2, 4] * op2[4, 2]) bits(N) BFMatMulAdd(bits(N) addend, bits(N) op1, bits(N) op2) assert N == 128; bits(N) result; bits(32) sum, prod0, prod1; for i = 0 to 1 for j = 0 to 1 sum = Elem[addend, 2*i + j, 32]; for k = 0 to 1 prod0 = BFMul(Elem[op1, 4*i + 2*k + 0, 16], Elem[op2, 4*j + 2*k + 0, 16]); prod1 = BFMul(Elem[op1, 4*i + 2*k + 1, 16], Elem[op2, 4*j + 2*k + 1, 16]); sum = BFAdd(sum, BFAdd(prod0, prod1)); Elem[result, 2*i + j, 32] = sum; return result;
Library pseudocode for shared/functions/float/bfloat/BFMul
// BFMul() // ======= // BFloat16 widening multiply to single-precision following BFloat16 // computation behaviors. bits(32) BFMul(bits(16) op1, bits(16) op2) bits(32) result; (type1,sign1,value1) = BFUnpack(op1); (type2,sign2,value2) = BFUnpack(op2); if type1 == FPType_QNaN || type2 == FPType_QNaN then result = FPDefaultNaN(); else inf1 = (type1 == FPType_Infinity); inf2 = (type2 == FPType_Infinity); zero1 = (type1 == FPType_Zero); zero2 = (type2 == FPType_Zero); if (inf1 && zero2) || (zero1 && inf2) then result = FPDefaultNaN(); elsif inf1 || inf2 then result = FPInfinity(sign1 EOR sign2); elsif zero1 || zero2 then result = FPZero(sign1 EOR sign2); else result = BFRound(value1*value2); return result;
Library pseudocode for shared/functions/float/bfloat/BFMulAdd
// BFMulAdd() // ========== // Used by BFMLALB and BFMLALT instructions. bits(N) BFMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr) boolean altfp = HaveAltFP() && fpcr.AH == '1'; // When TRUE: boolean fpexc = !altfp; // Do not generate floating point exceptions if altfp then fpcr.<FIZ,FZ> = '11'; // Flush denormal input and output to zero if altfp then fpcr.RMode = '00'; // Use RNE rounding mode return FPMulAdd(addend, op1, op2, fpcr, fpexc);
Library pseudocode for shared/functions/float/bfloat/BFRound
// BFRound() // ========= // Converts a real number OP into a single-precision value using the // Round to Odd rounding mode and following BFloat16 computation behaviors. bits(32) BFRound(real op) assert op != 0.0; bits(32) result; // Format parameters - minimum exponent, numbers of exponent and fraction bits. minimum_exp = -126; E = 8; F = 23; // Split value into sign, unrounded mantissa and exponent. if op < 0.0 then sign = '1'; mantissa = -op; else sign = '0'; mantissa = op; exponent = 0; while mantissa < 1.0 do mantissa = mantissa * 2.0; exponent = exponent - 1; while mantissa >= 2.0 do mantissa = mantissa / 2.0; exponent = exponent + 1; // Fixed Flush-to-zero. if exponent < minimum_exp then return FPZero(sign); // Start creating the exponent value for the result. Start by biasing the actual exponent // so that the minimum exponent becomes 1, lower values 0 (indicating possible underflow). biased_exp = Max(exponent - minimum_exp + 1, 0); if biased_exp == 0 then mantissa = mantissa / 2.0^(minimum_exp - exponent); // Get the unrounded mantissa as an integer, and the "units in last place" rounding error. int_mant = RoundDown(mantissa * 2.0^F); // < 2.0^F if biased_exp == 0, >= 2.0^F if not error = mantissa * 2.0^F - Real(int_mant); // Round to Odd if error != 0.0 then int_mant<0> = '1'; // Deal with overflow and generate result. if biased_exp >= 2^E - 1 then result = FPInfinity(sign); // Overflows generate appropriately-signed Infinity else result = sign : biased_exp<30-F:0> : int_mant<F-1:0>; return result;
Library pseudocode for shared/functions/float/bfloat/BFUnpack
// BFUnpack() // ========== // Unpacks a BFloat16 or single-precision value into its type, // sign bit and real number that it represents. // The real number result has the correct sign for numbers and infinities, // is very large in magnitude for infinities, and is 0.0 for NaNs. // (These values are chosen to simplify the description of // comparisons and conversions.) (FPType, bit, real) BFUnpack(bits(N) fpval) assert N IN {16,32}; if N == 16 then sign = fpval<15>; exp = fpval<14:7>; frac = fpval<6:0> : Zeros(16); else // N == 32 sign = fpval<31>; exp = fpval<30:23>; frac = fpval<22:0>; if IsZero(exp) then fptype = FPType_Zero; value = 0.0; // Fixed Flush to Zero elsif IsOnes(exp) then if IsZero(frac) then fptype = FPType_Infinity; value = 2.0^1000000; else // no SNaN for BF16 arithmetic fptype = FPType_QNaN; value = 0.0; else fptype = FPType_Nonzero; value = 2.0^(UInt(exp)-127) * (1.0 + Real(UInt(frac)) * 2.0^-23); if sign == '1' then value = -value; return (fptype, sign, value);
Library pseudocode for shared/functions/float/bfloat/FPConvertBF
// FPConvertBF() // ============= // Converts a single-precision OP to BFloat16 value with using rounding mode of // Round to Nearest Even when executed from AArch64 state and // FPCR.AH == '1', otherwise rounding is controlled by FPCR/FPSCR. bits(16) FPConvertBF(bits(32) op, FPCRType fpcr, FPRounding rounding) bits(32) result; // BF16 value in top 16 bits boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; boolean fpexc = !altfp; // Generate no floating-point exceptions if altfp then fpcr.<FIZ,FZ> = '11'; // Flush denormal input and output to zero if altfp then rounding = FPRounding_TIEEVEN; // Use RNE rounding mode // Unpack floating-point operand, with always flush-to-zero if fpcr.AH == '1'. (fptype,sign,value) = FPUnpack(op, fpcr, fpexc); if fptype == FPType_SNaN || fptype == FPType_QNaN then if fpcr.DN == '1' then result = FPDefaultNaN(); else result = FPConvertNaN(op); if fptype == FPType_SNaN then if fpexc then FPProcessException(FPExc_InvalidOp, fpcr); elsif fptype == FPType_Infinity then result = FPInfinity(sign); elsif fptype == FPType_Zero then result = FPZero(sign); else result = FPRoundCVBF(value, fpcr, rounding, fpexc); // Returns correctly rounded BF16 value from top 16 bits return result<31:16>; // FPConvertBF() // ============= // Converts a single-precision operand to BFloat16 value. bits(16) FPConvertBF(bits(32) op, FPCRType fpcr) return FPConvertBF(op, fpcr, FPRoundingMode(fpcr));
Library pseudocode for shared/functions/float/bfloat/FPRoundCVBF
// FPRoundCVBF() // ============= // Converts a real number OP into a BFloat16 value using the supplied // rounding mode RMODE. The 'fpexc' argument controls the generation of // floating-point exceptions. bits(32) FPRoundCVBF(real op, FPCRType fpcr, FPRounding rounding, boolean fpexc) boolean isbfloat16 = TRUE; return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc);
Library pseudocode for shared/functions/float/fixedtofp/FixedToFP
// FixedToFP() // =========== // Convert M-bit fixed point OP with FBITS fractional bits to // N-bit precision floating point, controlled by UNSIGNED and ROUNDING. bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding) assert N IN {16,32,64}; assert M IN {16,32,64}; bits(N) result; assert fbits >= 0; assert rounding != FPRounding_ODD; // Correct signed-ness int_operand = Int(op, unsigned); // Scale by fractional bits and generate a real value real_operand = Real(int_operand) / 2.0^fbits; if real_operand == 0.0 then result = FPZero('0'); else result = FPRound(real_operand, fpcr, rounding); return result;
Library pseudocode for shared/functions/float/fpabs/FPAbs
// FPAbs() // ======= bits(N) FPAbs(bits(N) op) assert N IN {16,32,64}; if !UsingAArch32() && HaveAltFP() then FPCRType fpcr = FPCR[]; if fpcr.AH == '1' then (fptype, -, -) = FPUnpack(op, fpcr, FALSE); if fptype IN {FPType_SNaN, FPType_QNaN} then return op; // When fpcr.AH=1, sign of NaN has no consequence return '0' : op<N-2:0>;
Library pseudocode for shared/functions/float/fpadd/FPAdd
// FPAdd() // ======= bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; rounding = FPRoundingMode(fpcr); (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); (done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr); if !done then inf1 = (type1 == FPType_Infinity); inf2 = (type2 == FPType_Infinity); zero1 = (type1 == FPType_Zero); zero2 = (type2 == FPType_Zero); if inf1 && inf2 && sign1 == NOT(sign2) then result = FPDefaultNaN(); FPProcessException(FPExc_InvalidOp, fpcr); elsif (inf1 && sign1 == '0') || (inf2 && sign2 == '0') then result = FPInfinity('0'); elsif (inf1 && sign1 == '1') || (inf2 && sign2 == '1') then result = FPInfinity('1'); elsif zero1 && zero2 && sign1 == sign2 then result = FPZero(sign1); else result_value = value1 + value2; if result_value == 0.0 then // Sign of exact zero result depends on rounding mode result_sign = if rounding == FPRounding_NEGINF then '1' else '0'; result = FPZero(result_sign); else result = FPRound(result_value, fpcr, rounding); FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpcompare/FPCompare
// FPCompare() // =========== bits(4) FPCompare(bits(N) op1, bits(N) op2, boolean signal_nans, FPCRType fpcr) assert N IN {16,32,64}; (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); if type1 IN {FPType_SNaN, FPType_QNaN} || type2 IN {FPType_SNaN, FPType_QNaN} then result = '0011'; if type1 == FPType_SNaN || type2 == FPType_SNaN || signal_nans then FPProcessException(FPExc_InvalidOp, fpcr); else // All non-NaN cases can be evaluated on the values produced by FPUnpack() if value1 == value2 then result = '0110'; elsif value1 < value2 then result = '1000'; else // value1 > value2 result = '0010'; FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpcompareeq/FPCompareEQ
// FPCompareEQ() // ============= boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); if type1 IN {FPType_SNaN, FPType_QNaN} || type2 IN {FPType_SNaN, FPType_QNaN} then result = FALSE; if type1 == FPType_SNaN || type2 == FPType_SNaN then FPProcessException(FPExc_InvalidOp, fpcr); else // All non-NaN cases can be evaluated on the values produced by FPUnpack() result = (value1 == value2); FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpcomparege/FPCompareGE
// FPCompareGE() // ============= boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); if type1 IN {FPType_SNaN, FPType_QNaN} || type2 IN {FPType_SNaN, FPType_QNaN} then result = FALSE; FPProcessException(FPExc_InvalidOp, fpcr); else // All non-NaN cases can be evaluated on the values produced by FPUnpack() result = (value1 >= value2); FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpcomparegt/FPCompareGT
// FPCompareGT() // ============= boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); if type1 IN {FPType_SNaN, FPType_QNaN} || type2 IN {FPType_SNaN, FPType_QNaN} then result = FALSE; FPProcessException(FPExc_InvalidOp, fpcr); else // All non-NaN cases can be evaluated on the values produced by FPUnpack() result = (value1 > value2); FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpconvert/FPConvert
// FPConvert() // =========== // Convert floating point OP with N-bit precision to M-bit precision, // with rounding controlled by ROUNDING. // This is used by the FP-to-FP conversion instructions and so for // half-precision data ignores FZ16, but observes AHP. bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding) assert M IN {16,32,64}; assert N IN {16,32,64}; bits(M) result; // Unpack floating-point operand optionally with flush-to-zero. (fptype,sign,value) = FPUnpackCV(op, fpcr); alt_hp = (M == 16) && (fpcr.AHP == '1'); if fptype == FPType_SNaN || fptype == FPType_QNaN then if alt_hp then result = FPZero(sign); elsif fpcr.DN == '1' then result = FPDefaultNaN(); else result = FPConvertNaN(op); if fptype == FPType_SNaN || alt_hp then FPProcessException(FPExc_InvalidOp,fpcr); elsif fptype == FPType_Infinity then if alt_hp then result = sign:Ones(M-1); FPProcessException(FPExc_InvalidOp, fpcr); else result = FPInfinity(sign); elsif fptype == FPType_Zero then result = FPZero(sign); else result = FPRoundCV(value, fpcr, rounding); FPProcessDenorm(fptype, N, fpcr); return result; // FPConvert() // =========== bits(M) FPConvert(bits(N) op, FPCRType fpcr) return FPConvert(op, fpcr, FPRoundingMode(fpcr));
Library pseudocode for shared/functions/float/fpconvertnan/FPConvertNaN
// FPConvertNaN() // ============== // Converts a NaN of one floating-point type to another bits(M) FPConvertNaN(bits(N) op) assert N IN {16,32,64}; assert M IN {16,32,64}; bits(M) result; bits(51) frac; sign = op<N-1>; // Unpack payload from input NaN case N of when 64 frac = op<50:0>; when 32 frac = op<21:0>:Zeros(29); when 16 frac = op<8:0>:Zeros(42); // Repack payload into output NaN, while // converting an SNaN to a QNaN. case M of when 64 result = sign:Ones(M-52):frac; when 32 result = sign:Ones(M-23):frac<50:29>; when 16 result = sign:Ones(M-10):frac<50:42>; return result;
Library pseudocode for shared/functions/float/fpdecoderm/FPDecodeRM
// FPDecodeRM() // ============ // Decode most common AArch32 floating-point rounding encoding. FPRounding FPDecodeRM(bits(2) rm) case rm of when '00' result = FPRounding_TIEAWAY; // A when '01' result = FPRounding_TIEEVEN; // N when '10' result = FPRounding_POSINF; // P when '11' result = FPRounding_NEGINF; // M return result;
Library pseudocode for shared/functions/float/fpdecoderounding/FPDecodeRounding
// FPDecodeRounding() // ================== // Decode floating-point rounding mode and common AArch64 encoding. FPRounding FPDecodeRounding(bits(2) rmode) case rmode of when '00' return FPRounding_TIEEVEN; // N when '01' return FPRounding_POSINF; // P when '10' return FPRounding_NEGINF; // M when '11' return FPRounding_ZERO; // Z
Library pseudocode for shared/functions/float/fpdefaultnan/FPDefaultNaN
// FPDefaultNaN() // ============== bits(N) FPDefaultNaN() assert N IN {16,32,64}; constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11); constant integer F = N - (E + 1); bit sign; if !UsingAArch32() && HaveAltFP() then FPCRType fpcr = FPCR[]; sign = if fpcr.AH == '1' then '1' else '0'; else sign = '0'; bits(E) exp = Ones(E); bits(F) frac = '1':Zeros(F-1); return sign : exp : frac;
Library pseudocode for shared/functions/float/fpdiv/FPDiv
// FPDiv() // ======= bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); (done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr); if !done then inf1 = type1 == FPType_Infinity; inf2 = type2 == FPType_Infinity; zero1 = type1 == FPType_Zero; zero2 = type2 == FPType_Zero; if (inf1 && inf2) || (zero1 && zero2) then result = FPDefaultNaN(); FPProcessException(FPExc_InvalidOp, fpcr); elsif inf1 || zero2 then result = FPInfinity(sign1 EOR sign2); if !inf1 then FPProcessException(FPExc_DivideByZero, fpcr); elsif zero1 || inf2 then result = FPZero(sign1 EOR sign2); else result = FPRound(value1/value2, fpcr); if !zero2 then FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpexc/FPExc
enumeration FPExc {FPExc_InvalidOp, FPExc_DivideByZero, FPExc_Overflow, FPExc_Underflow, FPExc_Inexact, FPExc_InputDenorm};
Library pseudocode for shared/functions/float/fpinfinity/FPInfinity
// FPInfinity() // ============ bits(N) FPInfinity(bit sign) assert N IN {16,32,64}; constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11); constant integer F = N - (E + 1); bits(E) exp = Ones(E); bits(F) frac = Zeros(F); return sign : exp : frac;
Library pseudocode for shared/functions/float/fpmatmul/FPMatMulAdd
// FPMatMulAdd() // ============= // // Floating point matrix multiply and add to same precision matrix // result[2, 2] = addend[2, 2] + (op1[2, 2] * op2[2, 2]) bits(N) FPMatMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, integer esize, FPCRType fpcr) assert N == esize * 2 * 2; bits(N) result; bits(esize) prod0, prod1, sum; for i = 0 to 1 for j = 0 to 1 sum = Elem[addend, 2*i + j, esize]; prod0 = FPMul(Elem[op1, 2*i + 0, esize], Elem[op2, 2*j + 0, esize], fpcr); prod1 = FPMul(Elem[op1, 2*i + 1, esize], Elem[op2, 2*j + 1, esize], fpcr); sum = FPAdd(sum, FPAdd(prod0, prod1, fpcr), fpcr); Elem[result, 2*i + j, esize] = sum; return result;
Library pseudocode for shared/functions/float/fpmax/FPMax
// FPMax() // ======= bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr) boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; return FPMax(op1, op2, fpcr, altfp); // FPMax() // ======= // Compare two inputs and return the larger value after rounding. The // 'fpcr' argument supplies the FPCR control bits and 'altfp' determines // if the function should use alternative floating-point behaviour. bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr, boolean altfp) assert N IN {16,32,64}; (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); if (altfp && type1 == FPType_Zero && type2 == FPType_Zero && ((sign1 == '0' && sign2 == '1') || (sign1 == '1' && sign2 == '0'))) then return FPZero(sign2); (done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr, altfp, TRUE); if !done then if value1 > value2 then (fptype,sign,value) = (type1,sign1,value1); else (fptype,sign,value) = (type2,sign2,value2); if fptype == FPType_Infinity then result = FPInfinity(sign); elsif fptype == FPType_Zero then sign = sign1 AND sign2; // Use most positive sign result = FPZero(sign); else // The use of FPRound() covers the case where there is a trapped underflow exception // for a denormalized number even though the result is exact. rounding = FPRoundingMode(fpcr); if altfp then // Denormal output is not flushed to zero fpcr.FZ = '0'; fpcr.FZ16 = '0'; result = FPRound(value, fpcr, rounding, TRUE); FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpmaxnormal/FPMaxNormal
// FPMaxNormal() // ============= bits(N) FPMaxNormal(bit sign) assert N IN {16,32,64}; constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11); constant integer F = N - (E + 1); exp = Ones(E-1):'0'; frac = Ones(F); return sign : exp : frac;
Library pseudocode for shared/functions/float/fpmaxnum/FPMaxNum
// FPMaxNum() // ========== bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; (type1,-,-) = FPUnpack(op1, fpcr); (type2,-,-) = FPUnpack(op2, fpcr); boolean type1_nan = type1 IN {FPType_QNaN, FPType_SNaN}; boolean type2_nan = type2 IN {FPType_QNaN, FPType_SNaN}; boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; if !(altfp && type1_nan && type2_nan) then // Treat a single quiet-NaN as -Infinity. if type1 == FPType_QNaN && type2 != FPType_QNaN then op1 = FPInfinity('1'); elsif type1 != FPType_QNaN && type2 == FPType_QNaN then op2 = FPInfinity('1'); altfmaxfmin = FALSE; // Restrict use of FMAX/FMIN NaN propagation rules result = FPMax(op1, op2, fpcr, altfmaxfmin); return result;
Library pseudocode for shared/functions/float/fpmerge/IsMerging
// IsMerging() // =========== // Returns TRUE if the output elements other than the lowest are taken from // the destination register. boolean IsMerging(FPCRType fpcr) boolean merge = HaveAltFP() && !UsingAArch32() && fpcr.NEP == '1'; return merge;
Library pseudocode for shared/functions/float/fpmin/FPMin
// FPMin() // ======= bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr) boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; return FPMin(op1, op2, fpcr, altfp); // FPMin() // ======= // Compare two operands and return the smaller operand after rounding. The // 'fpcr' argument supplies the FPCR control bits and 'altfp' determines // if the function should use alternative behaviour. bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr, boolean altfp) assert N IN {16,32,64}; (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); if (altfp && type1 == FPType_Zero && type2 == FPType_Zero && ((sign1 == '0' && sign2 == '1') || (sign1 == '1' && sign2 == '0'))) then return FPZero(sign2); (done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr, altfp, TRUE); if !done then if value1 < value2 then (fptype,sign,value) = (type1,sign1,value1); else (fptype,sign,value) = (type2,sign2,value2); if fptype == FPType_Infinity then result = FPInfinity(sign); elsif fptype == FPType_Zero then sign = sign1 OR sign2; // Use most negative sign result = FPZero(sign); else // The use of FPRound() covers the case where there is a trapped underflow exception // for a denormalized number even though the result is exact. rounding = FPRoundingMode(fpcr); if altfp then // Denormal output is not flushed to zero fpcr.FZ = '0'; fpcr.FZ16 = '0'; result = FPRound(value, fpcr, rounding, TRUE); FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpminnum/FPMinNum
// FPMinNum() // ========== bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; (type1,-,-) = FPUnpack(op1, fpcr); (type2,-,-) = FPUnpack(op2, fpcr); boolean type1_nan = type1 IN {FPType_QNaN, FPType_SNaN}; boolean type2_nan = type2 IN {FPType_QNaN, FPType_SNaN}; boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; if !(altfp && type1_nan && type2_nan) then // Treat a single quiet-NaN as +Infinity. if type1 == FPType_QNaN && type2 != FPType_QNaN then op1 = FPInfinity('0'); elsif type1 != FPType_QNaN && type2 == FPType_QNaN then op2 = FPInfinity('0'); altfmaxfmin = FALSE; // Restrict use of FMAX/FMIN NaN propagation rules result = FPMin(op1, op2, fpcr, altfmaxfmin); return result;
Library pseudocode for shared/functions/float/fpmul/FPMul
// FPMul() // ======= bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); (done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr); if !done then inf1 = (type1 == FPType_Infinity); inf2 = (type2 == FPType_Infinity); zero1 = (type1 == FPType_Zero); zero2 = (type2 == FPType_Zero); if (inf1 && zero2) || (zero1 && inf2) then result = FPDefaultNaN(); FPProcessException(FPExc_InvalidOp, fpcr); elsif inf1 || inf2 then result = FPInfinity(sign1 EOR sign2); elsif zero1 || zero2 then result = FPZero(sign1 EOR sign2); else result = FPRound(value1*value2, fpcr); FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpmuladd/FPMulAdd
// FPMulAdd() // ========== bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr) boolean fpexc = TRUE; // Generate floating-point exceptions return FPMulAdd(addend, op1, op2, fpcr, fpexc); // FPMulAdd() // ========== // // Calculates addend + op1*op2 with a single rounding. The 'fpcr' argument // supplies the FPCR control bits, and 'fpexc' controls the generation of // floating-point exceptions. bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr, boolean fpexc) assert N IN {16,32,64}; (typeA,signA,valueA) = FPUnpack(addend, fpcr, fpexc); (type1,sign1,value1) = FPUnpack(op1, fpcr, fpexc); (type2,sign2,value2) = FPUnpack(op2, fpcr, fpexc); rounding = FPRoundingMode(fpcr); inf1 = (type1 == FPType_Infinity); zero1 = (type1 == FPType_Zero); inf2 = (type2 == FPType_Infinity); zero2 = (type2 == FPType_Zero); (done,result) = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, fpcr, fpexc); if !(HaveAltFP() && !UsingAArch32() && fpcr.AH == '1') then if typeA == FPType_QNaN && ((inf1 && zero2) || (zero1 && inf2)) then result = FPDefaultNaN(); FPProcessException(FPExc_InvalidOp, fpcr); if !done then infA = (typeA == FPType_Infinity); zeroA = (typeA == FPType_Zero); // Determine sign and type product will have if it does not cause an // Invalid Operation. signP = sign1 EOR sign2; infP = inf1 || inf2; zeroP = zero1 || zero2; // Non SNaN-generated Invalid Operation cases are multiplies of zero // by infinity and additions of opposite-signed infinities. invalidop = (inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP); if invalidop then result = FPDefaultNaN(); if fpexc then FPProcessException(FPExc_InvalidOp, fpcr); // Other cases involving infinities produce an infinity of the same sign. elsif (infA && signA == '0') || (infP && signP == '0') then result = FPInfinity('0'); elsif (infA && signA == '1') || (infP && signP == '1') then result = FPInfinity('1'); // Cases where the result is exactly zero and its sign is not determined by the // rounding mode are additions of same-signed zeros. elsif zeroA && zeroP && signA == signP then result = FPZero(signA); // Otherwise calculate numerical result and round it. else result_value = valueA + (value1 * value2); if result_value == 0.0 then // Sign of exact zero result depends on rounding mode result_sign = if rounding == FPRounding_NEGINF then '1' else '0'; result = FPZero(result_sign); else result = FPRound(result_value, fpcr, rounding, fpexc); if !invalidop && fpexc then FPProcessDenorms3(typeA, type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpmuladdh/FPMulAddH
// FPMulAddH() // =========== // Calculates addend + op1*op2. bits(N) FPMulAddH(bits(N) addend, bits(N DIV 2) op1, bits(N DIV 2) op2, FPCRType fpcr) assert N == 32; rounding = FPRoundingMode(fpcr); (typeA,signA,valueA) = FPUnpack(addend, fpcr); (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); inf1 = (type1 == FPType_Infinity); zero1 = (type1 == FPType_Zero); inf2 = (type2 == FPType_Infinity); zero2 = (type2 == FPType_Zero); (done,result) = FPProcessNaNs3H(typeA, type1, type2, addend, op1, op2, fpcr); if !(HaveAltFP() && !UsingAArch32() && fpcr.AH == '1') then if typeA == FPType_QNaN && ((inf1 && zero2) || (zero1 && inf2)) then result = FPDefaultNaN(); FPProcessException(FPExc_InvalidOp, fpcr); if !done then infA = (typeA == FPType_Infinity); zeroA = (typeA == FPType_Zero); // Determine sign and type product will have if it does not cause an // Invalid Operation. signP = sign1 EOR sign2; infP = inf1 || inf2; zeroP = zero1 || zero2; // Non SNaN-generated Invalid Operation cases are multiplies of zero by infinity and // additions of opposite-signed infinities. invalidop = (inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP); if invalidop then result = FPDefaultNaN(); FPProcessException(FPExc_InvalidOp, fpcr); // Other cases involving infinities produce an infinity of the same sign. elsif (infA && signA == '0') || (infP && signP == '0') then result = FPInfinity('0'); elsif (infA && signA == '1') || (infP && signP == '1') then result = FPInfinity('1'); // Cases where the result is exactly zero and its sign is not determined by the // rounding mode are additions of same-signed zeros. elsif zeroA && zeroP && signA == signP then result = FPZero(signA); // Otherwise calculate numerical result and round it. else result_value = valueA + (value1 * value2); if result_value == 0.0 then // Sign of exact zero result depends on rounding mode result_sign = if rounding == FPRounding_NEGINF then '1' else '0'; result = FPZero(result_sign); else result = FPRound(result_value, fpcr); if !invalidop then FPProcessDenorm(typeA, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpmuladdh/FPProcessNaNs3H
// FPProcessNaNs3H() // ================= (boolean, bits(N)) FPProcessNaNs3H(FPType type1, FPType type2, FPType type3, bits(N) op1, bits(N DIV 2) op2, bits(N DIV 2) op3, FPCRType fpcr) assert N IN {32,64}; bits(N) result; // When TRUE, use alternative NaN propagation rules. boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; boolean op1_nan = type1 IN {FPType_SNaN, FPType_QNaN}; boolean op2_nan = type2 IN {FPType_SNaN, FPType_QNaN}; boolean op3_nan = type3 IN {FPType_SNaN, FPType_QNaN}; boolean fpexc = TRUE; if altfp then if (type1 == FPType_SNaN || type2 == FPType_SNaN || type3 == FPType_SNaN) then type_nan = FPType_SNaN; else type_nan = FPType_QNaN; if altfp && op1_nan && op2_nan && op3_nan then done = TRUE; result = FPConvertNaN(FPProcessNaN(type_nan, op2, fpcr, fpexc)); // <n> register NaN selected elsif altfp && op2_nan && (op1_nan || op3_nan) then done = TRUE; result = FPConvertNaN(FPProcessNaN(type_nan, op2, fpcr, fpexc)); // <n> register NaN selected elsif altfp && op3_nan && op1_nan then done = TRUE; result = FPConvertNaN(FPProcessNaN(type_nan, op3, fpcr, fpexc)); // <m> register NaN selected elsif type1 == FPType_SNaN then done = TRUE; result = FPProcessNaN(type1, op1, fpcr, fpexc); elsif type2 == FPType_SNaN then done = TRUE; result = FPConvertNaN(FPProcessNaN(type2, op2, fpcr, fpexc)); elsif type3 == FPType_SNaN then done = TRUE; result = FPConvertNaN(FPProcessNaN(type3, op3, fpcr, fpexc)); elsif type1 == FPType_QNaN then done = TRUE; result = FPProcessNaN(type1, op1, fpcr, fpexc); elsif type2 == FPType_QNaN then done = TRUE; result = FPConvertNaN(FPProcessNaN(type2, op2, fpcr, fpexc)); elsif type3 == FPType_QNaN then done = TRUE; result = FPConvertNaN(FPProcessNaN(type3, op3, fpcr, fpexc)); else done = FALSE; result = Zeros(); // 'Don't care' result return (done, result);
Library pseudocode for shared/functions/float/fpmulx/FPMulX
// FPMulX() // ======== bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; bits(N) result; (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); (done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr); if !done then inf1 = (type1 == FPType_Infinity); inf2 = (type2 == FPType_Infinity); zero1 = (type1 == FPType_Zero); zero2 = (type2 == FPType_Zero); if (inf1 && zero2) || (zero1 && inf2) then result = FPTwo(sign1 EOR sign2); elsif inf1 || inf2 then result = FPInfinity(sign1 EOR sign2); elsif zero1 || zero2 then result = FPZero(sign1 EOR sign2); else result = FPRound(value1*value2, fpcr); FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpneg/FPNeg
// FPNeg() // ======= bits(N) FPNeg(bits(N) op) assert N IN {16,32,64}; if !UsingAArch32() && HaveAltFP() then FPCRType fpcr = FPCR[]; if fpcr.AH == '1' then (fptype, -, -) = FPUnpack(op, fpcr, FALSE); if fptype IN {FPType_SNaN, FPType_QNaN} then return op; // When fpcr.AH=1, sign of NaN has no consequence return NOT(op<N-1>) : op<N-2:0>;
Library pseudocode for shared/functions/float/fponepointfive/FPOnePointFive
// FPOnePointFive() // ================ bits(N) FPOnePointFive(bit sign) assert N IN {16,32,64}; constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11); constant integer F = N - (E + 1); exp = '0':Ones(E-1); frac = '1':Zeros(F-1); result = sign : exp : frac; return result;
Library pseudocode for shared/functions/float/fpprocessdenorms/FPProcessDenorm
// FPProcessDenorm() // ================= // Handles denormal input in case of single-precision or double-precision // when using alternative floating-point mode. FPProcessDenorm(FPType fptype, integer N, FPCRType fpcr) boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; if altfp && N != 16 && fptype == FPType_Denormal then FPProcessException(FPExc_InputDenorm, fpcr);
Library pseudocode for shared/functions/float/fpprocessdenorms/FPProcessDenorms
// FPProcessDenorms() // ================== // Handles denormal input in case of single-precision or double-precision // when using alternative floating-point mode. FPProcessDenorms(FPType type1, FPType type2, integer N, FPCRType fpcr) boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; if altfp && N != 16 && (type1 == FPType_Denormal || type2 == FPType_Denormal) then FPProcessException(FPExc_InputDenorm, fpcr);
Library pseudocode for shared/functions/float/fpprocessdenorms/FPProcessDenorms3
// FPProcessDenorms3() // =================== // Handles denormal input in case of single-precision or double-precision // when using alternative floating-point mode. FPProcessDenorms3(FPType type1, FPType type2, FPType type3, integer N, FPCRType fpcr) boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; if altfp && N != 16 && (type1 == FPType_Denormal || type2 == FPType_Denormal || type3 == FPType_Denormal) then FPProcessException(FPExc_InputDenorm, fpcr);
Library pseudocode for shared/functions/float/fpprocessexception/FPProcessException
// FPProcessException() // ==================== // // The 'fpcr' argument supplies FPCR control bits. Status information is // updated directly in the FPSR where appropriate. FPProcessException(FPExc exception, FPCRType fpcr) // Determine the cumulative exception bit number case exception of when FPExc_InvalidOp cumul = 0; when FPExc_DivideByZero cumul = 1; when FPExc_Overflow cumul = 2; when FPExc_Underflow cumul = 3; when FPExc_Inexact cumul = 4; when FPExc_InputDenorm cumul = 7; enable = cumul + 8; if fpcr<enable> == '1' then // Trapping of the exception enabled. // It is IMPLEMENTATION DEFINED whether the enable bit may be set at all, and // if so then how exceptions may be accumulated before calling FPTrappedException() IMPLEMENTATION_DEFINED "floating-point trap handling"; elsif UsingAArch32() then // Set the cumulative exception bit FPSCR<cumul> = '1'; else // Set the cumulative exception bit FPSR<cumul> = '1'; return;
Library pseudocode for shared/functions/float/fpprocessnan/FPProcessNaN
// FPProcessNaN() // ============== bits(N) FPProcessNaN(FPType fptype, bits(N) op, FPCRType fpcr) boolean fpexc = TRUE; // Generate floating-point exceptions return FPProcessNaN(fptype, op, fpcr, fpexc); // FPProcessNaN() // ============== // Handle NaN input operands, returning the operand or default NaN value // if fpcr.DN is selected. The 'fpcr' argument supplies the FPCR control bits. // The 'fpexc' argument controls the generation of exceptions, regardless of // whether 'fptype' is a signalling NaN or a quiet NaN. bits(N) FPProcessNaN(FPType fptype, bits(N) op, FPCRType fpcr, boolean fpexc) assert N IN {16,32,64}; assert fptype IN {FPType_QNaN, FPType_SNaN}; case N of when 16 topfrac = 9; when 32 topfrac = 22; when 64 topfrac = 51; result = op; if fptype == FPType_SNaN then result<topfrac> = '1'; if fpexc then FPProcessException(FPExc_InvalidOp, fpcr); if fpcr.DN == '1' then // DefaultNaN requested result = FPDefaultNaN(); return result;
Library pseudocode for shared/functions/float/fpprocessnans/FPProcessNaNs
// FPProcessNaNs() // =============== (boolean, bits(N)) FPProcessNaNs(FPType type1, FPType type2, bits(N) op1, bits(N) op2, FPCRType fpcr) boolean altfmaxfmin = FALSE; // Do not use alfp mode for FMIN, FMAX and variants boolean fpexc = TRUE; // Generate floating-point exceptions return FPProcessNaNs(type1, type2, op1, op2, fpcr, altfmaxfmin, fpexc); // FPProcessNaNs() // =============== // // The boolean part of the return value says whether a NaN has been found and // processed. The bits(N) part is only relevant if it has and supplies the // result of the operation. // // The 'fpcr' argument supplies FPCR control bits and 'altfmaxfmin' controls // alternative floating-point behaviour for FMAX, FMIN and variants. 'fpexc' // controls the generation of floating-point exceptions. Status information // is updated directly in the FPSR where appropriate. (boolean, bits(N)) FPProcessNaNs(FPType type1, FPType type2, bits(N) op1, bits(N) op2, FPCRType fpcr, boolean altfmaxfmin, boolean fpexc) assert N IN {16,32,64}; boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; boolean op1_nan = type1 IN {FPType_SNaN, FPType_QNaN}; boolean op2_nan = type2 IN {FPType_SNaN, FPType_QNaN}; boolean any_snan = type1 == FPType_SNaN || type2 == FPType_SNaN; FPType type_nan = if any_snan then FPType_SNaN else FPType_QNaN; if altfmaxfmin && (op1_nan || op2_nan) then FPProcessException(FPExc_InvalidOp, fpcr); done = TRUE; sign2 = op2<N-1>; result = if type2 == FPType_Zero then FPZero(sign2) else op2; elsif altfp && op1_nan && op2_nan then done = TRUE; result = FPProcessNaN(type_nan, op1, fpcr, fpexc); // <n> register NaN selected elsif type1 == FPType_SNaN then done = TRUE; result = FPProcessNaN(type1, op1, fpcr, fpexc); elsif type2 == FPType_SNaN then done = TRUE; result = FPProcessNaN(type2, op2, fpcr, fpexc); elsif type1 == FPType_QNaN then done = TRUE; result = FPProcessNaN(type1, op1, fpcr, fpexc); elsif type2 == FPType_QNaN then done = TRUE; result = FPProcessNaN(type2, op2, fpcr, fpexc); else done = FALSE; result = Zeros(); // 'Don't care' result return (done, result);
Library pseudocode for shared/functions/float/fpprocessnans3/FPProcessNaNs3
// FPProcessNaNs3() // ================ (boolean, bits(N)) FPProcessNaNs3(FPType type1, FPType type2, FPType type3, bits(N) op1, bits(N) op2, bits(N) op3, FPCRType fpcr) boolean fpexc = TRUE; // Generate floating-point exceptions return FPProcessNaNs3(type1, type2, type3, op1, op2, op3, fpcr, fpexc); // FPProcessNaNs3() // ================ // The boolean part of the return value says whether a NaN has been found and // processed. The bits(N) part is only relevant if it has and supplies the // result of the operation. // // The 'fpcr' argument supplies FPCR control bits and 'fpexc' controls the // generation of floating-point exceptions. Status information is updated // directly in the FPSR where appropriate. (boolean, bits(N)) FPProcessNaNs3(FPType type1, FPType type2, FPType type3, bits(N) op1, bits(N) op2, bits(N) op3, FPCRType fpcr, boolean fpexc) assert N IN {16,32,64}; boolean op1_nan = type1 IN {FPType_SNaN, FPType_QNaN}; boolean op2_nan = type2 IN {FPType_SNaN, FPType_QNaN}; boolean op3_nan = type3 IN {FPType_SNaN, FPType_QNaN}; boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; if altfp then if type1 == FPType_SNaN || type2 == FPType_SNaN || type3 == FPType_SNaN then type_nan = FPType_SNaN; else type_nan = FPType_QNaN; if altfp && op1_nan && op2_nan && op3_nan then done = TRUE; result = FPProcessNaN(type_nan, op2, fpcr, fpexc); // <n> register NaN selected elsif altfp && op2_nan && (op1_nan || op3_nan) then done = TRUE; result = FPProcessNaN(type_nan, op2, fpcr, fpexc); // <n> register NaN selected elsif altfp && op3_nan && op1_nan then done = TRUE; result = FPProcessNaN(type_nan, op3, fpcr, fpexc); // <m> register NaN selected elsif type1 == FPType_SNaN then done = TRUE; result = FPProcessNaN(type1, op1, fpcr, fpexc); elsif type2 == FPType_SNaN then done = TRUE; result = FPProcessNaN(type2, op2, fpcr, fpexc); elsif type3 == FPType_SNaN then done = TRUE; result = FPProcessNaN(type3, op3, fpcr, fpexc); elsif type1 == FPType_QNaN then done = TRUE; result = FPProcessNaN(type1, op1, fpcr, fpexc); elsif type2 == FPType_QNaN then done = TRUE; result = FPProcessNaN(type2, op2, fpcr, fpexc); elsif type3 == FPType_QNaN then done = TRUE; result = FPProcessNaN(type3, op3, fpcr, fpexc); else done = FALSE; result = Zeros(); // 'Don't care' result return (done, result);
Library pseudocode for shared/functions/float/fprecipestimate/FPRecipEstimate
// FPRecipEstimate() // ================= bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr) assert N IN {16,32,64}; // When using alternative floating-point behaviour, do not generate // floating-point exceptions, flush denormal input and output to zero, // and use RNE rounding mode. boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; boolean fpexc = !altfp; if altfp then fpcr.<FIZ,FZ> = '11'; if altfp then fpcr.RMode = '00'; (fptype,sign,value) = FPUnpack(operand, fpcr, fpexc); FPRounding rounding = FPRoundingMode(fpcr); if fptype == FPType_SNaN || fptype == FPType_QNaN then result = FPProcessNaN(fptype, operand, fpcr, fpexc); elsif fptype == FPType_Infinity then result = FPZero(sign); elsif fptype == FPType_Zero then result = FPInfinity(sign); if fpexc then FPProcessException(FPExc_DivideByZero, fpcr); elsif ( (N == 16 && Abs(value) < 2.0^-16) || (N == 32 && Abs(value) < 2.0^-128) || (N == 64 && Abs(value) < 2.0^-1024) ) then case rounding of when FPRounding_TIEEVEN overflow_to_inf = TRUE; when FPRounding_POSINF overflow_to_inf = (sign == '0'); when FPRounding_NEGINF overflow_to_inf = (sign == '1'); when FPRounding_ZERO overflow_to_inf = FALSE; result = if overflow_to_inf then FPInfinity(sign) else FPMaxNormal(sign); if fpexc then FPProcessException(FPExc_Overflow, fpcr); FPProcessException(FPExc_Inexact, fpcr); elsif ((fpcr.FZ == '1' && N != 16) || (fpcr.FZ16 == '1' && N == 16)) && ( (N == 16 && Abs(value) >= 2.0^14) || (N == 32 && Abs(value) >= 2.0^126) || (N == 64 && Abs(value) >= 2.0^1022) ) then // Result flushed to zero of correct sign result = FPZero(sign); // Flush-to-zero never generates a trapped exception. if UsingAArch32() then FPSCR.UFC = '1'; else if fpexc then FPSR.UFC = '1'; else // Scale to a fixed point value in the range 0.5 <= x < 1.0 in steps of 1/512, and // calculate result exponent. Scaled value has copied sign bit, // exponent = 1022 = double-precision biased version of -1, // fraction = original fraction case N of when 16 fraction = operand<9:0> : Zeros(42); exp = UInt(operand<14:10>); when 32 fraction = operand<22:0> : Zeros(29); exp = UInt(operand<30:23>); when 64 fraction = operand<51:0>; exp = UInt(operand<62:52>); if exp == 0 then if fraction<51> == '0' then exp = -1; fraction = fraction<49:0>:'00'; else fraction = fraction<50:0>:'0'; integer scaled; boolean increasedprecision = N==32 && HaveFeatRPRES() && altfp; if !increasedprecision then scaled = UInt('1':fraction<51:44>); else scaled = UInt('1':fraction<51:41>); case N of when 16 result_exp = 29 - exp; // In range 29-30 = -1 to 29+1 = 30 when 32 result_exp = 253 - exp; // In range 253-254 = -1 to 253+1 = 254 when 64 result_exp = 2045 - exp; // In range 2045-2046 = -1 to 2045+1 = 2046 // Scaled is in range 256 .. 511 or 2048 .. 4095 range representing a // fixed-point number in range [0.5 .. 1.0]. estimate = RecipEstimate(scaled, increasedprecision); // Estimate is in the range 256 .. 511 or 4096 .. 8191 representing a // fixed-point result in the range [1.0 .. 2.0]. // Convert to scaled floating point result with copied sign bit, // high-order bits from estimate, and exponent calculated above. if !increasedprecision then fraction = estimate<7:0> : Zeros(44); else fraction = estimate<11:0> : Zeros(40); if result_exp == 0 then fraction = '1' : fraction<51:1>; elsif result_exp == -1 then fraction = '01' : fraction<51:2>; result_exp = 0; case N of when 16 result = sign : result_exp<N-12:0> : fraction<51:42>; when 32 result = sign : result_exp<N-25:0> : fraction<51:29>; when 64 result = sign : result_exp<N-54:0> : fraction<51:0>; return result;
Library pseudocode for shared/functions/float/fprecipestimate/RecipEstimate
// RecipEstimate() // =============== // Compute estimate of reciprocal of 9-bit fixed-point number. // // a is in range 256 .. 511 or 2048 .. 4096 representing a number in // the range 0.5 <= x < 1.0. // increasedprecision determines if the mantissa is 8-bit or 12-bit. // result is in the range 256 .. 511 or 4096 .. 8191 representing a // number in the range 1.0 to 511/256 or 1.00 to 8191/4096. integer RecipEstimate(integer a, boolean increasedprecision) integer r; if !increasedprecision then assert 256 <= a && a < 512; a = a*2+1; // Round to nearest integer b = (2 ^ 19) DIV a; r = (b+1) DIV 2; // Round to nearest assert 256 <= r && r < 512; else assert 2048 <= a && a < 4096; a = a*2+1; // Round to nearest real real_val = Real(2^25)/Real(a); r = RoundDown(real_val); real error = real_val - Real(r); boolean round_up = error > 0.5; // Error cannot be exactly 0.5 so do not need tie case if round_up then r = r+1; assert 4096 <= r && r < 8192; return r;
Library pseudocode for shared/functions/float/fprecpx/FPRecpX
// FPRecpX() // ========= bits(N) FPRecpX(bits(N) op, FPCRType fpcr) assert N IN {16,32,64}; case N of when 16 esize = 5; when 32 esize = 8; when 64 esize = 11; bits(N) result; bits(esize) exp; bits(esize) max_exp; bits(N-(esize+1)) frac = Zeros(); boolean altfp = HaveAltFP() && fpcr.AH == '1'; boolean fpexc = !altfp; // Generate no floating-point exceptions if altfp then fpcr.<FIZ,FZ> = '11'; // Flush denormal input and output to zero (fptype,sign,value) = FPUnpack(op, fpcr, fpexc); case N of when 16 exp = op<10+esize-1:10>; when 32 exp = op<23+esize-1:23>; when 64 exp = op<52+esize-1:52>; max_exp = Ones(esize) - 1; if fptype == FPType_SNaN || fptype == FPType_QNaN then result = FPProcessNaN(fptype, op, fpcr, fpexc); else if IsZero(exp) then // Zero and denormals result = sign:max_exp:frac; else // Infinities and normals result = sign:NOT(exp):frac; return result;
Library pseudocode for shared/functions/float/fpround/FPRound
// FPRound() // ========= // Used by data processing and int/fixed <-> FP conversion instructions. // For half-precision data it ignores AHP, and observes FZ16. bits(N) FPRound(real op, FPCRType fpcr, FPRounding rounding) fpcr.AHP = '0'; boolean fpexc = TRUE; // Generate floating-point exceptions boolean isbfloat16 = FALSE; return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc); // FPRound() // ========= // Used by data processing and int/fixed <-> FP conversion instructions. // For half-precision data it ignores AHP, and observes FZ16. // // The 'fpcr' argument supplies FPCR control bits and 'fpexc' controls the // generation of floating-point exceptions. Status information is updated // directly in the FPSR where appropriate. bits(N) FPRound(real op, FPCRType fpcr, FPRounding rounding, boolean fpexc) fpcr.AHP = '0'; boolean isbfloat16 = FALSE; return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc); // FPRound() // ========= bits(N) FPRound(real op, FPCRType fpcr) return FPRound(op, fpcr, FPRoundingMode(fpcr));
Library pseudocode for shared/functions/float/fpround/FPRoundBase
// FPRoundBase() // ============= bits(N) FPRoundBase(real op, FPCRType fpcr, FPRounding rounding, boolean isbfloat16) boolean fpexc = TRUE; // Generate floating-point exceptions return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc); // FPRoundBase() // ============= // Convert a real number OP into an N-bit floating-point value using the // supplied rounding mode RMODE. // // The 'fpcr' argument supplies FPCR control bits and 'fpexc' controls the // generation of floating-point exceptions. Status information is updated // directly in the FPSR where appropriate. bits(N) FPRoundBase(real op, FPCRType fpcr, FPRounding rounding, boolean isbfloat16, boolean fpexc) assert N IN {16,32,64}; assert op != 0.0; assert rounding != FPRounding_TIEAWAY; bits(N) result; // Obtain format parameters - minimum exponent, numbers of exponent and fraction bits. if N == 16 then minimum_exp = -14; E = 5; F = 10; elsif N == 32 && isbfloat16 then minimum_exp = -126; E = 8; F = 7; elsif N == 32 then minimum_exp = -126; E = 8; F = 23; else // N == 64 minimum_exp = -1022; E = 11; F = 52; // Split value into sign, unrounded mantissa and exponent. if op < 0.0 then sign = '1'; mantissa = -op; else sign = '0'; mantissa = op; exponent = 0; while mantissa < 1.0 do mantissa = mantissa * 2.0; exponent = exponent - 1; while mantissa >= 2.0 do mantissa = mantissa / 2.0; exponent = exponent + 1; // When TRUE, detection of underflow occurs after rounding and the test for a // denormalized number for single and double precision values occurs after rounding. altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; // Deal with flush-to-zero before rounding if FPCR.AH != '1'. if (!altfp && ((fpcr.FZ == '1' && N != 16) || (fpcr.FZ16 == '1' && N == 16)) && exponent < minimum_exp) then // Flush-to-zero never generates a trapped exception. if UsingAArch32() then FPSCR.UFC = '1'; else FPSR.UFC = '1'; return FPZero(sign); biased_exp_unconstrained = exponent - minimum_exp + 1; int_mant_unconstrained = RoundDown(mantissa * 2.0^F); error_unconstrained = mantissa * 2.0^F - Real(int_mant_unconstrained); // Start creating the exponent value for the result. Start by biasing the actual exponent // so that the minimum exponent becomes 1, lower values 0 (indicating possible underflow). biased_exp = Max(exponent - minimum_exp + 1, 0); if biased_exp == 0 then mantissa = mantissa / 2.0^(minimum_exp - exponent); // Get the unrounded mantissa as an integer, and the "units in last place" rounding error. int_mant = RoundDown(mantissa * 2.0^F); // < 2.0^F if biased_exp == 0, >= 2.0^F if not error = mantissa * 2.0^F - Real(int_mant); // Underflow occurs if exponent is too small before rounding, and result is inexact or // the Underflow exception is trapped. This applies before rounding if FPCR.AH != '1'. if !altfp && biased_exp == 0 && (error != 0.0 || fpcr.UFE == '1') then if fpexc then FPProcessException(FPExc_Underflow, fpcr); // Round result according to rounding mode. if altfp then case rounding of when FPRounding_TIEEVEN round_up_unconstrained = (error_unconstrained > 0.5 || (error_unconstrained == 0.5 && int_mant_unconstrained<0> == '1')); round_up = (error > 0.5 || (error == 0.5 && int_mant<0> == '1')); overflow_to_inf = TRUE; when FPRounding_POSINF round_up_unconstrained = (error_unconstrained != 0.0 && sign == '0'); round_up = (error != 0.0 && sign == '0'); overflow_to_inf = (sign == '0'); when FPRounding_NEGINF round_up_unconstrained = (error_unconstrained != 0.0 && sign == '1'); round_up = (error != 0.0 && sign == '1'); overflow_to_inf = (sign == '1'); when FPRounding_ZERO, FPRounding_ODD round_up_unconstrained = FALSE; round_up = FALSE; overflow_to_inf = FALSE; if round_up_unconstrained then int_mant_unconstrained = int_mant_unconstrained + 1; if int_mant_unconstrained == 2^(F+1) then // Rounded up to next exponent biased_exp_unconstrained = biased_exp_unconstrained + 1; int_mant_unconstrained = int_mant_unconstrained DIV 2; // Deal with flush-to-zero and underflow after rounding if FPCR.AH == '1'. if biased_exp_unconstrained < 1 && int_mant_unconstrained != 0 then // the result of unconstrained rounding is less than the minimum normalized number if (fpcr.FZ == '1' && N != 16) || (fpcr.FZ16 == '1' && N == 16) then // Flush-to-zero if fpexc then FPSR.UFC = '1'; FPProcessException(FPExc_Inexact, fpcr); return FPZero(sign); elsif error != 0.0 || fpcr.UFE == '1' then if fpexc then FPProcessException(FPExc_Underflow, fpcr); else // altfp == FALSE case rounding of when FPRounding_TIEEVEN round_up = (error > 0.5 || (error == 0.5 && int_mant<0> == '1')); overflow_to_inf = TRUE; when FPRounding_POSINF round_up = (error != 0.0 && sign == '0'); overflow_to_inf = (sign == '0'); when FPRounding_NEGINF round_up = (error != 0.0 && sign == '1'); overflow_to_inf = (sign == '1'); when FPRounding_ZERO, FPRounding_ODD round_up = FALSE; overflow_to_inf = FALSE; if round_up then int_mant = int_mant + 1; if int_mant == 2^F then // Rounded up from denormalized to normalized biased_exp = 1; if int_mant == 2^(F+1) then // Rounded up to next exponent biased_exp = biased_exp + 1; int_mant = int_mant DIV 2; // Handle rounding to odd if error != 0.0 && rounding == FPRounding_ODD then int_mant<0> = '1'; // Deal with overflow and generate result. if N != 16 || fpcr.AHP == '0' then // Single, double or IEEE half precision if biased_exp >= 2^E - 1 then result = if overflow_to_inf then FPInfinity(sign) else FPMaxNormal(sign); if fpexc then FPProcessException(FPExc_Overflow, fpcr); error = 1.0; // Ensure that an Inexact exception occurs else result = sign : biased_exp<E-1:0> : int_mant<F-1:0> : Zeros(N-(E+F+1)); else // Alternative half precision if biased_exp >= 2^E then result = sign : Ones(N-1); if fpexc then FPProcessException(FPExc_InvalidOp, fpcr); error = 0.0; // Ensure that an Inexact exception does not occur else result = sign : biased_exp<E-1:0> : int_mant<F-1:0> : Zeros(N-(E+F+1)); // Deal with Inexact exception. if error != 0.0 then if fpexc then FPProcessException(FPExc_Inexact, fpcr); return result;
Library pseudocode for shared/functions/float/fpround/FPRoundCV
// FPRoundCV() // =========== // Used for FP <-> FP conversion instructions. // For half-precision data ignores FZ16 and observes AHP. bits(N) FPRoundCV(real op, FPCRType fpcr, FPRounding rounding) fpcr.FZ16 = '0'; boolean fpexc = TRUE; // Generate floating-point exceptions boolean isbfloat16 = FALSE; return FPRoundBase(op, fpcr, rounding, isbfloat16, fpexc);
Library pseudocode for shared/functions/float/fprounding/FPRounding
enumeration FPRounding {FPRounding_TIEEVEN, FPRounding_POSINF, FPRounding_NEGINF, FPRounding_ZERO, FPRounding_TIEAWAY, FPRounding_ODD};
Library pseudocode for shared/functions/float/fproundingmode/FPRoundingMode
// FPRoundingMode() // ================ // Return the current floating-point rounding mode. FPRounding FPRoundingMode(FPCRType fpcr) return FPDecodeRounding(fpcr.RMode);
Library pseudocode for shared/functions/float/fproundint/FPRoundInt
// FPRoundInt() // ============ // Round op to nearest integral floating point value using rounding mode in FPCR/FPSCR. // If EXACT is TRUE, set FPSR.IXC if result is not numerically equal to op. bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact) assert rounding != FPRounding_ODD; assert N IN {16,32,64}; // When alternative floating-point support is TRUE, do not generate // Input Denormal floating-point exceptions. altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; fpexc = !altfp; // Unpack using FPCR to determine if subnormals are flushed-to-zero. (fptype,sign,value) = FPUnpack(op, fpcr, fpexc); if fptype == FPType_SNaN || fptype == FPType_QNaN then result = FPProcessNaN(fptype, op, fpcr); elsif fptype == FPType_Infinity then result = FPInfinity(sign); elsif fptype == FPType_Zero then result = FPZero(sign); else // Extract integer component. int_result = RoundDown(value); error = value - Real(int_result); // Determine whether supplied rounding mode requires an increment. case rounding of when FPRounding_TIEEVEN round_up = (error > 0.5 || (error == 0.5 && int_result<0> == '1')); when FPRounding_POSINF round_up = (error != 0.0); when FPRounding_NEGINF round_up = FALSE; when FPRounding_ZERO round_up = (error != 0.0 && int_result < 0); when FPRounding_TIEAWAY round_up = (error > 0.5 || (error == 0.5 && int_result >= 0)); if round_up then int_result = int_result + 1; // Convert integer value into an equivalent real value. real_result = Real(int_result); // Re-encode as a floating-point value, result is always exact. if real_result == 0.0 then result = FPZero(sign); else result = FPRound(real_result, fpcr, FPRounding_ZERO); // Generate inexact exceptions. if error != 0.0 && exact then FPProcessException(FPExc_Inexact, fpcr); return result;
Library pseudocode for shared/functions/float/fproundintn/FPRoundIntN
// FPRoundIntN() // ============= bits(N) FPRoundIntN(bits(N) op, FPCRType fpcr, FPRounding rounding, integer intsize) assert rounding != FPRounding_ODD; assert N IN {32,64}; assert intsize IN {32, 64}; integer exp; constant integer E = (if N == 32 then 8 else 11); constant integer F = N - (E + 1); // When alternative floating-point support is TRUE, do not generate // Input Denormal floating-point exceptions. altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; fpexc = !altfp; // Unpack using FPCR to determine if subnormals are flushed-to-zero. (fptype,sign,value) = FPUnpack(op, fpcr, fpexc); if fptype IN {FPType_SNaN, FPType_QNaN, FPType_Infinity} then if N == 32 then exp = 126 + intsize; result = '1':exp<(E-1):0>:Zeros(F); else exp = 1022+intsize; result = '1':exp<(E-1):0>:Zeros(F); FPProcessException(FPExc_InvalidOp, fpcr); elsif fptype == FPType_Zero then result = FPZero(sign); else // Extract integer component. int_result = RoundDown(value); error = value - Real(int_result); // Determine whether supplied rounding mode requires an increment. case rounding of when FPRounding_TIEEVEN round_up = error > 0.5 || (error == 0.5 && int_result<0> == '1'); when FPRounding_POSINF round_up = error != 0.0; when FPRounding_NEGINF round_up = FALSE; when FPRounding_ZERO round_up = error != 0.0 && int_result < 0; when FPRounding_TIEAWAY round_up = error > 0.5 || (error == 0.5 && int_result >= 0); if round_up then int_result = int_result + 1; overflow = int_result > 2^(intsize-1)-1 || int_result < -1*2^(intsize-1); if overflow then if N == 32 then exp = 126 + intsize; result = '1':exp<(E-1):0>:Zeros(F); else exp = 1022 + intsize; result = '1':exp<(E-1):0>:Zeros(F); FPProcessException(FPExc_InvalidOp, fpcr); // This case shouldn't set Inexact. error = 0.0; else // Convert integer value into an equivalent real value. real_result = Real(int_result); // Re-encode as a floating-point value, result is always exact. if real_result == 0.0 then result = FPZero(sign); else result = FPRound(real_result, fpcr, FPRounding_ZERO); // Generate inexact exceptions. if error != 0.0 then FPProcessException(FPExc_Inexact, fpcr); return result;
Library pseudocode for shared/functions/float/fprsqrtestimate/FPRSqrtEstimate
// FPRSqrtEstimate() // ================= bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr) assert N IN {16,32,64}; // When using alternative floating-point behaviour, do not generate // floating-point exceptions and flush denormal input to zero. boolean altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; boolean fpexc = !altfp; if altfp then fpcr.<FIZ,FZ> = '11'; (fptype,sign,value) = FPUnpack(operand, fpcr, fpexc); if fptype == FPType_SNaN || fptype == FPType_QNaN then result = FPProcessNaN(fptype, operand, fpcr, fpexc); elsif fptype == FPType_Zero then result = FPInfinity(sign); if fpexc then FPProcessException(FPExc_DivideByZero, fpcr); elsif sign == '1' then result = FPDefaultNaN(); if fpexc then FPProcessException(FPExc_InvalidOp, fpcr); elsif fptype == FPType_Infinity then result = FPZero('0'); else // Scale to a fixed-point value in the range 0.25 <= x < 1.0 in steps of 512, with the // evenness or oddness of the exponent unchanged, and calculate result exponent. // Scaled value has copied sign bit, exponent = 1022 or 1021 = double-precision // biased version of -1 or -2, fraction = original fraction extended with zeros. case N of when 16 fraction = operand<9:0> : Zeros(42); exp = UInt(operand<14:10>); when 32 fraction = operand<22:0> : Zeros(29); exp = UInt(operand<30:23>); when 64 fraction = operand<51:0>; exp = UInt(operand<62:52>); if exp == 0 then while fraction<51> == '0' do fraction = fraction<50:0> : '0'; exp = exp - 1; fraction = fraction<50:0> : '0'; integer scaled; boolean increasedprecision = N==32 && HaveFeatRPRES() && altfp; if !increasedprecision then if exp<0> == '0' then scaled = UInt('1':fraction<51:44>); else scaled = UInt('01':fraction<51:45>); else if exp<0> == '0' then scaled = UInt('1':fraction<51:41>); else scaled = UInt('01':fraction<51:42>); case N of when 16 result_exp = ( 44 - exp) DIV 2; when 32 result_exp = ( 380 - exp) DIV 2; when 64 result_exp = (3068 - exp) DIV 2; estimate = RecipSqrtEstimate(scaled, increasedprecision); // Estimate is in the range 256 .. 511 or 4096 .. 8191 representing a // fixed-point result in the range [1.0 .. 2.0]. // Convert to scaled floating point result with copied sign bit and high-order // fraction bits, and exponent calculated above. case N of when 16 result = '0' : result_exp<N-12:0> : estimate<7:0>:Zeros(2); when 32 if !increasedprecision then result = '0' : result_exp<N-25:0> : estimate<7:0>:Zeros(15); else result = '0' : result_exp<N-25:0> : estimate<11:0>:Zeros(11); when 64 result = '0' : result_exp<N-54:0> : estimate<7:0>:Zeros(44); return result;
Library pseudocode for shared/functions/float/fprsqrtestimate/RecipSqrtEstimate
// RecipSqrtEstimate() // =================== // Compute estimate of reciprocal square root of 9-bit fixed-point number. // // a is in range 128 .. 511 or 1024 .. 4095, with increased precision, // representing a number in the range 0.25 <= x < 1.0. // increasedprecision determines if the mantissa is 8-bit or 12-bit. // result is in the range 256 .. 511 or 4096 .. 8191, with increased precision, // representing a number in the range 1.0 to 511/256 or 8191/4096. integer RecipSqrtEstimate(integer a, boolean increasedprecision) integer r; if !increasedprecision then assert 128 <= a && a < 512; if a < 256 then // 0.25 .. 0.5 a = a*2+1; // a in units of 1/512 rounded to nearest else // 0.5 .. 1.0 a = (a >> 1) << 1; // Discard bottom bit a = (a+1)*2; // a in units of 1/256 rounded to nearest integer b = 512; while a*(b+1)*(b+1) < 2^28 do b = b+1; // b = largest b such that b < 2^14 / sqrt(a) r = (b+1) DIV 2; // Round to nearest assert 256 <= r && r < 512; else assert 1024 <= a && a < 4096; real real_val; real error; integer int_val; if a < 2048 then // 0.25... 0.5 a = a*2 + 1; // Take 10 bits of fraction and force a 1 at the bottom real_val = Real(a)/2.0; else // 0.5..1.0 a = (a >> 1) << 1; // Discard bottom bit a = a+1; // Taking 10 bits of the fraction and force a 1 at the bottom real_val = Real(a); real_val = Sqrt(real_val); // This number will lie in the range of 32 to 64 // Round to nearest even for a DP float number real_val = real_val * Real(2^47); // The integer is the size of the whole DP mantissa int_val = RoundDown(real_val); // Calculate rounding value error = real_val - Real(int_val); round_up = error > 0.5; // Error cannot be exactly 0.5 so do not need tie case if round_up then int_val = int_val+1; real_val = Real(2^65)/Real(int_val); // Lies in the range 4096 <= real_val < 8192 int_val = RoundDown(real_val); // Round that (to nearest even) to give integer error = real_val - Real(int_val); round_up = (error > 0.5 || (error == 0.5 && int_val<0> == '1')); if round_up then int_val = int_val+1; r = int_val; assert 4096 <= r && r < 8192; return r;
Library pseudocode for shared/functions/float/fpsqrt/FPSqrt
// FPSqrt() // ======== bits(N) FPSqrt(bits(N) op, FPCRType fpcr) assert N IN {16,32,64}; (fptype,sign,value) = FPUnpack(op, fpcr); if fptype == FPType_SNaN || fptype == FPType_QNaN then result = FPProcessNaN(fptype, op, fpcr); elsif fptype == FPType_Zero then result = FPZero(sign); elsif fptype == FPType_Infinity && sign == '0' then result = FPInfinity(sign); elsif sign == '1' then result = FPDefaultNaN(); FPProcessException(FPExc_InvalidOp, fpcr); else result = FPRound(Sqrt(value), fpcr); FPProcessDenorm(fptype, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpsub/FPSub
// FPSub() // ======= bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr) assert N IN {16,32,64}; rounding = FPRoundingMode(fpcr); (type1,sign1,value1) = FPUnpack(op1, fpcr); (type2,sign2,value2) = FPUnpack(op2, fpcr); (done,result) = FPProcessNaNs(type1, type2, op1, op2, fpcr); if !done then inf1 = (type1 == FPType_Infinity); inf2 = (type2 == FPType_Infinity); zero1 = (type1 == FPType_Zero); zero2 = (type2 == FPType_Zero); if inf1 && inf2 && sign1 == sign2 then result = FPDefaultNaN(); FPProcessException(FPExc_InvalidOp, fpcr); elsif (inf1 && sign1 == '0') || (inf2 && sign2 == '1') then result = FPInfinity('0'); elsif (inf1 && sign1 == '1') || (inf2 && sign2 == '0') then result = FPInfinity('1'); elsif zero1 && zero2 && sign1 == NOT(sign2) then result = FPZero(sign1); else result_value = value1 - value2; if result_value == 0.0 then // Sign of exact zero result depends on rounding mode result_sign = if rounding == FPRounding_NEGINF then '1' else '0'; result = FPZero(result_sign); else result = FPRound(result_value, fpcr, rounding); FPProcessDenorms(type1, type2, N, fpcr); return result;
Library pseudocode for shared/functions/float/fpthree/FPThree
// FPThree() // ========= bits(N) FPThree(bit sign) assert N IN {16,32,64}; constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11); constant integer F = N - (E + 1); exp = '1':Zeros(E-1); frac = '1':Zeros(F-1); result = sign : exp : frac; return result;
Library pseudocode for shared/functions/float/fptofixed/FPToFixed
// FPToFixed() // =========== // Convert N-bit precision floating point OP to M-bit fixed point with // FBITS fractional bits, controlled by UNSIGNED and ROUNDING. bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding) assert N IN {16,32,64}; assert M IN {16,32,64}; assert fbits >= 0; assert rounding != FPRounding_ODD; // When alternative floating-point support is TRUE, do not generate // Input Denormal floating-point exceptions. altfp = HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'; fpexc = !altfp; // Unpack using fpcr to determine if subnormals are flushed-to-zero. (fptype,sign,value) = FPUnpack(op, fpcr, fpexc); // If NaN, set cumulative flag or take exception. if fptype == FPType_SNaN || fptype == FPType_QNaN then FPProcessException(FPExc_InvalidOp, fpcr); // Scale by fractional bits and produce integer rounded towards minus-infinity. value = value * 2.0^fbits; int_result = RoundDown(value); error = value - Real(int_result); // Determine whether supplied rounding mode requires an increment. case rounding of when FPRounding_TIEEVEN round_up = (error > 0.5 || (error == 0.5 && int_result<0> == '1')); when FPRounding_POSINF round_up = (error != 0.0); when FPRounding_NEGINF round_up = FALSE; when FPRounding_ZERO round_up = (error != 0.0 && int_result < 0); when FPRounding_TIEAWAY round_up = (error > 0.5 || (error == 0.5 && int_result >= 0)); if round_up then int_result = int_result + 1; // Generate saturated result and exceptions. (result, overflow) = SatQ(int_result, M, unsigned); if overflow then FPProcessException(FPExc_InvalidOp, fpcr); elsif error != 0.0 then FPProcessException(FPExc_Inexact, fpcr); return result;
Library pseudocode for shared/functions/float/fptofixedjs/FPToFixedJS
// FPToFixedJS() // ============= // Converts a double precision floating point input value // to a signed integer, with rounding to zero. (bits(N), bit) FPToFixedJS(bits(M) op, FPCRType fpcr, boolean Is64) assert M == 64 && N == 32; // If FALSE, never generate Input Denormal floating-point exceptions. fpexc_idenorm = !(HaveAltFP() && !UsingAArch32() && fpcr.AH == '1'); // Unpack using fpcr to determine if subnormals are flushed-to-zero. (fptype,sign,value) = FPUnpack(op, fpcr, fpexc_idenorm); Z = '1'; // If NaN, set cumulative flag or take exception. if fptype == FPType_SNaN || fptype == FPType_QNaN then FPProcessException(FPExc_InvalidOp, fpcr); Z = '0'; int_result = RoundDown(value); error = value - Real(int_result); // Determine whether supplied rounding mode requires an increment. round_it_up = (error != 0.0 && int_result < 0); if round_it_up then int_result = int_result + 1; if int_result < 0 then result = int_result - 2^32*RoundUp(Real(int_result)/Real(2^32)); else result = int_result - 2^32*RoundDown(Real(int_result)/Real(2^32)); // Generate exceptions. if int_result < -(2^31) || int_result > (2^31)-1 then FPProcessException(FPExc_InvalidOp, fpcr); Z = '0'; elsif error != 0.0 then FPProcessException(FPExc_Inexact, fpcr); Z = '0'; elsif sign == '1' && value == 0.0 then Z = '0'; elsif sign == '0' && value == 0.0 && !IsZero(op<51:0>) then Z = '0'; if fptype == FPType_Infinity then result = 0; return (result<N-1:0>, Z);
Library pseudocode for shared/functions/float/fptwo/FPTwo
// FPTwo() // ======= bits(N) FPTwo(bit sign) assert N IN {16,32,64}; constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11); constant integer F = N - (E + 1); exp = '1':Zeros(E-1); frac = Zeros(F); result = sign : exp : frac; return result;
Library pseudocode for shared/functions/float/fptype/FPType
enumeration FPType {FPType_Zero, FPType_Denormal, FPType_Nonzero, FPType_Infinity, FPType_QNaN, FPType_SNaN};
Library pseudocode for shared/functions/float/fpunpack/FPUnpack
// FPUnpack() // ========== (FPType, bit, real) FPUnpack(bits(N) fpval, FPCRType fpcr) fpcr.AHP = '0'; boolean fpexc = TRUE; // Generate floating-point exceptions (fp_type, sign, value) = FPUnpackBase(fpval, fpcr, fpexc); return (fp_type, sign, value); // FPUnpack() // ========== // // Used by data processing and int/fixed <-> FP conversion instructions. // For half-precision data it ignores AHP, and observes FZ16. (FPType, bit, real) FPUnpack(bits(N) fpval, FPCRType fpcr, boolean fpexc) fpcr.AHP = '0'; (fp_type, sign, value) = FPUnpackBase(fpval, fpcr, fpexc); return (fp_type, sign, value);
Library pseudocode for shared/functions/float/fpunpack/FPUnpackBase
// FPUnpackBase() // ============== (FPType, bit, real) FPUnpackBase(bits(N) fpval, FPCRType fpcr) boolean fpexc = TRUE; // Generate floating-point exceptions (fp_type, sign, value) = FPUnpackBase(fpval, fpcr, fpexc); return (fp_type, sign, value); // FPUnpackBase() // ============== // // Unpack a floating-point number into its type, sign bit and the real number // that it represents. The real number result has the correct sign for numbers // and infinities, is very large in magnitude for infinities, and is 0.0 for // NaNs. (These values are chosen to simplify the description of comparisons // and conversions.) // // The 'fpcr' argument supplies FPCR control bits and 'fpexc' controls the // generation of floating-point exceptions. Status information is updated // directly in the FPSR where appropriate. (FPType, bit, real) FPUnpackBase(bits(N) fpval, FPCRType fpcr, boolean fpexc) assert N IN {16,32,64}; boolean altfp = HaveAltFP() && !UsingAArch32(); boolean fiz = altfp && fpcr.FIZ == '1'; boolean fz = fpcr.FZ == '1' && !(altfp && fpcr.AH == '1'); if N == 16 then sign = fpval<15>; exp16 = fpval<14:10>; frac16 = fpval<9:0>; if IsZero(exp16) then if IsZero(frac16) || fpcr.FZ16 == '1' then fptype = FPType_Zero; value = 0.0; else fptype = FPType_Denormal; value = 2.0^-14 * (Real(UInt(frac16)) * 2.0^-10); elsif IsOnes(exp16) && fpcr.AHP == '0' then // Infinity or NaN in IEEE format if IsZero(frac16) then fptype = FPType_Infinity; value = 2.0^1000000; else fptype = if frac16<9> == '1' then FPType_QNaN else FPType_SNaN; value = 0.0; else fptype = FPType_Nonzero; value = 2.0^(UInt(exp16)-15) * (1.0 + Real(UInt(frac16)) * 2.0^-10); elsif N == 32 then sign = fpval<31>; exp32 = fpval<30:23>; frac32 = fpval<22:0>; if IsZero(exp32) then if IsZero(frac32) then // Produce zero if value is zero. fptype = FPType_Zero; value = 0.0; elsif fz || fiz then // Flush-to-zero if FIZ==1 or AH,FZ==01 fptype = FPType_Zero; value = 0.0; // Check whether to raise Input Denormal floating-point exception. // fpcr.FIZ==1 does not raise Input Denormal exception. if fz then // Denormalized input flushed to zero if fpexc then FPProcessException(FPExc_InputDenorm, fpcr); else fptype = FPType_Denormal; value = 2.0^-126 * (Real(UInt(frac32)) * 2.0^-23); elsif IsOnes(exp32) then if IsZero(frac32) then fptype = FPType_Infinity; value = 2.0^1000000; else fptype = if frac32<22> == '1' then FPType_QNaN else FPType_SNaN; value = 0.0; else fptype = FPType_Nonzero; value = 2.0^(UInt(exp32)-127) * (1.0 + Real(UInt(frac32)) * 2.0^-23); else // N == 64 sign = fpval<63>; exp64 = fpval<62:52>; frac64 = fpval<51:0>; if IsZero(exp64) then if IsZero(frac64) then // Produce zero if value is zero. fptype = FPType_Zero; value = 0.0; elsif fz || fiz then // Flush-to-zero if FIZ==1 or AH,FZ==01 fptype = FPType_Zero; value = 0.0; // Check whether to raise Input Denormal floating-point exception. // fpcr.FIZ==1 does not raise Input Denormal exception. if fz then // Denormalized input flushed to zero if fpexc then FPProcessException(FPExc_InputDenorm, fpcr); else fptype = FPType_Denormal; value = 2.0^-1022 * (Real(UInt(frac64)) * 2.0^-52); elsif IsOnes(exp64) then if IsZero(frac64) then fptype = FPType_Infinity; value = 2.0^1000000; else fptype = if frac64<51> == '1' then FPType_QNaN else FPType_SNaN; value = 0.0; else fptype = FPType_Nonzero; value = 2.0^(UInt(exp64)-1023) * (1.0 + Real(UInt(frac64)) * 2.0^-52); if sign == '1' then value = -value; return (fptype, sign, value);
Library pseudocode for shared/functions/float/fpunpack/FPUnpackCV
// FPUnpackCV() // ============ // // Used for FP <-> FP conversion instructions. // For half-precision data ignores FZ16 and observes AHP. (FPType, bit, real) FPUnpackCV(bits(N) fpval, FPCRType fpcr) fpcr.FZ16 = '0'; boolean fpexc = TRUE; // Generate floating-point exceptions (fp_type, sign, value) = FPUnpackBase(fpval, fpcr, fpexc); return (fp_type, sign, value);
Library pseudocode for shared/functions/float/fpzero/FPZero
// FPZero() // ======== bits(N) FPZero(bit sign) assert N IN {16,32,64}; constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11); constant integer F = N - (E + 1); exp = Zeros(E); frac = Zeros(F); result = sign : exp : frac; return result;
Library pseudocode for shared/functions/float/vfpexpandimm/VFPExpandImm
// VFPExpandImm() // ============== bits(N) VFPExpandImm(bits(8) imm8) assert N IN {16,32,64}; constant integer E = (if N == 16 then 5 elsif N == 32 then 8 else 11); constant integer F = N - E - 1; sign = imm8<7>; exp = NOT(imm8<6>):Replicate(imm8<6>,E-3):imm8<5:4>; frac = imm8<3:0>:Zeros(F-4); result = sign : exp : frac; return result;