using System; using Unity.Burst; #if !BURST_INTERNAL using AOT; using UnityEngine; #endif using System.Runtime.InteropServices; namespace Unity.Burst.Intrinsics { #if !BURST_INTERNAL [BurstCompile] #endif public unsafe static partial class X86 { /// /// The 32-bit MXCSR register contains control and status information for SSE and AVX SIMD floating-point operations. /// [Flags] public enum MXCSRBits { /// /// Bit 15 (FTZ) of the MXCSR register enables the flush-to-zero mode, which controls the masked response to a SIMD floating-point underflow condition. /// /// /// When the underflow exception is masked and the flush-to-zero mode is enabled, the processor performs the following operations when it detects a floating-point underflow condition. /// - Returns a zero result with the sign of the true result /// - Sets the precision and underflow exception flags. /// /// If the underflow exception is not masked, the flush-to-zero bit is ignored. /// /// The flush-to-zero mode is not compatible with IEEE Standard 754. The IEEE-mandated masked response to under-flow is to deliver the denormalized result. /// The flush-to-zero mode is provided primarily for performance reasons. At the cost of a slight precision loss, faster execution can be achieved for applications where underflows /// are common and rounding the underflow result to zero can be tolerated. The flush-to-zero bit is cleared upon a power-up or reset of the processor, disabling the flush-to-zero mode. /// FlushToZero = 1 << 15, /// /// Mask for rounding control bits. /// /// /// The rounding modes have no effect on comparison operations, operations that produce exact results, or operations that produce NaN results. RoundingControlMask = (1 << 14) | (1 << 13), /// /// Rounded result is the closest to the infinitely precise result. If two values are equally close, the result is the even value (that is, the one with the least-significant bit of zero). Default. /// RoundToNearest = 0, /// /// Rounded result is closest to but no greater than the infinitely precise result. /// RoundDown = (1 << 13), /// /// Rounded result is closest to but no less than the infinitely precise result. /// RoundUp = (1 << 14), /// /// Rounded result is closest to but no greater in absolute value than the infinitely precise result. /// RoundTowardZero = (1 << 13) | (1 << 14), /// Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked. PrecisionMask = 1 << 12, /// Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked. UnderflowMask = 1 << 11, /// Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked. OverflowMask = 1 << 10, /// Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked. DivideByZeroMask = 1 << 9, /// Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked. DenormalOperationMask = 1 << 8, /// Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked. InvalidOperationMask = 1 << 7, /// /// Combine all bits for exception masking into one mask for convenience. /// ExceptionMask = PrecisionMask | UnderflowMask | OverflowMask | DivideByZeroMask | DenormalOperationMask | InvalidOperationMask, /// /// Bit 6 (DAZ) of the MXCSR register enables the denormals-are-zeros mode, which controls the processor’s response to a SIMD floating-point denormal operand condition. /// /// /// When the denormals-are-zeros flag is set, the processor converts all denormal source operands to a zero with the sign of the original operand before performing any computations on them. /// The processor does not set the denormal-operand exception flag (DE), regardless of the setting of the denormal-operand exception mask bit (DM); and it does not generate a denormal-operand /// exception if the exception is unmasked.The denormals-are-zeros mode is not compatible with IEEE Standard 754. /// /// The denormals-are-zeros mode is provided to improve processor performance for applications such as streaming media processing, where rounding a denormal operand to zero does not /// appreciably affect the quality of the processed data. The denormals-are-zeros flag is cleared upon a power-up or reset of the processor, disabling the denormals-are-zeros mode. /// /// The denormals-are-zeros mode was introduced in the Pentium 4 and Intel Xeon processor with the SSE2 extensions; however, it is fully compatible with the SSE SIMD floating-point instructions /// (that is, the denormals-are-zeros flag affects the operation of the SSE SIMD floating-point instructions). In earlier IA-32 processors and in some models of the Pentium 4 processor, this flag /// (bit 6) is reserved. Attempting to set bit 6 of the MXCSR register on processors that do not support the DAZ flag will cause a general-protection exception (#GP). DenormalsAreZeroes = 1 << 6, /// Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them. PrecisionFlag = 1 << 5, /// Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them. UnderflowFlag = 1 << 4, /// Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them. OverflowFlag = 1 << 3, /// Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them. DivideByZeroFlag = 1 << 2, /// Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them. DenormalFlag = 1 << 1, /// Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them. InvalidOperationFlag = 1 << 0, /// /// Combines all bits for flags into one mask for convenience. /// FlagMask = PrecisionFlag | UnderflowFlag | OverflowFlag | DivideByZeroFlag | DenormalFlag | InvalidOperationFlag, } /// /// Rounding mode flags /// [Flags] public enum RoundingMode { /// /// Round to the nearest integer /// FROUND_TO_NEAREST_INT = 0x00, /// /// Round to negative infinity /// FROUND_TO_NEG_INF = 0x01, /// /// Round to positive infinity /// FROUND_TO_POS_INF = 0x02, /// /// Round to zero /// FROUND_TO_ZERO = 0x03, /// /// Round to current direction /// FROUND_CUR_DIRECTION = 0x04, /// /// Do not suppress exceptions /// FROUND_RAISE_EXC = 0x00, /// /// Suppress exceptions /// FROUND_NO_EXC = 0x08, /// /// Round to the nearest integer without suppressing exceptions /// FROUND_NINT = FROUND_TO_NEAREST_INT | FROUND_RAISE_EXC, /// /// Round using Floor function without suppressing exceptions /// FROUND_FLOOR = FROUND_TO_NEG_INF | FROUND_RAISE_EXC, /// /// Round using Ceiling function without suppressing exceptions /// FROUND_CEIL = FROUND_TO_POS_INF | FROUND_RAISE_EXC, /// /// Round by truncating without suppressing exceptions /// FROUND_TRUNC = FROUND_TO_ZERO | FROUND_RAISE_EXC, /// /// Round using MXCSR.RC without suppressing exceptions /// FROUND_RINT = FROUND_CUR_DIRECTION | FROUND_RAISE_EXC, /// /// Round using MXCSR.RC and suppressing exceptions /// FROUND_NEARBYINT = FROUND_CUR_DIRECTION | FROUND_NO_EXC, /// /// Round to nearest integer and suppressing exceptions /// FROUND_NINT_NOEXC = FROUND_TO_NEAREST_INT | FROUND_NO_EXC, /// /// Round using Floor function and suppressing exceptions /// FROUND_FLOOR_NOEXC = FROUND_TO_NEG_INF | FROUND_NO_EXC, /// /// Round using Ceiling function and suppressing exceptions /// FROUND_CEIL_NOEXC = FROUND_TO_POS_INF | FROUND_NO_EXC, /// /// Round by truncating and suppressing exceptions /// FROUND_TRUNC_NOEXC = FROUND_TO_ZERO | FROUND_NO_EXC, /// /// Round using MXCSR.RC and suppressing exceptions /// FROUND_RINT_NOEXC = FROUND_CUR_DIRECTION | FROUND_NO_EXC, } internal struct RoundingScope : IDisposable { private MXCSRBits OldBits; public RoundingScope(MXCSRBits roundingMode) { OldBits = MXCSR; MXCSR = (OldBits & ~MXCSRBits.RoundingControlMask) | roundingMode; } public void Dispose() { MXCSR = OldBits; } } #if !BURST_INTERNAL private static void BurstIntrinsicSetCSRFromManaged(int _) { } private static int BurstIntrinsicGetCSRFromManaged() { return 0; } internal static int getcsr_raw() => DoGetCSRTrampoline(); internal static void setcsr_raw(int bits) => DoSetCSRTrampoline(bits); [BurstCompile(CompileSynchronously = true)] private static void DoSetCSRTrampoline(int bits) { if (Sse.IsSseSupported) BurstIntrinsicSetCSRFromManaged(bits); } [BurstCompile(CompileSynchronously = true)] private static int DoGetCSRTrampoline() { if (Sse.IsSseSupported) return BurstIntrinsicGetCSRFromManaged(); return 0; } #elif BURST_INTERNAL // Internally inside burst for unit tests we can't recurse from tests into burst again, // so we pinvoke to a dummy wrapper DLL that exposes CSR manipulation [DllImport("burst-dllimport-native", EntryPoint = "x86_getcsr")] internal static extern int getcsr_raw(); [DllImport("burst-dllimport-native", EntryPoint = "x86_setcsr")] internal static extern void setcsr_raw(int bits); #endif /// /// Allows access to the CSR register /// public static MXCSRBits MXCSR { [BurstTargetCpu(BurstTargetCpu.X64_SSE2)] get { return (MXCSRBits)getcsr_raw(); } [BurstTargetCpu(BurstTargetCpu.X64_SSE2)] set { setcsr_raw((int)value); } } } }