first commit

This commit is contained in:
lethanhsonvsp
2025-11-17 15:16:36 +07:00
commit a40d0921eb
17012 changed files with 2652386 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: fb77e3d4fbde3090a07ebac108e13ed8
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: bbe744fdbbc734d3bb0a78042bd4b56a
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,276 @@
using System.Diagnostics;
namespace Unity.Burst.Intrinsics
{
public unsafe static partial class X86
{
/// <summary>
/// bmi1 intrinsics
/// </summary>
public static class Bmi1
{
/// <summary>
/// Evaluates to true at compile time if bmi1 intrinsics are supported.
///
/// Burst ties bmi1 support to AVX2 support to simplify feature sets to support.
/// </summary>
public static bool IsBmi1Supported { get { return Avx2.IsAvx2Supported; } }
/// <summary>
/// Compute the bitwise NOT of 32-bit integer a and then AND with b, and store the results in dst.
/// </summary>
/// <remarks>
/// **** andn r32, r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <param name="b">32-bit integer</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint andn_u32(uint a, uint b)
{
return ~a & b;
}
/// <summary>
/// Compute the bitwise NOT of 64-bit integer a and then AND with b, and store the results in dst.
/// </summary>
/// <remarks>
/// **** andn r64, r64, r64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <param name="b">64-bit integer</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong andn_u64(ulong a, ulong b)
{
return ~a & b;
}
/// <summary>
/// Extract contiguous bits from unsigned 32-bit integer a, and store the result in dst. Extract the number of bits specified by len, starting at the bit specified by start.
/// </summary>
/// <remarks>
/// **** bextr r32, r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <param name="start">Starting bit</param>
/// <param name="len">Number of bits</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint bextr_u32(uint a, uint start, uint len)
{
start &= 0xff;
if (start >= (sizeof(uint) * 8))
{
return 0;
}
var aShifted = a >> (int)start;
len &= 0xff;
if (len >= (sizeof(uint) * 8))
{
return aShifted;
}
return aShifted & ((1u << (int)len) - 1u);
}
/// <summary>
/// Extract contiguous bits from unsigned 64-bit integer a, and store the result in dst. Extract the number of bits specified by len, starting at the bit specified by start.
/// </summary>
/// <remarks>
/// **** bextr r64, r64, r64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <param name="start">Starting bit</param>
/// <param name="len">Number of bits</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong bextr_u64(ulong a, uint start, uint len)
{
start &= 0xff;
if (start >= (sizeof(ulong) * 8))
{
return 0;
}
var aShifted = a >> (int)start;
len &= 0xff;
if (len >= (sizeof(ulong) * 8))
{
return aShifted;
}
return aShifted & (((1ul) << (int)len) - 1u);
}
/// <summary>
/// Extract contiguous bits from unsigned 32-bit integer a, and store the result in dst. Extract the number of bits specified by bits 15:8 of control, starting at the bit specified by bits 0:7 of control..
/// </summary>
/// <remarks>
/// **** bextr r32, r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <param name="control">Control</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint bextr2_u32(uint a, uint control)
{
uint start = control & byte.MaxValue;
uint len = (control >> 8) & byte.MaxValue;
return bextr_u32(a, start, len);
}
/// <summary>
/// Extract contiguous bits from unsigned 64-bit integer a, and store the result in dst. Extract the number of bits specified by bits 15:8 of control, starting at the bit specified by bits 0:7 of control..
/// </summary>
/// <remarks>
/// **** bextr r64, r64, r64
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <param name="control">Control</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong bextr2_u64(ulong a, ulong control)
{
uint start = (uint)(control & byte.MaxValue);
uint len = (uint)((control >> 8) & byte.MaxValue);
return bextr_u64(a, start, len);
}
/// <summary>
/// Extract the lowest set bit from unsigned 32-bit integer a and set the corresponding bit in dst. All other bits in dst are zeroed, and all bits are zeroed if no bits are set in a.
/// </summary>
/// <remarks>
/// **** blsi r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint blsi_u32(uint a)
{
return (uint)(-(int)a) & a;
}
/// <summary>
/// Extract the lowest set bit from unsigned 64-bit integer a and set the corresponding bit in dst. All other bits in dst are zeroed, and all bits are zeroed if no bits are set in a.
/// </summary>
/// <remarks>
/// **** blsi r64, r64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong blsi_u64(ulong a)
{
return (ulong)(-(long)a) & a;
}
/// <summary>
/// Set all the lower bits of dst up to and including the lowest set bit in unsigned 32-bit integer a.
/// </summary>
/// <remarks>
/// **** blsmsk r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint blsmsk_u32(uint a)
{
return (a - 1) ^ a;
}
/// <summary>
/// Set all the lower bits of dst up to and including the lowest set bit in unsigned 64-bit integer a.
/// </summary>
/// <remarks>
/// **** blsmsk r64, r64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong blsmsk_u64(ulong a)
{
return (a - 1) ^ a;
}
/// <summary>
/// Copy all bits from unsigned 32-bit integer a to dst, and reset (set to 0) the bit in dst that corresponds to the lowest set bit in a.
/// </summary>
/// <remarks>
/// **** blsr r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint blsr_u32(uint a)
{
return (a - 1) & a;
}
/// <summary>
/// Copy all bits from unsigned 64-bit integer a to dst, and reset (set to 0) the bit in dst that corresponds to the lowest set bit in a.
/// </summary>
/// <remarks>
/// **** blsr r64, r64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong blsr_u64(ulong a)
{
return (a - 1) & a;
}
/// <summary>
/// Count the number of trailing zero bits in unsigned 32-bit integer a, and return that count in dst.
/// </summary>
/// <remarks>
/// **** tzcnt r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint tzcnt_u32(uint a)
{
uint c = 32;
a &= (uint)-(int)(a);
if (a != 0) c--;
if ((a & 0x0000FFFF) != 0) c -= 16;
if ((a & 0x00FF00FF) != 0) c -= 8;
if ((a & 0x0F0F0F0F) != 0) c -= 4;
if ((a & 0x33333333) != 0) c -= 2;
if ((a & 0x55555555) != 0) c -= 1;
return c;
}
/// <summary>
/// Count the number of trailing zero bits in unsigned 64-bit integer a, and return that count in dst.
/// </summary>
/// <remarks>
/// **** tzcnt r64, r64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong tzcnt_u64(ulong a)
{
ulong c = 64;
a &= (ulong)-(long)(a);
if (a != 0) c--;
if ((a & 0x00000000FFFFFFFF) != 0) c -= 32;
if ((a & 0x0000FFFF0000FFFF) != 0) c -= 16;
if ((a & 0x00FF00FF00FF00FF) != 0) c -= 8;
if ((a & 0x0F0F0F0F0F0F0F0F) != 0) c -= 4;
if ((a & 0x3333333333333333) != 0) c -= 2;
if ((a & 0x5555555555555555) != 0) c -= 1;
return c;
}
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: bae2d17db94135ea84f8110705ba44a0
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,212 @@
using System.Diagnostics;
namespace Unity.Burst.Intrinsics
{
public unsafe static partial class X86
{
/// <summary>
/// bmi2 intrinsics
/// </summary>
public static class Bmi2
{
/// <summary>
/// Evaluates to true at compile time if bmi2 intrinsics are supported.
///
/// Burst ties bmi2 support to AVX2 support to simplify feature sets to support.
/// </summary>
public static bool IsBmi2Supported { get { return Avx2.IsAvx2Supported; } }
/// <summary>
/// Copy all bits from unsigned 32-bit integer a to dst, and reset (set to 0) the high bits in dst starting at index.
/// </summary>
/// <remarks>
/// **** bzhi r32, r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <param name="index">Starting point</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint bzhi_u32(uint a, uint index)
{
index &= 0xff;
if (index >= (sizeof(uint) * 8))
{
return a;
}
return a & ((1u << (int)index) - 1u);
}
/// <summary>
/// Copy all bits from unsigned 64-bit integer a to dst, and reset (set to 0) the high bits in dst starting at index.
/// </summary>
/// <remarks>
/// **** bzhi r64, r64, r64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <param name="index">Starting point</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong bzhi_u64(ulong a, ulong index)
{
index &= 0xff;
if (index >= (sizeof(ulong) * 8))
{
return a;
}
return a & ((1ul << (int)index) - 1ul);
}
/// <summary>
/// Multiply unsigned 32-bit integers a and b, store the low 32-bits of the result in dst, and store the high 32-bits in hi. This does not read or write arithmetic flags.
/// </summary>
/// <remarks>
/// **** mulx r32, r32, m32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <param name="b">32-bit integer</param>
/// <param name="hi">Stores the high 32-bits</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint mulx_u32(uint a, uint b, out uint hi)
{
ulong aBig = a;
ulong bBig = b;
ulong result = aBig * bBig;
hi = (uint)(result >> 32);
return (uint)(result & 0xffffffff);
}
/// <summary>
/// Multiply unsigned 64-bit integers a and b, store the low 64-bits of the result in dst, and store the high 64-bits in hi. This does not read or write arithmetic flags.
/// </summary>
/// <remarks>
/// **** mulx r64, r64, m64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <param name="b">64-bit integer</param>
/// <param name="hi">Stores the high 64-bits</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong mulx_u64(ulong a, ulong b, out ulong hi)
{
return Common.umul128(a, b, out hi);
}
/// <summary>
/// Deposit contiguous low bits from unsigned 32-bit integer a to dst at the corresponding bit locations specified by mask; all other bits in dst are set to zero.
/// </summary>
/// <remarks>
/// **** pdep r32, r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <param name="mask">Mask</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint pdep_u32(uint a, uint mask)
{
uint result = 0;
int k = 0;
for (int i = 0; i < 32; i++)
{
if ((mask & (1u << i)) != 0)
{
result |= ((a >> k) & 1u) << i;
k++;
}
}
return result;
}
/// <summary>
/// Deposit contiguous low bits from unsigned 64-bit integer a to dst at the corresponding bit locations specified by mask; all other bits in dst are set to zero.
/// </summary>
/// <remarks>
/// **** pdep r64, r64, r64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <param name="mask">Mask</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong pdep_u64(ulong a, ulong mask)
{
ulong result = 0;
int k = 0;
for (int i = 0; i < 64; i++)
{
if ((mask & (1ul << i)) != 0)
{
result |= ((a >> k) & 1ul) << i;
k++;
}
}
return result;
}
/// <summary>
/// Extract bits from unsigned 32-bit integer a at the corresponding bit locations specified by mask to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
/// </summary>
/// <remarks>
/// **** pext r32, r32, r32
/// </remarks>
/// <param name="a">32-bit integer</param>
/// <param name="mask">Mask</param>
/// <returns>32-bit integer</returns>
[DebuggerStepThrough]
public static uint pext_u32(uint a, uint mask)
{
uint result = 0;
int k = 0;
for (int i = 0; i < 32; i++)
{
if ((mask & (1u << i)) != 0)
{
result |= ((a >> i) & 1u) << k;
k++;
}
}
return result;
}
/// <summary>
/// Extract bits from unsigned 64-bit integer a at the corresponding bit locations specified by mask to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
/// </summary>
/// <remarks>
/// **** pext r64, r64, r64
/// </remarks>
/// <param name="a">64-bit integer</param>
/// <param name="mask">Mask</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
public static ulong pext_u64(ulong a, ulong mask)
{
ulong result = 0;
int k = 0;
for (int i = 0; i < 64; i++)
{
if ((mask & (1ul << i)) != 0)
{
result |= ((a >> i) & 1ul) << k;
k++;
}
}
return result;
}
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: aa392f69e52b37a486ca7cfa6125fd60
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,66 @@
using System;
namespace Unity.Burst.Intrinsics
{
/// <summary>
/// Static methods and properties for X86 instruction intrinsics.
/// </summary>
public unsafe static partial class X86
{
private static v128 GenericCSharpLoad(void* ptr)
{
return *(v128*)ptr;
}
private static void GenericCSharpStore(void* ptr, v128 val)
{
*(v128*)ptr = val;
}
private static sbyte Saturate_To_Int8(int val)
{
if (val > sbyte.MaxValue)
return sbyte.MaxValue;
else if (val < sbyte.MinValue)
return sbyte.MinValue;
return (sbyte)val;
}
private static byte Saturate_To_UnsignedInt8(int val)
{
if (val > byte.MaxValue)
return byte.MaxValue;
else if (val < byte.MinValue)
return byte.MinValue;
return (byte)val;
}
private static short Saturate_To_Int16(int val)
{
if (val > short.MaxValue)
return short.MaxValue;
else if (val < short.MinValue)
return short.MinValue;
return (short)val;
}
private static ushort Saturate_To_UnsignedInt16(int val)
{
if (val > ushort.MaxValue)
return ushort.MaxValue;
else if (val < ushort.MinValue)
return ushort.MinValue;
return (ushort)val;
}
private static bool IsNaN(uint v)
{
return (v & 0x7fffffffu) > 0x7f800000;
}
private static bool IsNaN(ulong v)
{
return (v & 0x7ffffffffffffffful) > 0x7ff0000000000000ul;
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 000378914c63384c8062cbad18605802
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,269 @@
using System;
using Unity.Burst;
#if !BURST_INTERNAL
using AOT;
using UnityEngine;
#endif
using System.Runtime.InteropServices;
namespace Unity.Burst.Intrinsics
{
#if !BURST_INTERNAL
[BurstCompile]
#endif
public unsafe static partial class X86
{
/// <summary>
/// The 32-bit MXCSR register contains control and status information for SSE and AVX SIMD floating-point operations.
/// </summary>
[Flags]
public enum MXCSRBits
{
/// <summary>
/// Bit 15 (FTZ) of the MXCSR register enables the flush-to-zero mode, which controls the masked response to a SIMD floating-point underflow condition.
/// </summary>
/// <remarks>
/// When the underflow exception is masked and the flush-to-zero mode is enabled, the processor performs the following operations when it detects a floating-point underflow condition.
/// - Returns a zero result with the sign of the true result
/// - Sets the precision and underflow exception flags.
///
/// If the underflow exception is not masked, the flush-to-zero bit is ignored.
///
/// The flush-to-zero mode is not compatible with IEEE Standard 754. The IEEE-mandated masked response to under-flow is to deliver the denormalized result.
/// The flush-to-zero mode is provided primarily for performance reasons. At the cost of a slight precision loss, faster execution can be achieved for applications where underflows
/// are common and rounding the underflow result to zero can be tolerated. The flush-to-zero bit is cleared upon a power-up or reset of the processor, disabling the flush-to-zero mode.
/// </remarks>
FlushToZero = 1 << 15,
/// <summary>
/// Mask for rounding control bits.
/// </summary>
/// <remarks>
/// The rounding modes have no effect on comparison operations, operations that produce exact results, or operations that produce NaN results.
/// </remarks>
RoundingControlMask = (1 << 14) | (1 << 13),
/// <summary>
/// Rounded result is the closest to the infinitely precise result. If two values are equally close, the result is the even value (that is, the one with the least-significant bit of zero). Default.
/// </summary>
RoundToNearest = 0,
/// <summary>
/// Rounded result is closest to but no greater than the infinitely precise result.
/// </summary>
RoundDown = (1 << 13),
/// <summary>
/// Rounded result is closest to but no less than the infinitely precise result.
/// </summary>
RoundUp = (1 << 14),
/// <summary>
/// Rounded result is closest to but no greater in absolute value than the infinitely precise result.
/// </summary>
RoundTowardZero = (1 << 13) | (1 << 14),
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
PrecisionMask = 1 << 12,
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
UnderflowMask = 1 << 11,
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
OverflowMask = 1 << 10,
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
DivideByZeroMask = 1 << 9,
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
DenormalOperationMask = 1 << 8,
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
InvalidOperationMask = 1 << 7,
/// <summary>
/// Combine all bits for exception masking into one mask for convenience.
/// </summary>
ExceptionMask = PrecisionMask | UnderflowMask | OverflowMask | DivideByZeroMask | DenormalOperationMask | InvalidOperationMask,
/// <summary>
/// Bit 6 (DAZ) of the MXCSR register enables the denormals-are-zeros mode, which controls the processors response to a SIMD floating-point denormal operand condition.
/// </summary>
/// <remarks>
/// When the denormals-are-zeros flag is set, the processor converts all denormal source operands to a zero with the sign of the original operand before performing any computations on them.
/// The processor does not set the denormal-operand exception flag (DE), regardless of the setting of the denormal-operand exception mask bit (DM); and it does not generate a denormal-operand
/// exception if the exception is unmasked.The denormals-are-zeros mode is not compatible with IEEE Standard 754.
///
/// The denormals-are-zeros mode is provided to improve processor performance for applications such as streaming media processing, where rounding a denormal operand to zero does not
/// appreciably affect the quality of the processed data. The denormals-are-zeros flag is cleared upon a power-up or reset of the processor, disabling the denormals-are-zeros mode.
///
/// The denormals-are-zeros mode was introduced in the Pentium 4 and Intel Xeon processor with the SSE2 extensions; however, it is fully compatible with the SSE SIMD floating-point instructions
/// (that is, the denormals-are-zeros flag affects the operation of the SSE SIMD floating-point instructions). In earlier IA-32 processors and in some models of the Pentium 4 processor, this flag
/// (bit 6) is reserved. Attempting to set bit 6 of the MXCSR register on processors that do not support the DAZ flag will cause a general-protection exception (#GP).
/// </remarks>
DenormalsAreZeroes = 1 << 6,
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
PrecisionFlag = 1 << 5,
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
UnderflowFlag = 1 << 4,
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
OverflowFlag = 1 << 3,
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
DivideByZeroFlag = 1 << 2,
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
DenormalFlag = 1 << 1,
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
InvalidOperationFlag = 1 << 0,
/// <summary>
/// Combines all bits for flags into one mask for convenience.
/// </summary>
FlagMask = PrecisionFlag | UnderflowFlag | OverflowFlag | DivideByZeroFlag | DenormalFlag | InvalidOperationFlag,
}
/// <summary>
/// Rounding mode flags
/// </summary>
[Flags]
public enum RoundingMode
{
/// <summary>
/// Round to the nearest integer
/// </summary>
FROUND_TO_NEAREST_INT = 0x00,
/// <summary>
/// Round to negative infinity
/// </summary>
FROUND_TO_NEG_INF = 0x01,
/// <summary>
/// Round to positive infinity
/// </summary>
FROUND_TO_POS_INF = 0x02,
/// <summary>
/// Round to zero
/// </summary>
FROUND_TO_ZERO = 0x03,
/// <summary>
/// Round to current direction
/// </summary>
FROUND_CUR_DIRECTION = 0x04,
/// <summary>
/// Do not suppress exceptions
/// </summary>
FROUND_RAISE_EXC = 0x00,
/// <summary>
/// Suppress exceptions
/// </summary>
FROUND_NO_EXC = 0x08,
/// <summary>
/// Round to the nearest integer without suppressing exceptions
/// </summary>
FROUND_NINT = FROUND_TO_NEAREST_INT | FROUND_RAISE_EXC,
/// <summary>
/// Round using Floor function without suppressing exceptions
/// </summary>
FROUND_FLOOR = FROUND_TO_NEG_INF | FROUND_RAISE_EXC,
/// <summary>
/// Round using Ceiling function without suppressing exceptions
/// </summary>
FROUND_CEIL = FROUND_TO_POS_INF | FROUND_RAISE_EXC,
/// <summary>
/// Round by truncating without suppressing exceptions
/// </summary>
FROUND_TRUNC = FROUND_TO_ZERO | FROUND_RAISE_EXC,
/// <summary>
/// Round using MXCSR.RC without suppressing exceptions
/// </summary>
FROUND_RINT = FROUND_CUR_DIRECTION | FROUND_RAISE_EXC,
/// <summary>
/// Round using MXCSR.RC and suppressing exceptions
/// </summary>
FROUND_NEARBYINT = FROUND_CUR_DIRECTION | FROUND_NO_EXC,
/// <summary>
/// Round to nearest integer and suppressing exceptions
/// </summary>
FROUND_NINT_NOEXC = FROUND_TO_NEAREST_INT | FROUND_NO_EXC,
/// <summary>
/// Round using Floor function and suppressing exceptions
/// </summary>
FROUND_FLOOR_NOEXC = FROUND_TO_NEG_INF | FROUND_NO_EXC,
/// <summary>
/// Round using Ceiling function and suppressing exceptions
/// </summary>
FROUND_CEIL_NOEXC = FROUND_TO_POS_INF | FROUND_NO_EXC,
/// <summary>
/// Round by truncating and suppressing exceptions
/// </summary>
FROUND_TRUNC_NOEXC = FROUND_TO_ZERO | FROUND_NO_EXC,
/// <summary>
/// Round using MXCSR.RC and suppressing exceptions
/// </summary>
FROUND_RINT_NOEXC = FROUND_CUR_DIRECTION | FROUND_NO_EXC,
}
internal struct RoundingScope : IDisposable
{
private MXCSRBits OldBits;
public RoundingScope(MXCSRBits roundingMode)
{
OldBits = MXCSR;
MXCSR = (OldBits & ~MXCSRBits.RoundingControlMask) | roundingMode;
}
public void Dispose()
{
MXCSR = OldBits;
}
}
#if !BURST_INTERNAL
private static void BurstIntrinsicSetCSRFromManaged(int _) { }
private static int BurstIntrinsicGetCSRFromManaged() { return 0; }
internal static int getcsr_raw() => DoGetCSRTrampoline();
internal static void setcsr_raw(int bits) => DoSetCSRTrampoline(bits);
[BurstCompile(CompileSynchronously = true)]
private static void DoSetCSRTrampoline(int bits)
{
if (Sse.IsSseSupported)
BurstIntrinsicSetCSRFromManaged(bits);
}
[BurstCompile(CompileSynchronously = true)]
private static int DoGetCSRTrampoline()
{
if (Sse.IsSseSupported)
return BurstIntrinsicGetCSRFromManaged();
return 0;
}
#elif BURST_INTERNAL
// Internally inside burst for unit tests we can't recurse from tests into burst again,
// so we pinvoke to a dummy wrapper DLL that exposes CSR manipulation
[DllImport("burst-dllimport-native", EntryPoint = "x86_getcsr")]
internal static extern int getcsr_raw();
[DllImport("burst-dllimport-native", EntryPoint = "x86_setcsr")]
internal static extern void setcsr_raw(int bits);
#endif
/// <summary>
/// Allows access to the CSR register
/// </summary>
public static MXCSRBits MXCSR
{
[BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
get
{
return (MXCSRBits)getcsr_raw();
}
[BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
set
{
setcsr_raw((int)value);
}
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: b88ec138634e3238a82a5b8f3d970ac1
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,306 @@
using System.Diagnostics;
namespace Unity.Burst.Intrinsics
{
public unsafe static partial class X86
{
/// <summary>
/// F16C intrinsics
/// </summary>
public static class F16C
{
/// <summary>
/// Evaluates to true at compile time if F16C intrinsics are supported.
///
/// Burst ties F16C support to AVX2 support to simplify feature sets to support.
/// </summary>
public static bool IsF16CSupported { get { return Avx2.IsAvx2Supported; } }
/// <summary>
/// Converts a half (hiding in a ushort) to a float (hiding in a uint).
/// </summary>
/// <param name="h">The half to convert</param>
/// <returns>The float result</returns>
[DebuggerStepThrough]
private static uint HalfToFloat(ushort h)
{
var signed = (h & 0x8000u) != 0;
var exponent = (h >> 10) & 0x1fu;
var mantissa = h & 0x3ffu;
var result = signed ? 0x80000000u : 0u;
if (!(exponent == 0 && mantissa == 0))
{
// Denormal (converts to normalized)
if (exponent == 0)
{
// Adjust mantissa so it's normalized (and keep track of exponent adjustment)
exponent = -1;
do
{
exponent++;
mantissa <<= 1;
} while ((mantissa & 0x400) == 0);
result |= (uint)((127 - 15 - exponent) << 23);
// Have to re-mask the mantissa here because we've been shifting bits up.
result |= (mantissa & 0x3ff) << 13;
}
else
{
var isInfOrNan = exponent == 0x1f;
result |= (uint)(isInfOrNan ? 255 : (127 - 15 + exponent) << 23);
result |= mantissa << 13;
}
}
return result;
}
/// <summary>
/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vcvtph2ps xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 cvtph_ps(v128 a)
{
return new v128(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3));
}
/// <summary>
/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vcvtph2ps ymm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_cvtph_ps(v128 a)
{
return new v256(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3), HalfToFloat(a.UShort4), HalfToFloat(a.UShort5), HalfToFloat(a.UShort6), HalfToFloat(a.UShort7));
}
// Using ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
private static readonly ushort[] BaseTable =
{
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,
0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00,
0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,
0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00,
0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00,
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
};
private static readonly sbyte[] ShiftTable =
{
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
};
/// <summary>
/// Converts a float (hiding in a uint) to a half (hiding in a ushort).
/// </summary>
/// <param name="f">The float to convert</param>
/// <param name="rounding">Rounding mode</param>
/// <returns>The half result</returns>
[DebuggerStepThrough]
private static ushort FloatToHalf(uint f, int rounding)
{
var exponentAndSign = f >> 23;
var shift = ShiftTable[exponentAndSign];
var result = (uint)(BaseTable[exponentAndSign] + (ushort)((f & 0x7FFFFFu) >> shift));
// Check if the result is not Inf or NaN.
var isFinite = (result & 0x7C00) != 0x7C00;
var isNegative = (result & 0x8000) != 0;
if (rounding == (int)RoundingMode.FROUND_NINT_NOEXC)
{
var fWithRoundingBitPreserved = (f & 0x7FFFFFu) >> (shift - 1);
if ((exponentAndSign & 0xFF) == 102)
{
result++;
}
if (isFinite && ((fWithRoundingBitPreserved & 0x1u) != 0))
{
result++;
}
}
else if (rounding == (int)RoundingMode.FROUND_TRUNC_NOEXC)
{
if (!isFinite)
{
result -= (uint)(~shift & 0x1);
}
}
else if (rounding == (int)RoundingMode.FROUND_CEIL_NOEXC)
{
if (isFinite && !isNegative)
{
if ((exponentAndSign <= 102) && (exponentAndSign != 0))
{
result++;
}
else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)
{
result++;
}
}
var resultIsNegativeInf = (result == 0xFC00);
var inputIsNotNegativeInfOrNan = (exponentAndSign != 0x1FF);
if (resultIsNegativeInf && inputIsNotNegativeInfOrNan)
{
result--;
}
}
else if (rounding == (int)RoundingMode.FROUND_FLOOR_NOEXC)
{
if (isFinite && isNegative)
{
if ((exponentAndSign <= 358) && (exponentAndSign != 256))
{
result++;
}
else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)
{
result++;
}
}
var resultIsPositiveInf = (result == 0x7C00);
var inputIsNotPositiveInfOrNan = (exponentAndSign != 0xFF);
if (resultIsPositiveInf && inputIsNotPositiveInfOrNan)
{
result--;
}
}
return (ushort)result;
}
/// <summary>
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.
///
/// Rounding is done according to the rounding parameter, which can be one of:
/// </summary>
/// <remarks>
/// **** cvtps2ph xmm, xmm, imm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="rounding">Rounding mode</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 cvtps_ph(v128 a, int rounding)
{
if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)
{
switch (MXCSR & MXCSRBits.RoundingControlMask)
{
case MXCSRBits.RoundToNearest:
rounding = (int)RoundingMode.FROUND_NINT_NOEXC;
break;
case MXCSRBits.RoundDown:
rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;
break;
case MXCSRBits.RoundUp:
rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;
break;
case MXCSRBits.RoundTowardZero:
rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;
break;
}
}
return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), 0, 0, 0, 0);
}
/// <summary>
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.
///
/// Rounding is done according to the rounding parameter, which can be one of:
/// </summary>
/// <remarks>
/// **** cvtps2ph xmm, ymm, imm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="rounding">Rounding mode</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 mm256_cvtps_ph(v256 a, int rounding)
{
if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)
{
switch (MXCSR & MXCSRBits.RoundingControlMask)
{
case MXCSRBits.RoundToNearest:
rounding = (int)RoundingMode.FROUND_NINT_NOEXC;
break;
case MXCSRBits.RoundDown:
rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;
break;
case MXCSRBits.RoundUp:
rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;
break;
case MXCSRBits.RoundTowardZero:
rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;
break;
}
}
return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), FloatToHalf(a.UInt4, rounding), FloatToHalf(a.UInt5, rounding), FloatToHalf(a.UInt6, rounding), FloatToHalf(a.UInt7, rounding));
}
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: ae12ed22401338869b648a8327f251da
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,624 @@
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;
namespace Unity.Burst.Intrinsics
{
public unsafe static partial class X86
{
/// <summary>
/// FMA intrinsics
/// </summary>
public static class Fma
{
/// <summary>
/// Evaluates to true at compile time if FMA intrinsics are supported.
///
/// Burst ties FMA support to AVX2 support to simplify feature sets to support.
/// </summary>
public static bool IsFmaSupported { get { return Avx2.IsAvx2Supported; } }
[DebuggerStepThrough]
private static float FmaHelper(float a, float b, float c)
{
return (float)((((double)a) * b) + c);
}
[StructLayout(LayoutKind.Explicit)]
private struct Union
{
[FieldOffset(0)]
public float f;
[FieldOffset(0)]
public uint u;
}
[DebuggerStepThrough]
private static float FnmaHelper(float a, float b, float c)
{
return FmaHelper(-a, b, c);
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmadd213pd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmadd_pd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmadd213pd ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fmadd_pd(v256 a, v256 b, v256 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmadd213ps xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmadd_ps(v128 a, v128 b, v128 c)
{
return new v128(FmaHelper(a.Float0, b.Float0, c.Float0),
FmaHelper(a.Float1, b.Float1, c.Float1),
FmaHelper(a.Float2, b.Float2, c.Float2),
FmaHelper(a.Float3, b.Float3, c.Float3));
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmadd213ps ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fmadd_ps(v256 a, v256 b, v256 c)
{
return new v256(FmaHelper(a.Float0, b.Float0, c.Float0),
FmaHelper(a.Float1, b.Float1, c.Float1),
FmaHelper(a.Float2, b.Float2, c.Float2),
FmaHelper(a.Float3, b.Float3, c.Float3),
FmaHelper(a.Float4, b.Float4, c.Float4),
FmaHelper(a.Float5, b.Float5, c.Float5),
FmaHelper(a.Float6, b.Float6, c.Float6),
FmaHelper(a.Float7, b.Float7, c.Float7));
}
/// <summary>
/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
/// </summary>
/// <remarks>
/// **** vfmadd213sd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmadd_sd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
/// </summary>
/// <remarks>
/// **** vfmadd213ss xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmadd_ss(v128 a, v128 b, v128 c)
{
var result = a;
result.Float0 = FmaHelper(a.Float0, b.Float0, c.Float0);
return result;
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmaddsub213pd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmaddsub_pd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmaddsub213pd ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fmaddsub_pd(v256 a, v256 b, v256 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmaddsub213ps xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmaddsub_ps(v128 a, v128 b, v128 c)
{
return new v128(FmaHelper(a.Float0, b.Float0, -c.Float0),
FmaHelper(a.Float1, b.Float1, c.Float1),
FmaHelper(a.Float2, b.Float2, -c.Float2),
FmaHelper(a.Float3, b.Float3, c.Float3));
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmaddsub213ps ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fmaddsub_ps(v256 a, v256 b, v256 c)
{
return new v256(FmaHelper(a.Float0, b.Float0, -c.Float0),
FmaHelper(a.Float1, b.Float1, c.Float1),
FmaHelper(a.Float2, b.Float2, -c.Float2),
FmaHelper(a.Float3, b.Float3, c.Float3),
FmaHelper(a.Float4, b.Float4, -c.Float4),
FmaHelper(a.Float5, b.Float5, c.Float5),
FmaHelper(a.Float6, b.Float6, -c.Float6),
FmaHelper(a.Float7, b.Float7, c.Float7));
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmsub213pd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmsub_pd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmsub213pd ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fmsub_pd(v256 a, v256 b, v256 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmsub213ps xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmsub_ps(v128 a, v128 b, v128 c)
{
return new v128(FmaHelper(a.Float0, b.Float0, -c.Float0),
FmaHelper(a.Float1, b.Float1, -c.Float1),
FmaHelper(a.Float2, b.Float2, -c.Float2),
FmaHelper(a.Float3, b.Float3, -c.Float3));
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmsub213ps ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fmsub_ps(v256 a, v256 b, v256 c)
{
return new v256(FmaHelper(a.Float0, b.Float0, -c.Float0),
FmaHelper(a.Float1, b.Float1, -c.Float1),
FmaHelper(a.Float2, b.Float2, -c.Float2),
FmaHelper(a.Float3, b.Float3, -c.Float3),
FmaHelper(a.Float4, b.Float4, -c.Float4),
FmaHelper(a.Float5, b.Float5, -c.Float5),
FmaHelper(a.Float6, b.Float6, -c.Float6),
FmaHelper(a.Float7, b.Float7, -c.Float7));
}
/// <summary>
/// Multiply the lower double-precision(64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result.Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
/// </summary>
/// <remarks>
/// **** vfmsub213sd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmsub_sd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
/// </summary>
/// <remarks>
/// **** vfmsub213ss xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmsub_ss(v128 a, v128 b, v128 c)
{
var result = a;
result.Float0 = FmaHelper(a.Float0, b.Float0, -c.Float0);
return result;
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmsubadd213pd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmsubadd_pd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmsubadd213pd ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fmsubadd_pd(v256 a, v256 b, v256 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmsubadd213ps xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fmsubadd_ps(v128 a, v128 b, v128 c)
{
return new v128(FmaHelper(a.Float0, b.Float0, c.Float0),
FmaHelper(a.Float1, b.Float1, -c.Float1),
FmaHelper(a.Float2, b.Float2, c.Float2),
FmaHelper(a.Float3, b.Float3, -c.Float3));
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfmsubadd213ps ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fmsubadd_ps(v256 a, v256 b, v256 c)
{
return new v256(FmaHelper(a.Float0, b.Float0, c.Float0),
FmaHelper(a.Float1, b.Float1, -c.Float1),
FmaHelper(a.Float2, b.Float2, c.Float2),
FmaHelper(a.Float3, b.Float3, -c.Float3),
FmaHelper(a.Float4, b.Float4, c.Float4),
FmaHelper(a.Float5, b.Float5, -c.Float5),
FmaHelper(a.Float6, b.Float6, c.Float6),
FmaHelper(a.Float7, b.Float7, -c.Float7));
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfnmadd213pd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fnmadd_pd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfnmadd213pd ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fnmadd_pd(v256 a, v256 b, v256 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfnmadd213ps xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fnmadd_ps(v128 a, v128 b, v128 c)
{
return new v128(FnmaHelper(a.Float0, b.Float0, c.Float0),
FnmaHelper(a.Float1, b.Float1, c.Float1),
FnmaHelper(a.Float2, b.Float2, c.Float2),
FnmaHelper(a.Float3, b.Float3, c.Float3));
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfnmadd213ps ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fnmadd_ps(v256 a, v256 b, v256 c)
{
return new v256(FnmaHelper(a.Float0, b.Float0, c.Float0),
FnmaHelper(a.Float1, b.Float1, c.Float1),
FnmaHelper(a.Float2, b.Float2, c.Float2),
FnmaHelper(a.Float3, b.Float3, c.Float3),
FnmaHelper(a.Float4, b.Float4, c.Float4),
FnmaHelper(a.Float5, b.Float5, c.Float5),
FnmaHelper(a.Float6, b.Float6, c.Float6),
FnmaHelper(a.Float7, b.Float7, c.Float7));
}
/// <summary>
/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
/// </summary>
/// <remarks>
/// **** vfnmadd213sd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fnmadd_sd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
/// </summary>
/// <remarks>
/// **** vfnmadd213ss xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fnmadd_ss(v128 a, v128 b, v128 c)
{
var result = a;
result.Float0 = FnmaHelper(a.Float0, b.Float0, c.Float0);
return result;
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfnmsub213pd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fnmsub_pd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfnmsub213pd ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fnmsub_pd(v256 a, v256 b, v256 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfnmsub213ps xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fnmsub_ps(v128 a, v128 b, v128 c)
{
return new v128(FnmaHelper(a.Float0, b.Float0, -c.Float0),
FnmaHelper(a.Float1, b.Float1, -c.Float1),
FnmaHelper(a.Float2, b.Float2, -c.Float2),
FnmaHelper(a.Float3, b.Float3, -c.Float3));
}
/// <summary>
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
/// </summary>
/// <remarks>
/// **** vfnmsub213ps ymm, ymm, ymm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v256 mm256_fnmsub_ps(v256 a, v256 b, v256 c)
{
return new v256(FnmaHelper(a.Float0, b.Float0, -c.Float0),
FnmaHelper(a.Float1, b.Float1, -c.Float1),
FnmaHelper(a.Float2, b.Float2, -c.Float2),
FnmaHelper(a.Float3, b.Float3, -c.Float3),
FnmaHelper(a.Float4, b.Float4, -c.Float4),
FnmaHelper(a.Float5, b.Float5, -c.Float5),
FnmaHelper(a.Float6, b.Float6, -c.Float6),
FnmaHelper(a.Float7, b.Float7, -c.Float7));
}
/// <summary>
/// Multiply the lower double-precision(64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result.Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
/// </summary>
/// <remarks>
/// **** vfnmsub213sd xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fnmsub_sd(v128 a, v128 b, v128 c)
{
throw new Exception("Double-precision FMA not emulated in C#");
}
/// <summary>
/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
/// </summary>
/// <remarks>
/// **** vfnmsub213ss xmm, xmm, xmm
/// </remarks>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="c">Vector c</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 fnmsub_ss(v128 a, v128 b, v128 c)
{
var result = a;
result.Float0 = FnmaHelper(a.Float0, b.Float0, -c.Float0);
return result;
}
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 4d7325591616354d86b1492e282843f4
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,62 @@
using System.Diagnostics;
namespace Unity.Burst.Intrinsics
{
public unsafe static partial class X86
{
/// <summary>
/// popcnt intrinsics
/// </summary>
public static class Popcnt
{
/// <summary>
/// Evaluates to true at compile time if popcnt intrinsics are supported.
///
/// Burst ties popcnt support to SSE4.2 support to simplify feature sets to support.
/// </summary>
public static bool IsPopcntSupported { get { return Sse4_2.IsSse42Supported; } }
/// <summary>
/// Count the number of bits set to 1 in unsigned 32-bit integer a, and return that count in dst.
/// </summary>
/// <remarks>
/// **** popcnt r32, r32
/// </remarks>
/// <param name="v">Integer to be counted in</param>
/// <returns>Count</returns>
[DebuggerStepThrough]
public static int popcnt_u32(uint v)
{
int result = 0;
uint mask = 0x80000000u;
while (mask != 0)
{
result += ((v & mask) != 0) ? 1 : 0;
mask >>= 1;
}
return result;
}
/// <summary>
/// Count the number of bits set to 1 in unsigned 64-bit integer a, and return that count in dst.
/// </summary>
/// <remarks>
/// **** popcnt r64, r64
/// </remarks>
/// <param name="v">Integer to be counted in</param>
/// <returns>Count</returns>
[DebuggerStepThrough]
public static int popcnt_u64(ulong v)
{
int result = 0;
ulong mask = 0x8000000000000000u;
while (mask != 0)
{
result += ((v & mask) != 0) ? 1 : 0;
mask >>= 1;
}
return result;
}
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: e4725d04fd6336efbc80f25ae908c344
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 9edae0ecbfb63f239983f9a81f80ddf9
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: f0de54c00de3304699fdf0bedf123944
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,155 @@
using System;
using System.Diagnostics;
namespace Unity.Burst.Intrinsics
{
public unsafe static partial class X86
{
/// <summary>
/// SSE3 intrinsics
/// </summary>
public static class Sse3
{
/// <summary>
/// Evaluates to true at compile time if SSE3 intrinsics are supported.
/// </summary>
public static bool IsSse3Supported { get { return false; } }
// _mm_addsub_ps
/// <summary> Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 addsub_ps(v128 a, v128 b)
{
v128 dst = default(v128);
dst.Float0 = a.Float0 - b.Float0;
dst.Float1 = a.Float1 + b.Float1;
dst.Float2 = a.Float2 - b.Float2;
dst.Float3 = a.Float3 + b.Float3;
return dst;
}
// _mm_addsub_pd
/// <summary> Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 addsub_pd(v128 a, v128 b)
{
v128 dst = default(v128);
dst.Double0 = a.Double0 - b.Double0;
dst.Double1 = a.Double1 + b.Double1;
return dst;
}
// _mm_hadd_pd
/// <summary> Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hadd_pd(v128 a, v128 b)
{
v128 dst = default(v128);
dst.Double0 = a.Double0 + a.Double1;
dst.Double1 = b.Double0 + b.Double1;
return dst;
}
// _mm_hadd_ps
/// <summary> Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hadd_ps(v128 a, v128 b)
{
v128 dst = default(v128);
dst.Float0 = a.Float0 + a.Float1;
dst.Float1 = a.Float2 + a.Float3;
dst.Float2 = b.Float0 + b.Float1;
dst.Float3 = b.Float2 + b.Float3;
return dst;
}
// _mm_hsub_pd
/// <summary> Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hsub_pd(v128 a, v128 b)
{
v128 dst = default(v128);
dst.Double0 = a.Double0 - a.Double1;
dst.Double1 = b.Double0 - b.Double1;
return dst;
}
// _mm_hsub_ps
/// <summary> Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hsub_ps(v128 a, v128 b)
{
v128 dst = default(v128);
dst.Float0 = a.Float0 - a.Float1;
dst.Float1 = a.Float2 - a.Float3;
dst.Float2 = b.Float0 - b.Float1;
dst.Float3 = b.Float2 - b.Float3;
return dst;
}
// _mm_movedup_pd
/// <summary> Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 movedup_pd(v128 a)
{
// Burst IR is fine
v128 dst = default(v128);
dst.Double0 = a.Double0;
dst.Double1 = a.Double0;
return dst;
}
// _mm_movehdup_ps
/// <summary> Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 movehdup_ps(v128 a)
{
// Burst IR is fine
v128 dst = default(v128);
dst.Float0 = a.Float1;
dst.Float1 = a.Float1;
dst.Float2 = a.Float3;
dst.Float3 = a.Float3;
return dst;
}
// _mm_moveldup_ps
/// <summary> Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 moveldup_ps(v128 a)
{
// Burst IR is fine
v128 dst = default(v128);
dst.Float0 = a.Float0;
dst.Float1 = a.Float0;
dst.Float2 = a.Float2;
dst.Float3 = a.Float2;
return dst;
}
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 084c864f475138fba5e71aa0c9653558
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 79fa55e43ac038089dbaa9227eea27ae
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,822 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Unity.Burst.Intrinsics
{
public unsafe static partial class X86
{
/// <summary>
/// SSE 4.2 intrinsics
/// </summary>
public static class Sse4_2
{
/// <summary>
/// Evaluates to true at compile time if SSE 4.2 intrinsics are supported.
/// </summary>
public static bool IsSse42Supported { get { return false; } }
/// <summary>
/// Constants for string comparison intrinsics
/// </summary>
[Flags]
public enum SIDD
{
/// <summary>
/// Compare 8-bit unsigned characters
/// </summary>
UBYTE_OPS = 0x00,
/// <summary>
/// Compare 16-bit unsigned characters
/// </summary>
UWORD_OPS = 0x01,
/// <summary>
/// Compare 8-bit signed characters
/// </summary>
SBYTE_OPS = 0x02,
/// <summary>
/// Compare 16-bit signed characters
/// </summary>
SWORD_OPS = 0x03,
/// <summary>
/// Compare any equal
/// </summary>
CMP_EQUAL_ANY = 0x00,
/// <summary>
/// Compare ranges
/// </summary>
CMP_RANGES = 0x04,
/// <summary>
/// Compare equal each
/// </summary>
CMP_EQUAL_EACH = 0x08,
/// <summary>
/// Compare equal ordered
/// </summary>
CMP_EQUAL_ORDERED = 0x0C,
/// <summary>
/// Normal result polarity
/// </summary>
POSITIVE_POLARITY = 0x00,
/// <summary>
/// Negate results
/// </summary>
NEGATIVE_POLARITY = 0x10,
/// <summary>
/// Normal results only before end of string
/// </summary>
MASKED_POSITIVE_POLARITY = 0x20,
/// <summary>
/// Negate results only before end of string
/// </summary>
MASKED_NEGATIVE_POLARITY = 0x30,
/// <summary>
/// Index only: return least significant bit
/// </summary>
LEAST_SIGNIFICANT = 0x00,
/// <summary>
/// Index only: return most significan bit
/// </summary>
MOST_SIGNIFICANT = 0x40,
/// <summary>
/// mask only: return bit mask
/// </summary>
BIT_MASK = 0x00,
/// <summary>
/// mask only: return byte/word mask
/// </summary>
UNIT_MASK = 0x40,
}
/*
* Intrinsics for text/string processing.
*/
private unsafe struct StrBoolArray
{
public fixed ushort Bits[16];
public void SetBit(int aindex, int bindex, bool val)
{
fixed (ushort* b = Bits)
{
if (val)
b[aindex] |= (ushort)(1 << bindex);
else
b[aindex] &= (ushort)(~(1 << bindex));
}
}
public bool GetBit(int aindex, int bindex)
{
fixed (ushort* b = Bits)
{
return (b[aindex] & (1 << bindex)) != 0;
}
}
}
private static v128 cmpistrm_emulation<T>(T* a, T* b, int len, int imm8, int allOnes, T allOnesT) where T : unmanaged, IComparable<T>, IEquatable<T>
{
int intRes2 = ComputeStrCmpIntRes2<T>(a, ComputeStringLength<T>(a, len), b, ComputeStringLength<T>(b, len), len, imm8, allOnes);
return ComputeStrmOutput(len, imm8, allOnesT, intRes2);
}
private static v128 cmpestrm_emulation<T>(T* a, int alen, T* b, int blen, int len, int imm8, int allOnes, T allOnesT) where T : unmanaged, IComparable<T>, IEquatable<T>
{
int intRes2 = ComputeStrCmpIntRes2<T>(a, alen, b, blen, len, imm8, allOnes);
return ComputeStrmOutput(len, imm8, allOnesT, intRes2);
}
private static v128 ComputeStrmOutput<T>(int len, int imm8, T allOnesT, int intRes2) where T : unmanaged, IComparable<T>, IEquatable<T>
{
// output
v128 result = default;
if ((imm8 & (1 << 6)) != 0)
{
// byte / word mask
T* maskDst = (T*)&result.Byte0;
for (int i = 0; i < len; ++i)
{
if ((intRes2 & (1 << i)) != 0)
{
maskDst[i] = allOnesT;
}
else
{
maskDst[i] = default(T);
}
}
}
else
{
// bit mask
result.SInt0 = intRes2;
}
return result;
}
private static int cmpistri_emulation<T>(T* a, T* b, int len, int imm8, int allOnes, T allOnesT) where T : unmanaged, IComparable<T>, IEquatable<T>
{
int intRes2 = ComputeStrCmpIntRes2<T>(a, ComputeStringLength<T>(a, len), b, ComputeStringLength<T>(b, len), len, imm8, allOnes);
return ComputeStriOutput(len, imm8, intRes2);
}
private static int cmpestri_emulation<T>(T* a, int alen, T* b, int blen, int len, int imm8, int allOnes, T allOnesT) where T : unmanaged, IComparable<T>, IEquatable<T>
{
int intRes2 = ComputeStrCmpIntRes2<T>(a, alen, b, blen, len, imm8, allOnes);
return ComputeStriOutput(len, imm8, intRes2);
}
private static int ComputeStriOutput(int len, int imm8, int intRes2)
{
// output
if ((imm8 & (1 << 6)) == 0)
{
int bit = 0;
while (bit < len)
{
if ((intRes2 & (1 << bit)) != 0)
return bit;
++bit;
}
}
else
{
int bit = len - 1;
while (bit >= 0)
{
if ((intRes2 & (1 << bit)) != 0)
return bit;
--bit;
}
}
return len;
}
private static int ComputeStringLength<T>(T* ptr, int max) where T : unmanaged, IEquatable<T>
{
for (int i = 0; i < max; ++i)
{
if (EqualityComparer<T>.Default.Equals(ptr[i], default(T)))
{
return i;
}
}
return max;
}
private static int ComputeStrCmpIntRes2<T>(T* a, int alen, T* b, int blen, int len, int imm8, int allOnes) where T : unmanaged, IComparable<T>, IEquatable<T>
{
#if !NET_DOTS
bool aInvalid = false;
bool bInvalid = false;
StrBoolArray boolRes = default;
int i, j, intRes2;
for (i = 0; i < len; ++i)
{
T aCh = a[i];
if (i == alen)
aInvalid = true;
bInvalid = false;
for (j = 0; j < len; ++j)
{
T bCh = b[j];
if (j == blen)
bInvalid = true;
bool match;
// override comparisons for invalid characters
switch ((imm8 >> 2) & 3)
{
case 0: // equal any
match = EqualityComparer<T>.Default.Equals(aCh, bCh);
if (!aInvalid && bInvalid)
match = false;
else if (aInvalid && !bInvalid)
match = false;
else if (aInvalid && bInvalid)
match = false;
break;
case 1: // ranges
if (0 == (i & 1))
match = Comparer<T>.Default.Compare(bCh, aCh) >= 0;
else
match = Comparer<T>.Default.Compare(bCh, aCh) <= 0;
if (!aInvalid && bInvalid)
match = false;
else if (aInvalid && !bInvalid)
match = false;
else if (aInvalid && bInvalid)
match = false;
break;
case 2: // equal each
match = EqualityComparer<T>.Default.Equals(aCh, bCh);
if (!aInvalid && bInvalid)
match = false;
else if (aInvalid && !bInvalid)
match = false;
else if (aInvalid && bInvalid)
match = true;
break;
default: // equal ordered
match = EqualityComparer<T>.Default.Equals(aCh, bCh);
if (!aInvalid && bInvalid)
match = false;
else if (aInvalid && !bInvalid)
match = true;
else if (aInvalid && bInvalid)
match = true;
break;
}
boolRes.SetBit(i, j, match);
}
}
int intRes1 = 0;
// aggregate results
switch ((imm8 >> 2) & 3)
{
case 0: // equal any
for (i = 0; i < len; ++i)
{
for (j = 0; j < len; ++j)
{
intRes1 |= (boolRes.GetBit(j, i) ? 1 : 0) << i;
}
}
/*
for (i = 0; i < len; ++i)
{
intRes1 |= boolRes.Bits[i];
}*/
break;
case 1: // ranges
for (i = 0; i < len; ++i)
{
for (j = 0; j < len; j += 2)
{
intRes1 |= ((boolRes.GetBit(j, i) && boolRes.GetBit(j + 1, i)) ? 1 : 0) << i;
}
}
break;
case 2: // equal each
for (i = 0; i < len; ++i)
{
intRes1 |= (boolRes.GetBit(i, i) ? 1 : 0) << i;
}
break;
case 3: // equal ordered
intRes1 = allOnes;
for (i = 0; i < len; ++i)
{
int k = i;
for (j = 0; j < len - i; ++j)
{
if (!boolRes.GetBit(j, k))
intRes1 &= ~(1 << i);
k += 1;
}
}
break;
}
intRes2 = 0;
// optionally negate results
bInvalid = false;
for (i = 0; i < len; ++i)
{
if ((imm8 & (1 << 4)) != 0)
{
if ((imm8 & (1 << 5)) != 0) // only negate valid
{
if (EqualityComparer<T>.Default.Equals(b[i], default(T)))
{
bInvalid = true;
}
if (bInvalid) // invalid, don't negate
intRes2 |= intRes1 & (1 << i);
else // valid, negate
intRes2 |= (~intRes1) & (1 << i);
}
else // negate all
intRes2 |= (~intRes1) & (1 << i);
}
else // don't negate
intRes2 |= intRes1 & (1 << i);
}
return intRes2;
#else
throw new NotImplementedException("dots runtime C# lacks comparer");
#endif
}
/// <summary>
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and store the generated mask in dst.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">Control</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 cmpistrm(v128 a, v128 b, int imm8)
{
v128 c;
if (0 == (imm8 & 1))
if (0 == (imm8 & 2))
c = cmpistrm_emulation(&a.Byte0, &b.Byte0, 16, imm8, 0xffff, (byte)0xff);
else
c = cmpistrm_emulation(&a.SByte0, &b.SByte0, 16, imm8, 0xffff, (sbyte)-1);
else
if (0 == (imm8 & 2))
c = cmpistrm_emulation(&a.UShort0, &b.UShort0, 8, imm8, 0xff, (ushort)0xffff);
else
c = cmpistrm_emulation(&a.SShort0, &b.SShort0, 8, imm8, 0xff, (short)-1);
return c;
}
/// <summary>
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and store the generated index in dst.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">Control</param>
/// <returns>Index</returns>
[DebuggerStepThrough]
public static int cmpistri(v128 a, v128 b, int imm8)
{
if (0 == (imm8 & 1))
if (0 == (imm8 & 2))
return cmpistri_emulation(&a.Byte0, &b.Byte0, 16, imm8, 0xffff, (byte)0xff);
else
return cmpistri_emulation(&a.SByte0, &b.SByte0, 16, imm8, 0xffff, (sbyte)-1);
else
if (0 == (imm8 & 2))
return cmpistri_emulation(&a.UShort0, &b.UShort0, 8, imm8, 0xff, (ushort)0xffff);
else
return cmpistri_emulation(&a.SShort0, &b.SShort0, 8, imm8, 0xff, (short)-1);
}
/// <summary>
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and store the generated mask in dst.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="la">Length a</param>
/// <param name="lb">Length b</param>
/// <param name="imm8">Control</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 cmpestrm(v128 a, int la, v128 b, int lb, int imm8)
{
v128 c;
if (0 == (imm8 & 1))
if (0 == (imm8 & 2))
c = cmpestrm_emulation(&a.Byte0, la, &b.Byte0, lb, 16, imm8, 0xffff, (byte)0xff);
else
c = cmpestrm_emulation(&a.SByte0, la, &b.SByte0, lb, 16, imm8, 0xffff, (sbyte)-1);
else
if (0 == (imm8 & 2))
c = cmpestrm_emulation(&a.UShort0, la, &b.UShort0, lb, 8, imm8, 0xff, (ushort)0xffff);
else
c = cmpestrm_emulation(&a.SShort0, la, &b.SShort0, lb, 8, imm8, 0xff, (short)-1);
return c;
}
/// <summary>
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and store the generated index in dst.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="la">Length a</param>
/// <param name="lb">Length b</param>
/// <param name="imm8">Control</param>
/// <returns>Index</returns>
[DebuggerStepThrough]
public static int cmpestri(v128 a, int la, v128 b, int lb, int imm8)
{
if (0 == (imm8 & 1))
if (0 == (imm8 & 2))
return cmpestri_emulation(&a.Byte0, la, &b.Byte0, lb, 16, imm8, 0xffff, (byte)0xff);
else
return cmpestri_emulation(&a.SByte0, la, &b.SByte0, lb, 16, imm8, 0xffff, (sbyte)-1);
else
if (0 == (imm8 & 2))
return cmpestri_emulation(&a.UShort0, la, &b.UShort0, lb, 8, imm8, 0xff, (ushort)0xffff);
else
return cmpestri_emulation(&a.SShort0, la, &b.SShort0, lb, 8, imm8, 0xff, (short)-1);
}
/*
* Intrinsics for text/string processing and reading values of EFlags.
*/
/// <summary>
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns 1 if any character in b was null, and 0 otherwise.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">Control</param>
/// <returns>Boolean value</returns>
[DebuggerStepThrough]
public static int cmpistrz(v128 a, v128 b, int imm8)
{
if (0 == (imm8 & 1))
return ComputeStringLength<byte>(&b.Byte0, 16) < 16 ? 1 : 0;
else
return ComputeStringLength<ushort>(&b.UShort0, 8) < 8 ? 1 : 0;
}
/// <summary>
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">Control</param>
/// <returns>Boolean value</returns>
[DebuggerStepThrough]
public static int cmpistrc(v128 a, v128 b, int imm8)
{
v128 q = cmpistrm(a, b, imm8);
return q.SInt0 == 0 && q.SInt1 == 0 && q.SInt2 == 0 && q.SInt3 == 0 ? 0 : 1;
}
/// <summary>
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns 1 if any character in a was null, and 0 otherwise.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">Control</param>
/// <returns>Boolean value</returns>
[DebuggerStepThrough]
public static int cmpistrs(v128 a, v128 b, int imm8)
{
if (0 == (imm8 & 1))
return ComputeStringLength<byte>(&a.Byte0, 16) < 16 ? 1 : 0;
else
return ComputeStringLength<ushort>(&a.UShort0, 8) < 8 ? 1 : 0;
}
/// <summary>
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns bit 0 of the resulting bit mask.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">Control</param>
/// <returns>Bit 0</returns>
[DebuggerStepThrough]
public static int cmpistro(v128 a, v128 b, int imm8)
{
int intRes2;
if (0 == (imm8 & 1))
{
int al = ComputeStringLength<byte>(&a.Byte0, 16);
int bl = ComputeStringLength<byte>(&b.Byte0, 16);
if (0 == (imm8 & 2))
intRes2 = ComputeStrCmpIntRes2<byte>(&a.Byte0, al, &b.Byte0, bl, 16, imm8, 0xffff);
else
intRes2 = ComputeStrCmpIntRes2<sbyte>(&a.SByte0, al, &b.SByte0, bl, 16, imm8, 0xffff);
}
else
{
int al = ComputeStringLength<ushort>(&a.UShort0, 8);
int bl = ComputeStringLength<ushort>(&b.UShort0, 8);
if (0 == (imm8 & 2))
intRes2 = ComputeStrCmpIntRes2<ushort>(&a.UShort0, al, &b.UShort0, bl, 8, imm8, 0xff);
else
intRes2 = ComputeStrCmpIntRes2<short>(&a.SShort0, al, &b.SShort0, bl, 8, imm8, 0xff);
}
return intRes2 & 1;
}
/// <summary>
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns 1 if b did not contain a null character and the resulting mask was zero, and 0 otherwise.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">Control</param>
/// <returns>Boolean value</returns>
[DebuggerStepThrough]
public static int cmpistra(v128 a, v128 b, int imm8)
{
return ((~cmpistrc(a, b, imm8)) & (~cmpistrz(a, b, imm8))) & 1;
}
/// <summary>
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns 1 if any character in b was null, and 0 otherwise.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="la">Length a</param>
/// <param name="lb">Length b</param>
/// <param name="imm8">Control</param>
/// <returns>Boolean value</returns>
[DebuggerStepThrough]
public static int cmpestrz(v128 a, int la, v128 b, int lb, int imm8)
{
int size = (imm8 & 1) == 1 ? 16 : 8;
int upperBound = (128 / size) - 1;
return lb <= upperBound ? 1 : 0;
}
/// <summary>
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="la">Length a</param>
/// <param name="lb">Length b</param>
/// <param name="imm8">Control</param>
/// <returns>Boolean value</returns>
[DebuggerStepThrough]
public static int cmpestrc(v128 a, int la, v128 b, int lb, int imm8)
{
int intRes2;
if (0 == (imm8 & 1))
{
if (0 == (imm8 & 2))
intRes2 = ComputeStrCmpIntRes2<byte>(&a.Byte0, la, &b.Byte0, lb, 16, imm8, 0xffff);
else
intRes2 = ComputeStrCmpIntRes2<sbyte>(&a.SByte0, la, &b.SByte0, lb, 16, imm8, 0xffff);
}
else
{
if (0 == (imm8 & 2))
intRes2 = ComputeStrCmpIntRes2<ushort>(&a.UShort0, la, &b.UShort0, lb, 8, imm8, 0xff);
else
intRes2 = ComputeStrCmpIntRes2<short>(&a.SShort0, la, &b.SShort0, lb, 8, imm8, 0xff);
}
return intRes2 != 0 ? 1 : 0;
}
/// <summary>
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns 1 if any character in a was null, and 0 otherwise.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="la">Length a</param>
/// <param name="lb">Length b</param>
/// <param name="imm8">Control</param>
/// <returns>Boolean value</returns>
[DebuggerStepThrough]
public static int cmpestrs(v128 a, int la, v128 b, int lb, int imm8)
{
int size = (imm8 & 1) == 1 ? 16 : 8;
int upperBound = (128 / size) - 1;
return la <= upperBound ? 1 : 0;
}
/// <summary>
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns bit 0 of the resulting bit mask.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="la">Length a</param>
/// <param name="lb">Length b</param>
/// <param name="imm8">Control</param>
/// <returns>Bit 0</returns>
[DebuggerStepThrough]
public static int cmpestro(v128 a, int la, v128 b, int lb, int imm8)
{
int intRes2;
if (0 == (imm8 & 1))
{
if (0 == (imm8 & 2))
intRes2 = ComputeStrCmpIntRes2<byte>(&a.Byte0, la, &b.Byte0, lb, 16, imm8, 0xffff);
else
intRes2 = ComputeStrCmpIntRes2<sbyte>(&a.SByte0, la, &b.SByte0, lb, 16, imm8, 0xffff);
}
else
{
if (0 == (imm8 & 2))
intRes2 = ComputeStrCmpIntRes2<ushort>(&a.UShort0, la, &b.UShort0, lb, 8, imm8, 0xff);
else
intRes2 = ComputeStrCmpIntRes2<short>(&a.SShort0, la, &b.SShort0, lb, 8, imm8, 0xff);
}
return intRes2 & 1;
}
/// <summary>
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns 1 if b did not contain a null character and the resulting mask was zero, and 0 otherwise.
/// </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="la">Length a</param>
/// <param name="lb">Length b</param>
/// <param name="imm8">Control</param>
/// <returns>Boolean value</returns>
[DebuggerStepThrough]
public static int cmpestra(v128 a, int la, v128 b, int lb, int imm8)
{
return ((~cmpestrc(a, la, b, lb, imm8)) & (~cmpestrz(a, la, b, lb, imm8))) & 1;
}
/// <summary>
/// Compare packed 64-bit integers in a and b for greater-than, and store the results in dst.
/// </summary>
/// <param name="val1">Vector a</param>
/// <param name="val2">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 cmpgt_epi64(v128 val1, v128 val2)
{
v128 result = default;
result.SLong0 = val1.SLong0 > val2.SLong0 ? -1 : 0;
result.SLong1 = val1.SLong1 > val2.SLong1 ? -1 : 0;
return result;
}
/*
* Accumulate CRC32 (polynomial 0x11EDC6F41) value
*/
private static readonly uint[] crctab = new uint[]
{
0x00000000U,0xF26B8303U,0xE13B70F7U,0x1350F3F4U,0xC79A971FU,0x35F1141CU,0x26A1E7E8U,0xD4CA64EBU,
0x8AD958CFU,0x78B2DBCCU,0x6BE22838U,0x9989AB3BU,0x4D43CFD0U,0xBF284CD3U,0xAC78BF27U,0x5E133C24U,
0x105EC76FU,0xE235446CU,0xF165B798U,0x030E349BU,0xD7C45070U,0x25AFD373U,0x36FF2087U,0xC494A384U,
0x9A879FA0U,0x68EC1CA3U,0x7BBCEF57U,0x89D76C54U,0x5D1D08BFU,0xAF768BBCU,0xBC267848U,0x4E4DFB4BU,
0x20BD8EDEU,0xD2D60DDDU,0xC186FE29U,0x33ED7D2AU,0xE72719C1U,0x154C9AC2U,0x061C6936U,0xF477EA35U,
0xAA64D611U,0x580F5512U,0x4B5FA6E6U,0xB93425E5U,0x6DFE410EU,0x9F95C20DU,0x8CC531F9U,0x7EAEB2FAU,
0x30E349B1U,0xC288CAB2U,0xD1D83946U,0x23B3BA45U,0xF779DEAEU,0x05125DADU,0x1642AE59U,0xE4292D5AU,
0xBA3A117EU,0x4851927DU,0x5B016189U,0xA96AE28AU,0x7DA08661U,0x8FCB0562U,0x9C9BF696U,0x6EF07595U,
0x417B1DBCU,0xB3109EBFU,0xA0406D4BU,0x522BEE48U,0x86E18AA3U,0x748A09A0U,0x67DAFA54U,0x95B17957U,
0xCBA24573U,0x39C9C670U,0x2A993584U,0xD8F2B687U,0x0C38D26CU,0xFE53516FU,0xED03A29BU,0x1F682198U,
0x5125DAD3U,0xA34E59D0U,0xB01EAA24U,0x42752927U,0x96BF4DCCU,0x64D4CECFU,0x77843D3BU,0x85EFBE38U,
0xDBFC821CU,0x2997011FU,0x3AC7F2EBU,0xC8AC71E8U,0x1C661503U,0xEE0D9600U,0xFD5D65F4U,0x0F36E6F7U,
0x61C69362U,0x93AD1061U,0x80FDE395U,0x72966096U,0xA65C047DU,0x5437877EU,0x4767748AU,0xB50CF789U,
0xEB1FCBADU,0x197448AEU,0x0A24BB5AU,0xF84F3859U,0x2C855CB2U,0xDEEEDFB1U,0xCDBE2C45U,0x3FD5AF46U,
0x7198540DU,0x83F3D70EU,0x90A324FAU,0x62C8A7F9U,0xB602C312U,0x44694011U,0x5739B3E5U,0xA55230E6U,
0xFB410CC2U,0x092A8FC1U,0x1A7A7C35U,0xE811FF36U,0x3CDB9BDDU,0xCEB018DEU,0xDDE0EB2AU,0x2F8B6829U,
0x82F63B78U,0x709DB87BU,0x63CD4B8FU,0x91A6C88CU,0x456CAC67U,0xB7072F64U,0xA457DC90U,0x563C5F93U,
0x082F63B7U,0xFA44E0B4U,0xE9141340U,0x1B7F9043U,0xCFB5F4A8U,0x3DDE77ABU,0x2E8E845FU,0xDCE5075CU,
0x92A8FC17U,0x60C37F14U,0x73938CE0U,0x81F80FE3U,0x55326B08U,0xA759E80BU,0xB4091BFFU,0x466298FCU,
0x1871A4D8U,0xEA1A27DBU,0xF94AD42FU,0x0B21572CU,0xDFEB33C7U,0x2D80B0C4U,0x3ED04330U,0xCCBBC033U,
0xA24BB5A6U,0x502036A5U,0x4370C551U,0xB11B4652U,0x65D122B9U,0x97BAA1BAU,0x84EA524EU,0x7681D14DU,
0x2892ED69U,0xDAF96E6AU,0xC9A99D9EU,0x3BC21E9DU,0xEF087A76U,0x1D63F975U,0x0E330A81U,0xFC588982U,
0xB21572C9U,0x407EF1CAU,0x532E023EU,0xA145813DU,0x758FE5D6U,0x87E466D5U,0x94B49521U,0x66DF1622U,
0x38CC2A06U,0xCAA7A905U,0xD9F75AF1U,0x2B9CD9F2U,0xFF56BD19U,0x0D3D3E1AU,0x1E6DCDEEU,0xEC064EEDU,
0xC38D26C4U,0x31E6A5C7U,0x22B65633U,0xD0DDD530U,0x0417B1DBU,0xF67C32D8U,0xE52CC12CU,0x1747422FU,
0x49547E0BU,0xBB3FFD08U,0xA86F0EFCU,0x5A048DFFU,0x8ECEE914U,0x7CA56A17U,0x6FF599E3U,0x9D9E1AE0U,
0xD3D3E1ABU,0x21B862A8U,0x32E8915CU,0xC083125FU,0x144976B4U,0xE622F5B7U,0xF5720643U,0x07198540U,
0x590AB964U,0xAB613A67U,0xB831C993U,0x4A5A4A90U,0x9E902E7BU,0x6CFBAD78U,0x7FAB5E8CU,0x8DC0DD8FU,
0xE330A81AU,0x115B2B19U,0x020BD8EDU,0xF0605BEEU,0x24AA3F05U,0xD6C1BC06U,0xC5914FF2U,0x37FACCF1U,
0x69E9F0D5U,0x9B8273D6U,0x88D28022U,0x7AB90321U,0xAE7367CAU,0x5C18E4C9U,0x4F48173DU,0xBD23943EU,
0xF36E6F75U,0x0105EC76U,0x12551F82U,0xE03E9C81U,0x34F4F86AU,0xC69F7B69U,0xD5CF889DU,0x27A40B9EU,
0x79B737BAU,0x8BDCB4B9U,0x988C474DU,0x6AE7C44EU,0xBE2DA0A5U,0x4C4623A6U,0x5F16D052U,0xAD7D5351U,
};
/// <summary>
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 32-bit integer v, and stores the result in dst.
/// </summary>
/// <param name="crc">Initial value</param>
/// <param name="v">Unsigned 32-bit integer</param>
/// <returns>Result</returns>
[DebuggerStepThrough]
public static uint crc32_u32(uint crc, uint v)
{
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v);
return crc;
}
/// <summary>
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 8-bit integer v, and stores the result in dst.
/// </summary>
/// <param name="crc">Initial value</param>
/// <param name="v">Unsigned 8-bit integer</param>
/// <returns>Result</returns>
[DebuggerStepThrough]
public static uint crc32_u8(uint crc, byte v)
{
crc = (crc >> 8) ^ crctab[(crc ^ v) & 0xff];
return crc;
}
/// <summary>
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 16-bit integer v, and stores the result in dst.
/// </summary>
/// <param name="crc">Initial value</param>
/// <param name="v">Unsigned 16-bit integer</param>
/// <returns>Result</returns>
[DebuggerStepThrough]
public static uint crc32_u16(uint crc, ushort v)
{
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v);
return crc;
}
/// <summary>
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 64-bit integer v, and stores the result in dst.
/// </summary>
/// <param name="crc_ul">Initial value</param>
/// <param name="v">Signed 64-bit integer</param>
/// <returns>Result</returns>
[DebuggerStepThrough]
[Obsolete("Use the ulong version of this intrinsic instead.")]
public static ulong crc32_u64(ulong crc_ul, long v)
{
return crc32_u64(crc_ul, (ulong)v);
}
/// <summary>
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 64-bit integer v, and stores the result in dst.
/// </summary>
/// <param name="crc_ul">Initial value</param>
/// <param name="v">Unsigned 64-bit integer</param>
/// <returns>Result</returns>
[DebuggerStepThrough]
public static ulong crc32_u64(ulong crc_ul, ulong v)
{
uint crc = (uint)crc_ul;
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v); v >>= 8;
crc = crc32_u8(crc, (byte)v);
return crc;
}
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 34483fa8e8413ba9b6e02809c5adfdd3
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,371 @@
using System;
using System.Diagnostics;
namespace Unity.Burst.Intrinsics
{
public unsafe static partial class X86
{
/// <summary>
/// SSSE3 intrinsics
/// </summary>
public static class Ssse3
{
/// <summary>
/// Evaluates to true at compile time if SSSE3 intrinsics are supported.
/// </summary>
public static bool IsSsse3Supported { get { return false; } }
// _mm_abs_epi8
/// <summary> Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 abs_epi8(v128 a)
{
v128 dst = default(v128);
byte* dptr = &dst.Byte0;
sbyte* aptr = &a.SByte0;
for (int j = 0; j <= 15; j++)
{
dptr[j] = (byte)Math.Abs((int)aptr[j]);
}
return dst;
}
// _mm_abs_epi16
/// <summary> Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 abs_epi16(v128 a)
{
v128 dst = default(v128);
ushort* dptr = &dst.UShort0;
short* aptr = &a.SShort0;
for (int j = 0; j <= 7; j++)
{
dptr[j] = (ushort)Math.Abs((int)aptr[j]);
}
return dst;
}
// _mm_abs_epi32
/// <summary> Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 abs_epi32(v128 a)
{
v128 dst = default(v128);
uint* dptr = &dst.UInt0;
int* aptr = &a.SInt0;
for (int j = 0; j <= 3; j++)
{
dptr[j] = (uint)Math.Abs((long)aptr[j]);
}
return dst;
}
// _mm_shuffle_epi8
/// <summary> Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 shuffle_epi8(v128 a, v128 b)
{
v128 dst = default(v128);
byte* dptr = &dst.Byte0;
byte* aptr = &a.Byte0;
byte* bptr = &b.Byte0;
for (int j = 0; j <= 15; j++)
{
if ((bptr[j] & 0x80) != 0)
{
dptr[j] = 0x00;
}
else
{
dptr[j] = aptr[bptr[j] & 15];
}
}
return dst;
}
// _mm_alignr_epi8
/// <summary> Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="count">Byte count</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 alignr_epi8(v128 a, v128 b, int count)
{
var dst = default(v128);
byte* dptr = &dst.Byte0;
byte* aptr = &a.Byte0 + count;
byte* bptr = &b.Byte0;
int i;
for (i = 0; i < 16 - count; ++i)
{
*dptr++ = *aptr++;
}
for (; i < 16; ++i)
{
*dptr++ = *bptr++;
}
return dst;
}
// _mm_hadd_epi16
/// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hadd_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 3; ++j)
{
dptr[j] = (short)(aptr[2 * j + 1] + aptr[2 * j]);
dptr[j + 4] = (short)(bptr[2 * j + 1] + bptr[2 * j]);
}
return dst;
}
// _mm_hadds_epi16
/// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hadds_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 3; ++j)
{
dptr[j] = Saturate_To_Int16(aptr[2 * j + 1] + aptr[2 * j]);
dptr[j + 4] = Saturate_To_Int16(bptr[2 * j + 1] + bptr[2 * j]);
}
return dst;
}
// _mm_hadd_epi32
/// <summary> Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hadd_epi32(v128 a, v128 b)
{
v128 dst = default(v128);
dst.SInt0 = a.SInt1 + a.SInt0;
dst.SInt1 = a.SInt3 + a.SInt2;
dst.SInt2 = b.SInt1 + b.SInt0;
dst.SInt3 = b.SInt3 + b.SInt2;
return dst;
}
// _mm_hsub_epi16
/// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hsub_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 3; ++j)
{
dptr[j] = (short)(aptr[2 * j] - aptr[2 * j + 1]);
dptr[j + 4] = (short)(bptr[2 * j] - bptr[2 * j + 1]);
}
return dst;
}
// _mm_hsubs_epi16
/// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hsubs_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 3; ++j)
{
dptr[j] = Saturate_To_Int16(aptr[2 * j] - aptr[2 * j + 1]);
dptr[j + 4] = Saturate_To_Int16(bptr[2 * j] - bptr[2 * j + 1]);
}
return dst;
}
// _mm_hsub_epi32
/// <summary> Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hsub_epi32(v128 a, v128 b)
{
v128 dst = default(v128);
dst.SInt0 = a.SInt0 - a.SInt1;
dst.SInt1 = a.SInt2 - a.SInt3;
dst.SInt2 = b.SInt0 - b.SInt1;
dst.SInt3 = b.SInt2 - b.SInt3;
return dst;
}
// _mm_maddubs_epi16
/// <summary> Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 maddubs_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
byte* aptr = &a.Byte0;
sbyte* bptr = &b.SByte0;
for (int j = 0; j <= 7; j++)
{
int tmp = aptr[2 * j + 1] * bptr[2 * j + 1] + aptr[2 * j] * bptr[2 * j];
dptr[j] = Saturate_To_Int16(tmp);
}
return dst;
}
// _mm_mulhrs_epi16
/// <summary> Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 mulhrs_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 7; j++)
{
int tmp = aptr[j] * bptr[j];
tmp >>= 14;
tmp += 1;
tmp >>= 1;
dptr[j] = (short)tmp;
}
return dst;
}
// _mm_sign_epi8
/// <summary> Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 sign_epi8(v128 a, v128 b)
{
v128 dst = default(v128);
sbyte* dptr = &dst.SByte0;
sbyte* aptr = &a.SByte0;
sbyte* bptr = &b.SByte0;
for (int j = 0; j <= 15; j++)
{
if (bptr[j] < 0)
{
dptr[j] = (sbyte)-aptr[j];
}
else if (bptr[j] == 0)
{
dptr[j] = 0;
}
else
{
dptr[j] = aptr[j];
}
}
return dst;
}
// _mm_sign_epi16
/// <summary> Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 sign_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 7; j++)
{
if (bptr[j] < 0)
{
dptr[j] = (short)-aptr[j];
}
else if (bptr[j] == 0)
{
dptr[j] = 0;
}
else
{
dptr[j] = aptr[j];
}
}
return dst;
}
// _mm_sign_epi32
/// <summary> Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 sign_epi32(v128 a, v128 b)
{
v128 dst = default(v128);
int* dptr = &dst.SInt0;
int* aptr = &a.SInt0;
int* bptr = &b.SInt0;
for (int j = 0; j <= 3; j++)
{
if (bptr[j] < 0)
{
dptr[j] = -aptr[j];
}
else if (bptr[j] == 0)
{
dptr[j] = 0;
}
else
{
dptr[j] = aptr[j];
}
}
return dst;
}
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 0904d56406a93977ad6ef642b548155d
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant: