first commit
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: fb77e3d4fbde3090a07ebac108e13ed8
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: bbe744fdbbc734d3bb0a78042bd4b56a
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,276 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
/// <summary>
|
||||
/// bmi1 intrinsics
|
||||
/// </summary>
|
||||
public static class Bmi1
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates to true at compile time if bmi1 intrinsics are supported.
|
||||
///
|
||||
/// Burst ties bmi1 support to AVX2 support to simplify feature sets to support.
|
||||
/// </summary>
|
||||
public static bool IsBmi1Supported { get { return Avx2.IsAvx2Supported; } }
|
||||
|
||||
/// <summary>
|
||||
/// Compute the bitwise NOT of 32-bit integer a and then AND with b, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** andn r32, r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <param name="b">32-bit integer</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint andn_u32(uint a, uint b)
|
||||
{
|
||||
return ~a & b;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute the bitwise NOT of 64-bit integer a and then AND with b, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** andn r64, r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <param name="b">64-bit integer</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong andn_u64(ulong a, ulong b)
|
||||
{
|
||||
return ~a & b;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract contiguous bits from unsigned 32-bit integer a, and store the result in dst. Extract the number of bits specified by len, starting at the bit specified by start.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** bextr r32, r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <param name="start">Starting bit</param>
|
||||
/// <param name="len">Number of bits</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint bextr_u32(uint a, uint start, uint len)
|
||||
{
|
||||
start &= 0xff;
|
||||
|
||||
if (start >= (sizeof(uint) * 8))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
var aShifted = a >> (int)start;
|
||||
|
||||
len &= 0xff;
|
||||
|
||||
if (len >= (sizeof(uint) * 8))
|
||||
{
|
||||
return aShifted;
|
||||
}
|
||||
|
||||
return aShifted & ((1u << (int)len) - 1u);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract contiguous bits from unsigned 64-bit integer a, and store the result in dst. Extract the number of bits specified by len, starting at the bit specified by start.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** bextr r64, r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <param name="start">Starting bit</param>
|
||||
/// <param name="len">Number of bits</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong bextr_u64(ulong a, uint start, uint len)
|
||||
{
|
||||
start &= 0xff;
|
||||
|
||||
if (start >= (sizeof(ulong) * 8))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
var aShifted = a >> (int)start;
|
||||
|
||||
len &= 0xff;
|
||||
|
||||
if (len >= (sizeof(ulong) * 8))
|
||||
{
|
||||
return aShifted;
|
||||
}
|
||||
|
||||
return aShifted & (((1ul) << (int)len) - 1u);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract contiguous bits from unsigned 32-bit integer a, and store the result in dst. Extract the number of bits specified by bits 15:8 of control, starting at the bit specified by bits 0:7 of control..
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** bextr r32, r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <param name="control">Control</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint bextr2_u32(uint a, uint control)
|
||||
{
|
||||
uint start = control & byte.MaxValue;
|
||||
uint len = (control >> 8) & byte.MaxValue;
|
||||
return bextr_u32(a, start, len);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract contiguous bits from unsigned 64-bit integer a, and store the result in dst. Extract the number of bits specified by bits 15:8 of control, starting at the bit specified by bits 0:7 of control..
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** bextr r64, r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <param name="control">Control</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong bextr2_u64(ulong a, ulong control)
|
||||
{
|
||||
uint start = (uint)(control & byte.MaxValue);
|
||||
uint len = (uint)((control >> 8) & byte.MaxValue);
|
||||
return bextr_u64(a, start, len);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract the lowest set bit from unsigned 32-bit integer a and set the corresponding bit in dst. All other bits in dst are zeroed, and all bits are zeroed if no bits are set in a.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** blsi r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint blsi_u32(uint a)
|
||||
{
|
||||
return (uint)(-(int)a) & a;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract the lowest set bit from unsigned 64-bit integer a and set the corresponding bit in dst. All other bits in dst are zeroed, and all bits are zeroed if no bits are set in a.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** blsi r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong blsi_u64(ulong a)
|
||||
{
|
||||
return (ulong)(-(long)a) & a;
|
||||
}
|
||||
/// <summary>
|
||||
/// Set all the lower bits of dst up to and including the lowest set bit in unsigned 32-bit integer a.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** blsmsk r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint blsmsk_u32(uint a)
|
||||
{
|
||||
return (a - 1) ^ a;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Set all the lower bits of dst up to and including the lowest set bit in unsigned 64-bit integer a.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** blsmsk r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong blsmsk_u64(ulong a)
|
||||
{
|
||||
return (a - 1) ^ a;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Copy all bits from unsigned 32-bit integer a to dst, and reset (set to 0) the bit in dst that corresponds to the lowest set bit in a.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** blsr r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint blsr_u32(uint a)
|
||||
{
|
||||
return (a - 1) & a;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Copy all bits from unsigned 64-bit integer a to dst, and reset (set to 0) the bit in dst that corresponds to the lowest set bit in a.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** blsr r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong blsr_u64(ulong a)
|
||||
{
|
||||
return (a - 1) & a;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Count the number of trailing zero bits in unsigned 32-bit integer a, and return that count in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** tzcnt r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint tzcnt_u32(uint a)
|
||||
{
|
||||
uint c = 32;
|
||||
a &= (uint)-(int)(a);
|
||||
if (a != 0) c--;
|
||||
if ((a & 0x0000FFFF) != 0) c -= 16;
|
||||
if ((a & 0x00FF00FF) != 0) c -= 8;
|
||||
if ((a & 0x0F0F0F0F) != 0) c -= 4;
|
||||
if ((a & 0x33333333) != 0) c -= 2;
|
||||
if ((a & 0x55555555) != 0) c -= 1;
|
||||
return c;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Count the number of trailing zero bits in unsigned 64-bit integer a, and return that count in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** tzcnt r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong tzcnt_u64(ulong a)
|
||||
{
|
||||
ulong c = 64;
|
||||
a &= (ulong)-(long)(a);
|
||||
if (a != 0) c--;
|
||||
if ((a & 0x00000000FFFFFFFF) != 0) c -= 32;
|
||||
if ((a & 0x0000FFFF0000FFFF) != 0) c -= 16;
|
||||
if ((a & 0x00FF00FF00FF00FF) != 0) c -= 8;
|
||||
if ((a & 0x0F0F0F0F0F0F0F0F) != 0) c -= 4;
|
||||
if ((a & 0x3333333333333333) != 0) c -= 2;
|
||||
if ((a & 0x5555555555555555) != 0) c -= 1;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: bae2d17db94135ea84f8110705ba44a0
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,212 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
/// <summary>
|
||||
/// bmi2 intrinsics
|
||||
/// </summary>
|
||||
public static class Bmi2
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates to true at compile time if bmi2 intrinsics are supported.
|
||||
///
|
||||
/// Burst ties bmi2 support to AVX2 support to simplify feature sets to support.
|
||||
/// </summary>
|
||||
public static bool IsBmi2Supported { get { return Avx2.IsAvx2Supported; } }
|
||||
|
||||
/// <summary>
|
||||
/// Copy all bits from unsigned 32-bit integer a to dst, and reset (set to 0) the high bits in dst starting at index.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** bzhi r32, r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <param name="index">Starting point</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint bzhi_u32(uint a, uint index)
|
||||
{
|
||||
index &= 0xff;
|
||||
|
||||
if (index >= (sizeof(uint) * 8))
|
||||
{
|
||||
return a;
|
||||
}
|
||||
|
||||
return a & ((1u << (int)index) - 1u);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Copy all bits from unsigned 64-bit integer a to dst, and reset (set to 0) the high bits in dst starting at index.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** bzhi r64, r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <param name="index">Starting point</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong bzhi_u64(ulong a, ulong index)
|
||||
{
|
||||
index &= 0xff;
|
||||
|
||||
if (index >= (sizeof(ulong) * 8))
|
||||
{
|
||||
return a;
|
||||
}
|
||||
|
||||
return a & ((1ul << (int)index) - 1ul);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply unsigned 32-bit integers a and b, store the low 32-bits of the result in dst, and store the high 32-bits in hi. This does not read or write arithmetic flags.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** mulx r32, r32, m32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <param name="b">32-bit integer</param>
|
||||
/// <param name="hi">Stores the high 32-bits</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint mulx_u32(uint a, uint b, out uint hi)
|
||||
{
|
||||
ulong aBig = a;
|
||||
ulong bBig = b;
|
||||
ulong result = aBig * bBig;
|
||||
hi = (uint)(result >> 32);
|
||||
return (uint)(result & 0xffffffff);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply unsigned 64-bit integers a and b, store the low 64-bits of the result in dst, and store the high 64-bits in hi. This does not read or write arithmetic flags.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** mulx r64, r64, m64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <param name="b">64-bit integer</param>
|
||||
/// <param name="hi">Stores the high 64-bits</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong mulx_u64(ulong a, ulong b, out ulong hi)
|
||||
{
|
||||
return Common.umul128(a, b, out hi);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deposit contiguous low bits from unsigned 32-bit integer a to dst at the corresponding bit locations specified by mask; all other bits in dst are set to zero.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** pdep r32, r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <param name="mask">Mask</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint pdep_u32(uint a, uint mask)
|
||||
{
|
||||
uint result = 0;
|
||||
|
||||
int k = 0;
|
||||
|
||||
for (int i = 0; i < 32; i++)
|
||||
{
|
||||
if ((mask & (1u << i)) != 0)
|
||||
{
|
||||
result |= ((a >> k) & 1u) << i;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deposit contiguous low bits from unsigned 64-bit integer a to dst at the corresponding bit locations specified by mask; all other bits in dst are set to zero.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** pdep r64, r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <param name="mask">Mask</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong pdep_u64(ulong a, ulong mask)
|
||||
{
|
||||
ulong result = 0;
|
||||
|
||||
int k = 0;
|
||||
|
||||
for (int i = 0; i < 64; i++)
|
||||
{
|
||||
if ((mask & (1ul << i)) != 0)
|
||||
{
|
||||
result |= ((a >> k) & 1ul) << i;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract bits from unsigned 32-bit integer a at the corresponding bit locations specified by mask to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** pext r32, r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="a">32-bit integer</param>
|
||||
/// <param name="mask">Mask</param>
|
||||
/// <returns>32-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint pext_u32(uint a, uint mask)
|
||||
{
|
||||
uint result = 0;
|
||||
|
||||
int k = 0;
|
||||
|
||||
for (int i = 0; i < 32; i++)
|
||||
{
|
||||
if ((mask & (1u << i)) != 0)
|
||||
{
|
||||
result |= ((a >> i) & 1u) << k;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract bits from unsigned 64-bit integer a at the corresponding bit locations specified by mask to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** pext r64, r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="a">64-bit integer</param>
|
||||
/// <param name="mask">Mask</param>
|
||||
/// <returns>64-bit integer</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong pext_u64(ulong a, ulong mask)
|
||||
{
|
||||
ulong result = 0;
|
||||
|
||||
int k = 0;
|
||||
|
||||
for (int i = 0; i < 64; i++)
|
||||
{
|
||||
if ((mask & (1ul << i)) != 0)
|
||||
{
|
||||
result |= ((a >> i) & 1ul) << k;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: aa392f69e52b37a486ca7cfa6125fd60
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,66 @@
|
||||
using System;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
/// <summary>
|
||||
/// Static methods and properties for X86 instruction intrinsics.
|
||||
/// </summary>
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
private static v128 GenericCSharpLoad(void* ptr)
|
||||
{
|
||||
return *(v128*)ptr;
|
||||
}
|
||||
|
||||
private static void GenericCSharpStore(void* ptr, v128 val)
|
||||
{
|
||||
*(v128*)ptr = val;
|
||||
}
|
||||
|
||||
private static sbyte Saturate_To_Int8(int val)
|
||||
{
|
||||
if (val > sbyte.MaxValue)
|
||||
return sbyte.MaxValue;
|
||||
else if (val < sbyte.MinValue)
|
||||
return sbyte.MinValue;
|
||||
return (sbyte)val;
|
||||
}
|
||||
|
||||
private static byte Saturate_To_UnsignedInt8(int val)
|
||||
{
|
||||
if (val > byte.MaxValue)
|
||||
return byte.MaxValue;
|
||||
else if (val < byte.MinValue)
|
||||
return byte.MinValue;
|
||||
return (byte)val;
|
||||
}
|
||||
|
||||
private static short Saturate_To_Int16(int val)
|
||||
{
|
||||
if (val > short.MaxValue)
|
||||
return short.MaxValue;
|
||||
else if (val < short.MinValue)
|
||||
return short.MinValue;
|
||||
return (short)val;
|
||||
}
|
||||
|
||||
private static ushort Saturate_To_UnsignedInt16(int val)
|
||||
{
|
||||
if (val > ushort.MaxValue)
|
||||
return ushort.MaxValue;
|
||||
else if (val < ushort.MinValue)
|
||||
return ushort.MinValue;
|
||||
return (ushort)val;
|
||||
}
|
||||
|
||||
private static bool IsNaN(uint v)
|
||||
{
|
||||
return (v & 0x7fffffffu) > 0x7f800000;
|
||||
}
|
||||
|
||||
private static bool IsNaN(ulong v)
|
||||
{
|
||||
return (v & 0x7ffffffffffffffful) > 0x7ff0000000000000ul;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 000378914c63384c8062cbad18605802
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,269 @@
|
||||
using System;
|
||||
using Unity.Burst;
|
||||
|
||||
#if !BURST_INTERNAL
|
||||
using AOT;
|
||||
using UnityEngine;
|
||||
#endif
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
#if !BURST_INTERNAL
|
||||
[BurstCompile]
|
||||
#endif
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
/// <summary>
|
||||
/// The 32-bit MXCSR register contains control and status information for SSE and AVX SIMD floating-point operations.
|
||||
/// </summary>
|
||||
[Flags]
|
||||
public enum MXCSRBits
|
||||
{
|
||||
/// <summary>
|
||||
/// Bit 15 (FTZ) of the MXCSR register enables the flush-to-zero mode, which controls the masked response to a SIMD floating-point underflow condition.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// When the underflow exception is masked and the flush-to-zero mode is enabled, the processor performs the following operations when it detects a floating-point underflow condition.
|
||||
/// - Returns a zero result with the sign of the true result
|
||||
/// - Sets the precision and underflow exception flags.
|
||||
///
|
||||
/// If the underflow exception is not masked, the flush-to-zero bit is ignored.
|
||||
///
|
||||
/// The flush-to-zero mode is not compatible with IEEE Standard 754. The IEEE-mandated masked response to under-flow is to deliver the denormalized result.
|
||||
/// The flush-to-zero mode is provided primarily for performance reasons. At the cost of a slight precision loss, faster execution can be achieved for applications where underflows
|
||||
/// are common and rounding the underflow result to zero can be tolerated. The flush-to-zero bit is cleared upon a power-up or reset of the processor, disabling the flush-to-zero mode.
|
||||
/// </remarks>
|
||||
FlushToZero = 1 << 15,
|
||||
|
||||
/// <summary>
|
||||
/// Mask for rounding control bits.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The rounding modes have no effect on comparison operations, operations that produce exact results, or operations that produce NaN results.
|
||||
/// </remarks>
|
||||
RoundingControlMask = (1 << 14) | (1 << 13),
|
||||
|
||||
/// <summary>
|
||||
/// Rounded result is the closest to the infinitely precise result. If two values are equally close, the result is the even value (that is, the one with the least-significant bit of zero). Default.
|
||||
/// </summary>
|
||||
RoundToNearest = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Rounded result is closest to but no greater than the infinitely precise result.
|
||||
/// </summary>
|
||||
RoundDown = (1 << 13),
|
||||
|
||||
/// <summary>
|
||||
/// Rounded result is closest to but no less than the infinitely precise result.
|
||||
/// </summary>
|
||||
RoundUp = (1 << 14),
|
||||
|
||||
/// <summary>
|
||||
/// Rounded result is closest to but no greater in absolute value than the infinitely precise result.
|
||||
/// </summary>
|
||||
RoundTowardZero = (1 << 13) | (1 << 14),
|
||||
|
||||
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
|
||||
PrecisionMask = 1 << 12,
|
||||
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
|
||||
UnderflowMask = 1 << 11,
|
||||
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
|
||||
OverflowMask = 1 << 10,
|
||||
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
|
||||
DivideByZeroMask = 1 << 9,
|
||||
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
|
||||
DenormalOperationMask = 1 << 8,
|
||||
/// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
|
||||
InvalidOperationMask = 1 << 7,
|
||||
|
||||
/// <summary>
|
||||
/// Combine all bits for exception masking into one mask for convenience.
|
||||
/// </summary>
|
||||
ExceptionMask = PrecisionMask | UnderflowMask | OverflowMask | DivideByZeroMask | DenormalOperationMask | InvalidOperationMask,
|
||||
|
||||
/// <summary>
|
||||
/// Bit 6 (DAZ) of the MXCSR register enables the denormals-are-zeros mode, which controls the processor’s response to a SIMD floating-point denormal operand condition.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// When the denormals-are-zeros flag is set, the processor converts all denormal source operands to a zero with the sign of the original operand before performing any computations on them.
|
||||
/// The processor does not set the denormal-operand exception flag (DE), regardless of the setting of the denormal-operand exception mask bit (DM); and it does not generate a denormal-operand
|
||||
/// exception if the exception is unmasked.The denormals-are-zeros mode is not compatible with IEEE Standard 754.
|
||||
///
|
||||
/// The denormals-are-zeros mode is provided to improve processor performance for applications such as streaming media processing, where rounding a denormal operand to zero does not
|
||||
/// appreciably affect the quality of the processed data. The denormals-are-zeros flag is cleared upon a power-up or reset of the processor, disabling the denormals-are-zeros mode.
|
||||
///
|
||||
/// The denormals-are-zeros mode was introduced in the Pentium 4 and Intel Xeon processor with the SSE2 extensions; however, it is fully compatible with the SSE SIMD floating-point instructions
|
||||
/// (that is, the denormals-are-zeros flag affects the operation of the SSE SIMD floating-point instructions). In earlier IA-32 processors and in some models of the Pentium 4 processor, this flag
|
||||
/// (bit 6) is reserved. Attempting to set bit 6 of the MXCSR register on processors that do not support the DAZ flag will cause a general-protection exception (#GP).
|
||||
/// </remarks>
|
||||
DenormalsAreZeroes = 1 << 6,
|
||||
|
||||
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
|
||||
PrecisionFlag = 1 << 5,
|
||||
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
|
||||
UnderflowFlag = 1 << 4,
|
||||
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
|
||||
OverflowFlag = 1 << 3,
|
||||
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
|
||||
DivideByZeroFlag = 1 << 2,
|
||||
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
|
||||
DenormalFlag = 1 << 1,
|
||||
/// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
|
||||
InvalidOperationFlag = 1 << 0,
|
||||
|
||||
/// <summary>
|
||||
/// Combines all bits for flags into one mask for convenience.
|
||||
/// </summary>
|
||||
FlagMask = PrecisionFlag | UnderflowFlag | OverflowFlag | DivideByZeroFlag | DenormalFlag | InvalidOperationFlag,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Rounding mode flags
|
||||
/// </summary>
|
||||
[Flags]
|
||||
public enum RoundingMode
|
||||
{
|
||||
/// <summary>
|
||||
/// Round to the nearest integer
|
||||
/// </summary>
|
||||
FROUND_TO_NEAREST_INT = 0x00,
|
||||
/// <summary>
|
||||
/// Round to negative infinity
|
||||
/// </summary>
|
||||
FROUND_TO_NEG_INF = 0x01,
|
||||
/// <summary>
|
||||
/// Round to positive infinity
|
||||
/// </summary>
|
||||
FROUND_TO_POS_INF = 0x02,
|
||||
/// <summary>
|
||||
/// Round to zero
|
||||
/// </summary>
|
||||
FROUND_TO_ZERO = 0x03,
|
||||
/// <summary>
|
||||
/// Round to current direction
|
||||
/// </summary>
|
||||
FROUND_CUR_DIRECTION = 0x04,
|
||||
|
||||
/// <summary>
|
||||
/// Do not suppress exceptions
|
||||
/// </summary>
|
||||
FROUND_RAISE_EXC = 0x00,
|
||||
/// <summary>
|
||||
/// Suppress exceptions
|
||||
/// </summary>
|
||||
FROUND_NO_EXC = 0x08,
|
||||
|
||||
/// <summary>
|
||||
/// Round to the nearest integer without suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_NINT = FROUND_TO_NEAREST_INT | FROUND_RAISE_EXC,
|
||||
/// <summary>
|
||||
/// Round using Floor function without suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_FLOOR = FROUND_TO_NEG_INF | FROUND_RAISE_EXC,
|
||||
/// <summary>
|
||||
/// Round using Ceiling function without suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_CEIL = FROUND_TO_POS_INF | FROUND_RAISE_EXC,
|
||||
/// <summary>
|
||||
/// Round by truncating without suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_TRUNC = FROUND_TO_ZERO | FROUND_RAISE_EXC,
|
||||
/// <summary>
|
||||
/// Round using MXCSR.RC without suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_RINT = FROUND_CUR_DIRECTION | FROUND_RAISE_EXC,
|
||||
/// <summary>
|
||||
/// Round using MXCSR.RC and suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_NEARBYINT = FROUND_CUR_DIRECTION | FROUND_NO_EXC,
|
||||
|
||||
/// <summary>
|
||||
/// Round to nearest integer and suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_NINT_NOEXC = FROUND_TO_NEAREST_INT | FROUND_NO_EXC,
|
||||
/// <summary>
|
||||
/// Round using Floor function and suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_FLOOR_NOEXC = FROUND_TO_NEG_INF | FROUND_NO_EXC,
|
||||
/// <summary>
|
||||
/// Round using Ceiling function and suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_CEIL_NOEXC = FROUND_TO_POS_INF | FROUND_NO_EXC,
|
||||
/// <summary>
|
||||
/// Round by truncating and suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_TRUNC_NOEXC = FROUND_TO_ZERO | FROUND_NO_EXC,
|
||||
/// <summary>
|
||||
/// Round using MXCSR.RC and suppressing exceptions
|
||||
/// </summary>
|
||||
FROUND_RINT_NOEXC = FROUND_CUR_DIRECTION | FROUND_NO_EXC,
|
||||
}
|
||||
|
||||
internal struct RoundingScope : IDisposable
|
||||
{
|
||||
private MXCSRBits OldBits;
|
||||
|
||||
public RoundingScope(MXCSRBits roundingMode)
|
||||
{
|
||||
OldBits = MXCSR;
|
||||
MXCSR = (OldBits & ~MXCSRBits.RoundingControlMask) | roundingMode;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
MXCSR = OldBits;
|
||||
}
|
||||
}
|
||||
|
||||
#if !BURST_INTERNAL
|
||||
private static void BurstIntrinsicSetCSRFromManaged(int _) { }
|
||||
private static int BurstIntrinsicGetCSRFromManaged() { return 0; }
|
||||
|
||||
internal static int getcsr_raw() => DoGetCSRTrampoline();
|
||||
|
||||
internal static void setcsr_raw(int bits) => DoSetCSRTrampoline(bits);
|
||||
|
||||
[BurstCompile(CompileSynchronously = true)]
|
||||
private static void DoSetCSRTrampoline(int bits)
|
||||
{
|
||||
if (Sse.IsSseSupported)
|
||||
BurstIntrinsicSetCSRFromManaged(bits);
|
||||
}
|
||||
|
||||
[BurstCompile(CompileSynchronously = true)]
|
||||
private static int DoGetCSRTrampoline()
|
||||
{
|
||||
if (Sse.IsSseSupported)
|
||||
return BurstIntrinsicGetCSRFromManaged();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif BURST_INTERNAL
|
||||
// Internally inside burst for unit tests we can't recurse from tests into burst again,
|
||||
// so we pinvoke to a dummy wrapper DLL that exposes CSR manipulation
|
||||
[DllImport("burst-dllimport-native", EntryPoint = "x86_getcsr")]
|
||||
internal static extern int getcsr_raw();
|
||||
|
||||
[DllImport("burst-dllimport-native", EntryPoint = "x86_setcsr")]
|
||||
internal static extern void setcsr_raw(int bits);
|
||||
#endif
|
||||
/// <summary>
|
||||
/// Allows access to the CSR register
|
||||
/// </summary>
|
||||
public static MXCSRBits MXCSR
|
||||
{
|
||||
[BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
|
||||
get
|
||||
{
|
||||
return (MXCSRBits)getcsr_raw();
|
||||
}
|
||||
[BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
|
||||
set
|
||||
{
|
||||
setcsr_raw((int)value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: b88ec138634e3238a82a5b8f3d970ac1
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,306 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
/// <summary>
|
||||
/// F16C intrinsics
|
||||
/// </summary>
|
||||
public static class F16C
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates to true at compile time if F16C intrinsics are supported.
|
||||
///
|
||||
/// Burst ties F16C support to AVX2 support to simplify feature sets to support.
|
||||
/// </summary>
|
||||
public static bool IsF16CSupported { get { return Avx2.IsAvx2Supported; } }
|
||||
|
||||
/// <summary>
|
||||
/// Converts a half (hiding in a ushort) to a float (hiding in a uint).
|
||||
/// </summary>
|
||||
/// <param name="h">The half to convert</param>
|
||||
/// <returns>The float result</returns>
|
||||
[DebuggerStepThrough]
|
||||
private static uint HalfToFloat(ushort h)
|
||||
{
|
||||
var signed = (h & 0x8000u) != 0;
|
||||
var exponent = (h >> 10) & 0x1fu;
|
||||
var mantissa = h & 0x3ffu;
|
||||
|
||||
var result = signed ? 0x80000000u : 0u;
|
||||
|
||||
if (!(exponent == 0 && mantissa == 0))
|
||||
{
|
||||
// Denormal (converts to normalized)
|
||||
if (exponent == 0)
|
||||
{
|
||||
// Adjust mantissa so it's normalized (and keep track of exponent adjustment)
|
||||
exponent = -1;
|
||||
do
|
||||
{
|
||||
exponent++;
|
||||
mantissa <<= 1;
|
||||
} while ((mantissa & 0x400) == 0);
|
||||
|
||||
result |= (uint)((127 - 15 - exponent) << 23);
|
||||
|
||||
// Have to re-mask the mantissa here because we've been shifting bits up.
|
||||
result |= (mantissa & 0x3ff) << 13;
|
||||
}
|
||||
else
|
||||
{
|
||||
var isInfOrNan = exponent == 0x1f;
|
||||
result |= (uint)(isInfOrNan ? 255 : (127 - 15 + exponent) << 23);
|
||||
result |= mantissa << 13;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vcvtph2ps xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 cvtph_ps(v128 a)
|
||||
{
|
||||
return new v128(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vcvtph2ps ymm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_cvtph_ps(v128 a)
|
||||
{
|
||||
return new v256(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3), HalfToFloat(a.UShort4), HalfToFloat(a.UShort5), HalfToFloat(a.UShort6), HalfToFloat(a.UShort7));
|
||||
}
|
||||
|
||||
// Using ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
|
||||
private static readonly ushort[] BaseTable =
|
||||
{
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,
|
||||
0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00,
|
||||
0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,
|
||||
0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00,
|
||||
0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
};
|
||||
|
||||
private static readonly sbyte[] ShiftTable =
|
||||
{
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Converts a float (hiding in a uint) to a half (hiding in a ushort).
|
||||
/// </summary>
|
||||
/// <param name="f">The float to convert</param>
|
||||
/// <param name="rounding">Rounding mode</param>
|
||||
/// <returns>The half result</returns>
|
||||
[DebuggerStepThrough]
|
||||
private static ushort FloatToHalf(uint f, int rounding)
|
||||
{
|
||||
var exponentAndSign = f >> 23;
|
||||
var shift = ShiftTable[exponentAndSign];
|
||||
|
||||
var result = (uint)(BaseTable[exponentAndSign] + (ushort)((f & 0x7FFFFFu) >> shift));
|
||||
|
||||
// Check if the result is not Inf or NaN.
|
||||
var isFinite = (result & 0x7C00) != 0x7C00;
|
||||
var isNegative = (result & 0x8000) != 0;
|
||||
|
||||
if (rounding == (int)RoundingMode.FROUND_NINT_NOEXC)
|
||||
{
|
||||
var fWithRoundingBitPreserved = (f & 0x7FFFFFu) >> (shift - 1);
|
||||
|
||||
if ((exponentAndSign & 0xFF) == 102)
|
||||
{
|
||||
result++;
|
||||
}
|
||||
if (isFinite && ((fWithRoundingBitPreserved & 0x1u) != 0))
|
||||
{
|
||||
result++;
|
||||
}
|
||||
}
|
||||
else if (rounding == (int)RoundingMode.FROUND_TRUNC_NOEXC)
|
||||
{
|
||||
if (!isFinite)
|
||||
{
|
||||
result -= (uint)(~shift & 0x1);
|
||||
}
|
||||
}
|
||||
else if (rounding == (int)RoundingMode.FROUND_CEIL_NOEXC)
|
||||
{
|
||||
if (isFinite && !isNegative)
|
||||
{
|
||||
if ((exponentAndSign <= 102) && (exponentAndSign != 0))
|
||||
{
|
||||
result++;
|
||||
}
|
||||
else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)
|
||||
{
|
||||
result++;
|
||||
}
|
||||
}
|
||||
|
||||
var resultIsNegativeInf = (result == 0xFC00);
|
||||
var inputIsNotNegativeInfOrNan = (exponentAndSign != 0x1FF);
|
||||
|
||||
if (resultIsNegativeInf && inputIsNotNegativeInfOrNan)
|
||||
{
|
||||
result--;
|
||||
}
|
||||
}
|
||||
else if (rounding == (int)RoundingMode.FROUND_FLOOR_NOEXC)
|
||||
{
|
||||
if (isFinite && isNegative)
|
||||
{
|
||||
if ((exponentAndSign <= 358) && (exponentAndSign != 256))
|
||||
{
|
||||
result++;
|
||||
}
|
||||
else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)
|
||||
{
|
||||
result++;
|
||||
}
|
||||
}
|
||||
|
||||
var resultIsPositiveInf = (result == 0x7C00);
|
||||
var inputIsNotPositiveInfOrNan = (exponentAndSign != 0xFF);
|
||||
|
||||
if (resultIsPositiveInf && inputIsNotPositiveInfOrNan)
|
||||
{
|
||||
result--;
|
||||
}
|
||||
}
|
||||
|
||||
return (ushort)result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.
|
||||
///
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** cvtps2ph xmm, xmm, imm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="rounding">Rounding mode</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 cvtps_ph(v128 a, int rounding)
|
||||
{
|
||||
if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)
|
||||
{
|
||||
switch (MXCSR & MXCSRBits.RoundingControlMask)
|
||||
{
|
||||
case MXCSRBits.RoundToNearest:
|
||||
rounding = (int)RoundingMode.FROUND_NINT_NOEXC;
|
||||
break;
|
||||
case MXCSRBits.RoundDown:
|
||||
rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;
|
||||
break;
|
||||
case MXCSRBits.RoundUp:
|
||||
rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;
|
||||
break;
|
||||
case MXCSRBits.RoundTowardZero:
|
||||
rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.
|
||||
///
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** cvtps2ph xmm, ymm, imm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="rounding">Rounding mode</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 mm256_cvtps_ph(v256 a, int rounding)
|
||||
{
|
||||
if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)
|
||||
{
|
||||
switch (MXCSR & MXCSRBits.RoundingControlMask)
|
||||
{
|
||||
case MXCSRBits.RoundToNearest:
|
||||
rounding = (int)RoundingMode.FROUND_NINT_NOEXC;
|
||||
break;
|
||||
case MXCSRBits.RoundDown:
|
||||
rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;
|
||||
break;
|
||||
case MXCSRBits.RoundUp:
|
||||
rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;
|
||||
break;
|
||||
case MXCSRBits.RoundTowardZero:
|
||||
rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), FloatToHalf(a.UInt4, rounding), FloatToHalf(a.UInt5, rounding), FloatToHalf(a.UInt6, rounding), FloatToHalf(a.UInt7, rounding));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: ae12ed22401338869b648a8327f251da
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,624 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
/// <summary>
|
||||
/// FMA intrinsics
|
||||
/// </summary>
|
||||
public static class Fma
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates to true at compile time if FMA intrinsics are supported.
|
||||
///
|
||||
/// Burst ties FMA support to AVX2 support to simplify feature sets to support.
|
||||
/// </summary>
|
||||
public static bool IsFmaSupported { get { return Avx2.IsAvx2Supported; } }
|
||||
|
||||
[DebuggerStepThrough]
|
||||
private static float FmaHelper(float a, float b, float c)
|
||||
{
|
||||
return (float)((((double)a) * b) + c);
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Explicit)]
|
||||
private struct Union
|
||||
{
|
||||
[FieldOffset(0)]
|
||||
public float f;
|
||||
|
||||
[FieldOffset(0)]
|
||||
public uint u;
|
||||
}
|
||||
|
||||
[DebuggerStepThrough]
|
||||
private static float FnmaHelper(float a, float b, float c)
|
||||
{
|
||||
return FmaHelper(-a, b, c);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmadd213pd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmadd_pd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmadd213pd ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fmadd_pd(v256 a, v256 b, v256 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmadd213ps xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmadd_ps(v128 a, v128 b, v128 c)
|
||||
{
|
||||
return new v128(FmaHelper(a.Float0, b.Float0, c.Float0),
|
||||
FmaHelper(a.Float1, b.Float1, c.Float1),
|
||||
FmaHelper(a.Float2, b.Float2, c.Float2),
|
||||
FmaHelper(a.Float3, b.Float3, c.Float3));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmadd213ps ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fmadd_ps(v256 a, v256 b, v256 c)
|
||||
{
|
||||
return new v256(FmaHelper(a.Float0, b.Float0, c.Float0),
|
||||
FmaHelper(a.Float1, b.Float1, c.Float1),
|
||||
FmaHelper(a.Float2, b.Float2, c.Float2),
|
||||
FmaHelper(a.Float3, b.Float3, c.Float3),
|
||||
FmaHelper(a.Float4, b.Float4, c.Float4),
|
||||
FmaHelper(a.Float5, b.Float5, c.Float5),
|
||||
FmaHelper(a.Float6, b.Float6, c.Float6),
|
||||
FmaHelper(a.Float7, b.Float7, c.Float7));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmadd213sd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmadd_sd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmadd213ss xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmadd_ss(v128 a, v128 b, v128 c)
|
||||
{
|
||||
var result = a;
|
||||
result.Float0 = FmaHelper(a.Float0, b.Float0, c.Float0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmaddsub213pd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmaddsub_pd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmaddsub213pd ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fmaddsub_pd(v256 a, v256 b, v256 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmaddsub213ps xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmaddsub_ps(v128 a, v128 b, v128 c)
|
||||
{
|
||||
return new v128(FmaHelper(a.Float0, b.Float0, -c.Float0),
|
||||
FmaHelper(a.Float1, b.Float1, c.Float1),
|
||||
FmaHelper(a.Float2, b.Float2, -c.Float2),
|
||||
FmaHelper(a.Float3, b.Float3, c.Float3));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmaddsub213ps ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fmaddsub_ps(v256 a, v256 b, v256 c)
|
||||
{
|
||||
return new v256(FmaHelper(a.Float0, b.Float0, -c.Float0),
|
||||
FmaHelper(a.Float1, b.Float1, c.Float1),
|
||||
FmaHelper(a.Float2, b.Float2, -c.Float2),
|
||||
FmaHelper(a.Float3, b.Float3, c.Float3),
|
||||
FmaHelper(a.Float4, b.Float4, -c.Float4),
|
||||
FmaHelper(a.Float5, b.Float5, c.Float5),
|
||||
FmaHelper(a.Float6, b.Float6, -c.Float6),
|
||||
FmaHelper(a.Float7, b.Float7, c.Float7));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsub213pd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmsub_pd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsub213pd ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fmsub_pd(v256 a, v256 b, v256 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsub213ps xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmsub_ps(v128 a, v128 b, v128 c)
|
||||
{
|
||||
return new v128(FmaHelper(a.Float0, b.Float0, -c.Float0),
|
||||
FmaHelper(a.Float1, b.Float1, -c.Float1),
|
||||
FmaHelper(a.Float2, b.Float2, -c.Float2),
|
||||
FmaHelper(a.Float3, b.Float3, -c.Float3));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsub213ps ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fmsub_ps(v256 a, v256 b, v256 c)
|
||||
{
|
||||
return new v256(FmaHelper(a.Float0, b.Float0, -c.Float0),
|
||||
FmaHelper(a.Float1, b.Float1, -c.Float1),
|
||||
FmaHelper(a.Float2, b.Float2, -c.Float2),
|
||||
FmaHelper(a.Float3, b.Float3, -c.Float3),
|
||||
FmaHelper(a.Float4, b.Float4, -c.Float4),
|
||||
FmaHelper(a.Float5, b.Float5, -c.Float5),
|
||||
FmaHelper(a.Float6, b.Float6, -c.Float6),
|
||||
FmaHelper(a.Float7, b.Float7, -c.Float7));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply the lower double-precision(64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result.Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsub213sd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmsub_sd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsub213ss xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmsub_ss(v128 a, v128 b, v128 c)
|
||||
{
|
||||
var result = a;
|
||||
result.Float0 = FmaHelper(a.Float0, b.Float0, -c.Float0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsubadd213pd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmsubadd_pd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsubadd213pd ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fmsubadd_pd(v256 a, v256 b, v256 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsubadd213ps xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fmsubadd_ps(v128 a, v128 b, v128 c)
|
||||
{
|
||||
return new v128(FmaHelper(a.Float0, b.Float0, c.Float0),
|
||||
FmaHelper(a.Float1, b.Float1, -c.Float1),
|
||||
FmaHelper(a.Float2, b.Float2, c.Float2),
|
||||
FmaHelper(a.Float3, b.Float3, -c.Float3));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfmsubadd213ps ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fmsubadd_ps(v256 a, v256 b, v256 c)
|
||||
{
|
||||
return new v256(FmaHelper(a.Float0, b.Float0, c.Float0),
|
||||
FmaHelper(a.Float1, b.Float1, -c.Float1),
|
||||
FmaHelper(a.Float2, b.Float2, c.Float2),
|
||||
FmaHelper(a.Float3, b.Float3, -c.Float3),
|
||||
FmaHelper(a.Float4, b.Float4, c.Float4),
|
||||
FmaHelper(a.Float5, b.Float5, -c.Float5),
|
||||
FmaHelper(a.Float6, b.Float6, c.Float6),
|
||||
FmaHelper(a.Float7, b.Float7, -c.Float7));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmadd213pd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fnmadd_pd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmadd213pd ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fnmadd_pd(v256 a, v256 b, v256 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmadd213ps xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fnmadd_ps(v128 a, v128 b, v128 c)
|
||||
{
|
||||
return new v128(FnmaHelper(a.Float0, b.Float0, c.Float0),
|
||||
FnmaHelper(a.Float1, b.Float1, c.Float1),
|
||||
FnmaHelper(a.Float2, b.Float2, c.Float2),
|
||||
FnmaHelper(a.Float3, b.Float3, c.Float3));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmadd213ps ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fnmadd_ps(v256 a, v256 b, v256 c)
|
||||
{
|
||||
return new v256(FnmaHelper(a.Float0, b.Float0, c.Float0),
|
||||
FnmaHelper(a.Float1, b.Float1, c.Float1),
|
||||
FnmaHelper(a.Float2, b.Float2, c.Float2),
|
||||
FnmaHelper(a.Float3, b.Float3, c.Float3),
|
||||
FnmaHelper(a.Float4, b.Float4, c.Float4),
|
||||
FnmaHelper(a.Float5, b.Float5, c.Float5),
|
||||
FnmaHelper(a.Float6, b.Float6, c.Float6),
|
||||
FnmaHelper(a.Float7, b.Float7, c.Float7));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmadd213sd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fnmadd_sd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmadd213ss xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fnmadd_ss(v128 a, v128 b, v128 c)
|
||||
{
|
||||
var result = a;
|
||||
result.Float0 = FnmaHelper(a.Float0, b.Float0, c.Float0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmsub213pd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fnmsub_pd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmsub213pd ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fnmsub_pd(v256 a, v256 b, v256 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmsub213ps xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fnmsub_ps(v128 a, v128 b, v128 c)
|
||||
{
|
||||
return new v128(FnmaHelper(a.Float0, b.Float0, -c.Float0),
|
||||
FnmaHelper(a.Float1, b.Float1, -c.Float1),
|
||||
FnmaHelper(a.Float2, b.Float2, -c.Float2),
|
||||
FnmaHelper(a.Float3, b.Float3, -c.Float3));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmsub213ps ymm, ymm, ymm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v256 mm256_fnmsub_ps(v256 a, v256 b, v256 c)
|
||||
{
|
||||
return new v256(FnmaHelper(a.Float0, b.Float0, -c.Float0),
|
||||
FnmaHelper(a.Float1, b.Float1, -c.Float1),
|
||||
FnmaHelper(a.Float2, b.Float2, -c.Float2),
|
||||
FnmaHelper(a.Float3, b.Float3, -c.Float3),
|
||||
FnmaHelper(a.Float4, b.Float4, -c.Float4),
|
||||
FnmaHelper(a.Float5, b.Float5, -c.Float5),
|
||||
FnmaHelper(a.Float6, b.Float6, -c.Float6),
|
||||
FnmaHelper(a.Float7, b.Float7, -c.Float7));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply the lower double-precision(64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result.Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmsub213sd xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fnmsub_sd(v128 a, v128 b, v128 c)
|
||||
{
|
||||
throw new Exception("Double-precision FMA not emulated in C#");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** vfnmsub213ss xmm, xmm, xmm
|
||||
/// </remarks>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="c">Vector c</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 fnmsub_ss(v128 a, v128 b, v128 c)
|
||||
{
|
||||
var result = a;
|
||||
result.Float0 = FnmaHelper(a.Float0, b.Float0, -c.Float0);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 4d7325591616354d86b1492e282843f4
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,62 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
/// <summary>
|
||||
/// popcnt intrinsics
|
||||
/// </summary>
|
||||
public static class Popcnt
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates to true at compile time if popcnt intrinsics are supported.
|
||||
///
|
||||
/// Burst ties popcnt support to SSE4.2 support to simplify feature sets to support.
|
||||
/// </summary>
|
||||
public static bool IsPopcntSupported { get { return Sse4_2.IsSse42Supported; } }
|
||||
|
||||
/// <summary>
|
||||
/// Count the number of bits set to 1 in unsigned 32-bit integer a, and return that count in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** popcnt r32, r32
|
||||
/// </remarks>
|
||||
/// <param name="v">Integer to be counted in</param>
|
||||
/// <returns>Count</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int popcnt_u32(uint v)
|
||||
{
|
||||
int result = 0;
|
||||
uint mask = 0x80000000u;
|
||||
while (mask != 0)
|
||||
{
|
||||
result += ((v & mask) != 0) ? 1 : 0;
|
||||
mask >>= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Count the number of bits set to 1 in unsigned 64-bit integer a, and return that count in dst.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// **** popcnt r64, r64
|
||||
/// </remarks>
|
||||
/// <param name="v">Integer to be counted in</param>
|
||||
/// <returns>Count</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int popcnt_u64(ulong v)
|
||||
{
|
||||
int result = 0;
|
||||
ulong mask = 0x8000000000000000u;
|
||||
while (mask != 0)
|
||||
{
|
||||
result += ((v & mask) != 0) ? 1 : 0;
|
||||
mask >>= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: e4725d04fd6336efbc80f25ae908c344
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 9edae0ecbfb63f239983f9a81f80ddf9
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: f0de54c00de3304699fdf0bedf123944
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,155 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
/// <summary>
|
||||
/// SSE3 intrinsics
|
||||
/// </summary>
|
||||
public static class Sse3
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates to true at compile time if SSE3 intrinsics are supported.
|
||||
/// </summary>
|
||||
public static bool IsSse3Supported { get { return false; } }
|
||||
|
||||
// _mm_addsub_ps
|
||||
/// <summary> Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 addsub_ps(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
dst.Float0 = a.Float0 - b.Float0;
|
||||
dst.Float1 = a.Float1 + b.Float1;
|
||||
dst.Float2 = a.Float2 - b.Float2;
|
||||
dst.Float3 = a.Float3 + b.Float3;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_addsub_pd
|
||||
/// <summary> Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 addsub_pd(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
dst.Double0 = a.Double0 - b.Double0;
|
||||
dst.Double1 = a.Double1 + b.Double1;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hadd_pd
|
||||
/// <summary> Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hadd_pd(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
dst.Double0 = a.Double0 + a.Double1;
|
||||
dst.Double1 = b.Double0 + b.Double1;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hadd_ps
|
||||
/// <summary> Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hadd_ps(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
dst.Float0 = a.Float0 + a.Float1;
|
||||
dst.Float1 = a.Float2 + a.Float3;
|
||||
dst.Float2 = b.Float0 + b.Float1;
|
||||
dst.Float3 = b.Float2 + b.Float3;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hsub_pd
|
||||
/// <summary> Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hsub_pd(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
dst.Double0 = a.Double0 - a.Double1;
|
||||
dst.Double1 = b.Double0 - b.Double1;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hsub_ps
|
||||
/// <summary> Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hsub_ps(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
dst.Float0 = a.Float0 - a.Float1;
|
||||
dst.Float1 = a.Float2 - a.Float3;
|
||||
dst.Float2 = b.Float0 - b.Float1;
|
||||
dst.Float3 = b.Float2 - b.Float3;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_movedup_pd
|
||||
/// <summary> Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 movedup_pd(v128 a)
|
||||
{
|
||||
// Burst IR is fine
|
||||
v128 dst = default(v128);
|
||||
dst.Double0 = a.Double0;
|
||||
dst.Double1 = a.Double0;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_movehdup_ps
|
||||
/// <summary> Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 movehdup_ps(v128 a)
|
||||
{
|
||||
// Burst IR is fine
|
||||
v128 dst = default(v128);
|
||||
dst.Float0 = a.Float1;
|
||||
dst.Float1 = a.Float1;
|
||||
dst.Float2 = a.Float3;
|
||||
dst.Float3 = a.Float3;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_moveldup_ps
|
||||
/// <summary> Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 moveldup_ps(v128 a)
|
||||
{
|
||||
// Burst IR is fine
|
||||
v128 dst = default(v128);
|
||||
dst.Float0 = a.Float0;
|
||||
dst.Float1 = a.Float0;
|
||||
dst.Float2 = a.Float2;
|
||||
dst.Float3 = a.Float2;
|
||||
return dst;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 084c864f475138fba5e71aa0c9653558
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 79fa55e43ac038089dbaa9227eea27ae
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,822 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
/// <summary>
|
||||
/// SSE 4.2 intrinsics
|
||||
/// </summary>
|
||||
public static class Sse4_2
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates to true at compile time if SSE 4.2 intrinsics are supported.
|
||||
/// </summary>
|
||||
public static bool IsSse42Supported { get { return false; } }
|
||||
|
||||
/// <summary>
|
||||
/// Constants for string comparison intrinsics
|
||||
/// </summary>
|
||||
[Flags]
|
||||
public enum SIDD
|
||||
{
|
||||
/// <summary>
|
||||
/// Compare 8-bit unsigned characters
|
||||
/// </summary>
|
||||
UBYTE_OPS = 0x00,
|
||||
/// <summary>
|
||||
/// Compare 16-bit unsigned characters
|
||||
/// </summary>
|
||||
UWORD_OPS = 0x01,
|
||||
/// <summary>
|
||||
/// Compare 8-bit signed characters
|
||||
/// </summary>
|
||||
SBYTE_OPS = 0x02,
|
||||
/// <summary>
|
||||
/// Compare 16-bit signed characters
|
||||
/// </summary>
|
||||
SWORD_OPS = 0x03,
|
||||
|
||||
/// <summary>
|
||||
/// Compare any equal
|
||||
/// </summary>
|
||||
CMP_EQUAL_ANY = 0x00,
|
||||
/// <summary>
|
||||
/// Compare ranges
|
||||
/// </summary>
|
||||
CMP_RANGES = 0x04,
|
||||
/// <summary>
|
||||
/// Compare equal each
|
||||
/// </summary>
|
||||
CMP_EQUAL_EACH = 0x08,
|
||||
/// <summary>
|
||||
/// Compare equal ordered
|
||||
/// </summary>
|
||||
CMP_EQUAL_ORDERED = 0x0C,
|
||||
|
||||
/// <summary>
|
||||
/// Normal result polarity
|
||||
/// </summary>
|
||||
POSITIVE_POLARITY = 0x00,
|
||||
/// <summary>
|
||||
/// Negate results
|
||||
/// </summary>
|
||||
NEGATIVE_POLARITY = 0x10,
|
||||
/// <summary>
|
||||
/// Normal results only before end of string
|
||||
/// </summary>
|
||||
MASKED_POSITIVE_POLARITY = 0x20,
|
||||
/// <summary>
|
||||
/// Negate results only before end of string
|
||||
/// </summary>
|
||||
MASKED_NEGATIVE_POLARITY = 0x30,
|
||||
|
||||
/// <summary>
|
||||
/// Index only: return least significant bit
|
||||
/// </summary>
|
||||
LEAST_SIGNIFICANT = 0x00,
|
||||
/// <summary>
|
||||
/// Index only: return most significan bit
|
||||
/// </summary>
|
||||
MOST_SIGNIFICANT = 0x40,
|
||||
|
||||
/// <summary>
|
||||
/// mask only: return bit mask
|
||||
/// </summary>
|
||||
BIT_MASK = 0x00,
|
||||
/// <summary>
|
||||
/// mask only: return byte/word mask
|
||||
/// </summary>
|
||||
UNIT_MASK = 0x40,
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Intrinsics for text/string processing.
|
||||
*/
|
||||
|
||||
private unsafe struct StrBoolArray
|
||||
{
|
||||
public fixed ushort Bits[16];
|
||||
|
||||
public void SetBit(int aindex, int bindex, bool val)
|
||||
{
|
||||
fixed (ushort* b = Bits)
|
||||
{
|
||||
if (val)
|
||||
b[aindex] |= (ushort)(1 << bindex);
|
||||
else
|
||||
b[aindex] &= (ushort)(~(1 << bindex));
|
||||
}
|
||||
}
|
||||
|
||||
public bool GetBit(int aindex, int bindex)
|
||||
{
|
||||
fixed (ushort* b = Bits)
|
||||
{
|
||||
return (b[aindex] & (1 << bindex)) != 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static v128 cmpistrm_emulation<T>(T* a, T* b, int len, int imm8, int allOnes, T allOnesT) where T : unmanaged, IComparable<T>, IEquatable<T>
|
||||
{
|
||||
int intRes2 = ComputeStrCmpIntRes2<T>(a, ComputeStringLength<T>(a, len), b, ComputeStringLength<T>(b, len), len, imm8, allOnes);
|
||||
|
||||
return ComputeStrmOutput(len, imm8, allOnesT, intRes2);
|
||||
}
|
||||
|
||||
private static v128 cmpestrm_emulation<T>(T* a, int alen, T* b, int blen, int len, int imm8, int allOnes, T allOnesT) where T : unmanaged, IComparable<T>, IEquatable<T>
|
||||
{
|
||||
int intRes2 = ComputeStrCmpIntRes2<T>(a, alen, b, blen, len, imm8, allOnes);
|
||||
|
||||
return ComputeStrmOutput(len, imm8, allOnesT, intRes2);
|
||||
}
|
||||
|
||||
private static v128 ComputeStrmOutput<T>(int len, int imm8, T allOnesT, int intRes2) where T : unmanaged, IComparable<T>, IEquatable<T>
|
||||
{
|
||||
// output
|
||||
v128 result = default;
|
||||
if ((imm8 & (1 << 6)) != 0)
|
||||
{
|
||||
// byte / word mask
|
||||
T* maskDst = (T*)&result.Byte0;
|
||||
for (int i = 0; i < len; ++i)
|
||||
{
|
||||
if ((intRes2 & (1 << i)) != 0)
|
||||
{
|
||||
maskDst[i] = allOnesT;
|
||||
}
|
||||
else
|
||||
{
|
||||
maskDst[i] = default(T);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// bit mask
|
||||
result.SInt0 = intRes2;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static int cmpistri_emulation<T>(T* a, T* b, int len, int imm8, int allOnes, T allOnesT) where T : unmanaged, IComparable<T>, IEquatable<T>
|
||||
{
|
||||
int intRes2 = ComputeStrCmpIntRes2<T>(a, ComputeStringLength<T>(a, len), b, ComputeStringLength<T>(b, len), len, imm8, allOnes);
|
||||
|
||||
return ComputeStriOutput(len, imm8, intRes2);
|
||||
}
|
||||
|
||||
private static int cmpestri_emulation<T>(T* a, int alen, T* b, int blen, int len, int imm8, int allOnes, T allOnesT) where T : unmanaged, IComparable<T>, IEquatable<T>
|
||||
{
|
||||
int intRes2 = ComputeStrCmpIntRes2<T>(a, alen, b, blen, len, imm8, allOnes);
|
||||
|
||||
return ComputeStriOutput(len, imm8, intRes2);
|
||||
}
|
||||
|
||||
private static int ComputeStriOutput(int len, int imm8, int intRes2)
|
||||
{
|
||||
// output
|
||||
if ((imm8 & (1 << 6)) == 0)
|
||||
{
|
||||
int bit = 0;
|
||||
while (bit < len)
|
||||
{
|
||||
if ((intRes2 & (1 << bit)) != 0)
|
||||
return bit;
|
||||
++bit;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int bit = len - 1;
|
||||
while (bit >= 0)
|
||||
{
|
||||
if ((intRes2 & (1 << bit)) != 0)
|
||||
return bit;
|
||||
--bit;
|
||||
}
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
private static int ComputeStringLength<T>(T* ptr, int max) where T : unmanaged, IEquatable<T>
|
||||
{
|
||||
for (int i = 0; i < max; ++i)
|
||||
{
|
||||
if (EqualityComparer<T>.Default.Equals(ptr[i], default(T)))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
private static int ComputeStrCmpIntRes2<T>(T* a, int alen, T* b, int blen, int len, int imm8, int allOnes) where T : unmanaged, IComparable<T>, IEquatable<T>
|
||||
{
|
||||
#if !NET_DOTS
|
||||
bool aInvalid = false;
|
||||
bool bInvalid = false;
|
||||
StrBoolArray boolRes = default;
|
||||
int i, j, intRes2;
|
||||
|
||||
for (i = 0; i < len; ++i)
|
||||
{
|
||||
T aCh = a[i];
|
||||
|
||||
if (i == alen)
|
||||
aInvalid = true;
|
||||
|
||||
bInvalid = false;
|
||||
for (j = 0; j < len; ++j)
|
||||
{
|
||||
T bCh = b[j];
|
||||
if (j == blen)
|
||||
bInvalid = true;
|
||||
|
||||
bool match;
|
||||
|
||||
// override comparisons for invalid characters
|
||||
switch ((imm8 >> 2) & 3)
|
||||
{
|
||||
case 0: // equal any
|
||||
match = EqualityComparer<T>.Default.Equals(aCh, bCh);
|
||||
if (!aInvalid && bInvalid)
|
||||
match = false;
|
||||
else if (aInvalid && !bInvalid)
|
||||
match = false;
|
||||
else if (aInvalid && bInvalid)
|
||||
match = false;
|
||||
break;
|
||||
|
||||
case 1: // ranges
|
||||
if (0 == (i & 1))
|
||||
match = Comparer<T>.Default.Compare(bCh, aCh) >= 0;
|
||||
else
|
||||
match = Comparer<T>.Default.Compare(bCh, aCh) <= 0;
|
||||
|
||||
if (!aInvalid && bInvalid)
|
||||
match = false;
|
||||
else if (aInvalid && !bInvalid)
|
||||
match = false;
|
||||
else if (aInvalid && bInvalid)
|
||||
match = false;
|
||||
break;
|
||||
case 2: // equal each
|
||||
match = EqualityComparer<T>.Default.Equals(aCh, bCh);
|
||||
if (!aInvalid && bInvalid)
|
||||
match = false;
|
||||
else if (aInvalid && !bInvalid)
|
||||
match = false;
|
||||
else if (aInvalid && bInvalid)
|
||||
match = true;
|
||||
break;
|
||||
default: // equal ordered
|
||||
match = EqualityComparer<T>.Default.Equals(aCh, bCh);
|
||||
if (!aInvalid && bInvalid)
|
||||
match = false;
|
||||
else if (aInvalid && !bInvalid)
|
||||
match = true;
|
||||
else if (aInvalid && bInvalid)
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
|
||||
boolRes.SetBit(i, j, match);
|
||||
}
|
||||
}
|
||||
|
||||
int intRes1 = 0;
|
||||
|
||||
// aggregate results
|
||||
switch ((imm8 >> 2) & 3)
|
||||
{
|
||||
case 0: // equal any
|
||||
for (i = 0; i < len; ++i)
|
||||
{
|
||||
for (j = 0; j < len; ++j)
|
||||
{
|
||||
intRes1 |= (boolRes.GetBit(j, i) ? 1 : 0) << i;
|
||||
}
|
||||
}
|
||||
/*
|
||||
for (i = 0; i < len; ++i)
|
||||
{
|
||||
intRes1 |= boolRes.Bits[i];
|
||||
}*/
|
||||
break;
|
||||
case 1: // ranges
|
||||
for (i = 0; i < len; ++i)
|
||||
{
|
||||
for (j = 0; j < len; j += 2)
|
||||
{
|
||||
intRes1 |= ((boolRes.GetBit(j, i) && boolRes.GetBit(j + 1, i)) ? 1 : 0) << i;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2: // equal each
|
||||
for (i = 0; i < len; ++i)
|
||||
{
|
||||
intRes1 |= (boolRes.GetBit(i, i) ? 1 : 0) << i;
|
||||
}
|
||||
break;
|
||||
case 3: // equal ordered
|
||||
intRes1 = allOnes;
|
||||
for (i = 0; i < len; ++i)
|
||||
{
|
||||
int k = i;
|
||||
for (j = 0; j < len - i; ++j)
|
||||
{
|
||||
if (!boolRes.GetBit(j, k))
|
||||
intRes1 &= ~(1 << i);
|
||||
k += 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
intRes2 = 0;
|
||||
|
||||
// optionally negate results
|
||||
bInvalid = false;
|
||||
for (i = 0; i < len; ++i)
|
||||
{
|
||||
if ((imm8 & (1 << 4)) != 0)
|
||||
{
|
||||
if ((imm8 & (1 << 5)) != 0) // only negate valid
|
||||
{
|
||||
if (EqualityComparer<T>.Default.Equals(b[i], default(T)))
|
||||
{
|
||||
bInvalid = true;
|
||||
}
|
||||
|
||||
if (bInvalid) // invalid, don't negate
|
||||
intRes2 |= intRes1 & (1 << i);
|
||||
else // valid, negate
|
||||
intRes2 |= (~intRes1) & (1 << i);
|
||||
}
|
||||
else // negate all
|
||||
intRes2 |= (~intRes1) & (1 << i);
|
||||
}
|
||||
else // don't negate
|
||||
intRes2 |= intRes1 & (1 << i);
|
||||
}
|
||||
|
||||
return intRes2;
|
||||
#else
|
||||
throw new NotImplementedException("dots runtime C# lacks comparer");
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and store the generated mask in dst.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 cmpistrm(v128 a, v128 b, int imm8)
|
||||
{
|
||||
v128 c;
|
||||
|
||||
if (0 == (imm8 & 1))
|
||||
if (0 == (imm8 & 2))
|
||||
c = cmpistrm_emulation(&a.Byte0, &b.Byte0, 16, imm8, 0xffff, (byte)0xff);
|
||||
else
|
||||
c = cmpistrm_emulation(&a.SByte0, &b.SByte0, 16, imm8, 0xffff, (sbyte)-1);
|
||||
else
|
||||
if (0 == (imm8 & 2))
|
||||
c = cmpistrm_emulation(&a.UShort0, &b.UShort0, 8, imm8, 0xff, (ushort)0xffff);
|
||||
else
|
||||
c = cmpistrm_emulation(&a.SShort0, &b.SShort0, 8, imm8, 0xff, (short)-1);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and store the generated index in dst.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Index</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpistri(v128 a, v128 b, int imm8)
|
||||
{
|
||||
if (0 == (imm8 & 1))
|
||||
if (0 == (imm8 & 2))
|
||||
return cmpistri_emulation(&a.Byte0, &b.Byte0, 16, imm8, 0xffff, (byte)0xff);
|
||||
else
|
||||
return cmpistri_emulation(&a.SByte0, &b.SByte0, 16, imm8, 0xffff, (sbyte)-1);
|
||||
else
|
||||
if (0 == (imm8 & 2))
|
||||
return cmpistri_emulation(&a.UShort0, &b.UShort0, 8, imm8, 0xff, (ushort)0xffff);
|
||||
else
|
||||
return cmpistri_emulation(&a.SShort0, &b.SShort0, 8, imm8, 0xff, (short)-1);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and store the generated mask in dst.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="la">Length a</param>
|
||||
/// <param name="lb">Length b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 cmpestrm(v128 a, int la, v128 b, int lb, int imm8)
|
||||
{
|
||||
v128 c;
|
||||
|
||||
if (0 == (imm8 & 1))
|
||||
if (0 == (imm8 & 2))
|
||||
c = cmpestrm_emulation(&a.Byte0, la, &b.Byte0, lb, 16, imm8, 0xffff, (byte)0xff);
|
||||
else
|
||||
c = cmpestrm_emulation(&a.SByte0, la, &b.SByte0, lb, 16, imm8, 0xffff, (sbyte)-1);
|
||||
else
|
||||
if (0 == (imm8 & 2))
|
||||
c = cmpestrm_emulation(&a.UShort0, la, &b.UShort0, lb, 8, imm8, 0xff, (ushort)0xffff);
|
||||
else
|
||||
c = cmpestrm_emulation(&a.SShort0, la, &b.SShort0, lb, 8, imm8, 0xff, (short)-1);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and store the generated index in dst.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="la">Length a</param>
|
||||
/// <param name="lb">Length b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Index</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpestri(v128 a, int la, v128 b, int lb, int imm8)
|
||||
{
|
||||
if (0 == (imm8 & 1))
|
||||
if (0 == (imm8 & 2))
|
||||
return cmpestri_emulation(&a.Byte0, la, &b.Byte0, lb, 16, imm8, 0xffff, (byte)0xff);
|
||||
else
|
||||
return cmpestri_emulation(&a.SByte0, la, &b.SByte0, lb, 16, imm8, 0xffff, (sbyte)-1);
|
||||
else
|
||||
if (0 == (imm8 & 2))
|
||||
return cmpestri_emulation(&a.UShort0, la, &b.UShort0, lb, 8, imm8, 0xff, (ushort)0xffff);
|
||||
else
|
||||
return cmpestri_emulation(&a.SShort0, la, &b.SShort0, lb, 8, imm8, 0xff, (short)-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Intrinsics for text/string processing and reading values of EFlags.
|
||||
*/
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns 1 if any character in b was null, and 0 otherwise.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Boolean value</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpistrz(v128 a, v128 b, int imm8)
|
||||
{
|
||||
if (0 == (imm8 & 1))
|
||||
return ComputeStringLength<byte>(&b.Byte0, 16) < 16 ? 1 : 0;
|
||||
else
|
||||
return ComputeStringLength<ushort>(&b.UShort0, 8) < 8 ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Boolean value</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpistrc(v128 a, v128 b, int imm8)
|
||||
{
|
||||
v128 q = cmpistrm(a, b, imm8);
|
||||
return q.SInt0 == 0 && q.SInt1 == 0 && q.SInt2 == 0 && q.SInt3 == 0 ? 0 : 1;
|
||||
}
|
||||
/// <summary>
|
||||
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns 1 if any character in a was null, and 0 otherwise.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Boolean value</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpistrs(v128 a, v128 b, int imm8)
|
||||
{
|
||||
if (0 == (imm8 & 1))
|
||||
return ComputeStringLength<byte>(&a.Byte0, 16) < 16 ? 1 : 0;
|
||||
else
|
||||
return ComputeStringLength<ushort>(&a.UShort0, 8) < 8 ? 1 : 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns bit 0 of the resulting bit mask.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Bit 0</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpistro(v128 a, v128 b, int imm8)
|
||||
{
|
||||
int intRes2;
|
||||
|
||||
if (0 == (imm8 & 1))
|
||||
{
|
||||
int al = ComputeStringLength<byte>(&a.Byte0, 16);
|
||||
int bl = ComputeStringLength<byte>(&b.Byte0, 16);
|
||||
|
||||
if (0 == (imm8 & 2))
|
||||
intRes2 = ComputeStrCmpIntRes2<byte>(&a.Byte0, al, &b.Byte0, bl, 16, imm8, 0xffff);
|
||||
else
|
||||
intRes2 = ComputeStrCmpIntRes2<sbyte>(&a.SByte0, al, &b.SByte0, bl, 16, imm8, 0xffff);
|
||||
}
|
||||
else
|
||||
{
|
||||
int al = ComputeStringLength<ushort>(&a.UShort0, 8);
|
||||
int bl = ComputeStringLength<ushort>(&b.UShort0, 8);
|
||||
|
||||
if (0 == (imm8 & 2))
|
||||
intRes2 = ComputeStrCmpIntRes2<ushort>(&a.UShort0, al, &b.UShort0, bl, 8, imm8, 0xff);
|
||||
else
|
||||
intRes2 = ComputeStrCmpIntRes2<short>(&a.SShort0, al, &b.SShort0, bl, 8, imm8, 0xff);
|
||||
}
|
||||
|
||||
return intRes2 & 1;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings with implicit lengths in a and b using the control in imm8, and returns 1 if b did not contain a null character and the resulting mask was zero, and 0 otherwise.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Boolean value</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpistra(v128 a, v128 b, int imm8)
|
||||
{
|
||||
return ((~cmpistrc(a, b, imm8)) & (~cmpistrz(a, b, imm8))) & 1;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns 1 if any character in b was null, and 0 otherwise.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="la">Length a</param>
|
||||
/// <param name="lb">Length b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Boolean value</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpestrz(v128 a, int la, v128 b, int lb, int imm8)
|
||||
{
|
||||
int size = (imm8 & 1) == 1 ? 16 : 8;
|
||||
int upperBound = (128 / size) - 1;
|
||||
return lb <= upperBound ? 1 : 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="la">Length a</param>
|
||||
/// <param name="lb">Length b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Boolean value</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpestrc(v128 a, int la, v128 b, int lb, int imm8)
|
||||
{
|
||||
int intRes2;
|
||||
|
||||
if (0 == (imm8 & 1))
|
||||
{
|
||||
if (0 == (imm8 & 2))
|
||||
intRes2 = ComputeStrCmpIntRes2<byte>(&a.Byte0, la, &b.Byte0, lb, 16, imm8, 0xffff);
|
||||
else
|
||||
intRes2 = ComputeStrCmpIntRes2<sbyte>(&a.SByte0, la, &b.SByte0, lb, 16, imm8, 0xffff);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (0 == (imm8 & 2))
|
||||
intRes2 = ComputeStrCmpIntRes2<ushort>(&a.UShort0, la, &b.UShort0, lb, 8, imm8, 0xff);
|
||||
else
|
||||
intRes2 = ComputeStrCmpIntRes2<short>(&a.SShort0, la, &b.SShort0, lb, 8, imm8, 0xff);
|
||||
}
|
||||
|
||||
return intRes2 != 0 ? 1 : 0;
|
||||
}
|
||||
/// <summary>
|
||||
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns 1 if any character in a was null, and 0 otherwise.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="la">Length a</param>
|
||||
/// <param name="lb">Length b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Boolean value</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpestrs(v128 a, int la, v128 b, int lb, int imm8)
|
||||
{
|
||||
int size = (imm8 & 1) == 1 ? 16 : 8;
|
||||
int upperBound = (128 / size) - 1;
|
||||
return la <= upperBound ? 1 : 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns bit 0 of the resulting bit mask.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="la">Length a</param>
|
||||
/// <param name="lb">Length b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Bit 0</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpestro(v128 a, int la, v128 b, int lb, int imm8)
|
||||
{
|
||||
int intRes2;
|
||||
|
||||
if (0 == (imm8 & 1))
|
||||
{
|
||||
if (0 == (imm8 & 2))
|
||||
intRes2 = ComputeStrCmpIntRes2<byte>(&a.Byte0, la, &b.Byte0, lb, 16, imm8, 0xffff);
|
||||
else
|
||||
intRes2 = ComputeStrCmpIntRes2<sbyte>(&a.SByte0, la, &b.SByte0, lb, 16, imm8, 0xffff);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (0 == (imm8 & 2))
|
||||
intRes2 = ComputeStrCmpIntRes2<ushort>(&a.UShort0, la, &b.UShort0, lb, 8, imm8, 0xff);
|
||||
else
|
||||
intRes2 = ComputeStrCmpIntRes2<short>(&a.SShort0, la, &b.SShort0, lb, 8, imm8, 0xff);
|
||||
}
|
||||
|
||||
return intRes2 & 1;
|
||||
}
|
||||
/// <summary>
|
||||
/// Compare packed strings in a and b with lengths la and lb using the control in imm8, and returns 1 if b did not contain a null character and the resulting mask was zero, and 0 otherwise.
|
||||
/// </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="la">Length a</param>
|
||||
/// <param name="lb">Length b</param>
|
||||
/// <param name="imm8">Control</param>
|
||||
/// <returns>Boolean value</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static int cmpestra(v128 a, int la, v128 b, int lb, int imm8)
|
||||
{
|
||||
return ((~cmpestrc(a, la, b, lb, imm8)) & (~cmpestrz(a, la, b, lb, imm8))) & 1;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare packed 64-bit integers in a and b for greater-than, and store the results in dst.
|
||||
/// </summary>
|
||||
/// <param name="val1">Vector a</param>
|
||||
/// <param name="val2">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 cmpgt_epi64(v128 val1, v128 val2)
|
||||
{
|
||||
v128 result = default;
|
||||
result.SLong0 = val1.SLong0 > val2.SLong0 ? -1 : 0;
|
||||
result.SLong1 = val1.SLong1 > val2.SLong1 ? -1 : 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Accumulate CRC32 (polynomial 0x11EDC6F41) value
|
||||
*/
|
||||
|
||||
private static readonly uint[] crctab = new uint[]
|
||||
{
|
||||
0x00000000U,0xF26B8303U,0xE13B70F7U,0x1350F3F4U,0xC79A971FU,0x35F1141CU,0x26A1E7E8U,0xD4CA64EBU,
|
||||
0x8AD958CFU,0x78B2DBCCU,0x6BE22838U,0x9989AB3BU,0x4D43CFD0U,0xBF284CD3U,0xAC78BF27U,0x5E133C24U,
|
||||
0x105EC76FU,0xE235446CU,0xF165B798U,0x030E349BU,0xD7C45070U,0x25AFD373U,0x36FF2087U,0xC494A384U,
|
||||
0x9A879FA0U,0x68EC1CA3U,0x7BBCEF57U,0x89D76C54U,0x5D1D08BFU,0xAF768BBCU,0xBC267848U,0x4E4DFB4BU,
|
||||
0x20BD8EDEU,0xD2D60DDDU,0xC186FE29U,0x33ED7D2AU,0xE72719C1U,0x154C9AC2U,0x061C6936U,0xF477EA35U,
|
||||
0xAA64D611U,0x580F5512U,0x4B5FA6E6U,0xB93425E5U,0x6DFE410EU,0x9F95C20DU,0x8CC531F9U,0x7EAEB2FAU,
|
||||
0x30E349B1U,0xC288CAB2U,0xD1D83946U,0x23B3BA45U,0xF779DEAEU,0x05125DADU,0x1642AE59U,0xE4292D5AU,
|
||||
0xBA3A117EU,0x4851927DU,0x5B016189U,0xA96AE28AU,0x7DA08661U,0x8FCB0562U,0x9C9BF696U,0x6EF07595U,
|
||||
0x417B1DBCU,0xB3109EBFU,0xA0406D4BU,0x522BEE48U,0x86E18AA3U,0x748A09A0U,0x67DAFA54U,0x95B17957U,
|
||||
0xCBA24573U,0x39C9C670U,0x2A993584U,0xD8F2B687U,0x0C38D26CU,0xFE53516FU,0xED03A29BU,0x1F682198U,
|
||||
0x5125DAD3U,0xA34E59D0U,0xB01EAA24U,0x42752927U,0x96BF4DCCU,0x64D4CECFU,0x77843D3BU,0x85EFBE38U,
|
||||
0xDBFC821CU,0x2997011FU,0x3AC7F2EBU,0xC8AC71E8U,0x1C661503U,0xEE0D9600U,0xFD5D65F4U,0x0F36E6F7U,
|
||||
0x61C69362U,0x93AD1061U,0x80FDE395U,0x72966096U,0xA65C047DU,0x5437877EU,0x4767748AU,0xB50CF789U,
|
||||
0xEB1FCBADU,0x197448AEU,0x0A24BB5AU,0xF84F3859U,0x2C855CB2U,0xDEEEDFB1U,0xCDBE2C45U,0x3FD5AF46U,
|
||||
0x7198540DU,0x83F3D70EU,0x90A324FAU,0x62C8A7F9U,0xB602C312U,0x44694011U,0x5739B3E5U,0xA55230E6U,
|
||||
0xFB410CC2U,0x092A8FC1U,0x1A7A7C35U,0xE811FF36U,0x3CDB9BDDU,0xCEB018DEU,0xDDE0EB2AU,0x2F8B6829U,
|
||||
0x82F63B78U,0x709DB87BU,0x63CD4B8FU,0x91A6C88CU,0x456CAC67U,0xB7072F64U,0xA457DC90U,0x563C5F93U,
|
||||
0x082F63B7U,0xFA44E0B4U,0xE9141340U,0x1B7F9043U,0xCFB5F4A8U,0x3DDE77ABU,0x2E8E845FU,0xDCE5075CU,
|
||||
0x92A8FC17U,0x60C37F14U,0x73938CE0U,0x81F80FE3U,0x55326B08U,0xA759E80BU,0xB4091BFFU,0x466298FCU,
|
||||
0x1871A4D8U,0xEA1A27DBU,0xF94AD42FU,0x0B21572CU,0xDFEB33C7U,0x2D80B0C4U,0x3ED04330U,0xCCBBC033U,
|
||||
0xA24BB5A6U,0x502036A5U,0x4370C551U,0xB11B4652U,0x65D122B9U,0x97BAA1BAU,0x84EA524EU,0x7681D14DU,
|
||||
0x2892ED69U,0xDAF96E6AU,0xC9A99D9EU,0x3BC21E9DU,0xEF087A76U,0x1D63F975U,0x0E330A81U,0xFC588982U,
|
||||
0xB21572C9U,0x407EF1CAU,0x532E023EU,0xA145813DU,0x758FE5D6U,0x87E466D5U,0x94B49521U,0x66DF1622U,
|
||||
0x38CC2A06U,0xCAA7A905U,0xD9F75AF1U,0x2B9CD9F2U,0xFF56BD19U,0x0D3D3E1AU,0x1E6DCDEEU,0xEC064EEDU,
|
||||
0xC38D26C4U,0x31E6A5C7U,0x22B65633U,0xD0DDD530U,0x0417B1DBU,0xF67C32D8U,0xE52CC12CU,0x1747422FU,
|
||||
0x49547E0BU,0xBB3FFD08U,0xA86F0EFCU,0x5A048DFFU,0x8ECEE914U,0x7CA56A17U,0x6FF599E3U,0x9D9E1AE0U,
|
||||
0xD3D3E1ABU,0x21B862A8U,0x32E8915CU,0xC083125FU,0x144976B4U,0xE622F5B7U,0xF5720643U,0x07198540U,
|
||||
0x590AB964U,0xAB613A67U,0xB831C993U,0x4A5A4A90U,0x9E902E7BU,0x6CFBAD78U,0x7FAB5E8CU,0x8DC0DD8FU,
|
||||
0xE330A81AU,0x115B2B19U,0x020BD8EDU,0xF0605BEEU,0x24AA3F05U,0xD6C1BC06U,0xC5914FF2U,0x37FACCF1U,
|
||||
0x69E9F0D5U,0x9B8273D6U,0x88D28022U,0x7AB90321U,0xAE7367CAU,0x5C18E4C9U,0x4F48173DU,0xBD23943EU,
|
||||
0xF36E6F75U,0x0105EC76U,0x12551F82U,0xE03E9C81U,0x34F4F86AU,0xC69F7B69U,0xD5CF889DU,0x27A40B9EU,
|
||||
0x79B737BAU,0x8BDCB4B9U,0x988C474DU,0x6AE7C44EU,0xBE2DA0A5U,0x4C4623A6U,0x5F16D052U,0xAD7D5351U,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 32-bit integer v, and stores the result in dst.
|
||||
/// </summary>
|
||||
/// <param name="crc">Initial value</param>
|
||||
/// <param name="v">Unsigned 32-bit integer</param>
|
||||
/// <returns>Result</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint crc32_u32(uint crc, uint v)
|
||||
{
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v);
|
||||
return crc;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 8-bit integer v, and stores the result in dst.
|
||||
/// </summary>
|
||||
/// <param name="crc">Initial value</param>
|
||||
/// <param name="v">Unsigned 8-bit integer</param>
|
||||
/// <returns>Result</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint crc32_u8(uint crc, byte v)
|
||||
{
|
||||
crc = (crc >> 8) ^ crctab[(crc ^ v) & 0xff];
|
||||
return crc;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 16-bit integer v, and stores the result in dst.
|
||||
/// </summary>
|
||||
/// <param name="crc">Initial value</param>
|
||||
/// <param name="v">Unsigned 16-bit integer</param>
|
||||
/// <returns>Result</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static uint crc32_u16(uint crc, ushort v)
|
||||
{
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v);
|
||||
return crc;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 64-bit integer v, and stores the result in dst.
|
||||
/// </summary>
|
||||
/// <param name="crc_ul">Initial value</param>
|
||||
/// <param name="v">Signed 64-bit integer</param>
|
||||
/// <returns>Result</returns>
|
||||
[DebuggerStepThrough]
|
||||
[Obsolete("Use the ulong version of this intrinsic instead.")]
|
||||
public static ulong crc32_u64(ulong crc_ul, long v)
|
||||
{
|
||||
return crc32_u64(crc_ul, (ulong)v);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starting with the initial value in crc, accumulates a CRC32 value for unsigned 64-bit integer v, and stores the result in dst.
|
||||
/// </summary>
|
||||
/// <param name="crc_ul">Initial value</param>
|
||||
/// <param name="v">Unsigned 64-bit integer</param>
|
||||
/// <returns>Result</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static ulong crc32_u64(ulong crc_ul, ulong v)
|
||||
{
|
||||
uint crc = (uint)crc_ul;
|
||||
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v); v >>= 8;
|
||||
crc = crc32_u8(crc, (byte)v);
|
||||
|
||||
return crc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 34483fa8e8413ba9b6e02809c5adfdd3
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
@@ -0,0 +1,371 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Unity.Burst.Intrinsics
|
||||
{
|
||||
public unsafe static partial class X86
|
||||
{
|
||||
/// <summary>
|
||||
/// SSSE3 intrinsics
|
||||
/// </summary>
|
||||
public static class Ssse3
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluates to true at compile time if SSSE3 intrinsics are supported.
|
||||
/// </summary>
|
||||
public static bool IsSsse3Supported { get { return false; } }
|
||||
|
||||
// _mm_abs_epi8
|
||||
/// <summary> Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 abs_epi8(v128 a)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
byte* dptr = &dst.Byte0;
|
||||
sbyte* aptr = &a.SByte0;
|
||||
for (int j = 0; j <= 15; j++)
|
||||
{
|
||||
dptr[j] = (byte)Math.Abs((int)aptr[j]);
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_abs_epi16
|
||||
/// <summary> Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 abs_epi16(v128 a)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
ushort* dptr = &dst.UShort0;
|
||||
short* aptr = &a.SShort0;
|
||||
for (int j = 0; j <= 7; j++)
|
||||
{
|
||||
dptr[j] = (ushort)Math.Abs((int)aptr[j]);
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_abs_epi32
|
||||
/// <summary> Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 abs_epi32(v128 a)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
uint* dptr = &dst.UInt0;
|
||||
int* aptr = &a.SInt0;
|
||||
for (int j = 0; j <= 3; j++)
|
||||
{
|
||||
dptr[j] = (uint)Math.Abs((long)aptr[j]);
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_shuffle_epi8
|
||||
/// <summary> Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 shuffle_epi8(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
byte* dptr = &dst.Byte0;
|
||||
byte* aptr = &a.Byte0;
|
||||
byte* bptr = &b.Byte0;
|
||||
for (int j = 0; j <= 15; j++)
|
||||
{
|
||||
if ((bptr[j] & 0x80) != 0)
|
||||
{
|
||||
dptr[j] = 0x00;
|
||||
}
|
||||
else
|
||||
{
|
||||
dptr[j] = aptr[bptr[j] & 15];
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
// _mm_alignr_epi8
|
||||
/// <summary> Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <param name="count">Byte count</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 alignr_epi8(v128 a, v128 b, int count)
|
||||
{
|
||||
var dst = default(v128);
|
||||
byte* dptr = &dst.Byte0;
|
||||
byte* aptr = &a.Byte0 + count;
|
||||
byte* bptr = &b.Byte0;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < 16 - count; ++i)
|
||||
{
|
||||
*dptr++ = *aptr++;
|
||||
}
|
||||
|
||||
for (; i < 16; ++i)
|
||||
{
|
||||
*dptr++ = *bptr++;
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hadd_epi16
|
||||
/// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hadd_epi16(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
short* dptr = &dst.SShort0;
|
||||
short* aptr = &a.SShort0;
|
||||
short* bptr = &b.SShort0;
|
||||
for (int j = 0; j <= 3; ++j)
|
||||
{
|
||||
dptr[j] = (short)(aptr[2 * j + 1] + aptr[2 * j]);
|
||||
dptr[j + 4] = (short)(bptr[2 * j + 1] + bptr[2 * j]);
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hadds_epi16
|
||||
/// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hadds_epi16(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
short* dptr = &dst.SShort0;
|
||||
short* aptr = &a.SShort0;
|
||||
short* bptr = &b.SShort0;
|
||||
for (int j = 0; j <= 3; ++j)
|
||||
{
|
||||
dptr[j] = Saturate_To_Int16(aptr[2 * j + 1] + aptr[2 * j]);
|
||||
dptr[j + 4] = Saturate_To_Int16(bptr[2 * j + 1] + bptr[2 * j]);
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hadd_epi32
|
||||
/// <summary> Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hadd_epi32(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
dst.SInt0 = a.SInt1 + a.SInt0;
|
||||
dst.SInt1 = a.SInt3 + a.SInt2;
|
||||
dst.SInt2 = b.SInt1 + b.SInt0;
|
||||
dst.SInt3 = b.SInt3 + b.SInt2;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hsub_epi16
|
||||
/// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hsub_epi16(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
short* dptr = &dst.SShort0;
|
||||
short* aptr = &a.SShort0;
|
||||
short* bptr = &b.SShort0;
|
||||
for (int j = 0; j <= 3; ++j)
|
||||
{
|
||||
dptr[j] = (short)(aptr[2 * j] - aptr[2 * j + 1]);
|
||||
dptr[j + 4] = (short)(bptr[2 * j] - bptr[2 * j + 1]);
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hsubs_epi16
|
||||
/// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hsubs_epi16(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
short* dptr = &dst.SShort0;
|
||||
short* aptr = &a.SShort0;
|
||||
short* bptr = &b.SShort0;
|
||||
for (int j = 0; j <= 3; ++j)
|
||||
{
|
||||
dptr[j] = Saturate_To_Int16(aptr[2 * j] - aptr[2 * j + 1]);
|
||||
dptr[j + 4] = Saturate_To_Int16(bptr[2 * j] - bptr[2 * j + 1]);
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_hsub_epi32
|
||||
/// <summary> Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 hsub_epi32(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
dst.SInt0 = a.SInt0 - a.SInt1;
|
||||
dst.SInt1 = a.SInt2 - a.SInt3;
|
||||
dst.SInt2 = b.SInt0 - b.SInt1;
|
||||
dst.SInt3 = b.SInt2 - b.SInt3;
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_maddubs_epi16
|
||||
/// <summary> Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 maddubs_epi16(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
short* dptr = &dst.SShort0;
|
||||
byte* aptr = &a.Byte0;
|
||||
sbyte* bptr = &b.SByte0;
|
||||
for (int j = 0; j <= 7; j++)
|
||||
{
|
||||
int tmp = aptr[2 * j + 1] * bptr[2 * j + 1] + aptr[2 * j] * bptr[2 * j];
|
||||
dptr[j] = Saturate_To_Int16(tmp);
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
// _mm_mulhrs_epi16
|
||||
/// <summary> Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 mulhrs_epi16(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
short* dptr = &dst.SShort0;
|
||||
short* aptr = &a.SShort0;
|
||||
short* bptr = &b.SShort0;
|
||||
for (int j = 0; j <= 7; j++)
|
||||
{
|
||||
int tmp = aptr[j] * bptr[j];
|
||||
tmp >>= 14;
|
||||
tmp += 1;
|
||||
tmp >>= 1;
|
||||
dptr[j] = (short)tmp;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_sign_epi8
|
||||
/// <summary> Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 sign_epi8(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
sbyte* dptr = &dst.SByte0;
|
||||
sbyte* aptr = &a.SByte0;
|
||||
sbyte* bptr = &b.SByte0;
|
||||
for (int j = 0; j <= 15; j++)
|
||||
{
|
||||
if (bptr[j] < 0)
|
||||
{
|
||||
dptr[j] = (sbyte)-aptr[j];
|
||||
}
|
||||
else if (bptr[j] == 0)
|
||||
{
|
||||
dptr[j] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
dptr[j] = aptr[j];
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_sign_epi16
|
||||
/// <summary> Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 sign_epi16(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
short* dptr = &dst.SShort0;
|
||||
short* aptr = &a.SShort0;
|
||||
short* bptr = &b.SShort0;
|
||||
for (int j = 0; j <= 7; j++)
|
||||
{
|
||||
if (bptr[j] < 0)
|
||||
{
|
||||
dptr[j] = (short)-aptr[j];
|
||||
}
|
||||
else if (bptr[j] == 0)
|
||||
{
|
||||
dptr[j] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
dptr[j] = aptr[j];
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// _mm_sign_epi32
|
||||
/// <summary> Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
|
||||
/// <param name="a">Vector a</param>
|
||||
/// <param name="b">Vector b</param>
|
||||
/// <returns>Vector</returns>
|
||||
[DebuggerStepThrough]
|
||||
public static v128 sign_epi32(v128 a, v128 b)
|
||||
{
|
||||
v128 dst = default(v128);
|
||||
int* dptr = &dst.SInt0;
|
||||
int* aptr = &a.SInt0;
|
||||
int* bptr = &b.SInt0;
|
||||
for (int j = 0; j <= 3; j++)
|
||||
{
|
||||
if (bptr[j] < 0)
|
||||
{
|
||||
dptr[j] = -aptr[j];
|
||||
}
|
||||
else if (bptr[j] == 0)
|
||||
{
|
||||
dptr[j] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
dptr[j] = aptr[j];
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fileFormatVersion: 2
|
||||
guid: 0904d56406a93977ad6ef642b548155d
|
||||
MonoImporter:
|
||||
serializedVersion: 2
|
||||
defaultReferences: []
|
||||
executionOrder: 0
|
||||
icon: {instanceID: 0}
|
||||
externalObjects: {}
|
||||
userData:
|
||||
assetBundleName:
|
||||
assetBundleVariant:
|
||||
Reference in New Issue
Block a user