Skip to content

Commit c5da1c4

Browse files
committed
fix BroadcastScalarToVector128/256 and simplify MoveMask
1 parent dacab04 commit c5da1c4

File tree

7 files changed

+307
-43
lines changed

7 files changed

+307
-43
lines changed

src/jit/gentree.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17894,11 +17894,11 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad()
1789417894
// Avx2.BroadcastScalarToVector128/256 have vector and pointer overloads both, e.g.,
1789517895
// Vector128<byte> BroadcastScalarToVector128(Vector128<byte> value)
1789617896
// Vector128<byte> BroadcastScalarToVector128(byte* source)
17897-
// So, we need to check the argument's type is memory-reference (TYP_I_IMPL) or not
17897+
// So, we need to check the argument's type is memory-reference or Vector128
1789817898
assert(HWIntrinsicInfo::lookupNumArgs(this) == 1);
1789917899
return (gtHWIntrinsicId == NI_AVX2_BroadcastScalarToVector128 ||
1790017900
gtHWIntrinsicId == NI_AVX2_BroadcastScalarToVector256) &&
17901-
gtOp.gtOp1->TypeGet() == TYP_I_IMPL;
17901+
gtOp.gtOp1->TypeGet() != TYP_SIMD16;
1790217902
}
1790317903
else if (category == HW_Category_IMM)
1790417904
{

src/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1520,16 +1520,6 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
15201520
break;
15211521
}
15221522

1523-
case NI_SSE_MoveMask:
1524-
{
1525-
assert(baseType == TYP_FLOAT);
1526-
assert(op2 == nullptr);
1527-
1528-
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType);
1529-
emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
1530-
break;
1531-
}
1532-
15331523
case NI_SSE_Prefetch0:
15341524
case NI_SSE_Prefetch1:
15351525
case NI_SSE_Prefetch2:
@@ -1749,16 +1739,6 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
17491739
break;
17501740
}
17511741

1752-
case NI_SSE2_MoveMask:
1753-
{
1754-
assert(op2 == nullptr);
1755-
assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);
1756-
1757-
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
1758-
emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
1759-
break;
1760-
}
1761-
17621742
case NI_SSE2_StoreNonTemporal:
17631743
case NI_SSE2_X64_StoreNonTemporal:
17641744
{

src/jit/hwintrinsiclistxarch.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ HARDWARE_INTRINSIC(SSE_Min, "Min",
125125
HARDWARE_INTRINSIC(SSE_MinScalar, "MinScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
126126
HARDWARE_INTRINSIC(SSE_MoveHighToLow, "MoveHighToLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
127127
HARDWARE_INTRINSIC(SSE_MoveLowToHigh, "MoveLowToHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
128-
HARDWARE_INTRINSIC(SSE_MoveMask, "MoveMask", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
128+
HARDWARE_INTRINSIC(SSE_MoveMask, "MoveMask", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
129129
HARDWARE_INTRINSIC(SSE_MoveScalar, "MoveScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
130130
HARDWARE_INTRINSIC(SSE_Multiply, "Multiply", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
131131
HARDWARE_INTRINSIC(SSE_MultiplyScalar, "MultiplyScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
@@ -239,7 +239,7 @@ HARDWARE_INTRINSIC(SSE2_MemoryFence, "MemoryFence
239239
HARDWARE_INTRINSIC(SSE2_MaxScalar, "MaxScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
240240
HARDWARE_INTRINSIC(SSE2_Min, "Min", SSE2, -1, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
241241
HARDWARE_INTRINSIC(SSE2_MinScalar, "MinScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
242-
HARDWARE_INTRINSIC(SSE2_MoveMask, "MoveMask", SSE2, -1, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
242+
HARDWARE_INTRINSIC(SSE2_MoveMask, "MoveMask", SSE2, -1, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
243243
HARDWARE_INTRINSIC(SSE2_MoveScalar, "MoveScalar", SSE2, -1, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
244244
HARDWARE_INTRINSIC(SSE2_Multiply, "Multiply", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
245245
HARDWARE_INTRINSIC(SSE2_MultiplyHigh, "MultiplyHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)

src/jit/hwintrinsicxarch.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,14 +1608,6 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
16081608

16091609
switch (intrinsic)
16101610
{
1611-
case NI_SSE_MoveMask:
1612-
assert(sig->numArgs == 1);
1613-
assert(JITtype2varType(sig->retType) == TYP_INT);
1614-
assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
1615-
op1 = impSIMDPopStack(TYP_SIMD16);
1616-
retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, simdSize);
1617-
break;
1618-
16191611
case NI_SSE_Prefetch0:
16201612
case NI_SSE_Prefetch1:
16211613
case NI_SSE_Prefetch2:
@@ -1691,17 +1683,6 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic,
16911683
break;
16921684
}
16931685

1694-
case NI_SSE2_MoveMask:
1695-
{
1696-
assert(sig->numArgs == 1);
1697-
retType = JITtype2varType(sig->retType);
1698-
assert(retType == TYP_INT);
1699-
op1 = impSIMDPopStack(TYP_SIMD16);
1700-
baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
1701-
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
1702-
break;
1703-
}
1704-
17051686
case NI_SSE2_StoreNonTemporal:
17061687
{
17071688
assert(sig->numArgs == 2);
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
using System;
2+
using System.Runtime.Intrinsics.X86;
3+
using System.Runtime.Intrinsics;
4+
5+
namespace GitHub_22815
6+
{
7+
class Program
8+
{
9+
const int Pass = 100;
10+
const int Fail = 0;
11+
12+
static int Main(string[] args)
13+
{
14+
bool result = true;
15+
if (Avx2.IsSupported)
16+
{
17+
result = test128((byte)1) && test128((sbyte)1) && test128((short)1) &&
18+
test128((ushort)1) && test128((int)1) && test128((uint)1) &&
19+
test128((long)1) && test128((ulong)1) &&
20+
test256((byte)1) && test256((sbyte)1) && test256((short)1) &&
21+
test256((ushort)1) && test256((int)1) && test256((uint)1) &&
22+
test256((long)1) && test256((ulong)1);
23+
}
24+
return result ? Pass : Fail;
25+
}
26+
27+
static unsafe bool test128(byte v)
28+
{
29+
var vec = Avx2.BroadcastScalarToVector128(&v);
30+
for (int i = 0; i < Vector128<byte>.Count; i++)
31+
{
32+
if (vec.GetElement(i) != v)
33+
{
34+
return false;
35+
}
36+
}
37+
return true;
38+
}
39+
40+
static unsafe bool test128(sbyte v)
41+
{
42+
var vec = Avx2.BroadcastScalarToVector128(&v);
43+
for (int i = 0; i < Vector128<sbyte>.Count; i++)
44+
{
45+
if (vec.GetElement(i) != v)
46+
{
47+
return false;
48+
}
49+
}
50+
return true;
51+
}
52+
53+
static unsafe bool test128(short v)
54+
{
55+
var vec = Avx2.BroadcastScalarToVector128(&v);
56+
for (int i = 0; i < Vector128<short>.Count; i++)
57+
{
58+
if (vec.GetElement(i) != v)
59+
{
60+
return false;
61+
}
62+
}
63+
return true;
64+
}
65+
66+
static unsafe bool test128(ushort v)
67+
{
68+
var vec = Avx2.BroadcastScalarToVector128(&v);
69+
for (int i = 0; i < Vector128<ushort>.Count; i++)
70+
{
71+
if (vec.GetElement(i) != v)
72+
{
73+
return false;
74+
}
75+
}
76+
return true;
77+
}
78+
79+
static unsafe bool test128(int v)
80+
{
81+
var vec = Avx2.BroadcastScalarToVector128(&v);
82+
for (int i = 0; i < Vector128<int>.Count; i++)
83+
{
84+
if (vec.GetElement(i) != v)
85+
{
86+
return false;
87+
}
88+
}
89+
return true;
90+
}
91+
92+
static unsafe bool test128(uint v)
93+
{
94+
var vec = Avx2.BroadcastScalarToVector128(&v);
95+
for (int i = 0; i < Vector128<uint>.Count; i++)
96+
{
97+
if (vec.GetElement(i) != v)
98+
{
99+
return false;
100+
}
101+
}
102+
return true;
103+
}
104+
105+
static unsafe bool test128(long v)
106+
{
107+
var vec = Avx2.BroadcastScalarToVector128(&v);
108+
for (int i = 0; i < Vector128<long>.Count; i++)
109+
{
110+
if (vec.GetElement(i) != v)
111+
{
112+
return false;
113+
}
114+
}
115+
return true;
116+
}
117+
118+
static unsafe bool test128(ulong v)
119+
{
120+
var vec = Avx2.BroadcastScalarToVector128(&v);
121+
for (int i = 0; i < Vector128<ulong>.Count; i++)
122+
{
123+
if (vec.GetElement(i) != v)
124+
{
125+
return false;
126+
}
127+
}
128+
return true;
129+
}
130+
131+
static unsafe bool test256(byte v)
132+
{
133+
var vec = Avx2.BroadcastScalarToVector256(&v);
134+
for (int i = 0; i < Vector256<byte>.Count; i++)
135+
{
136+
if (vec.GetElement(i) != v)
137+
{
138+
return false;
139+
}
140+
}
141+
return true;
142+
}
143+
144+
static unsafe bool test256(sbyte v)
145+
{
146+
var vec = Avx2.BroadcastScalarToVector256(&v);
147+
for (int i = 0; i < Vector256<sbyte>.Count; i++)
148+
{
149+
if (vec.GetElement(i) != v)
150+
{
151+
return false;
152+
}
153+
}
154+
return true;
155+
}
156+
157+
static unsafe bool test256(short v)
158+
{
159+
var vec = Avx2.BroadcastScalarToVector256(&v);
160+
for (int i = 0; i < Vector256<short>.Count; i++)
161+
{
162+
if (vec.GetElement(i) != v)
163+
{
164+
return false;
165+
}
166+
}
167+
return true;
168+
}
169+
170+
static unsafe bool test256(ushort v)
171+
{
172+
var vec = Avx2.BroadcastScalarToVector256(&v);
173+
for (int i = 0; i < Vector256<ushort>.Count; i++)
174+
{
175+
if (vec.GetElement(i) != v)
176+
{
177+
return false;
178+
}
179+
}
180+
return true;
181+
}
182+
183+
static unsafe bool test256(int v)
184+
{
185+
var vec = Avx2.BroadcastScalarToVector256(&v);
186+
for (int i = 0; i < Vector256<int>.Count; i++)
187+
{
188+
if (vec.GetElement(i) != v)
189+
{
190+
return false;
191+
}
192+
}
193+
return true;
194+
}
195+
196+
static unsafe bool test256(uint v)
197+
{
198+
var vec = Avx2.BroadcastScalarToVector256(&v);
199+
for (int i = 0; i < Vector256<uint>.Count; i++)
200+
{
201+
if (vec.GetElement(i) != v)
202+
{
203+
return false;
204+
}
205+
}
206+
return true;
207+
}
208+
209+
static unsafe bool test256(long v)
210+
{
211+
var vec = Avx2.BroadcastScalarToVector256(&v);
212+
for (int i = 0; i < Vector256<long>.Count; i++)
213+
{
214+
if (vec.GetElement(i) != v)
215+
{
216+
return false;
217+
}
218+
}
219+
return true;
220+
}
221+
222+
static unsafe bool test256(ulong v)
223+
{
224+
var vec = Avx2.BroadcastScalarToVector256(&v);
225+
for (int i = 0; i < Vector256<ulong>.Count; i++)
226+
{
227+
if (vec.GetElement(i) != v)
228+
{
229+
return false;
230+
}
231+
}
232+
return true;
233+
}
234+
}
235+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3+
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
4+
<PropertyGroup>
5+
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
6+
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
7+
<SchemaVersion>2.0</SchemaVersion>
8+
<ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
9+
<OutputType>Exe</OutputType>
10+
<ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
11+
<SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
12+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
13+
</PropertyGroup>
14+
<!-- Default configurations to help VS understand the configurations -->
15+
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
16+
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
17+
<ItemGroup>
18+
<CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
19+
<Visible>False</Visible>
20+
</CodeAnalysisDependentAssemblyPaths>
21+
</ItemGroup>
22+
<PropertyGroup>
23+
<DebugType>Embedded</DebugType>
24+
<Optimize></Optimize>
25+
</PropertyGroup>
26+
<ItemGroup>
27+
<Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
28+
</ItemGroup>
29+
<ItemGroup>
30+
<Compile Include="GitHub_22815.cs" />
31+
</ItemGroup>
32+
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
33+
<PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
34+
</Project>

0 commit comments

Comments
 (0)