1//===---- SparcInstrVIS.td - Visual Instruction Set extensions (VIS) -----===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains instruction formats, definitions and patterns needed for 10// VIS, VIS II, VIS III instructions on SPARC. 11//===----------------------------------------------------------------------===// 12 13// VIS Instruction Format. 14class VISInstFormat<bits<9> opfval, dag outs, dag ins, string asmstr> 15 : F3_3<0b10, 0b110110, opfval, outs, ins, asmstr, []>; 16 17class VISInst<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs> 18 : VISInstFormat<opfval, 19 (outs RC:$rd), (ins RC:$rs1, RC:$rs2), 20 !strconcat(OpcStr, " $rs1, $rs2, $rd")>; 21 22// VIS Instruction with integer destination register. 23class VISInstID<bits<9> opfval, string OpcStr> 24 : VISInstFormat<opfval, 25 (outs I64Regs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), 26 !strconcat(OpcStr, " $rs1, $rs2, $rd")>; 27 28// For VIS Instructions with no operand. 29let rd = 0, rs1 = 0, rs2 = 0 in 30class VISInst0<bits<9> opfval, string asmstr> 31 : VISInstFormat<opfval, (outs), (ins), asmstr>; 32 33// For VIS Instructions with only rs1, rd operands. 34let rs2 = 0 in 35class VISInst1<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs> 36 : VISInstFormat<opfval, 37 (outs RC:$rd), (ins RC:$rs1), 38 !strconcat(OpcStr, " $rs1, $rd")>; 39 40// For VIS Instructions with only rs2, rd operands. 41let rs1 = 0 in 42class VISInst2<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs> 43 : VISInstFormat<opfval, 44 (outs RC:$rd), (ins RC:$rs2), 45 !strconcat(OpcStr, " $rs2, $rd")>; 46 47// For VIS Instructions with only rd operand. 48let rs1 = 0, rs2 = 0 in 49class VISInstD<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs> 50 : VISInstFormat<opfval, 51 (outs RC:$rd), (ins), 52 !strconcat(OpcStr, " $rd")>; 53 54// VIS 1 Instructions 55let Predicates = [HasVIS] in { 56 57def FPADD16 : VISInst<0b001010000, "fpadd16">; 58def FPADD16S : VISInst<0b001010001, "fpadd16s", FPRegs>; 59def FPADD32 : VISInst<0b001010010, "fpadd32">; 60def FPADD32S : VISInst<0b001010011, "fpadd32s", FPRegs>; 61def FPSUB16 : VISInst<0b001010100, "fpsub16">; 62def FPSUB16S : VISInst<0b001010101, "fpsub16s", FPRegs>; 63def FPSUB32 : VISInst<0b001010110, "fpsub32">; 64def FPSUB32S : VISInst<0b001010111, "fpsub32s", FPRegs>; 65 66def FPACK16 : VISInst2<0b000111011, "fpack16">; 67def FPACK32 : VISInst <0b000111010, "fpack32">; 68let rs1 = 0 in 69def FPACKFIX : VISInstFormat<0b000111101, 70 (outs FPRegs:$rd), (ins DFPRegs:$rs2), "fpackfix $rs2, $rd">; 71let rs1 = 0 in 72def FEXPAND : VISInstFormat<0b001001101, 73 (outs DFPRegs:$rd), (ins FPRegs:$rs2), "fexpand $rs2, $rd">; 74def FPMERGE : VISInstFormat<0b001001011, 75 (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 76 "fpmerge $rs1, $rs2, $rd">; 77 78def FMUL8X16 : VISInstFormat<0b000110001, 79 (outs DFPRegs:$rd), (ins FPRegs:$rs1, DFPRegs:$rs2), 80 "fmul8x16 $rs1, $rs2, $rd">; 81def FMUL8X16AU : VISInstFormat<0b000110011, 82 (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 83 "fmul8x16au $rs1, $rs2, $rd">; 84def FMUL8X16AL : VISInstFormat<0b000110101, 85 (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 86 "fmul8x16al $rs1, $rs2, $rd">; 87def FMUL8SUX16 : VISInst<0b000110110, "fmul8sux16">; 88def FMUL8ULX16 : VISInst<0b000110111, "fmul8ulx16">; 89def FMULD8SUX16 : VISInstFormat<0b000111000, 90 (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 91 "fmuld8sux16 $rs1, $rs2, $rd">; 92def FMULD8ULX16 : VISInstFormat<0b000111001, 93 (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 94 "fmuld8ulx16 $rs1, $rs2, $rd">; 95 96def ALIGNADDR : VISInst<0b000011000, "alignaddr", I64Regs>; 97def ALIGNADDRL : VISInst<0b000011010, "alignaddrl", I64Regs>; 98def FALIGNADATA : VISInst<0b001001000, "faligndata">; 99 100def FZERO : VISInstD<0b001100000, "fzero">; 101def FZEROS : VISInstD<0b001100001, "fzeros", FPRegs>; 102def FONE : VISInstD<0b001111110, "fone">; 103def FONES : VISInstD<0b001111111, "fones", FPRegs>; 104def FSRC1 : VISInst1<0b001110100, "fsrc1">; 105def FSRC1S : VISInst1<0b001110101, "fsrc1s", FPRegs>; 106def FSRC2 : VISInst2<0b001111000, "fsrc2">; 107def FSRC2S : VISInst2<0b001111001, "fsrc2s", FPRegs>; 108def FNOT1 : VISInst1<0b001101010, "fnot1">; 109def FNOT1S : VISInst1<0b001101011, "fnot1s", FPRegs>; 110def FNOT2 : VISInst2<0b001100110, "fnot2">; 111def FNOT2S : VISInst2<0b001100111, "fnot2s", FPRegs>; 112def FOR : VISInst<0b001111100, "for">; 113def FORS : VISInst<0b001111101, "fors", FPRegs>; 114def FNOR : VISInst<0b001100010, "fnor">; 115def FNORS : VISInst<0b001100011, "fnors", FPRegs>; 116def FAND : VISInst<0b001110000, "fand">; 117def FANDS : VISInst<0b001110001, "fands", FPRegs>; 118def FNAND : VISInst<0b001101110, "fnand">; 119def FNANDS : VISInst<0b001101111, "fnands", FPRegs>; 120def FXOR : VISInst<0b001101100, "fxor">; 121def FXORS : VISInst<0b001101101, "fxors", FPRegs>; 122def FXNOR : VISInst<0b001110010, "fxnor">; 123def FXNORS : VISInst<0b001110011, "fxnors", FPRegs>; 124 125def FORNOT1 : VISInst<0b001111010, "fornot1">; 126def FORNOT1S : VISInst<0b001111011, "fornot1s", FPRegs>; 127def FORNOT2 : VISInst<0b001110110, "fornot2">; 128def FORNOT2S : VISInst<0b001110111, "fornot2s", FPRegs>; 129def FANDNOT1 : VISInst<0b001101000, "fandnot1">; 130def FANDNOT1S : VISInst<0b001101001, "fandnot1s", FPRegs>; 131def FANDNOT2 : VISInst<0b001100100, "fandnot2">; 132def FANDNOT2S : VISInst<0b001100101, "fandnot2s", FPRegs>; 133 134def FCMPGT16 : VISInstID<0b000101000, "fcmpgt16">; 135def FCMPGT32 : VISInstID<0b000101100, "fcmpgt32">; 136def FCMPLE16 : VISInstID<0b000100000, "fcmple16">; 137def FCMPLE32 : VISInstID<0b000100100, "fcmple32">; 138def FCMPNE16 : VISInstID<0b000100010, "fcmpne16">; 139def FCMPNE32 : VISInstID<0b000100110, "fcmpne32">; 140def FCMPEQ16 : VISInstID<0b000101010, "fcmpeq16">; 141def FCMPEQ32 : VISInstID<0b000101110, "fcmpeq32">; 142 143 144def EDGE8 : VISInst<0b000000000, "edge8", I64Regs>; 145def EDGE8L : VISInst<0b000000010, "edge8l", I64Regs>; 146def EDGE16 : VISInst<0b000000100, "edge16", I64Regs>; 147def EDGE16L : VISInst<0b000000110, "edge16l", I64Regs>; 148def EDGE32 : VISInst<0b000001000, "edge32", I64Regs>; 149def EDGE32L : VISInst<0b000001010, "edge32l", I64Regs>; 150 151def PDIST : VISInst<0b000111110, "pdist">; 152 153def ARRAY8 : VISInst<0b000010000, "array8", I64Regs>; 154def ARRAY16 : VISInst<0b000010010, "array16", I64Regs>; 155def ARRAY32 : VISInst<0b000010100, "array32", I64Regs>; 156 157def SHUTDOWN : VISInst0<0b010000000, "shutdown">; 158 159} // Predicates = [HasVIS] 160 161 162// VIS 2 Instructions. 163let Predicates = [HasVIS2] in { 164 165def BMASK : VISInst<0b000011001, "bmask", I64Regs>; 166def BSHUFFLE : VISInst<0b001001100, "bshuffle">; 167 168let rd = 0, rs1 = 0 in 169def SIAM : F3_3_siam<0b10, 0b110110, 0b010000001, (outs), 170 (ins i32imm:$mode), "siam $mode", []>; 171 172def EDGE8N : VISInst<0b000000001, "edge8n", I64Regs>; 173def EDGE8LN : VISInst<0b000000011, "edge8ln", I64Regs>; 174def EDGE16N : VISInst<0b000000101, "edge16n", I64Regs>; 175def EDGE16LN : VISInst<0b000000111, "edge16ln", I64Regs>; 176def EDGE32N : VISInst<0b000001001, "edge32n", I64Regs>; 177def EDGE32LN : VISInst<0b000001011, "edge32ln", I64Regs>; 178} // Predicates = [HasVIS2] 179 180 181// VIS 3 Instructions. 182let Predicates = [HasVIS3] in { 183 184let Uses = [ICC] in 185def ADDXC : VISInst<0b000010001, "addxc", I64Regs>; 186 187let Defs = [ICC], Uses = [ICC] in 188def ADDXCCC : VISInst<0b000010011, "addxccc", I64Regs>; 189 190let rd = 0, rs1 = 0 in { 191def CMASK8 : VISInstFormat<0b000011011, (outs), (ins I64Regs:$rs2), 192 "cmask8 $rs2">; 193def CMASK16 : VISInstFormat<0b000011101, (outs), (ins I64Regs:$rs2), 194 "cmask16 $rs2">; 195def CMASK32 : VISInstFormat<0b000011111, (outs), (ins I64Regs:$rs2), 196 "cmask32 $rs2">; 197 198} 199 200def FCHKSM16 : VISInst<0b001000100, "fchksm16">; 201 202def FHADDS : F3_3<0b10, 0b110100, 0b001100001, 203 (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 204 "fhadds $rs1, $rs2, $rd", []>; 205def FHADDD : F3_3<0b10, 0b110100, 0b001100010, 206 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), 207 "fhaddd $rs1, $rs2, $rd", []>; 208def FHSUBS : F3_3<0b10, 0b110100, 0b001100101, 209 (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 210 "fhsubs $rs1, $rs2, $rd", []>; 211def FHSUBD : F3_3<0b10, 0b110100, 0b001100110, 212 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), 213 "fhsubd $rs1, $rs2, $rd", []>; 214def FLCMPS : VISInstFormat<0b101010001, (outs FCCRegs:$rd), 215 (ins FPRegs:$rs1, FPRegs:$rs2), 216 "flcmps $rd, $rs1, $rs2">; 217def FLCMPD : VISInstFormat<0b101010010, (outs FCCRegs:$rd), 218 (ins DFPRegs:$rs1, DFPRegs:$rs2), 219 "flcmpd $rd, $rs1, $rs2">; 220 221def FMEAN16 : VISInst<0b001000000, "fmean16">; 222 223def FNADDS : F3_3<0b10, 0b110100, 0b001010001, 224 (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 225 "fnadds $rs1, $rs2, $rd", []>; 226def FNADDD : F3_3<0b10, 0b110100, 0b001010010, 227 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), 228 "fnaddd $rs1, $rs2, $rd", []>; 229def FNHADDS : F3_3<0b10, 0b110100, 0b001110001, 230 (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 231 "fnhadds $rs1, $rs2, $rd", []>; 232def FNHADDD : F3_3<0b10, 0b110100, 0b001110010, 233 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), 234 "fnhaddd $rs1, $rs2, $rd", []>; 235 236def FNMULS : F3_3<0b10, 0b110100, 0b001011001, 237 (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 238 "fnmuls $rs1, $rs2, $rd", []>; 239def FNMULD : F3_3<0b10, 0b110100, 0b001011010, 240 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2), 241 "fnmuld $rs1, $rs2, $rd", []>; 242def FNSMULD : F3_3<0b10, 0b110100, 0b001111001, 243 (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), 244 "fnsmuld $rs1, $rs2, $rd", []>; 245 246def FPADD64 : VISInst<0b001000010, "fpadd64">; 247 248def FSLL16 : VISInst<0b000100001, "fsll16">; 249def FSRL16 : VISInst<0b000100011, "fsrl16">; 250def FSLL32 : VISInst<0b000100101, "fsll32">; 251def FSRL32 : VISInst<0b000100111, "fsrl32">; 252def FSLAS16 : VISInst<0b000101001, "fslas16">; 253def FSRA16 : VISInst<0b000101011, "fsra16">; 254def FSLAS32 : VISInst<0b000101101, "fslas32">; 255def FSRA32 : VISInst<0b000101111, "fsra32">; 256 257let rs1 = 0 in 258def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd), 259 (ins I64Regs:$rs2), "lzcnt $rs2, $rd">; 260 261let rs1 = 0 in { 262def MOVSTOSW : VISInstFormat<0b100010011, (outs IntRegs:$rd), 263 (ins FPRegs:$rs2), "movstosw $rs2, $rd">; 264def MOVSTOUW : VISInstFormat<0b100010001, (outs IntRegs:$rd), 265 (ins FPRegs:$rs2), "movstouw $rs2, $rd">; 266def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd), 267 (ins DFPRegs:$rs2), "movdtox $rs2, $rd">; 268def MOVWTOS : VISInstFormat<0b100011001, (outs FPRegs:$rd), 269 (ins IntRegs:$rs2), "movwtos $rs2, $rd">; 270def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd), 271 (ins I64Regs:$rs2), "movxtod $rs2, $rd">; 272} 273 274def PDISTN : VISInstID<0b000111111, "pdistn">; 275 276def UMULXHI : VISInst<0b000010110, "umulxhi", I64Regs>; 277def XMULX : VISInst<0b100010101, "xmulx", I64Regs>; 278def XMULXHI : VISInst<0b100010110, "xmulxhi", I64Regs>; 279} // Predicates = [IsVIS3] 280 281// FP immediate patterns. 282def fpimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.0);}]>; 283def fpnegimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.0);}]>; 284def fpimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.5);}]>; 285def fpnegimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.5);}]>; 286 287// VIS instruction patterns. 288let Predicates = [HasVIS] in { 289// Zero immediate. 290def : Pat<(f64 fpimm0), (FZERO)>; 291def : Pat<(f32 fpimm0), (FZEROS)>; 292def : Pat<(f64 fpnegimm0), (FNEGD (FZERO))>; 293def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>; 294} // Predicates = [HasVIS] 295 296// VIS3 instruction patterns. 297let Predicates = [HasVIS3] in { 298// +/-0.5 immediate. 299// This is needed to enable halving instructions. 300// FIXME generalize this to arbitrary immediates. 301// SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants 302// faster than constant pool loading. 303def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>; 304def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>; 305def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>; 306def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>; 307 308def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>; 309 310def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>; 311// Signed "MULXHI". 312// Based on the formula presented in OSA2011 §7.140, but with bitops to select 313// the values to be added. 314// TODO: This expansion should probably be moved to DAG legalization phase. 315def : Pat<(i64 (mulhs i64:$lhs, i64:$rhs)), 316 (SUBrr (UMULXHI $lhs, $rhs), 317 (ADDrr (ANDrr (SRAXri $lhs, 63), $rhs), 318 (ANDrr (SRAXri $rhs, 63), $lhs)))>; 319 320def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>; 321def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>; 322// 32-bit LZCNT. 323// The zero extension will leave us with 32 extra leading zeros, 324// so we need to compensate for it. 325// FIXME remove this when the codegen supports using 64-bit values directly 326// in V8+ mode. 327def : Pat<(i32 (ctlz i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>; 328def : Pat<(i32 (ctlz_zero_undef i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>; 329 330def : Pat<(i32 (bitconvert f32:$src)), (MOVSTOUW $src)>; 331def : Pat<(i64 (zanyext (i32 (bitconvert f32:$src)))), (MOVSTOUW $src)>; 332def : Pat<(i64 (sext (i32 (bitconvert f32:$src)))), (MOVSTOSW $src)>; 333def : Pat<(f32 (bitconvert i32:$src)), (MOVWTOS $src)>; 334def : Pat<(i64 (bitconvert f64:$src)), (MOVDTOX $src)>; 335def : Pat<(f64 (bitconvert i64:$src)), (MOVXTOD $src)>; 336 337// OP-then-neg FP operations. 338// TODO handle equivalent patterns like `rs1*-rs2`. 339def : Pat<(f32 (fneg (fadd f32:$rs1, f32:$rs2))), (FNADDS $rs1, $rs2)>; 340def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>; 341def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>; 342def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>; 343def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>; 344 345// Op-then-halve FP operations. 346def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>; 347def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>; 348def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>; 349def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>; 350def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>; 351def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>; 352} // Predicates = [HasVIS3] 353