// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team // SPDX-License-Identifier: GPL-3.0+ #pragma once namespace x86Emitter { // -------------------------------------------------------------------------------------- // _SimdShiftHelper // -------------------------------------------------------------------------------------- struct _SimdShiftHelper { u8 Prefix; u16 Opcode; u16 OpcodeImm; u8 Modcode; void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const; void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const; void operator()(const xRegisterSSE& to, u8 imm8) const; }; // -------------------------------------------------------------------------------------- // xImplSimd_Shift / xImplSimd_ShiftWithoutQ // -------------------------------------------------------------------------------------- // Used for PSRA, which lacks the Q form. // struct xImplSimd_ShiftWithoutQ { const _SimdShiftHelper W; const _SimdShiftHelper D; }; // Implements PSRL and PSLL // struct xImplSimd_Shift { const _SimdShiftHelper W; const _SimdShiftHelper D; const _SimdShiftHelper Q; void DQ(const xRegisterSSE& to, u8 imm8) const; }; ////////////////////////////////////////////////////////////////////////////////////////// // struct xImplSimd_AddSub { const xImplSimd_DestRegEither B; const xImplSimd_DestRegEither W; const xImplSimd_DestRegEither D; const xImplSimd_DestRegEither Q; // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results. const xImplSimd_DestRegEither SB; // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results. const xImplSimd_DestRegEither SW; // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results. const xImplSimd_DestRegEither USB; // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results. const xImplSimd_DestRegEither USW; }; ////////////////////////////////////////////////////////////////////////////////////////// // struct xImplSimd_PMul { const xImplSimd_DestRegEither LW; const xImplSimd_DestRegEither HW; const xImplSimd_DestRegEither HUW; const xImplSimd_DestRegEither UDQ; // [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the // corresponding signed 16-bit integer of source, producing intermediate signed 32-bit // integers. Each intermediate 32-bit integer is truncated to the 18 most significant // bits. Rounding is always performed by adding 1 to the least significant bit of the // 18-bit intermediate result. The final result is obtained by selecting the 16 bits // immediately to the right of the most significant bit of each 18-bit intermediate // result and packed to the destination operand. // // Both operands can be MMX or XMM registers. Source can be register or memory. // const xImplSimd_DestRegEither HRSW; // [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store // the low 32 bits of each product in xmm1. const xImplSimd_DestRegSSE LD; // [SSE-4.1] Multiply the packed signed dword integers in dest with src. const xImplSimd_DestRegSSE DQ; }; ////////////////////////////////////////////////////////////////////////////////////////// // For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) // struct xImplSimd_rSqrt { const xImplSimd_DestRegSSE PS; const xImplSimd_DestRegSSE SS; }; ////////////////////////////////////////////////////////////////////////////////////////// // SQRT has PS/SS/SD forms, but not the PD form. // struct xImplSimd_Sqrt { const xImplSimd_DestRegSSE PS; const xImplSimd_DestRegSSE SS; const xImplSimd_DestRegSSE SD; }; ////////////////////////////////////////////////////////////////////////////////////////// // struct xImplSimd_AndNot { const xImplSimd_DestRegSSE PS; const xImplSimd_DestRegSSE PD; }; ////////////////////////////////////////////////////////////////////////////////////////// // Packed absolute value. [sSSE3 only] // struct xImplSimd_PAbsolute { // [sSSE-3] Computes the absolute value of bytes in the src, and stores the result // in dest, as UNSIGNED. const xImplSimd_DestRegEither B; // [sSSE-3] Computes the absolute value of word in the src, and stores the result // in dest, as UNSIGNED. const xImplSimd_DestRegEither W; // [sSSE-3] Computes the absolute value of doublewords in the src, and stores the // result in dest, as UNSIGNED. const xImplSimd_DestRegEither D; }; ////////////////////////////////////////////////////////////////////////////////////////// // Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the // corresponding sign in src. // struct xImplSimd_PSign { // [sSSE-3] negates each byte element of dest if the signed integer value of the // corresponding data element in src is less than zero. If the signed integer value // of a data element in src is positive, the corresponding data element in dest is // unchanged. If a data element in src is zero, the corresponding data element in // dest is set to zero. const xImplSimd_DestRegEither B; // [sSSE-3] negates each word element of dest if the signed integer value of the // corresponding data element in src is less than zero. If the signed integer value // of a data element in src is positive, the corresponding data element in dest is // unchanged. If a data element in src is zero, the corresponding data element in // dest is set to zero. const xImplSimd_DestRegEither W; // [sSSE-3] negates each doubleword element of dest if the signed integer value // of the corresponding data element in src is less than zero. If the signed integer // value of a data element in src is positive, the corresponding data element in dest // is unchanged. If a data element in src is zero, the corresponding data element in // dest is set to zero. const xImplSimd_DestRegEither D; }; ////////////////////////////////////////////////////////////////////////////////////////// // Packed Multiply and Add!! // struct xImplSimd_PMultAdd { // Multiplies the individual signed words of dest by the corresponding signed words // of src, producing temporary signed, doubleword results. The adjacent doubleword // results are then summed and stored in the destination operand. // // DEST[31:0] = ( DEST[15:0] * SRC[15:0]) + (DEST[31:16] * SRC[31:16] ); // DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] ); // [.. repeat in the case of XMM src/dest operands ..] // const xImplSimd_DestRegEither WD; // [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding // signed byte of src, producing intermediate signed 16-bit integers. Each adjacent // pair of signed words is added and the saturated result is packed to dest. // For example, the lowest-order bytes (bits 7-0) in src and dest are multiplied // and the intermediate signed word result is added with the corresponding // intermediate result from the 2nd lowest-order bytes (bits 15-8) of the operands; // the sign-saturated result is stored in the lowest word of dest (bits 15-0). // The same operation is performed on the other pairs of adjacent bytes. // // In Coder Speak: // DEST[15-0] = SaturateToSignedWord( SRC[15-8] * DEST[15-8] + SRC[7-0] * DEST[7-0] ); // DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] ); // [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..] // const xImplSimd_DestRegEither UBSW; }; ////////////////////////////////////////////////////////////////////////////////////////// // Packed Horizontal Add [SSE3 only] // struct xImplSimd_HorizAdd { // [SSE-3] Horizontal Add of Packed Data. A three step process: // * Adds the single-precision floating-point values in the first and second dwords of // dest and stores the result in the first dword of dest. // * Adds single-precision floating-point values in the third and fourth dword of dest // stores the result in the second dword of dest. // * Adds single-precision floating-point values in the first and second dword of *src* // and stores the result in the third dword of dest. const xImplSimd_DestRegSSE PS; // [SSE-3] Horizontal Add of Packed Data. A two step process: // * Adds the double-precision floating-point values in the high and low quadwords of // dest and stores the result in the low quadword of dest. // * Adds the double-precision floating-point values in the high and low quadwords of // *src* stores the result in the high quadword of dest. const xImplSimd_DestRegSSE PD; }; ////////////////////////////////////////////////////////////////////////////////////////// // DotProduct calculation (SSE4.1 only!) // struct xImplSimd_DotProduct { // [SSE-4.1] Conditionally multiplies the packed single precision floating-point // values in dest with the packed single-precision floats in src depending on a // mask extracted from the high 4 bits of the immediate byte. If a condition mask // bit in Imm8[7:4] is zero, the corresponding multiplication is replaced by a value // of 0.0. The four resulting single-precision values are summed into an inter- // mediate result. // // The intermediate result is conditionally broadcasted to the destination using a // broadcast mask specified by bits [3:0] of the immediate byte. If a broadcast // mask bit is 1, the intermediate result is copied to the corresponding dword // element in dest. If a broadcast mask bit is zero, the corresponding element in // the destination is set to zero. // xImplSimd_DestRegImmSSE PS; // [SSE-4.1] xImplSimd_DestRegImmSSE PD; }; ////////////////////////////////////////////////////////////////////////////////////////// // Rounds floating point values (packed or single scalar) by an arbitrary rounding mode. // (SSE4.1 only!) struct xImplSimd_Round { // [SSE-4.1] Rounds the 4 packed single-precision src values and stores them in dest. // // Imm8 specifies control fields for the rounding operation: // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. // Bits 1:0 - Specifies a rounding mode for this instruction only. // // Rounding Mode Reference: // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. // const xImplSimd_DestRegImmSSE PS; // [SSE-4.1] Rounds the 2 packed double-precision src values and stores them in dest. // // Imm8 specifies control fields for the rounding operation: // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. // Bits 1:0 - Specifies a rounding mode for this instruction only. // // Rounding Mode Reference: // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. // const xImplSimd_DestRegImmSSE PD; // [SSE-4.1] Rounds the single-precision src value and stores in dest. // // Imm8 specifies control fields for the rounding operation: // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. // Bits 1:0 - Specifies a rounding mode for this instruction only. // // Rounding Mode Reference: // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. // const xImplSimd_DestRegImmSSE SS; // [SSE-4.1] Rounds the double-precision src value and stores in dest. // // Imm8 specifies control fields for the rounding operation: // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. // Bits 1:0 - Specifies a rounding mode for this instruction only. // // Rounding Mode Reference: // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. // const xImplSimd_DestRegImmSSE SD; }; } // End namespace x86Emitter