mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-02-01 02:21:26 +02:00
Merge pull request #1109 from FioraAeterna/ps_cmp
JIT: add ps_cmp0/ps_cmp1/ps_res/ps_rsqrte
This commit is contained in:
commit
522d7eb275
@ -64,6 +64,8 @@ enum NormalSSEOps
|
|||||||
sseMOVLPDtoRM = 0x13,
|
sseMOVLPDtoRM = 0x13,
|
||||||
sseMOVHPDfromRM= 0x16,
|
sseMOVHPDfromRM= 0x16,
|
||||||
sseMOVHPDtoRM = 0x17,
|
sseMOVHPDtoRM = 0x17,
|
||||||
|
sseMOVHLPS = 0x12,
|
||||||
|
sseMOVLHPS = 0x16,
|
||||||
sseMASKMOVDQU = 0xF7,
|
sseMASKMOVDQU = 0xF7,
|
||||||
sseLDDQU = 0xF0,
|
sseLDDQU = 0xF0,
|
||||||
sseSHUF = 0xC6,
|
sseSHUF = 0xC6,
|
||||||
@ -1526,6 +1528,9 @@ void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVHPDfromRM,
|
|||||||
void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVLPDtoRM, false, regOp, arg);}
|
void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVLPDtoRM, false, regOp, arg);}
|
||||||
void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVHPDtoRM, false, regOp, arg);}
|
void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVHPDtoRM, false, regOp, arg);}
|
||||||
|
|
||||||
|
void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(32, sseMOVHLPS, true, regOp1, R(regOp2));}
|
||||||
|
void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(32, sseMOVLHPS, true, regOp1, R(regOp2));}
|
||||||
|
|
||||||
void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5A, true, regOp, arg);}
|
void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5A, true, regOp, arg);}
|
||||||
void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, true, regOp, arg);}
|
void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, true, regOp, arg);}
|
||||||
|
|
||||||
|
@ -583,6 +583,9 @@ public:
|
|||||||
void MOVLPD(OpArg arg, X64Reg regOp);
|
void MOVLPD(OpArg arg, X64Reg regOp);
|
||||||
void MOVHPD(OpArg arg, X64Reg regOp);
|
void MOVHPD(OpArg arg, X64Reg regOp);
|
||||||
|
|
||||||
|
void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
|
||||||
|
void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
|
||||||
|
|
||||||
void MOVD_xmm(X64Reg dest, const OpArg &arg);
|
void MOVD_xmm(X64Reg dest, const OpArg &arg);
|
||||||
void MOVQ_xmm(X64Reg dest, OpArg arg);
|
void MOVQ_xmm(X64Reg dest, OpArg arg);
|
||||||
void MOVD_xmm(const OpArg &arg, X64Reg src);
|
void MOVD_xmm(const OpArg &arg, X64Reg src);
|
||||||
|
@ -142,6 +142,7 @@ public:
|
|||||||
typedef u32 (*Operation)(u32 a, u32 b);
|
typedef u32 (*Operation)(u32 a, u32 b);
|
||||||
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
||||||
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
|
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
|
||||||
|
void FloatCompare(UGeckoInstruction inst, bool upper = false);
|
||||||
|
|
||||||
// OPCODES
|
// OPCODES
|
||||||
void unknown_instruction(UGeckoInstruction _inst);
|
void unknown_instruction(UGeckoInstruction _inst);
|
||||||
@ -197,8 +198,11 @@ public:
|
|||||||
void ps_arith(UGeckoInstruction inst); //aggregate
|
void ps_arith(UGeckoInstruction inst); //aggregate
|
||||||
void ps_mergeXX(UGeckoInstruction inst);
|
void ps_mergeXX(UGeckoInstruction inst);
|
||||||
void ps_maddXX(UGeckoInstruction inst);
|
void ps_maddXX(UGeckoInstruction inst);
|
||||||
|
void ps_res(UGeckoInstruction inst);
|
||||||
|
void ps_rsqrte(UGeckoInstruction inst);
|
||||||
void ps_sum(UGeckoInstruction inst);
|
void ps_sum(UGeckoInstruction inst);
|
||||||
void ps_muls(UGeckoInstruction inst);
|
void ps_muls(UGeckoInstruction inst);
|
||||||
|
void ps_cmpXX(UGeckoInstruction inst);
|
||||||
|
|
||||||
void fp_arith(UGeckoInstruction inst);
|
void fp_arith(UGeckoInstruction inst);
|
||||||
|
|
||||||
|
@ -110,14 +110,14 @@ static GekkoOPTemplate primarytable[] =
|
|||||||
|
|
||||||
static GekkoOPTemplate table4[] =
|
static GekkoOPTemplate table4[] =
|
||||||
{ //SUBOP10
|
{ //SUBOP10
|
||||||
{0, &Jit64::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
|
{0, &Jit64::ps_cmpXX}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
|
||||||
{32, &Jit64::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
|
{32, &Jit64::ps_cmpXX}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
|
||||||
{40, &Jit64::ps_sign}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
|
{40, &Jit64::ps_sign}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{136, &Jit64::ps_sign}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
|
{136, &Jit64::ps_sign}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{264, &Jit64::ps_sign}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
|
{264, &Jit64::ps_sign}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{64, &Jit64::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
|
{64, &Jit64::ps_cmpXX}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{72, &Jit64::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
|
{72, &Jit64::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{96, &Jit64::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
|
{96, &Jit64::ps_cmpXX}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{528, &Jit64::ps_mergeXX}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
|
{528, &Jit64::ps_mergeXX}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{560, &Jit64::ps_mergeXX}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
|
{560, &Jit64::ps_mergeXX}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
|
||||||
{592, &Jit64::ps_mergeXX}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
|
{592, &Jit64::ps_mergeXX}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
|
||||||
@ -138,9 +138,9 @@ static GekkoOPTemplate table4_2[] =
|
|||||||
{20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
{20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
||||||
{21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
{21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
||||||
{23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
|
{23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
|
||||||
{24, &Jit64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
|
{24, &Jit64::ps_res}, //"ps_res", OPTYPE_PS, 0}},
|
||||||
{25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
{25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
||||||
{26, &Jit64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
{26, &Jit64::ps_rsqrte}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
||||||
{28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
{28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
||||||
{29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
{29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
||||||
{30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
{30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
||||||
|
@ -267,25 +267,32 @@ void Jit64::fmrx(UGeckoInstruction inst)
|
|||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::fcmpx(UGeckoInstruction inst)
|
void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
|
||||||
JITDISABLE(bJITFloatingPointOff);
|
|
||||||
FALLBACK_IF(jo.fpAccurateFcmp);
|
|
||||||
|
|
||||||
//bool ordered = inst.SUBOP10 == 32;
|
|
||||||
int a = inst.FA;
|
|
||||||
int b = inst.FB;
|
|
||||||
int crf = inst.CRFD;
|
|
||||||
bool fprf = SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableFPRF && js.op->wantsFPRF;
|
bool fprf = SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableFPRF && js.op->wantsFPRF;
|
||||||
|
//bool ordered = !!(inst.SUBOP10 & 32);
|
||||||
|
int a = inst.FA;
|
||||||
|
int b = inst.FB;
|
||||||
|
int crf = inst.CRFD;
|
||||||
|
|
||||||
fpr.Lock(a,b);
|
fpr.Lock(a, b);
|
||||||
fpr.BindToRegister(b, true);
|
fpr.BindToRegister(b, true, false);
|
||||||
|
|
||||||
if (fprf)
|
if (fprf)
|
||||||
AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
|
AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
|
||||||
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
|
|
||||||
UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));
|
if (upper)
|
||||||
|
{
|
||||||
|
fpr.BindToRegister(a, true, false);
|
||||||
|
MOVHLPS(XMM0, fpr.RX(a));
|
||||||
|
MOVHLPS(XMM1, fpr.RX(b));
|
||||||
|
UCOMISD(XMM1, R(XMM0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
|
||||||
|
UCOMISD(fpr.RX(b), fpr.R(a));
|
||||||
|
}
|
||||||
|
|
||||||
FixupBranch pNaN, pLesser, pGreater;
|
FixupBranch pNaN, pLesser, pGreater;
|
||||||
FixupBranch continue1, continue2, continue3;
|
FixupBranch continue1, continue2, continue3;
|
||||||
@ -293,7 +300,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
|||||||
if (a != b)
|
if (a != b)
|
||||||
{
|
{
|
||||||
// if B > A, goto Lesser's jump target
|
// if B > A, goto Lesser's jump target
|
||||||
pLesser = J_CC(CC_A);
|
pLesser = J_CC(CC_A);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (B != B) or (A != A), goto NaN's jump target
|
// if (B != B) or (A != A), goto NaN's jump target
|
||||||
@ -344,6 +351,15 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
|||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Jit64::fcmpx(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
|
FALLBACK_IF(jo.fpAccurateFcmp);
|
||||||
|
|
||||||
|
FloatCompare(inst);
|
||||||
|
}
|
||||||
|
|
||||||
void Jit64::fctiwx(UGeckoInstruction inst)
|
void Jit64::fctiwx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
@ -282,6 +282,59 @@ void Jit64::ps_mergeXX(UGeckoInstruction inst)
|
|||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
|
FALLBACK_IF(inst.Rc);
|
||||||
|
int b = inst.FB;
|
||||||
|
int d = inst.FD;
|
||||||
|
|
||||||
|
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||||
|
fpr.Lock(b, d);
|
||||||
|
fpr.BindToRegister(b, true, false);
|
||||||
|
fpr.BindToRegister(d, false);
|
||||||
|
|
||||||
|
MOVSD(XMM0, fpr.R(b));
|
||||||
|
CALL((void *)asm_routines.frsqrte);
|
||||||
|
MOVSD(fpr.R(d), XMM0);
|
||||||
|
|
||||||
|
MOVHLPS(XMM0, fpr.RX(b));
|
||||||
|
CALL((void *)asm_routines.frsqrte);
|
||||||
|
MOVLHPS(fpr.RX(d), XMM0);
|
||||||
|
|
||||||
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::ps_res(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
|
FALLBACK_IF(inst.Rc);
|
||||||
|
int b = inst.FB;
|
||||||
|
int d = inst.FD;
|
||||||
|
|
||||||
|
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||||
|
fpr.Lock(b, d);
|
||||||
|
fpr.BindToRegister(b, true, false);
|
||||||
|
fpr.BindToRegister(d, false);
|
||||||
|
|
||||||
|
MOVSD(XMM0, fpr.R(b));
|
||||||
|
CALL((void *)asm_routines.fres);
|
||||||
|
MOVSD(fpr.R(d), XMM0);
|
||||||
|
|
||||||
|
MOVHLPS(XMM0, fpr.RX(b));
|
||||||
|
CALL((void *)asm_routines.fres);
|
||||||
|
MOVLHPS(fpr.RX(d), XMM0);
|
||||||
|
|
||||||
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
}
|
||||||
|
|
||||||
//TODO: add optimized cases
|
//TODO: add optimized cases
|
||||||
void Jit64::ps_maddXX(UGeckoInstruction inst)
|
void Jit64::ps_maddXX(UGeckoInstruction inst)
|
||||||
@ -351,3 +404,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
|
|||||||
MOVAPD(fpr.RX(d), R(XMM0));
|
MOVAPD(fpr.RX(d), R(XMM0));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Jit64::ps_cmpXX(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
|
FALLBACK_IF(jo.fpAccurateFcmp);
|
||||||
|
|
||||||
|
FloatCompare(inst, !!(inst.SUBOP10 & 64));
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user