Compare commits

...

2 Commits

Author SHA1 Message Date
gdkchan
e44a43c7e1 Implement VMAD shader instruction and improve InvocationInfo and ISBERD handling (#3251)
* Implement VMAD shader instruction and improve InvocationInfo and ISBERD handling

* Shader cache version bump

* Fix typo
2022-04-08 12:42:39 +02:00
gdkchan
3139a85a2b Allow copy texture views to have mismatching multisample state (#3152) 2022-04-08 11:26:48 +02:00
11 changed files with 222 additions and 73 deletions

View File

@@ -1136,17 +1136,33 @@ namespace Ryujinx.Graphics.Gpu.Image
/// <param name="range">Texture view physical memory ranges</param>
/// <param name="layerSize">Layer size on the given texture</param>
/// <param name="caps">Host GPU capabilities</param>
/// <param name="allowMs">Indicates that multisample textures are allowed to match non-multisample requested textures</param>
/// <param name="firstLayer">Texture view initial layer on this texture</param>
/// <param name="firstLevel">Texture view first mipmap level on this texture</param>
/// <returns>The level of compatiblilty a view with the given parameters created from this texture has</returns>
public TextureViewCompatibility IsViewCompatible(TextureInfo info, MultiRange range, int layerSize, Capabilities caps, out int firstLayer, out int firstLevel)
public TextureViewCompatibility IsViewCompatible(TextureInfo info, MultiRange range, int layerSize, Capabilities caps, bool allowMs, out int firstLayer, out int firstLevel)
{
TextureViewCompatibility result = TextureViewCompatibility.Full;
result = TextureCompatibility.PropagateViewCompatibility(result, TextureCompatibility.ViewFormatCompatible(Info, info, caps));
if (result != TextureViewCompatibility.Incompatible)
{
result = TextureCompatibility.PropagateViewCompatibility(result, TextureCompatibility.ViewTargetCompatible(Info, info));
bool msTargetCompatible = false;
if (allowMs)
{
msTargetCompatible = Info.Target == Target.Texture2DMultisample && info.Target == Target.Texture2D;
}
if (!msTargetCompatible)
{
result = TextureCompatibility.PropagateViewCompatibility(result, TextureCompatibility.ViewTargetCompatible(Info, info));
if (Info.SamplesInX != info.SamplesInX || Info.SamplesInY != info.SamplesInY)
{
result = TextureViewCompatibility.Incompatible;
}
}
if (result == TextureViewCompatibility.Full && Info.FormatInfo.Format != info.FormatInfo.Format && !_context.Capabilities.SupportsMismatchingViewFormat)
{
@@ -1156,11 +1172,6 @@ namespace Ryujinx.Graphics.Gpu.Image
result = TextureViewCompatibility.CopyOnly;
}
if (Info.SamplesInX != info.SamplesInX || Info.SamplesInY != info.SamplesInY)
{
result = TextureViewCompatibility.Incompatible;
}
}
firstLayer = 0;

View File

@@ -542,7 +542,14 @@ namespace Ryujinx.Graphics.Gpu.Image
for (int index = 0; index < overlapsCount; index++)
{
Texture overlap = _textureOverlaps[index];
TextureViewCompatibility overlapCompatibility = overlap.IsViewCompatible(info, range.Value, sizeInfo.LayerSize, _context.Capabilities, out int firstLayer, out int firstLevel);
TextureViewCompatibility overlapCompatibility = overlap.IsViewCompatible(
info,
range.Value,
sizeInfo.LayerSize,
_context.Capabilities,
flags.HasFlag(TextureSearchFlags.ForCopy),
out int firstLayer,
out int firstLevel);
if (overlapCompatibility == TextureViewCompatibility.Full)
{
@@ -650,7 +657,14 @@ namespace Ryujinx.Graphics.Gpu.Image
Texture overlap = _textureOverlaps[index];
bool overlapInCache = overlap.CacheNode != null;
TextureViewCompatibility compatibility = texture.IsViewCompatible(overlap.Info, overlap.Range, overlap.LayerSize, _context.Capabilities, out int firstLayer, out int firstLevel);
TextureViewCompatibility compatibility = texture.IsViewCompatible(
overlap.Info,
overlap.Range,
overlap.LayerSize,
_context.Capabilities,
false,
out int firstLayer,
out int firstLevel);
if (overlap.IsView && compatibility == TextureViewCompatibility.Full)
{
@@ -1000,20 +1014,34 @@ namespace Ryujinx.Graphics.Gpu.Image
depthOrLayers = info.DepthOrLayers;
}
// 2D and 2D multisample textures are not considered compatible.
// This specific case is required for copies, where the source texture might be multisample.
// In this case, we inherit the parent texture multisample state.
Target target = info.Target;
int samplesInX = info.SamplesInX;
int samplesInY = info.SamplesInY;
if (target == Target.Texture2D && parent.Target == Target.Texture2DMultisample)
{
target = Target.Texture2DMultisample;
samplesInX = parent.Info.SamplesInX;
samplesInY = parent.Info.SamplesInY;
}
return new TextureInfo(
info.GpuAddress,
width,
height,
depthOrLayers,
info.Levels,
info.SamplesInX,
info.SamplesInY,
samplesInX,
samplesInY,
info.Stride,
info.IsLinear,
info.GobBlocksInY,
info.GobBlocksInZ,
info.GobBlocksInTileX,
info.Target,
target,
info.FormatInfo,
info.DepthStencilMode,
info.SwizzleR,

View File

@@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <summary>
/// Version of the codegen (to be changed when codegen or guest format change).
/// </summary>
private const ulong ShaderCodeGenVersion = 3184;
private const ulong ShaderCodeGenVersion = 3251;
// Progress reporting helpers
private volatile int _shaderCount;

View File

@@ -250,9 +250,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
: "gl_SubgroupInvocationID";
}
// TODO: There must be a better way to handle this...
if (config.Stage == ShaderStage.Fragment)
{
// TODO: There must be a better way to handle this...
switch (value)
{
case AttributeConsts.PositionX: return $"(gl_FragCoord.x / {DefaultNames.SupportBlockRenderScaleName}[0])";

View File

@@ -5144,6 +5144,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
public int SrcC => (int)((_opcode >> 39) & 0xFF);
public int Pred => (int)((_opcode >> 16) & 0x7);
public bool PredInv => (_opcode & 0x80000) != 0;
public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
public bool WriteCC => (_opcode & 0x800000000000) != 0;
public AvgMode AvgMode => (AvgMode)((_opcode >> 56) & 0x3);
public bool DFormat => (_opcode & 0x40000000000000) != 0;
@@ -5164,6 +5165,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
public int SrcC => (int)((_opcode >> 39) & 0xFF);
public int Pred => (int)((_opcode >> 16) & 0x7);
public bool PredInv => (_opcode & 0x80000) != 0;
public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
public bool WriteCC => (_opcode & 0x800000000000) != 0;
public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7));

View File

@@ -13,16 +13,28 @@ namespace Ryujinx.Graphics.Shader
{
public static string ToGlslString(this InputTopology topology)
{
switch (topology)
return topology switch
{
case InputTopology.Points: return "points";
case InputTopology.Lines: return "lines";
case InputTopology.LinesAdjacency: return "lines_adjacency";
case InputTopology.Triangles: return "triangles";
case InputTopology.TrianglesAdjacency: return "triangles_adjacency";
}
InputTopology.Points => "points",
InputTopology.Lines => "lines",
InputTopology.LinesAdjacency => "lines_adjacency",
InputTopology.Triangles => "triangles",
InputTopology.TrianglesAdjacency => "triangles_adjacency",
_ => "points"
};
}
return "points";
public static int ToInputVertices(this InputTopology topology)
{
return topology switch
{
InputTopology.Points => 1,
InputTopology.Lines or
InputTopology.LinesAdjacency => 2,
InputTopology.Triangles or
InputTopology.TrianglesAdjacency => 3,
_ => 1
};
}
}
}

View File

@@ -73,6 +73,26 @@ namespace Ryujinx.Graphics.Shader.Instructions
};
}
public static Operand Extend(EmitterContext context, Operand src, VectorSelect type)
{
return type switch
{
VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
_ => src
};
}
public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false)
{
if (!setCC)
@@ -118,6 +138,15 @@ namespace Ryujinx.Graphics.Shader.Instructions
}
}
public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high)
{
low = context.BitwiseNot(low);
high = context.BitwiseNot(high);
low = AddWithCarry(context, low, Const(1), out Operand carryOut);
high = context.IAdd(high, carryOut);
return (low, high);
}
public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut)
{
Operand result = context.IAdd(lhs, rhs);

View File

@@ -168,10 +168,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
InstIsberd op = context.GetOp<InstIsberd>();
// This instruction performs a load from ISBE memory,
// however it seems to be only used to get some vertex
// input data, so we instead propagate the offset so that
// it can be used on the attribute load.
// This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory.
// Here, we just propagate the offset, as the result from this instruction is usually
// used with ALD to perform vertex load on geometry or tessellation shaders.
// The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex.
// Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex.
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
}

View File

@@ -94,31 +94,19 @@ namespace Ryujinx.Graphics.Shader.Instructions
case SReg.InvocationInfo:
if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment)
{
Operand primitiveId = Attribute(AttributeConsts.PrimitiveId);
Operand patchVerticesIn;
// Note: Lowest 8-bits seems to contain some primitive index,
// but it seems to be NVIDIA implementation specific as it's only used
// to calculate ISBE offsets, so we can just keep it as zero.
if (context.Config.Stage == ShaderStage.TessellationEvaluation)
if (context.Config.Stage == ShaderStage.TessellationControl ||
context.Config.Stage == ShaderStage.TessellationEvaluation)
{
patchVerticesIn = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16));
src = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16));
}
else
{
InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology();
int inputVertices = inputTopology switch
{
InputTopology.Points => 1,
InputTopology.Lines or
InputTopology.LinesAdjacency => 2,
InputTopology.Triangles or
InputTopology.TrianglesAdjacency => 3,
_ => 1
};
patchVerticesIn = Const(inputVertices << 16);
src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16);
}
src = context.BitwiseOr(primitiveId, patchVerticesIn);
}
else
{

View File

@@ -1,7 +1,9 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
@@ -11,8 +13,106 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
InstVmad op = context.GetOp<InstVmad>();
// TODO: Implement properly.
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcC));
bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0;
bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0;
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB);
Operand srcB;
if (op.BVideo)
{
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
int imm = op.Imm16;
if (bSigned)
{
imm = (imm << 16) >> 16;
}
srcB = Const(imm);
}
Operand productLow = context.IMultiply(srcA, srcB);
Operand productHigh;
if (aSigned == bSigned)
{
productHigh = aSigned
? context.MultiplyHighS32(srcA, srcB)
: context.MultiplyHighU32(srcA, srcB);
}
else
{
Operand temp = aSigned
? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))
: context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)));
productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB));
}
if (op.AvgMode == AvgMode.NegA)
{
(productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh);
}
Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry);
Operand resHigh = context.IAdd(productHigh, sumCarry);
if (op.AvgMode == AvgMode.PlusOne)
{
resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry);
resHigh = context.IAdd(resHigh, poCarry);
}
bool resSigned = op.ASelect == VectorSelect.S32 ||
op.BSelect == VectorSelect.S32 ||
op.AvgMode == AvgMode.NegB ||
op.AvgMode == AvgMode.NegA;
int shift = op.VideoScale switch
{
VideoScale.Shr7 => 7,
VideoScale.Shr15 => 15,
_ => 0
};
if (shift != 0)
{
// Low = (Low >> Shift) | (High << (32 - Shift))
// High >>= Shift
resLow = context.ShiftRightU32(resLow, Const(shift));
resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift)));
resHigh = resSigned
? context.ShiftRightS32(resHigh, Const(shift))
: context.ShiftRightU32(resHigh, Const(shift));
}
Operand res = resLow;
if (op.Sat)
{
Operand sign = context.ShiftRightS32(resHigh, Const(31));
if (resSigned)
{
Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31)));
Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue));
res = context.ConditionalSelect(overflow, clampValue, resLow);
}
else
{
Operand overflow = context.ICompareNotEqual(resHigh, Const(0));
res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow);
}
}
context.Copy(GetDest(op.Dest), res);
// TODO: CC.
}
}
}

View File

@@ -13,14 +13,13 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
InstVmnmx op = context.GetOp<InstVmnmx>();
Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcC = GetSrcReg(context, op.SrcC);
Operand srcB;
if (op.BVideo)
{
srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
@@ -124,13 +123,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
InstVsetp op = context.GetOp<InstVsetp>();
Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcB;
if (op.BVideo)
{
srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
@@ -181,25 +179,5 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
}
private static Operand Extend(EmitterContext context, Operand src, VectorSelect type)
{
return type switch
{
VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
_ => src
};
}
}
}