I've written a C# code and Compute Shader that makes frustum culling and it works correctly but it makes the grass flicker. I can't figure out what is behind this issue so I'm asking here. Does anybody know what's wrong with it?
C# Script:
private Camera camera;
struct DrawData
{
public Vector3 position;
public Quaternion rotation;
public Vector3 scale;
}
[HideInInspector]
public Mesh mesh;
[HideInInspector]
public List<Matrix4x4> matrices;
[HideInInspector]
public Material material;
[Range(0, 1000f)]
public float distanceCutoff;
private List<DrawData> instances;
private ComputeShader cullShader;
private ComputeBuffer drawDataBuffer, argsBuffer, voteBuffer, scanBuffer, groupSumArrayBuffer, scannedGroupSumBuffer, resultBuffer;
private int numThreadGroups, numGroupScanThreadGroups;
private uint[] args = new uint[5];
private MaterialPropertyBlock mpb;
private void Awake()
{
camera = Camera.main;
mpb = new MaterialPropertyBlock();
instances = new List<DrawData>();
LoadInstances();
drawDataBuffer = new ComputeBuffer(instances.Count, Marshal.SizeOf<DrawData>());
drawDataBuffer.SetData(instances);
numThreadGroups = Mathf.CeilToInt(instances.Count / 128.0f);
numGroupScanThreadGroups = Mathf.CeilToInt(instances.Count / 1024.0f);
cullShader = Resources.Load<ComputeShader>("ComputeShaders/Cull");
voteBuffer = new ComputeBuffer(instances.Count, 4);
scanBuffer = new ComputeBuffer(instances.Count, 4);
groupSumArrayBuffer = new ComputeBuffer(instances.Count, 4);
scannedGroupSumBuffer = new ComputeBuffer(instances.Count, 4);
resultBuffer = new ComputeBuffer(instances.Count, Marshal.SizeOf<DrawData>());
argsBuffer = new ComputeBuffer(5, sizeof(uint), ComputeBufferType.IndirectArguments);
args[0] = mesh.GetIndexCount(0);
args[1] = (uint)instances.Count;
args[2] = (uint)mesh.GetIndexStart(0);
args[3] = (uint)mesh.GetBaseVertex(0);
mpb.SetBuffer("_DrawData", resultBuffer);
}
private void CullGrass(Matrix4x4 VP)
{
argsBuffer.SetData(args);
// Vote
cullShader.SetMatrix("MATRIX_VP", VP);
cullShader.SetBuffer(0, "drawDataBuffer", drawDataBuffer);
cullShader.SetBuffer(0, "voteBuffer", voteBuffer);
cullShader.SetVector("cameraPosition", camera.transform.position);
cullShader.SetFloat("_distance", distanceCutoff);
cullShader.Dispatch(0, numThreadGroups, 1, 1);
// Scan Instances
cullShader.SetBuffer(1, "voteBuffer", voteBuffer);
cullShader.SetBuffer(1, "scanBuffer", scanBuffer);
cullShader.SetBuffer(1, "groupSumArray", groupSumArrayBuffer);
cullShader.Dispatch(1, numThreadGroups, 1, 1);
// Scan Groups
cullShader.SetInt("numOfGroups", numThreadGroups);
cullShader.SetBuffer(2, "groupSumArrayIn", groupSumArrayBuffer);
cullShader.SetBuffer(2, "groupSumArrayOut", scannedGroupSumBuffer);
cullShader.Dispatch(2, numGroupScanThreadGroups, 1, 1);
// Compact
cullShader.SetBuffer(3, "drawDataBuffer", drawDataBuffer);
cullShader.SetBuffer(3, "voteBuffer", voteBuffer);
cullShader.SetBuffer(3, "scanBuffer", scanBuffer);
cullShader.SetBuffer(3, "argsBuffer", argsBuffer);
cullShader.SetBuffer(3, "resultBuffer", resultBuffer);
cullShader.SetBuffer(3, "groupSumArray", scannedGroupSumBuffer);
cullShader.Dispatch(3, numThreadGroups, 1, 1);
}
private void Update()
{
Matrix4x4 P = camera.projectionMatrix;
Matrix4x4 V = camera.transform.worldToLocalMatrix;
Matrix4x4 VP = P * V;
CullGrass(VP);
Graphics.DrawMeshInstancedIndirect(mesh, 0, material, new Bounds(Vector3.zero, Vector3.one * 100.0f), argsBuffer, 0, mpb);
}
private void LoadInstances()
{
instances.Clear();
foreach (var matrix in matrices)
{
instances.Add(new DrawData()
{
position = GetPositionFromMatrix(matrix),
rotation = GetRotationFromMatrix(matrix),
scale = GetScaleFromMatrix(matrix)
});
}
Debug.Log($"Initialized {instances.Count} instances of grass.");
}
private Vector3 GetPositionFromMatrix(Matrix4x4 matrix)
{
return matrix.GetColumn(3);
}
private Vector3 GetScaleFromMatrix(Matrix4x4 matrix)
{
return new Vector3(matrix.GetColumn(0).magnitude, matrix.GetColumn(1).magnitude, matrix.GetColumn(2).magnitude);
}
private Quaternion GetRotationFromMatrix(Matrix4x4 matrix)
{
float w = Mathf.Sqrt(1 + matrix.m00 + matrix.m11 + matrix.m22) / 2f;
float x = (matrix.m21 - matrix.m12) / (w * 4);
float y = (matrix.m02 - matrix.m20) / (w * 4);
float z = (matrix.m10 - matrix.m01) / (w * 4);
return new Quaternion(x, y, z, w);
}
private void OnDisable()
{
argsBuffer?.Release();
drawDataBuffer?.Release();
voteBuffer?.Release();
scanBuffer?.Release();
groupSumArrayBuffer?.Release();
scannedGroupSumBuffer?.Release();
resultBuffer?.Release();
}
Compute shader:
#pragma kernel Vote
#pragma kernel Scan
#pragma kernel ScanGroupSums
#pragma kernel Compact
#pragma kernel ResetArgs
#define NUM_THREAD_GROUPS_X 64
struct DrawData
{
float3 position;
float4 rotation;
float3 scale;
};
RWStructuredBuffer<uint> argsBuffer;
RWStructuredBuffer<DrawData> drawDataBuffer;
RWStructuredBuffer<uint> voteBuffer;
RWStructuredBuffer<uint> scanBuffer;
RWStructuredBuffer<uint> groupSumArray;
RWStructuredBuffer<uint> groupSumArrayIn;
RWStructuredBuffer<uint> groupSumArrayOut;
RWStructuredBuffer<DrawData> resultBuffer;
float4x4 MATRIX_VP;
int numOfGroups;
groupshared uint temp[2 * NUM_THREAD_GROUPS_X];
groupshared uint grouptemp[2 * 1024];
float _distance;
float3 cameraPosition;
[numthreads(128, 1, 1)]
void Vote(uint3 id : SV_DispatchThreadID)
{
float4 position = float4(drawDataBuffer[id.x].position, 1.0f);
float4 viewspace = mul(MATRIX_VP, position);
float3 clipspace = viewspace.xyz;
clipspace /= -viewspace.w;
clipspace.x = clipspace.x / 2.0f + 0.5f;
clipspace.y = clipspace.y / 2.0f + 0.5f;
clipspace.z = -viewspace.w;
bool inView = clipspace.x < -0.2f || clipspace.x > 1.2f || clipspace.z <= -0.1f ? 0 : 1;
bool withinDistance = distance(cameraPosition, position.xyz) < _distance;
voteBuffer[id.x] = inView * withinDistance;
}
[numthreads(NUM_THREAD_GROUPS_X, 1, 1)]
void Scan(uint3 id : SV_DISPATCHTHREADID, uint groupIndex : SV_GROUPINDEX, uint3 _groupID : SV_GROUPID,
uint3 groupThreadID : SV_GROUPTHREADID)
{
int tid = (int)id.x;
int groupTID = (int)groupThreadID.x;
int groupID = (int)_groupID.x;
int offset = 1;
temp[2 * groupTID] = voteBuffer[2 * tid];
temp[2 * groupTID + 1] = voteBuffer[2 * tid + 1];
int d;
int numElements = 2 * NUM_THREAD_GROUPS_X;
for (d = numElements >> 1; d > 0; d >>= 1)
{
GroupMemoryBarrierWithGroupSync();
if (groupTID < d)
{
int ai = offset * (2 * groupTID + 1) - 1;
int bi = offset * (2 * groupTID + 2) - 1;
temp[bi] += temp[ai];
}
offset *= 2;
}
if (groupTID == 0)
{
groupSumArray[_groupID.x] = temp[numElements - 1];
temp[numElements - 1] = 0;
}
for (d = 1; d < numElements; d *= 2)
{
offset >>= 1;
GroupMemoryBarrierWithGroupSync();
if (groupTID < d)
{
int ai = offset * (2 * groupTID + 1) - 1;
int bi = offset * (2 * groupTID + 2) - 1;
int t = temp[ai];
temp[ai] = temp[bi];
temp[bi] += t;
}
}
GroupMemoryBarrierWithGroupSync();
scanBuffer[2 * tid] = temp[2 * groupTID];
scanBuffer[2 * tid + 1] = temp[2 * groupTID + 1];
}
[numthreads(1024, 1, 1)]
void ScanGroupSums(uint3 id : SV_DISPATCHTHREADID, uint groupIndex : SV_GROUPINDEX, uint3 _groupID : SV_GROUPID,
uint3 groupThreadID : SV_GROUPTHREADID)
{
int tid = (int)id.x;
int groupTID = (int)groupThreadID.x;
int groupID = (int)_groupID.x;
int offset = 1;
grouptemp[2 * groupTID] = groupSumArrayIn[2 * tid];
grouptemp[2 * groupTID + 1] = groupSumArrayIn[2 * tid + 1];
int d;
for (d = numOfGroups >> 1; d > 0; d >>= 1)
{
GroupMemoryBarrierWithGroupSync();
if (groupTID < d)
{
int ai = offset * (2 * groupTID + 1) - 1;
int bi = offset * (2 * groupTID + 2) - 1;
grouptemp[bi] += grouptemp[ai];
}
offset *= 2;
}
if (tid == 0)
grouptemp[numOfGroups - 1] = 0;
for (d = 1; d < numOfGroups; d *= 2)
{
offset >>= 1;
GroupMemoryBarrierWithGroupSync();
if (tid < d)
{
int ai = offset * (2 * groupTID + 1) - 1;
int bi = offset * (2 * groupTID + 2) - 1;
int t = grouptemp[ai];
grouptemp[ai] = grouptemp[bi];
grouptemp[bi] += t;
}
}
GroupMemoryBarrierWithGroupSync();
groupSumArrayOut[2 * tid] = grouptemp[2 * tid];
groupSumArrayOut[2 * tid + 1] = grouptemp[2 * tid + 1];
}
[numthreads(128, 1, 1)]
void Compact(uint3 id : SV_DISPATCHTHREADID, uint groupIndex : SV_GROUPINDEX, uint3 _groupID : SV_GROUPID,
uint3 groupThreadID : SV_GROUPTHREADID)
{
uint tid = id.x;
uint groupID = _groupID.x;
uint groupSum = groupID.x > 0 ? groupSumArray[groupID.x] : 0;
bool inCamera = voteBuffer[id.x];
if (inCamera == 1)
{
InterlockedAdd(argsBuffer[1], 1);
resultBuffer[scanBuffer[tid] + groupSum] = drawDataBuffer[tid];
}
}
[numthreads(1, 1, 1)]
void ResetArgs(uint3 id : SV_DISPATCHTHREADID)
{
argsBuffer[1] = (uint)0;
}
Video: https://youtu.be/K53G4NpnYxY