0
\$\begingroup\$

I've written a C# code and Compute Shader that makes frustum culling and it works correctly but it makes the grass flicker. I can't figure out what is behind this issue so I'm asking here. Does anybody know what's wrong with it?

C# Script:

private Camera camera;

struct DrawData
{
    public Vector3 position;
    public Quaternion rotation;
    public Vector3 scale;
}

[HideInInspector]
public Mesh mesh;
[HideInInspector]
public List<Matrix4x4> matrices;
[HideInInspector]
public Material material;

[Range(0, 1000f)] 
public float distanceCutoff;

private List<DrawData> instances;

private ComputeShader cullShader;
private ComputeBuffer drawDataBuffer, argsBuffer, voteBuffer, scanBuffer, groupSumArrayBuffer, scannedGroupSumBuffer, resultBuffer;

private int numThreadGroups, numGroupScanThreadGroups;

private uint[] args = new uint[5];
private MaterialPropertyBlock mpb;

private void Awake()
{
    camera = Camera.main;
    mpb = new MaterialPropertyBlock();
    
    instances = new List<DrawData>();
    LoadInstances();

    drawDataBuffer = new ComputeBuffer(instances.Count, Marshal.SizeOf<DrawData>());
    drawDataBuffer.SetData(instances);
    
    numThreadGroups = Mathf.CeilToInt(instances.Count / 128.0f);
    numGroupScanThreadGroups = Mathf.CeilToInt(instances.Count / 1024.0f);

    cullShader = Resources.Load<ComputeShader>("ComputeShaders/Cull");

    voteBuffer = new ComputeBuffer(instances.Count, 4);
    scanBuffer = new ComputeBuffer(instances.Count, 4);
    groupSumArrayBuffer = new ComputeBuffer(instances.Count, 4);
    scannedGroupSumBuffer = new ComputeBuffer(instances.Count, 4);
    resultBuffer = new ComputeBuffer(instances.Count, Marshal.SizeOf<DrawData>());
    
    argsBuffer = new ComputeBuffer(5, sizeof(uint), ComputeBufferType.IndirectArguments);
    args[0] = mesh.GetIndexCount(0);
    args[1] = (uint)instances.Count;
    args[2] = (uint)mesh.GetIndexStart(0);
    args[3] = (uint)mesh.GetBaseVertex(0);

    mpb.SetBuffer("_DrawData", resultBuffer);
}

private void CullGrass(Matrix4x4 VP)
{

    argsBuffer.SetData(args);       

    // Vote
    cullShader.SetMatrix("MATRIX_VP", VP);
    cullShader.SetBuffer(0, "drawDataBuffer", drawDataBuffer);
    cullShader.SetBuffer(0, "voteBuffer", voteBuffer);
    cullShader.SetVector("cameraPosition", camera.transform.position);
    cullShader.SetFloat("_distance", distanceCutoff);
    cullShader.Dispatch(0, numThreadGroups, 1, 1);
    
    // Scan Instances
    cullShader.SetBuffer(1, "voteBuffer", voteBuffer);
    cullShader.SetBuffer(1, "scanBuffer", scanBuffer);
    cullShader.SetBuffer(1, "groupSumArray", groupSumArrayBuffer);
    cullShader.Dispatch(1, numThreadGroups, 1, 1);
    
    // Scan Groups
    cullShader.SetInt("numOfGroups", numThreadGroups);
    cullShader.SetBuffer(2, "groupSumArrayIn", groupSumArrayBuffer);
    cullShader.SetBuffer(2, "groupSumArrayOut", scannedGroupSumBuffer);
    cullShader.Dispatch(2, numGroupScanThreadGroups, 1, 1);
    
    // Compact
    cullShader.SetBuffer(3, "drawDataBuffer", drawDataBuffer);
    cullShader.SetBuffer(3, "voteBuffer", voteBuffer);
    cullShader.SetBuffer(3, "scanBuffer", scanBuffer);
    cullShader.SetBuffer(3, "argsBuffer", argsBuffer);
    cullShader.SetBuffer(3, "resultBuffer", resultBuffer);
    cullShader.SetBuffer(3, "groupSumArray", scannedGroupSumBuffer);
    cullShader.Dispatch(3, numThreadGroups, 1, 1);
}

private void Update()
{
    Matrix4x4 P = camera.projectionMatrix;
    Matrix4x4 V = camera.transform.worldToLocalMatrix;
    Matrix4x4 VP = P * V; 
    
    CullGrass(VP);
    
    Graphics.DrawMeshInstancedIndirect(mesh, 0, material, new Bounds(Vector3.zero, Vector3.one * 100.0f), argsBuffer, 0, mpb);
}

private void LoadInstances()
{
    instances.Clear();
    foreach (var matrix in matrices)
    {
        instances.Add(new DrawData()
        {
            position = GetPositionFromMatrix(matrix),
            rotation = GetRotationFromMatrix(matrix),
            scale = GetScaleFromMatrix(matrix)
        });
    }

    Debug.Log($"Initialized {instances.Count} instances of grass.");
}

private Vector3 GetPositionFromMatrix(Matrix4x4 matrix)
{
    return matrix.GetColumn(3);
}

private Vector3 GetScaleFromMatrix(Matrix4x4 matrix)
{
    return new Vector3(matrix.GetColumn(0).magnitude, matrix.GetColumn(1).magnitude, matrix.GetColumn(2).magnitude);
}

private Quaternion GetRotationFromMatrix(Matrix4x4 matrix)
{
    float w = Mathf.Sqrt(1 + matrix.m00 + matrix.m11 + matrix.m22) / 2f;
    float x = (matrix.m21 - matrix.m12) / (w * 4);
    float y = (matrix.m02 - matrix.m20) / (w * 4);
    float z = (matrix.m10 - matrix.m01) / (w * 4);

    return new Quaternion(x, y, z, w);
}

private void OnDisable()
{
    argsBuffer?.Release();
    drawDataBuffer?.Release();
    voteBuffer?.Release();
    scanBuffer?.Release();
    groupSumArrayBuffer?.Release();
    scannedGroupSumBuffer?.Release();
    resultBuffer?.Release();
}

Compute shader:

#pragma kernel Vote
#pragma kernel Scan
#pragma kernel ScanGroupSums
#pragma kernel Compact
#pragma kernel ResetArgs

#define NUM_THREAD_GROUPS_X 64

struct DrawData
{
    float3 position;
    float4 rotation;
    float3 scale;
};

RWStructuredBuffer<uint> argsBuffer;
RWStructuredBuffer<DrawData> drawDataBuffer;
RWStructuredBuffer<uint> voteBuffer;
RWStructuredBuffer<uint> scanBuffer;
RWStructuredBuffer<uint> groupSumArray;
RWStructuredBuffer<uint> groupSumArrayIn;
RWStructuredBuffer<uint> groupSumArrayOut;
RWStructuredBuffer<DrawData> resultBuffer;

float4x4 MATRIX_VP;
int numOfGroups;
groupshared uint temp[2 * NUM_THREAD_GROUPS_X];
groupshared uint grouptemp[2 * 1024];
float _distance;
float3 cameraPosition;

[numthreads(128, 1, 1)]
void Vote(uint3 id : SV_DispatchThreadID)
{
    float4 position = float4(drawDataBuffer[id.x].position, 1.0f);

    float4 viewspace = mul(MATRIX_VP, position);

    float3 clipspace = viewspace.xyz;

    clipspace /= -viewspace.w;

    clipspace.x = clipspace.x / 2.0f + 0.5f;
    clipspace.y = clipspace.y / 2.0f + 0.5f;
    clipspace.z = -viewspace.w;

    bool inView = clipspace.x < -0.2f || clipspace.x > 1.2f || clipspace.z <= -0.1f ? 0 : 1;
    bool withinDistance = distance(cameraPosition, position.xyz) < _distance;

    voteBuffer[id.x] = inView * withinDistance;
}

[numthreads(NUM_THREAD_GROUPS_X, 1, 1)]
void Scan(uint3 id : SV_DISPATCHTHREADID, uint groupIndex : SV_GROUPINDEX, uint3 _groupID : SV_GROUPID,
          uint3 groupThreadID : SV_GROUPTHREADID)
{
    int tid = (int)id.x;
    int groupTID = (int)groupThreadID.x;
    int groupID = (int)_groupID.x;

    int offset = 1;
    temp[2 * groupTID] = voteBuffer[2 * tid];
    temp[2 * groupTID + 1] = voteBuffer[2 * tid + 1];
    int d;
    int numElements = 2 * NUM_THREAD_GROUPS_X;

    for (d = numElements >> 1; d > 0; d >>= 1)
    {
        GroupMemoryBarrierWithGroupSync();

        if (groupTID < d)
        {
            int ai = offset * (2 * groupTID + 1) - 1;
            int bi = offset * (2 * groupTID + 2) - 1;
            temp[bi] += temp[ai];
        }

        offset *= 2;
    }

    if (groupTID == 0)
    {
        groupSumArray[_groupID.x] = temp[numElements - 1];
        temp[numElements - 1] = 0;
    }

    for (d = 1; d < numElements; d *= 2)
    {
        offset >>= 1;

        GroupMemoryBarrierWithGroupSync();
        if (groupTID < d)
        {
            int ai = offset * (2 * groupTID + 1) - 1;
            int bi = offset * (2 * groupTID + 2) - 1;
            int t = temp[ai];
            temp[ai] = temp[bi];
            temp[bi] += t;
        }
    }

    GroupMemoryBarrierWithGroupSync();

    scanBuffer[2 * tid] = temp[2 * groupTID];
    scanBuffer[2 * tid + 1] = temp[2 * groupTID + 1];
}

[numthreads(1024, 1, 1)]
void ScanGroupSums(uint3 id : SV_DISPATCHTHREADID, uint groupIndex : SV_GROUPINDEX, uint3 _groupID : SV_GROUPID,
                   uint3 groupThreadID : SV_GROUPTHREADID)
{
    int tid = (int)id.x;
    int groupTID = (int)groupThreadID.x;
    int groupID = (int)_groupID.x;

    int offset = 1;
    grouptemp[2 * groupTID] = groupSumArrayIn[2 * tid];
    grouptemp[2 * groupTID + 1] = groupSumArrayIn[2 * tid + 1];
    int d;

    for (d = numOfGroups >> 1; d > 0; d >>= 1)
    {
        GroupMemoryBarrierWithGroupSync();

        if (groupTID < d)
        {
            int ai = offset * (2 * groupTID + 1) - 1;
            int bi = offset * (2 * groupTID + 2) - 1;
            grouptemp[bi] += grouptemp[ai];
        }

        offset *= 2;
    }

    if (tid == 0)
        grouptemp[numOfGroups - 1] = 0;

    for (d = 1; d < numOfGroups; d *= 2)
    {
        offset >>= 1;

        GroupMemoryBarrierWithGroupSync();
        if (tid < d)
        {
            int ai = offset * (2 * groupTID + 1) - 1;
            int bi = offset * (2 * groupTID + 2) - 1;
            int t = grouptemp[ai];
            grouptemp[ai] = grouptemp[bi];
            grouptemp[bi] += t;
        }
    }

    GroupMemoryBarrierWithGroupSync();

    groupSumArrayOut[2 * tid] = grouptemp[2 * tid];
    groupSumArrayOut[2 * tid + 1] = grouptemp[2 * tid + 1];
}

[numthreads(128, 1, 1)]
void Compact(uint3 id : SV_DISPATCHTHREADID, uint groupIndex : SV_GROUPINDEX, uint3 _groupID : SV_GROUPID,
             uint3 groupThreadID : SV_GROUPTHREADID)
{
    uint tid = id.x;
    uint groupID = _groupID.x;
    uint groupSum = groupID.x > 0 ? groupSumArray[groupID.x] : 0;
    bool inCamera = voteBuffer[id.x];

    if (inCamera == 1)
    {
        InterlockedAdd(argsBuffer[1], 1);
        resultBuffer[scanBuffer[tid] + groupSum] = drawDataBuffer[tid];
    }
}

[numthreads(1, 1, 1)]
void ResetArgs(uint3 id : SV_DISPATCHTHREADID)
{
    argsBuffer[1] = (uint)0;
}

Video: https://youtu.be/K53G4NpnYxY

\$\endgroup\$
2
  • 1
    \$\begingroup\$ This looks like it could be an edit to your previous question, since it's about the same topic of "how do I correctly implement frustum culling for instanced meshes when using a Unity compute shader" \$\endgroup\$ Commented Mar 3, 2024 at 1:42
  • \$\begingroup\$ It's not an edit, it's a new question because a guy in comments said I should be asking differently, so I'm asking differently. If you can see, I've made quite a progress to a frustum culling and it now works but wrongly. :) \$\endgroup\$ Commented Mar 3, 2024 at 9:40

0

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.