Skip to content

TestDelayed.test_delayed_subsample__memusage randomly fails in the CI #692

Open
@vschaffn

Description

@vschaffn

Only for Ubuntu-latest.

chunksizes_in_mem = (1000, 1000), subsample_size = 100
cluster = LocalCluster(45e6d9e3, 'tcp://127.0.0.1:38789', workers=1, threads=1, memory=15.61 GiB)

    @pytest.mark.parametrize("fn", [fn_large])  # type: ignore
    @pytest.mark.parametrize("chunksizes_in_mem", [(1000, 1000), (2500, 2500)])  # type: ignore
    @pytest.mark.parametrize("subsample_size", [100, 100000])  # type: ignore
    def test_delayed_subsample__memusage(
        self, fn: str, chunksizes_in_mem: tuple[int, int], subsample_size: int, cluster: Any
    ):
        """
        Checks for delayed subsampling function for memory usage on big file.
        (and also runs output checks as not long or too memory intensive in this case)
        Variables that influence memory usage are:
        - Subsample sizes,
        - Chunksizes in memory.
        """
    
        # Only check on linux
        if sys.platform == "linux":
    
            # 0/ Open dataset with chunks
            ds = xr.open_dataset(fn, chunks={"x": chunksizes_in_mem[0], "y": chunksizes_in_mem[1]})
            darr = ds["test"].data
    
            # 1/ Estimation of theoretical memory usage of the subsampling script
    
            max_op_memusage = _estimate_subsample_memusage(
                darr=darr, chunksizes_in_mem=chunksizes_in_mem, subsample_size=subsample_size
            )
    
            # 2/ Run delayed subsample with dask memory usage monitoring
    
            # Derive subsample from delayed function
            # (passed to wrapper function to measure memory usage during execution)
            sub, measured_op_memusage = _run_dask_measuring_memusage(
                cluster, delayed_subsample, darr, subsample=subsample_size, random_state=42
            )
    
            # Check the measured memory usage is smaller than the maximum estimated one
>           assert measured_op_memusage < max_op_memusage
E           assert np.float64(148.85546875) < np.float64(102.48738861083984)

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions