0

I have this with node 25.0.0 and Windows and puppeteer. Basically the get_card_search_results is called and it loads a page that features lazy loading. It will "scroll" the page down to ensure that the entire page loads.

This part works with no issues:

const puppeteer = require('puppeteer');
var browser;

var puppeteer_options = {
    headless: true, 
    ignoreHTTPSErrors: true, 
    args: ['--disable-dev-shm-usage', '--shm-size=4gb'],
    defaultViewport: {width: 1920, height: 1080},
    protocolTimeout: 1500000, //https://www.puppeteersharp.com/api/PuppeteerSharp.LaunchOptions.html
}

try
{
    browser = await puppeteer.launch(puppeteer_options);

    var page = await browser.newPage();
}
catch (err) {
    console.log(err);
}

async function get_card_search_results(options)
{
   await scroll_current_page({url:url, gp_args:gp_args})
}

async function scroll_current_page(options)
{
    await page.goto(url, {waitUntil: 'networkidle2'}); //, {waitUntil: 'networkidle2'}

    await page.setDefaultNavigationTimeout(0)

    cli_log({msg: 'starting scroll', log_name: log_name})

    //enable logging inside page.evaluate
        await page.exposeFunction('logInNodeJs', (value) => cli_log({msg: value, log_name: log_name}));

    //scroll down the page to get all html
        await page.evaluate(async ([gp_args]) => {
            var retval = true
            await new Promise((resolve) => {
                var retval = {}
                
                var el = document.documentElement

                logInNodeJs('scrollHeight: ' + el.scrollHeight)

                var cur_scroll_top = el.scrollTop
                logInNodeJs('cur_scroll_top start: ' + cur_scroll_top)
                var prev_scroll_top = cur_scroll_top
                logInNodeJs('prev_scroll_top start: ' + prev_scroll_top)

                scroll_page({
                    el: el, 
                    cur_scroll_top: cur_scroll_top, 
                    prev_scroll_top: prev_scroll_top,
                    safety: 0,
                })

                function scroll_page(options)
                {
                    var el = options.el
                    var cur_scroll_top = options.cur_scroll_top
                    var prev_scroll_top = options.prev_scroll_top
                    var safety = options.safety
                    
                    el.scrollTop += gp_args['scroll_step']
                    
                    
                    var cur_scroll_top = el.scrollTop
                    //ProtocolError: Runtime.callFunctionOn timed out. Increase the 'protocolTimeout' setting in launch/connect calls for a higher timeout if needed.
                    logInNodeJs('cur_scroll_top: ' + cur_scroll_top + ' previous: ' + prev_scroll_top)
                    
                    var max_scroll = false
                    if(gp_args['max_scroll'])
                    {
                        max_scroll = (cur_scroll_top >= gp_args['max_scroll'])
                    }
                    
                    if((cur_scroll_top == prev_scroll_top) || max_scroll)
                    {
                        logInNodeJs('end reached!')
                        resolve(true);
                    }
                    else
                    {
                        var prev_scroll_top = cur_scroll_top
                        logInNodeJs('prev_scroll_top: ' + prev_scroll_top)
                        
                        setTimeout(function(){scroll_page({
                            el: el, 
                            cur_scroll_top: cur_scroll_top, 
                            prev_scroll_top: prev_scroll_top,
                            safety: safety,
                        })}, 1375)

                    }
                }
                
                return true;
            })
            
            return retval
        }, [gp_args])
    
    return page
}

I get output like:

cl: starting scroll
cl: scrollHeight: 4907
cl: cur_scroll_top start: 0
cl: prev_scroll_top start: 0
cl: cur_scroll_top: 500 previous: 0
cl: prev_scroll_top: 500
cl: cur_scroll_top: 1000 previous: 500
cl: prev_scroll_top: 1000
cl: cur_scroll_top: 1500 previous: 1000
cl: prev_scroll_top: 1500
cl: cur_scroll_top: 2000 previous: 1500
cl: prev_scroll_top: 2000
cl: cur_scroll_top: 2500 previous: 2000
cl: prev_scroll_top: 2500
cl: cur_scroll_top: 3000 previous: 2500
cl: prev_scroll_top: 3000
cl: cur_scroll_top: 3500 previous: 3000
cl: prev_scroll_top: 3500
cl: cur_scroll_top: 3825 previous: 3500
cl: prev_scroll_top: 3825
cl: cur_scroll_top: 3825 previous: 3825
cl: end reached!
cl: scroll ended

However, if I modify the get_card_search_results function like this and add this second part to extract some information from the page:

async function get_card_search_results(options)
{
    await scroll_current_page({url:url, gp_args:gp_args})

    var total_pages = await page.evaluate(([gp_args]) => {
        var retval = false
        await new Promise((resolve) => {
            var el = document.querySelector('section[data-testid="paginationWrapper"] section article:nth-child(2)')
                
            var retval = el.childNodes.length
        })
    return retval

    }, [gp_args])
}

the entire script hangs at this point with this:

TimeoutError: Navigation timeout of 30000 ms exceeded
    at new Deferred (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\util\Deferred.js:60:34)
    at Deferred.create (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\util\Deferred.js:21:16)
    at new LifecycleWatcher (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\LifecycleWatcher.js:73:60)
    at CdpFrame.goto (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Frame.js:149:29)
    at CdpFrame.<anonymous> (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\util\decorators.js:109:27)
    at CdpPage.goto (c:\code\get_the_data\node_modules\puppeteer-core\lib\cjs\puppeteer\api\Page.js:572:43)
    at scroll_current_page (c:\code\get_the_data\get_the_data.js:831:14)
    at get_card_search_results (c:\code\get_the_data\get_the_data.js:804:10)
    at c:\code\get_the_data\get_the_data.js:279:22

I haven't been able to figure out why adding this second page.evaluate causes it to hang.

3
  • Can you share a minimal reproducible example? Much of these funcs are undefined and there is no URL, so I can't run this to repro the issue. What info do you want to get on the page, exactly? Please provide all details! Thanks. Commented Nov 9, 2025 at 2:42
  • 1
    In var total_pages, you're not resolving the promise. I also think you'll be fine without Promise(). Commented Nov 9, 2025 at 11:03
  • @ninadepina Good point, but I don't think that matches OP's navigation error. Commented Nov 9, 2025 at 15:55

0

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.