Puppeteer Scrape Webpage

Kevin FOO
2 min readJun 8, 2020

--

There are several ways to interact with a page and later scrape its results with Puppeteer. In my example, I’ll be querying Google to check for the price of Bitcoin.

Javascript method

Copy the contents below and paste it into the script. This method uses Javascript to loop through the elements by tag name.

const puppeteer = require('puppeteer');
async function run () {
const browser = await puppeteer.launch({
defaultViewport: {width: 1920, height: 1080}
});

//reuse the blank tab opened during puppeteer launch
var pages = await browser.pages();
var page = pages[0];
await page.goto('https://www.google.com');

//wait for search box
await page.waitForSelector('input[name="q"]');
await page.evaluate(()=>{
var input=document.querySelector('input[name="q"]')
input.value='bitcoin price';
var search=document.querySelector('input[name="btnK"]');
search.click();
});

//wait for results
await page.waitForSelector('div[id="result-stats"]');
var price=await page.evaluate(()=>{
var div=document.querySelector('div[class="dDoNo ikb4Bb vk_bk gsrt gzfeS"]');
return div.textContent;
});

console.log(price);
await page.screenshot({path: 'screenshot.png'});
browser.close();
}
run();

CSS selector method

This method is similar to the Javascript method to set value of the text box but instead of looping through the elements, it can specify an element directly using CSS selector.

const puppeteer = require('puppeteer');
async function run () {
const browser = await puppeteer.launch({
defaultViewport: {width: 1920, height: 1080}
});
const page = await browser.newPage();
await page.goto('https://www.google.com');
//wait for search box
await page.waitForSelector('input[name="q"]');
await page.$eval('input[name="q"]', (el,q) => el.value=q, 'bitcoin price');
await page.$eval('input[name="btnK"]', el => el.click());
//wait for results
await page.waitForSelector('div[id="result-stats"]');
var price=await page.$eval('div[class="dDoNo vk_bk gsrt gzfeS"]', el => el.textContent);
console.log(price);
await page.screenshot({path: 'screenshot.png'});
browser.close();
}
run();

Keyboard type method

I had encountered pages that does not accept text box values that was set using Javascript. The workaround method is to simulate keyboard type.

const puppeteer = require('puppeteer');
async function run () {
const browser = await puppeteer.launch({
defaultViewport: {width: 1920, height: 1080}
});
const page = await browser.newPage();
await page.goto('https://www.google.com');
//wait for search box
await page.waitForSelector('input[name="q"]');
await page.$eval('input[name="q"]', el => el.focus());
await page.keyboard.type('bitcoin', {delay: 100});
await page.$eval('input[name="btnK"]', el => el.click());
//wait for results
await page.waitForSelector('div[id="result-stats"]');
var price=await page.$eval('div[class="dDoNo vk_bk gsrt gzfeS"]', el => el.textContent);
console.log(price);
await page.screenshot({path: 'screenshot.png'});
browser.close();
}
run();

< Back to all the stories I had written

--

--

Kevin FOO
Kevin FOO

Written by Kevin FOO

A software engineer, a rock climbing, inline skating enthusiast, a husband, a father.

No responses yet