Web Scraping Google Shopping Product Online Sellers with Nodejs

What will be scraped

Using Google Product Online Sellers API from SerpApi

This section is to show the comparison between the DIY solution and our solution.

The biggest difference is that you don't need to create the parser from scratch and maintain it.

There's also a chance that the request might be blocked at some point from Google, we handle it on our backend so there's no need to figure out how to do it yourself or figure out which CAPTCHA, proxy provider to use.

First, we need to install google-search-results-nodejs:

npm i google-search-results-nodejs

Here's the full code example, if you don't need an explanation:

const SerpApi = require("google-search-results-nodejs");
const search = new SerpApi.GoogleSearch(process.env.API_KEY); //your API key from serpapi.com

const params = {
  product_id: "8757849604759505625", // Parameter defines the ID of a product you want to get the results for.
  engine: "google_product", // search engine
  device: "desktop", //Parameter defines the device to use to get the results. It can be set to "desktop" (default), "tablet", or "mobile"
  hl: "en", // parameter defines the language to use for the Google search
  gl: "us", // parameter defines the country to use for the Google search
  offers: true, // parameter for fetching offers results
};

const getJson = () => {
  return new Promise((resolve) => {
    search.json(params, resolve);
  });
};

const getResults = async () => {
  const json = await getJson();
  return { 
    ...json.product_results,
    onlineSellers: json.sellers_results?.online_sellers,
    reletedProducts: json.related_products?.different_brand
  };
};

getResults().then((result) => console.dir(result, { depth: null }));

Code explanation

First, we need to declare SerpApi from google-search-results-nodejs library and define new search instance with your API key from SerpApi:

const SerpApi = require("google-search-results-nodejs");
const search = new SerpApi.GoogleSearch(API_KEY);

Next, we write the necessary parameters for making a request:

const params = {
  product_id: "8757849604759505625", // Parameter defines the ID of a product you want to get the results for.
  engine: "google_product", // search engine
  device: "desktop", //Parameter defines the device to use to get the results. It can be set to "desktop" (default), "tablet", or "mobile"
  hl: "en", // parameter defines the language to use for the Google search
  gl: "us", // parameter defines the country to use for the Google search
  offers: true, // parameter for fetching offers results
};

Next, we wrap the search method from the SerpApi library in a promise to further work with the search results:

const getJson = () => {
  return new Promise((resolve) => {
    search.json(params, resolve);
  });
};

And finally, we declare the function getResult that gets data from the page and returns it:

const getResults = async () => {
  ...
};

In this function we get json with results, and return object with data from received json using spread syntax:

const json = await getJson();
return { 
  ...json.product_results,
  onlineSellers: json.sellers_results?.online_sellers,
  reletedProducts: json.related_products?.different_brand
};

After, we run the getResults function and print all the received information in the console with the console.dir method, which allows you to use an object with the necessary parameters to change default output options:

getResults().then((result) => console.dir(result, { depth: null }));

Output

{
   "product_id":8757849604759506000,
   "title":"Apple iPhone 14 Pro Max - 128 GB - Space Black - Unlocked",
   "reviews":748,
   "rating":4.5,
   "onlineSellers":[
      {
         "position":1,
         "name":"Apple",
         "link":"https://www.google.com/url?q=https://www.apple.com/us/shop/go/product/MQ8N3%3Fcppart%3DUNLOCKED%26cid%3Daos-us-seo-pla&sa=U&ved=0ahUKEwiMl-6jip_7AhUZLUQIHTI4DPoQ2ykIJQ&usg=AOvVaw1NkUFFfa7AWk6BcJQut1jp",
         "base_price":"$1,099.00",
         "additional_price":{
            "shipping":"$0.00",
            "tax":"$85.17"
         },
         "total_price":"$1,184.17"
      },
    ... and other sellers
   ],
   "reletedProducts":[
      {
         "title":"iPhone 13 Pro Max 128GB Sierra ...",
         "link":"https://www.google.com/shopping/product/10665434407022887951?hl=en&gl=us&ie=UTF-8&prds=epd:17054172175953313994,oid:17054172175953313994,pid:8842852891481692870,rsk:PC_8217023720749633348&sa=X&ved=0ahUKEwiMl-6jip_7AhUZLUQIHTI4DPoQrhIIaA",
         "price":"$0.00"
      },
      ... and other products
   ]
}

DIY Code

If you don't need an explanation, have a look at the full code example in the online IDE

const cheerio = require("cheerio");
const axios = require("axios");

const productId = "8757849604759505625"; // Parameter defines the ID of a product you want to get the results for

const AXIOS_OPTIONS = {
  headers: {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36",
  }, // adding the User-Agent header as one way to prevent the request from being blocked
  params: {
    hl: "en", // parameter defines the language to use for the Google search
    gl: "us", // parameter defines the country to use for the Google search
  },
};

function getSellersInfo() {
  return axios.get(`https://www.google.com/shopping/product/${productId}/offers`, AXIOS_OPTIONS).then(function ({ data }) {
    let $ = cheerio.load(data);

    return {
      title: $(".BvQan")?.text().trim(),
      reviews: parseInt($(".HiT7Id > span")?.attr("aria-label")?.replace(",", "") || 0),
      rating: parseFloat($(".UzThIf")?.attr("aria-label")),
      onlineSellers: Array.from($(".sh-osd__offer-row")).map((el) => ({
        name: $(el).find(".b5ycib")?.text().trim() || $(el).find(".kjM2Bf")?.text().trim(),
        link: `https://www.google.com${$(el).find(".b5ycib")?.attr("href") || $(el).find(".pCKrrc > a")?.attr("href")}`,
        basePrice: $(el).find(".g9WBQb")?.text().trim(),
        additionalPrice: {
          shipping: $(el).find(".SuutWb tr:nth-child(2) td:last-child")?.text().trim(),
          tax: $(el).find(".SuutWb tr:nth-child(3) td:last-child")?.text().trim(),
        },
        totalPrice: $(el).find(".SuutWb tr:last-child td:last-child")?.text().trim(),
        condition: $(el).find(".Yy9sbf")?.text().trim() || "New",
      })),
      reletedProducts: Array.from($(".xyjbB")).map((el) => ({
        title: $(el).find(".YTkbnd")?.text().trim(),
        link: `https://www.google.com${$(el).find(".YTkbnd")?.attr("href")}`,
        price: $(el).find(".vzbr7d")?.text().trim(),
        reviews: parseInt($(el).find(".HiT7Id span")?.attr("aria-label")?.replace(",", "")) || "No reviews",
        rating: parseFloat($(el).find(".UzThIf")?.attr("aria-label")) || "No rating",
      })),
    };
  });
}

getSellersInfo().then((result) => console.dir(result, { depth: null }));

Preparation

First, we need to create a Node.js* project and add npm packages cheerio to parse parts of the HTML markup, and axios to make a request to a website.

To do this, in the directory with our project, open the command line and enter:

$ npm init -y

And then:

$ npm i cheerio axios

*If you don't have Node.js installed, you can download it from nodejs.org and follow the installation documentation.

Process

First of all, we need to extract data from HTML elements. The process of getting the right CSS selectors is fairly easy via SelectorGadget Chrome extension which enables us to grab CSS selectors by clicking on the desired element in the browser. However, it is not always working perfectly, especially when the website is heavily used by JavaScript.

We have a dedicated web Scraping with CSS Selectors blog post at SerpApi if you want to know a little bit more about them.

The Gif below illustrates the approach of selecting different parts of the results.

Code explanation

Declare constants from cheerio and axios libraries:

const cheerio = require("cheerio");
const axios = require("axios");

Next, we write product ID, the request options: HTTP headers with User-Agent which is used to act as a "real" user visit, and the necessary parameters for making a request:

const productId = "8757849604759505625"; // Parameter defines the ID of a product you want to get the results for

const AXIOS_OPTIONS = {
  headers: {
    "User-Agent": 
      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36",
  }, // adding the User-Agent header as one way to prevent the request from being blocked
  params: {
    hl: "en", // parameter defines the language to use for the Google search
    gl: "us", // parameter defines the country to use for the Google search
  },
};

πŸ“ŒNote: Default axios request user-agent is axios/<axios_version> so websites understand that it's a script that sends a request and might block it. Check what's your user-agent.

Next, we write a function that makes the request and returns the received data. We received the response from axios request that has data key that we destructured and parse it with cheerio:

function getSellersInfo() {
  return axios
    .get(`https://www.google.com/shopping/product/${productId}/offers`, AXIOS_OPTIONS)
    .then(function ({ data }) {
      let $ = cheerio.load(data);
    ...
  })
}

Next, we need to get the different parts of the page using next methods:

      title: $(".BvQan")?.text().trim(),
      reviews: parseInt($(".HiT7Id > span")?.attr("aria-label")?.replace(",", "") || 0),
      rating: parseFloat($(".UzThIf")?.attr("aria-label")),
      onlineSellers: Array.from($(".sh-osd__offer-row")).map((el) => ({
        name: 
          $(el).find(".b5ycib")?.text().trim() ||
          $(el).find(".kjM2Bf")?.text().trim(),
        link: 
          `https://www.google.com${$(el).find(".b5ycib")?.attr("href") ||
          $(el).find(".pCKrrc > a")?.attr("href")}`,
        basePrice: $(el).find(".g9WBQb")?.text().trim(),
        additionalPrice: {
          shipping: $(el).find(".SuutWb tr:nth-child(2) td:last-child")?.text().trim(),
          tax: $(el).find(".SuutWb tr:nth-child(3) td:last-child")?.text().trim(),
        },
        totalPrice: $(el).find(".SuutWb tr:last-child td:last-child")?.text().trim(),
        condition: $(el).find(".Yy9sbf")?.text().trim() || "New",
      })),
      reletedProducts: Array.from($(".xyjbB")).map((el) => ({
        title: $(el).find(".YTkbnd")?.text().trim(),
        link: `https://www.google.com${$(el).find(".YTkbnd")?.attr("href")}`,
        price: $(el).find(".vzbr7d")?.text().trim(),
        reviews: 
          parseInt($(el).find(".HiT7Id span")?.attr("aria-label")?.replace(",", "")) ||
          "No reviews",
        rating: parseFloat($(el).find(".UzThIf")?.attr("aria-label")) || "No rating",
      })),

Now we can launch our parser:

$ node YOUR_FILE_NAME # YOUR_FILE_NAME is the name of your .js file

Output

{
   "title":"Apple iPhone 14 Pro Max - 128 GB - Space Black - Unlocked",
   "reviews":748,
   "rating":4.5,
   "onlineSellers":[
      {
         "name":"AppleOpens in a new window",
         "link":"https://www.google.com/url?q=https://www.apple.com/us/shop/go/product/MQ8N3%3Fcppart%3DUNLOCKED%26cid%3Daos-us-seo-pla&sa=U&ved=0ahUKEwj18pvWgp_7AhW6BjQIHasACEEQ2ykIJQ&usg=AOvVaw22XYRR7KYv5JrrFHZOKXPK",
         "basePrice":"$1,099.00",
         "additionalPrice":{
            "shipping":"$0.00",
            "tax":"$97.54"
         },
         "totalPrice":"$1,196.54",
         "condition":"New"
      },
      ... and other sellers
   ],
   "reletedProducts":[
      {
         "title":"iPhone 13 Pro Max 128GB Sierra ...",
         "link":"https://www.google.com/shopping/product/10665434407022887951?hl=en&gl=us&prds=epd:17054172175953313994,oid:17054172175953313994,pid:8842852891481692870,rsk:PC_8217023720749633348&sa=X&ved=0ahUKEwj18pvWgp_7AhW6BjQIHasACEEQrRIIbQ",
         "price":"$0.00",
         "reviews":11327,
         "rating":4.5
      },
    ... and other products
   ]
}

If you want to see some projects made with SerpApi, write me a message.


Join us on Twitter | YouTube

Add a Feature RequestπŸ’« or a Bug🐞