The reason this is not working for you is that the html generated by your desired page is dynamic, it's generated on the client side by JavaScript code.
We can still scrape the data, but we must use something like Puppeteer (Zombie.js or another headless browser might work too.) I'll use Puppeteer for this example though.
We load the page you wish, then parse the html in much the same way you were before.
I'm also using user-agents to generate a random user-agent to avoid a Captcha request.
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
const userAgent = require('user-agents');
async function getDynamicPageHtml(url) {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setUserAgent(userAgent.toString());
await page.goto(url, { waitUntil: 'networkidle0' });
const html = await page.evaluate(() => document.querySelector('*').outerHTML);
await browser.close();
return html;
} catch (err) {
console.error(err);
return null;
}
}
async function iniciar() {
const html = await getDynamicPageHtml('https://www.idealo.es/precios/4102124/the-north-face-men-s-mcmurdo-parka-tnf-black.html');
const $ = cheerio.load(html);
const price = $('span.oopStage-variantThumbnailsFromPrice').map( (index, element) => {
return $(element).first().text().trim();
}).toArray();
console.log("iniciar: price:", price);
return price;
}
module.exports = {
iniciar
};
I'm getting the below output when I call iniciar:
iniciar: price: [ '294,99?€' ]
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…