/** * Used temporarily until I get access to Amazon Product API. */ import cheerio, { type CheerioAPI } from 'cheerio'; import { type AmazonProductDetails } from '../old-data/products/amazon-product-details'; import { type ProductAttribute } from '../old-data/products/product-attribute'; import { parseNumberFromSelector } from './utils'; /** * CSS selectors for the product details. Feel free to figure out different variations of these selectors. */ const SELECTORS = { TITLE: 'span#productTitle', PRICE: 'span.priceToPay', // LIST_PRICE: 'span.basisPrice .a-offscreen', FEATURE_BULLETS: '#feature-bullets li span.a-list-item', DESCRIPTION: '#productDescription', REVIEW_RATING: '#acrPopover a > span', REVIEW_COUNT: '#acrCustomerReviewText', IMAGES: '#altImages .item img', PRODUCT_ATTRIBUTE_ROWS: '#productOverview_feature_div tr', ATTRIBUTES_LABEL: 'td:nth-of-type(1) span', ATTRIBUTES_VALUE: 'td:nth-of-type(2) span', } as const; /** * Extracts the product image URLs from the given Cheerio object. * - We have to iterate over the image elements and extract the `src` attribute. */ const extractImageUrls = ($: CheerioAPI): string[] => { const imageUrls = $(SELECTORS.IMAGES) .map((_, imageEl) => $(imageEl).attr('src')) .get(); // `get()` - Retrieve all elements matched by the Cheerio object, as an array. Removes `undefined` values. return imageUrls; }; /** * Extracts the product attributes from the given Cheerio object. * - We have to iterate over the attribute rows and extract both label and value for each row. */ const extractProductAttributes = ($: CheerioAPI): ProductAttribute[] => { const attributeRowEls = $(SELECTORS.PRODUCT_ATTRIBUTE_ROWS).get(); const attributeRows = attributeRowEls.map((rowEl) => { const label = $(rowEl).find(SELECTORS.ATTRIBUTES_LABEL).text(); const value = $(rowEl).find(SELECTORS.ATTRIBUTES_VALUE).text(); return { label, value }; }); return attributeRows; }; /** * Extracts the feature bullets from the given Cheerio object. * - We have to iterate over the li elements and extract the text. */ const extractFeatureBullets = ($: CheerioAPI): string[] => { const featureBullets = $(SELECTORS.FEATURE_BULLETS) .map((_, featureBulletEl) => $(featureBulletEl).text().trim()) .get(); // `get()` - Retrieve all elements matched by the Cheerio object, as an array. Removes `undefined` values. return featureBullets; }; /** * Scrapes the product details from the given Cheerio object. */ export const extractProductDetails = ($: CheerioAPI): AmazonProductDetails => { const title = $(SELECTORS.TITLE).text().trim(); const description = $(SELECTORS.DESCRIPTION).text()!.trim(); const price = parseNumberFromSelector($, SELECTORS.PRICE); // const listPrice = parseNumberFromSelector($, SELECTORS.LIST_PRICE); const reviewRating = parseNumberFromSelector($, SELECTORS.REVIEW_RATING); const reviewCount = parseNumberFromSelector($, SELECTORS.REVIEW_COUNT); const imageUrls = extractImageUrls($); const attributes = extractProductAttributes($); const featureBullets = extractFeatureBullets($); return { title, description, featureBullets, price, /*listPrice,*/ reviewCount, reviewRating, imageUrls, attributes }; };