], }, { id: "podcasts", terms: ["podcast", "podcasts"], domains: [ "podcasts.apple.com", "overcast.fm", "pocketcasts.com", "castbox.fm", ], }, { id: "papers_research", terms: [ "paper", "papers", "research paper", "research papers", "academic paper", "academic papers", "journal", "journals", "study", "studies", "publication", "publications", ], domains: [ "scholar.google.com", "arxiv.org", "semanticscholar.org", "pubmed.ncbi.nlm.nih.gov", "researchgate.net", "ieeexplore.ieee.org", "dl.acm.org", "springer.com", "nature.com", "science.org", ], }, { id: "tech_news", terms: ["tech news", "technology news", "startup news"], domains: [ "theverge.com", "techcrunch.com", "wired.com", "arstechnica.com", "engadget.com", ], }, { id: "finance_news", terms: ["finance news", "business news", "market news", "stock news"], domains: [ "bloomberg.com", "wsj.com", "ft.com", "reuters.com", "cnbc.com", ], }, { id: "news", terms: [ "news", "headline", "headlines", "breaking news", "world news", "latest news", ], domains: [ "reuters.com", "apnews.com", "bbc.com", "cnn.com", "nytimes.com", "theguardian.com", "washingtonpost.com", "aljazeera.com", "npr.org", "wsj.com", "bloomberg.com", "ft.com", ], }, { id: "recipes", terms: [ "recipe", "recipes", "cooking", "food", "dinner ideas", "meal prep", ], domains: [ "allrecipes.com", "seriouseats.com", "foodnetwork.com", "bbcgoodfood.com", "epicurious.com", "nytcooking.com", ], }, { id: "travel", terms: ["travel", "hotels", "places", "destinations", "things to do"], domains: [ "tripadvisor.com", "booking.com", "expedia.com", "airbnb.com", "lonelyplanet.com", ], }, ], }; /** * Normalizes a query string into a lowercase, space-separated form suitable for matching * and comparison. * * @param {string} s * @returns {string} */ function normalizeQuery(s) { return (s || "") .toLowerCase() .replace(/[^\p{L}\p{N}]+/gu, " ") .replace(/\s+/g, " ") .trim(); } /** * Returns the matched category domains if searchTerm looks like a general category query. * Uses phrase matching on normalized query string. * * @param {string} searchTerm * @param {object} [categoriesJson=CATEGORIES_JSON] * @returns {string[]|null} */ export function matchDomains(searchTerm, categoriesJson = CATEGORIES_JSON) { const q = ` ${normalizeQuery(searchTerm)} `; if (!q.trim()) { return null; } for (const cat of categoriesJson.categories) { for (const t of cat.terms) { // Pad with spaces to enable whole-token phrase matching via includes. const tt = ` ${normalizeQuery(t)} `; if (tt.trim() && q.includes(tt)) { return cat.domains; } } } return null; } /** * Builds a SQL WHERE clause for matching `http`/`https` URLs belonging * to the given root domains and their `www` variants. * * @param {string[]} domains * @returns {{ where: string, params: object }} */ function buildDomainUrlWhere(domains) { const clauses = []; const params = {}; let i = 0; for (const raw of domains || []) { const d = String(raw).toLowerCase(); if (!d) { continue; } // - https://domain/... // - https://www.domain/... params[`d${i}`] = `%://${d}/%`; clauses.push(`lower(url) LIKE :d${i++}`); params[`d${i}`] = `%://www.${d}/%`; clauses.push(`lower(url) LIKE :d${i++}`); } return { where: clauses.length ? `(${clauses.join(" OR ")})` : "0", params, }; } /** * Domain-filtered moz_places query (time-windowed). * * @param {object} params * @param {object} params.conn * @param {string[]} params.domains * @param {number|null} params.startTs * @param {number|null} params.endTs * @param {number} params.historyLimit * @param {Function} params.buildHistoryRow * @returns {Promise} */ export async function searchByDomains({ conn, domains, startTs, endTs, historyLimit, buildHistoryRow, }) { if (!conn || !Array.isArray(domains) || !domains.length) { return []; } const { where, params } = buildDomainUrlWhere(domains); const results = await conn.executeCached( ` SELECT id, title, url, NULL AS distance, visit_count, frecency, last_visit_date, preview_image_url FROM moz_places WHERE frecency <> 0 AND (:startTs IS NULL OR last_visit_date >= :startTs) AND (:endTs IS NULL OR last_visit_date <= :endTs) AND ${where} ORDER BY last_visit_date DESC, frecency DESC LIMIT :limit `, { startTs, endTs, limit: historyLimit, ...params, } ); const rows = []; for (const row of results) { rows.push(await buildHistoryRow(row)); } return rows; } /** * Merge two result lists, keeping `primary` order, then topping up from `secondary`, * while de-duping by url (fallback to id). * * @param {object[]} primary * @param {object[]} secondary * @param {number} limit * @returns {object[]} */ export function mergeDedupe(primary, secondary, limit) { const seen = new Set(); const out = []; const keyOf = r => r?.url || r?.id; for (const r of primary || []) { const k = keyOf(r); if (!seen.has(k)) { seen.add(k); out.push(r); if (out.length >= limit) { return out; } } } for (const r of secondary || []) { const k = keyOf(r); if (!seen.has(k)) { seen.add(k); out.push(r); if (out.length >= limit) { return out; } } } return out; } export const SearchBrowsingHistoryDomainBoost = Object.freeze({ matchDomains, searchByDomains, mergeDedupe, }); PK