end detective", "pretend ghost", "pretend pirate", "pretend robot", "pretend superhero", "pretend teacher", "pretend wizard", "random fact", "random number", "roll dice", "goodbye", "simulate chat", "simulate future", "simulate past", "sing like robot", "sing lullaby", "sing rap", "sup", "surprise me", "teach me", "tell bedtime story", "tell fortune", "tell joke", "tell prophecy", "tell riddle", "tell story", "what is art", "what is beauty", "what is death", "what is freedom", "what is justice", "what is love", "what is mind", "what is reality", "what is right", "what is self", "what is soul", "what is time", "what is truth", "what is wrong", "what model are you", "what version", "what’s up", "which model are you", "who am i", "who are you", "who made you", "why are we", "write a poem", "write a song", "write haiku", "write quote", "your model is", ]; export function normalizeTextForChatAllowlist(s) { return s.toLowerCase().normalize("NFKC").replace(/\s+/g, " ").trim(); } // Split on non-word chars; letters/numbers/_ are "word" characters export function tokenizeTextForChatAllowlist(s) { return normalizeTextForChatAllowlist(s) .split(/[^\p{L}\p{N}_]+/u) .filter(Boolean); } export function buildChatAllowlist(phrases) { const byLen = new Map(); // len -> Set("tok tok ...") for (const p of phrases) { const key = tokenizeTextForChatAllowlist(p).join(" "); if (!key) { continue; } const k = key.split(" ").length; if (!byLen.has(k)) { byLen.set(k, new Set()); } byLen.get(k).add(key); } return byLen; } // Factory: returns a fast checker for “does query contain any isolated phrase?” export function makeIsolatedPhraseChecker(phrases) { const byLen = buildChatAllowlist(phrases); const cache = new Map(); return function containsIsolatedPhrase(query) { const qNorm = normalizeTextForChatAllowlist(query); if (cache.has(qNorm)) { return cache.get(qNorm); } const toks = qNorm.split(/[^\p{L}\p{N}_]+/u).filter(Boolean); for (const [k, set] of byLen) { for (let i = 0; i + k <= toks.length; i++) { if (set.has(toks.slice(i, i + k).join(" "))) { cache.set(qNorm, true); return true; } } } cache.set(qNorm, false); return false; }; } /** * Intent Classifier Engine */ export const IntentClassifier = { /** * Exposing createEngine for testing purposes. */ _createEngine: createEngine, /** * Initialize forced-chat checker at module load. * Keeping it as a property ensures easy stubbing in tests. */ _isForcedChat: makeIsolatedPhraseChecker(FORCED_CHAT_PHRASES), /** * Gets the intent of the prompt using a text classification model. * * @param {string} prompt * @returns {string} "search" | "chat" */ async getPromptIntent(query) { try { const cleanedQuery = this._preprocessQuery(query); if (this._isForcedChat(cleanedQuery)) { return "chat"; } const engine = await this._createEngine({ featureId: "smart-intent", modelId: "mozilla/mobilebert-query-intent-detection", modelRevision: "v0.2.0", taskName: "text-classification", }); const threshold = 0.8; const resp = await engine.run({ args: [[cleanedQuery]] }); // resp example: [{ label: "chat", score: 0.95 }, { label: "search", score: 0.04 }] if ( resp[0].label.toLowerCase() === "chat" && resp[0].score >= threshold ) { return "chat"; } return "search"; } catch (error) { console.error("Error using intent detection model:", error); throw error; } }, // Helper function for preprocessing text input _preprocessQuery(query) { if (typeof query !== "string") { throw new TypeError( `Expected a string for query preprocessing, but received ${typeof query}` ); } return query.replace(/\?/g, "").trim(); }, }; PK