CH_MODES.map(mode => {
        let name = lazy.UrlbarUtils.getResultSourceName(mode.source);
        return { id: `urlbar-search-mode-${name}` };
      })
    );

    let englishSearchStrings = new Localization([
      "preview/enUS-searchFeatures.ftl",
    ]);

    let englishKeywords = await englishSearchStrings.formatValues(
      lazy.UrlbarUtils.LOCAL_SEARCH_MODES.map(mode => {
        let name = lazy.UrlbarUtils.getResultSourceName(mode.source);
        return { id: `urlbar-search-mode-${name}-en` };
      })
    );

    for (let { restrict } of lazy.UrlbarUtils.LOCAL_SEARCH_MODES) {
      let uniqueKeywords = [
        ...new Set([l10nKeywords.shift(), englishKeywords.shift()]),
      ];

      tokenToKeywords.set(restrict, uniqueKeywords);
    }
  },

  /**
   * Gets the cached localized restrict keywords. If keywords are not cached
   * fetch the localized keywords first and then return the keywords.
   */
  async getL10nRestrictKeywords() {
    if (tokenToKeywords.size === 0) {
      await this.loadL10nRestrictKeywords();
    }

    return tokenToKeywords;
  },

  /**
   * Tokenizes the searchString from a UrlbarQueryContext.
   *
   * @param {object} context
   * @param {string} context.searchString
   * @param {string} [context.searchMode]
   * @param {string} context.trimmedSearchString
   * @returns {UrlbarSearchStringTokenData[]}
   *  The tokens associated with the query.
   */
  tokenize(context) {
    lazy.logger.debug("Tokenizing search string", {
      searchString: context.searchString,
    });
    if (!context.trimmedSearchString) {
      return [];
    }
    let unfiltered = splitString(context);
    return filterTokens(unfiltered);
  },

  /**
   * Given a token, tells if it's a restriction token.
   *
   * @param {object} token
   *   The token to check.
   * @returns {boolean} Whether the token is a restriction character.
   */
  isRestrictionToken(token) {
    return (
      token &&
      token.type >= this.TYPE.RESTRICT_HISTORY &&
      token.type <= this.TYPE.RESTRICT_URL
    );
  },
};

const CHAR_TO_TYPE_MAP = new Map(
  Object.entries(UrlbarTokenizer.RESTRICT).map(([type, char]) => [
    char,
    UrlbarTokenizer.TYPE[`RESTRICT_${type}`],
  ])
);

/**
 * Given a queryContext object, splits its searchString into string tokens.
 *
 * @param {object} context
 * @param {string} context.searchString
 * @param {string} [context.searchMode]
 * @returns {string[]} An array of string tokens.
 */
function splitString({ searchString, searchMode }) {
  // The first step is splitting on unicode whitespaces. We ignore whitespaces
  // if the search string starts with "data:", to better support Web developers
  // and compatiblity with other browsers.
  let trimmed = searchString.trim();
  let tokens;
  if (trimmed.startsWith("data:")) {
    tokens = [trimmed];
  } else if (trimmed.length < 500) {
    tokens = trimmed.split(lazy.UrlUtils.REGEXP_SPACES);
  } else {
    // If the string is very long, tokenizing all of it would be expensive. So
    // we only tokenize a part of it, then let the last token become a
    // catch-all.
    tokens = trimmed.substring(0, 500).split(lazy.UrlUtils.REGEXP_SPACES);
    tokens[tokens.length - 1] += trimmed.substring(500);
  }

  if (!tokens.length) {
    return tokens;
  }

  // If there is no separate restriction token, it's possible we have to split
  // a token, if it's the first one and it includes a leading restriction char
  // or it's the last one and it includes a trailing restriction char.
  // This allows to not require the user to add artificial whitespaces to
  // enforce restrictions, for example typing questions would restrict to
  // search results.
  const hasRestrictionToken = tokens.some(t => CHAR_TO_TYPE_MAP.has(t));

  const firstToken = tokens[0];
  const isFirstTokenAKeyword =
    !Object.values(UrlbarTokenizer.RESTRICT).includes(firstToken) &&
    lazy.PlacesUtils.keywords.isKeywordFromCache(firstToken);

  if (hasRestrictionToken || isFirstTokenAKeyword) {
    return tokens;
  }

  // Check for an unambiguous restriction char at the beginning of the first
  // token.
  if (
    CHAR_TO_TYPE_MAP.has(firstToken[0]) &&
    !lazy.UrlUtils.REGEXP_PERCENT_ENCODED_START.test(firstToken) &&
    !searchMode
  ) {
    tokens[0] = firstToken.substring(1);
    tokens.splice(0, 0, firstToken[0]);
    return tokens;
  }

  return tokens;
}

/**
 * Given an array of unfiltered tokens, this function filters them and converts
 * to token objects with a type.
 *
 * @param {Array} tokens
 *        An array of strings, representing search tokens.
 * @returns {Array} An array of token objects.
 * Note: restriction characters are only considered if they appear at the start
 *       or at the end of the tokens list. In case of restriction characters
 *       conflict, the most external ones win. Leading ones win over trailing
 *       ones. Discarded restriction characters are considered text.
 */
function filterTokens(tokens) {
  let filtered = [];
  let restrictions = [];
  const isFirstTokenAKeyword =
    !Object.values(UrlbarTokenizer.RESTRICT).includes(tokens[0]) &&
    lazy.PlacesUtils.keywords.isKeywordFromCache(tokens[0]);

  for (let i = 0; i < tokens.length; ++i) {
    let token = tokens[i];
    let tokenObj = {
      value: token,
      lowerCaseValue: token.toLocaleLowerCase(),
      type: UrlbarTokenizer.TYPE.TEXT,
    };
    // For privacy reasons, we don't want to send a data (or other kind of) URI
    // to a search engine. So we want to parse any single long token below.
    if (tokens.length > 1 && token.length > 500) {
      filtered.push(tokenObj);
      break;
    }

    if (isFirstTokenAKeyword) {
      filtered.push(tokenObj);
      continue;
    }

    let restrictionType = CHAR_TO_TYPE_MAP.get(token);
    if (restrictionType) {
      restrictions.push({ index: i, type: restrictionType });
    } else {
      let looksLikeOrigin = lazy.UrlUtils.looksLikeOrigin(token);
      if (
        looksLikeOrigin == lazy.UrlUtils.LOOKS_LIKE_ORIGIN.OTHER &&
        lazy.UrlbarPrefs.get("allowSearchSuggestionsForSimpleOrigins")
      ) {
        tokenObj.type = UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED;
      } else if (looksLikeOrigin != lazy.UrlUtils.LOOKS_LIKE_ORIGIN.NONE) {
        tokenObj.type = UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN;
      } else if (lazy.UrlUtils.looksLikeUrl(token, { requirePath: true })) {
        tokenObj.type = UrlbarTokenizer.TYPE.POSSIBLE_URL;
      }
    }
    filtered.push(tokenObj);
  }

  // Handle restriction characters.
  if (restrictions.length) {
    // We can apply two kind of restrictions: type (bookmark, search, ...) and
    // matching (url, title). These kind of restrictions can be combined, but we
    // can only have one restriction per kind.
    let matchingRestrictionFound = false;
    let typeRestrictionFound = false;
    function assignRestriction(r) {
      if (r && !(matchingRestrictionFound && typeRestrictionFound)) {
        if (
          [
            UrlbarTokenizer.TYPE.RESTRICT_TITLE,
            UrlbarTokenizer.TYPE.RESTRICT_URL,
          ].includes(r.type)
        ) {
          if (!matchingRestrictionFound) {
            matchingRestrictionFound = true;
            filtered[r.index].type = r.type;
            return true;
          }
        } else if (!typeRestrictionFound) {
          typeRestrictionFound = true;
          filtered[r.index].type = r.type;
          return true;
        }
      }
      return false;
    }

    // Look at the first token.
    let found = assignRestriction(restrictions.find(r => r.index == 0));
    if (found) {
      // If the first token was assigned, look at the next one.
      assignRestriction(restrictions.find(r => r.index == 1));
    }
    // Then look at the last token.
    let lastIndex = tokens.length - 1;
    found = assignRestriction(restrictions.find(r => r.index == lastIndex));
    if (found) {
      // If the last token was assigned, look at the previous one.
      assignRestriction(restrictions.find(r => r.index == lastIndex - 1));
    }
  }

  lazy.logger.info("Filtered Tokens", filtered);
  return filtered;
}
PK