# frozen_string_literal: true module Capybara module Node ## # # {Capybara::Node::WhitespaceNormalizer} provides methods that # help to normalize the spacing of text content inside of # {Capybara::Node::Element}s by removing various unicode # spacing and directional markings. # module WhitespaceNormalizer # Unicode for NBSP, or   NON_BREAKING_SPACE = "\u00a0" LINE_SEPERATOR = "\u2028" PARAGRAPH_SEPERATOR = "\u2029" # All spaces except for NBSP BREAKING_SPACES = "[[:space:]&&[^#{NON_BREAKING_SPACE}]]" # Whitespace we want to substitute with plain spaces SQUEEZED_SPACES = " \n\f\t\v#{LINE_SEPERATOR}#{PARAGRAPH_SEPERATOR}" # Any whitespace at the front of text LEADING_SPACES = /\A#{BREAKING_SPACES}+/.freeze # Any whitespace at the end of text TRAILING_SPACES = /#{BREAKING_SPACES}+\z/.freeze # "Invisible" space character ZERO_WIDTH_SPACE = "\u200b" # Signifies text is read left to right LEFT_TO_RIGHT_MARK = "\u200e" # Signifies text is read right to left RIGHT_TO_LEFT_MARK = "\u200f" # Characters we want to truncate from text REMOVED_CHARACTERS = [ZERO_WIDTH_SPACE, LEFT_TO_RIGHT_MARK, RIGHT_TO_LEFT_MARK].join # Matches multiple empty lines EMPTY_LINES = /[\ \n]*\n[\ \n]*/.freeze ## # # Normalizes the spacing of a node's text to be similar to # what matchers might expect. # # @param text [String] # @return [String] # def normalize_spacing(text) text .delete(REMOVED_CHARACTERS) .tr(SQUEEZED_SPACES, ' ') .squeeze(' ') .sub(LEADING_SPACES, '') .sub(TRAILING_SPACES, '') .tr(NON_BREAKING_SPACE, ' ') end ## # # Variant on {Capybara::Node::Normalizer#normalize_spacing} that # targets the whitespace of visible elements only. # # @param text [String] # @return [String] # def normalize_visible_spacing(text) text .squeeze(' ') .gsub(EMPTY_LINES, "\n") .sub(LEADING_SPACES, '') .sub(TRAILING_SPACES, '') .tr(NON_BREAKING_SPACE, ' ') end end end end