module JosieHealth::LUT record FuzzyResult, canonical : String, distance : Int32, matched_alias : String, auto_correct : Bool module Fuzzy def self.damerau_distance(a : String, b : String) : Int32 n = a.size m = b.size return m if n == 0 return n if m == 0 d = Array.new(n + 1) { Array.new(m + 1, 0) } (0..n).each { |i| d[i][0] = i } (0..m).each { |j| d[0][j] = j } (1..n).each do |i| (1..m).each do |j| cost = a[i - 1] == b[j - 1] ? 0 : 1 d[i][j] = { d[i - 1][j] + 1, # deletion d[i][j - 1] + 1, # insertion d[i - 1][j - 1] + cost, # substitution }.min if i > 1 && j > 1 && a[i - 1] == b[j - 2] && a[i - 2] == b[j - 1] d[i][j] = {d[i][j], d[i - 2][j - 2] + cost}.min # transposition end end end d[n][m] end def self.max_distance(length : Int32) : Int32 case length when ..2 then 0 when 3..5 then 1 when 6..8 then 2 else 3 end end def self.auto_correct?(length : Int32, distance : Int32) : Bool case length when ..2 then false when 3..5 then distance <= 1 when 6..8 then distance <= 1 else distance <= 2 end end end module DrugLUT private PREFIX_INDEX = build_prefix_index def self.build_prefix_index : Hash(String, Array(String)) index = Hash(String, Array(String)).new { |h, k| h[k] = [] of String } ALIAS_TO_CANONICAL.each_key do |alias_name| next if alias_name.size < 2 prefix = alias_name[0, 2] index[prefix] << alias_name end index end def self.fuzzy_normalize(input : String) : FuzzyResult? clean = input.strip.downcase return nil if clean.size < 3 if exact = normalize(clean) return FuzzyResult.new(canonical: exact, distance: 0, matched_alias: clean, auto_correct: true) end max_dist = Fuzzy.max_distance(clean.size) return nil if max_dist == 0 best_distance = max_dist + 1 best_matches = [] of {String, String} # {alias, canonical} # Determine prefix constraint if clean.size == 3 prefixes = [clean[0, 1]] else prefixes = [clean[0, 2]] end candidates = Set(String).new prefixes.each do |prefix| if aliases = PREFIX_INDEX[prefix]? aliases.each { |a| candidates << a } end end # For single-char prefix (length-3 inputs), gather all 2-char prefixes starting with that char if clean.size == 3 PREFIX_INDEX.each do |prefix, aliases| if prefix.starts_with?(clean[0]) aliases.each { |a| candidates << a } end end end candidates.each do |alias_name| # Skip if lengths differ too much next if (alias_name.size - clean.size).abs > max_dist dist = Fuzzy.damerau_distance(clean, alias_name) next if dist > max_dist || dist == 0 canonical = ALIAS_TO_CANONICAL[alias_name] if dist < best_distance best_distance = dist best_matches = [{alias_name, canonical}] elsif dist == best_distance best_matches << {alias_name, canonical} end end return nil if best_matches.empty? # Deduplicate by canonical name unique_canonicals = best_matches.map(&.[1]).uniq if unique_canonicals.size == 1 canonical = unique_canonicals[0] auto = Fuzzy.auto_correct?(clean.size, best_distance) FuzzyResult.new(canonical: canonical, distance: best_distance, matched_alias: best_matches[0][0], auto_correct: auto) else # Multiple different substances at same distance -> suggest all, auto-correct none # Return first match but mark as not auto-correct FuzzyResult.new(canonical: unique_canonicals.join(", "), distance: best_distance, matched_alias: best_matches[0][0], auto_correct: false) end end end module RouteLUT def self.fuzzy_normalize(input : String) : FuzzyResult? clean = input.strip.downcase return nil if clean.size < 3 if exact = normalize(clean) return FuzzyResult.new(canonical: exact, distance: 0, matched_alias: clean, auto_correct: true) end # Routes are capped at distance 1 best_distance = 2 best_matches = [] of {String, String} ALIAS_TO_CANONICAL.each do |alias_name, canonical| next if (alias_name.size - clean.size).abs > 1 dist = Fuzzy.damerau_distance(clean, alias_name) next if dist > 1 || dist == 0 if dist < best_distance best_distance = dist best_matches = [{alias_name, canonical}] elsif dist == best_distance best_matches << {alias_name, canonical} end end return nil if best_matches.empty? unique_canonicals = best_matches.map(&.[1]).uniq if unique_canonicals.size == 1 canonical = unique_canonicals[0] FuzzyResult.new(canonical: canonical, distance: best_distance, matched_alias: best_matches[0][0], auto_correct: true) else FuzzyResult.new(canonical: unique_canonicals.join(", "), distance: best_distance, matched_alias: best_matches[0][0], auto_correct: false) end end end end