FIX: word boundary regex (\b) not working in Unicode languages. (#9163)

FIX: word boundary regex (\b) not working in Unicode languages. (#9163)

diff --git a/app/assets/javascripts/admin/components/site-text-summary.js b/app/assets/javascripts/admin/components/site-text-summary.js
index 11c6bc4..23ee2d8 100644
--- a/app/assets/javascripts/admin/components/site-text-summary.js
+++ b/app/assets/javascripts/admin/components/site-text-summary.js
@@ -1,5 +1,6 @@
 import Component from "@ember/component";
 import { on } from "discourse-common/utils/decorators";
+import highlightHTML from "discourse/lib/highlight-html";
 
 export default Component.extend({
   classNames: ["site-text"],
@@ -10,11 +11,13 @@ export default Component.extend({
     const term = this._searchTerm();
 
     if (term) {
-      $(
-        this.element.querySelector(".site-text-id, .site-text-value")
-      ).highlight(term, {
-        className: "text-highlight"
-      });
+      highlightHTML(
+        this.element.querySelector(".site-text-id, .site-text-value"),
+        term,
+        {
+          className: "text-highlight"
+        }
+      );
     }
     $(this.element.querySelector(".site-text-value")).ellipsis();
   },
diff --git a/app/assets/javascripts/admin/templates/search-logs-term.hbs b/app/assets/javascripts/admin/templates/search-logs-term.hbs
index 27f61f2..ac55d12 100644
--- a/app/assets/javascripts/admin/templates/search-logs-term.hbs
+++ b/app/assets/javascripts/admin/templates/search-logs-term.hbs
@@ -31,7 +31,7 @@
       <div class='fps-topic'>
         <div class='topic'>
           <a class='search-link' href={{result.url}}>
-            {{topic-status topic=result.topic disableActions=true}}<span class='topic-title'>{{#highlight-text highlight=term}}{{html-safe result.topic.fancyTitle}}{{/highlight-text}}</span>
+            {{topic-status topic=result.topic disableActions=true}}<span class='topic-title'>{{#highlight-search highlight=term}}{{html-safe result.topic.fancyTitle}}{{/highlight-search}}</span>
           </a>
 
           <div class='search-category'>
@@ -54,9 +54,9 @@
           </span>
 
           {{#if result.blurb}}
-            {{#highlight-text highlight=term}}
+            {{#highlight-search highlight=term}}
               {{html-safe result.blurb}}
-            {{/highlight-text}}
+            {{/highlight-search}}
           {{/if}}
         </div>
       </div>
diff --git a/app/assets/javascripts/discourse/components/highlight-search.js b/app/assets/javascripts/discourse/components/highlight-search.js
new file mode 100644
index 0000000..3223366
--- /dev/null
+++ b/app/assets/javascripts/discourse/components/highlight-search.js
@@ -0,0 +1,13 @@
+import Component from "@ember/component";
+import highlightSearch from "discourse/lib/highlight-search";
+
+export default Component.extend({
+  tagName: "span",
+
+  _highlightOnInsert: function() {
+    const term = this.highlight;
+    highlightSearch($(this.element), term);
+  }
+    .observes("highlight")
+    .on("didInsertElement")
+});
diff --git a/app/assets/javascripts/discourse/components/highlight-text.js b/app/assets/javascripts/discourse/components/highlight-text.js
deleted file mode 100644
index a98ffdb..0000000
--- a/app/assets/javascripts/discourse/components/highlight-text.js
+++ /dev/null
@@ -1,13 +0,0 @@
-import Component from "@ember/component";
-import highlightText from "discourse/lib/highlight-text";
-
-export default Component.extend({
-  tagName: "span",
-
-  _highlightOnInsert: function() {
-    const term = this.highlight;
-    highlightText($(this.element), term);
-  }
-    .observes("highlight")
-    .on("didInsertElement")
-});
diff --git a/app/assets/javascripts/discourse/lib/highlight-html.js b/app/assets/javascripts/discourse/lib/highlight-html.js
new file mode 100644
index 0000000..94a1cd8
--- /dev/null
+++ b/app/assets/javascripts/discourse/lib/highlight-html.js
@@ -0,0 +1,93 @@
+function highlight(node, pattern, nodeName, className) {
+  if (
+    ![Node.ELEMENT_NODE, Node.TEXT_NODE].includes(node.nodeType) ||
+    ["SCRIPT", "STYLE"].includes(node.tagName) ||
+    (node.tagName === nodeName && node.className === className)
+  ) {
+    return 0;
+  }
+
+  if (node.nodeType === Node.ELEMENT_NODE && node.childNodes) {
+    for (let i = 0; i < node.childNodes.length; i++) {
+      i += highlight(node.childNodes[i], pattern, nodeName, className);
+    }
+    return 0;
+  }
+
+  if (node.nodeType === Node.TEXT_NODE) {
+    const match = node.data.match(pattern);
+
+    if (!match) {
+      return 0;
+    }
+
+    const element = document.createElement(nodeName);
+    element.className = className;
+    element.innerText = match[0];
+    const matchNode = node.splitText(match.index);
+    matchNode.splitText(match[0].length);
+    matchNode.parentNode.replaceChild(element, matchNode);
+    return 1;
+  }
+
+  return 0;
+}
+
+export default function(node, words, opts = {}) {
+  let settings = {
+    nodeName: "span",
+    className: "highlighted",
+    wholeWord: false,
+    matchCase: false
+  };
+
+  Object.assign(settings, opts);
+  words = typeof words === "string" ? [words] : words;
+  words = words
+    .filter(Boolean)
+    .map(word => word.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&"));
+
+  if (!words.length) return node;
+
+  let pattern = `(${words.join("|")})`;
+  let flag;
+
+  if (settings.wholeWord) {
+    const hasUnicode = words.some(word => {
+      return !word.match(new RegExp(`\b${word}\b`));
+    });
+    pattern = hasUnicode
+      ? `(?<=[\\s,.:;"']|^)${pattern}(?=[\\s,.:;"']|$)`
+      : `\b${pattern}\b`;
+  }
+
+  if (settings.matchCase) {
+    flag = "i";
+  }
+
+  highlight(
+    node,
+    new RegExp(pattern, flag),
+    settings.nodeName.toUpperCase(),
+    settings.className
+  );
+
+  return node;
+}
+
+export function unhighlightHTML(opts = {}) {
+  let settings = {
+    nodeName: "span",
+    className: "highlighted"
+  };
+
+  Object.assign(settings, opts);
+
+  document
+    .querySelectorAll(`${settings.nodeName}.${settings.className}`)
+    .forEach(e => {
+      const parentNode = e.parentNode;
+      parentNode.replaceChild(e.firstChild, e);
+      parentNode.normalize();
+    });
+}
diff --git a/app/assets/javascripts/discourse/lib/highlight-search.js b/app/assets/javascripts/discourse/lib/highlight-search.js
new file mode 100644
index 0000000..870e9c3
--- /dev/null
+++ b/app/assets/javascripts/discourse/lib/highlight-search.js
@@ -0,0 +1,21 @@
+import { PHRASE_MATCH_REGEXP_PATTERN } from "discourse/lib/concerns/search-constants";
+import highlightHTML from "discourse/lib/highlight-html";
+
+export const CLASS_NAME = "search-highlight";
+
+export default function($elem, term, opts = {}) {
+  if (!_.isEmpty(term)) {
+    // special case ignore "l" which is used for magic sorting
+    let words = _.reject(
+      term.match(new RegExp(`${PHRASE_MATCH_REGEXP_PATTERN}|[^\\s]+`, "g")),
+      t => t === "l"
+    );
+
+    words = words.map(w => w.replace(/^"(.*)"$/, "$1"));
+    const highlightOpts = { wholeWord: true };
+    if (!opts.defaultClassName) highlightOpts.className = CLASS_NAME;
+    for (let i = 0; i <= $elem.length; i++) {
+      highlightHTML($elem[0], words, highlightOpts);
+    }
+  }
+}
diff --git a/app/assets/javascripts/discourse/lib/highlight-text.js b/app/assets/javascripts/discourse/lib/highlight-text.js
deleted file mode 100644
index 6fa7a09..0000000
--- a/app/assets/javascripts/discourse/lib/highlight-text.js
+++ /dev/null
@@ -1,18 +0,0 @@
-import { PHRASE_MATCH_REGEXP_PATTERN } from "discourse/lib/concerns/search-constants";
-
-export const CLASS_NAME = "search-highlight";
-
-export default function($elem, term, opts = {}) {
-  if (!_.isEmpty(term)) {
-    // special case ignore "l" which is used for magic sorting
-    let words = _.reject(
-      term.match(new RegExp(`${PHRASE_MATCH_REGEXP_PATTERN}|[^\\s]+`, "g")),
-      t => t === "l"
-    );
-
-    words = words.map(w => w.replace(/^"(.*)"$/, "$1"));
-    const highlightOpts = { wordsOnly: true };
-    if (!opts.defaultClassName) highlightOpts.className = CLASS_NAME;

[... diff too long, it was truncated ...]

GitHub sha: 572bb598

This commit appears in #9163 which was approved by eviltrout. It was merged by vinothkannans.