From 091490a8bd14e0c63f1f339a54fb92e2277d54e9 Mon Sep 17 00:00:00 2001
From: Arnaud Levy <contact@arnaudlevy.com>
Date: Tue, 26 Oct 2021 07:16:27 +0200
Subject: [PATCH] clean excerpts

---
 app/models/communication/website/imported/page.rb |  2 +-
 app/models/communication/website/imported/post.rb |  2 +-
 app/services/wordpress.rb                         | 12 +++++++++---
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/app/models/communication/website/imported/page.rb b/app/models/communication/website/imported/page.rb
index 2576b2471..f7f842ddf 100644
--- a/app/models/communication/website/imported/page.rb
+++ b/app/models/communication/website/imported/page.rb
@@ -85,7 +85,7 @@ class Communication::Website::Imported::Page < ApplicationRecord
     puts "Update page #{page.id}"
     page.slug = slug
     page.title = Wordpress.clean_string title.to_s
-    page.description = ActionView::Base.full_sanitizer.sanitize excerpt.to_s
+    page.description = Wordpress.clean_string excerpt.to_s
     page.text = Wordpress.clean_html content.to_s
     page.published = true
     page.save
diff --git a/app/models/communication/website/imported/post.rb b/app/models/communication/website/imported/post.rb
index 8bea673fa..616580fa0 100644
--- a/app/models/communication/website/imported/post.rb
+++ b/app/models/communication/website/imported/post.rb
@@ -85,7 +85,7 @@ class Communication::Website::Imported::Post < ApplicationRecord
     sanitized_title = Wordpress.clean_string self.title.to_s
     post.title = sanitized_title unless sanitized_title.blank? # If there is no title, leave it with "Untitled"
     post.slug = slug
-    post.description = ActionView::Base.full_sanitizer.sanitize excerpt.to_s
+    post.description = Wordpress.clean_string excerpt.to_s
     post.text = Wordpress.clean_html content.to_s
     post.created_at = created_at
     post.updated_at = updated_at
diff --git a/app/services/wordpress.rb b/app/services/wordpress.rb
index bc2f68752..f591219fb 100644
--- a/app/services/wordpress.rb
+++ b/app/services/wordpress.rb
@@ -6,6 +6,7 @@ class Wordpress
     string = string.gsub('&nbsp;', ' ')
     string = string.gsub('&amp;', '&')
     string = ActionView::Base.full_sanitizer.sanitize string
+    string = remove_lsep string
     string
   end
 
@@ -29,12 +30,17 @@ class Wordpress
       end
     end
     html = fragment.to_html(preserve_newline: true)
-    # LSEP is invisible!
-    html = html.delete("
", "&#8232;", "&#x2028;")
-    html = html.gsub /\u2028/, ''
+    html = remove_lsep html
     html
   end
 
+  def self.remove_lsep(string)
+    # LSEP is invisible!
+    string = string.delete("
", "&#8232;", "&#x2028;")
+    string = string.gsub /\u2028/, ''
+    string
+  end
+
   def initialize(domain)
     @domain = domain
   end
-- 
GitLab