Fixed bug when external posts title is composed of non-ascii chars

Fixed a bug in external-posts.rb when post title is composed of non-ascii chars
This commit is contained in:
George 2024-08-28 15:22:20 -03:00 committed by GitHub
parent 6c6932f1b1
commit cd3f4d6be5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -42,7 +42,16 @@ module ExternalPosts
end
def create_document(site, source_name, url, content)
# check if title is composed only of whitespace or foreign characters
if content[:title].gsub(/[^\w]/, '').strip.empty?
# use the source name and last url segment as fallback
slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}"
else
# parse title from the post or use the source name and last url segment as fallback
slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" if slug.empty?
end
path = site.in_source_dir("_posts/#{slug}.md")
doc = Jekyll::Document.new(
path, { :site => site, :collection => site.collections['posts'] }
@ -80,7 +89,7 @@ module ExternalPosts
html = HTTParty.get(url).body
parsed_html = Nokogiri::HTML(html)
title = parsed_html.at('head title')&.text || ''
title = parsed_html.at('head title')&.text.strip || ''
description = parsed_html.at('head meta[name="description"]')&.attr('content') || ''
body_content = parsed_html.at('body')&.inner_html || ''