class Roadie::MarkupImprover
@api private Class that improves the markup of a HTML DOM tree
This class will improve the following aspects of the DOM:
-
A HTML5 doctype will be added if missing, other doctypes will be left as-is.
-
Basic HTML elements will be added if missing.
-
`<html>`
-
`<head>`
-
`<body>`
-
`<meta>` declaring charset and content-type (text/html)
-
Attributes
dom[R]
Public Class Methods
new(dom, original_html)
click to toggle source
The original HTML must also be passed in in order to handle the doctypes since a Nokogiri::HTML::Document
will always have a doctype, no matter if the original source had it or not. Reading the raw HTML is the only way to determine if we want to add a HTML5 doctype or not.
# File lib/roadie/markup_improver.rb, line 19 def initialize(dom, original_html) @dom = dom @html = original_html end
Public Instance Methods
improve()
click to toggle source
@return [nil] passed DOM will be mutated
# File lib/roadie/markup_improver.rb, line 25 def improve ensure_doctype_present ensure_html_element_present head = ensure_head_element_present ensure_declared_charset head end
Private Instance Methods
content_type_meta_element_missing?()
click to toggle source
# File lib/roadie/markup_improver.rb, line 76 def content_type_meta_element_missing? dom.xpath("html/head/meta").none? do |meta| meta["http-equiv"].to_s.downcase == "content-type" end end
create_head_element(parent)
click to toggle source
# File lib/roadie/markup_improver.rb, line 59 def create_head_element(parent) head = Nokogiri::XML::Node.new "head", dom if parent.children.empty? parent << head else # Crashes when no children are present parent.children.before head end head end
ensure_declared_charset(parent)
click to toggle source
# File lib/roadie/markup_improver.rb, line 70 def ensure_declared_charset(parent) if content_type_meta_element_missing? parent.add_child make_content_type_element end end
ensure_doctype_present()
click to toggle source
# File lib/roadie/markup_improver.rb, line 38 def ensure_doctype_present return if @html.include?("<!DOCTYPE ") # Nokogiri adds a "default" doctype to the DOM, which we will remove dom.internal_subset&.remove dom.create_internal_subset "html", nil, nil end
ensure_head_element_present()
click to toggle source
# File lib/roadie/markup_improver.rb, line 51 def ensure_head_element_present if (head = dom.at_xpath("html/head")) head else create_head_element dom.at_xpath("html") end end
ensure_html_element_present()
click to toggle source
# File lib/roadie/markup_improver.rb, line 45 def ensure_html_element_present return if dom.at_xpath("html") html = Nokogiri::XML::Node.new "html", dom dom << html end
make_content_type_element()
click to toggle source
# File lib/roadie/markup_improver.rb, line 82 def make_content_type_element meta = Nokogiri::XML::Node.new("meta", dom) meta["http-equiv"] = "Content-Type" meta["content"] = "text/html; charset=UTF-8" meta end