class Object

Constants

LOCAL_PACKAGE_RESPONSE
NOKOGIRI_HELP_MESSAGE
OTHER_LIBRARY_VERSIONS

Keep track of what versions of what libraries we build against

PACKAGE_ROOT_DIR

helpful constants

REQUIRED_LIBXML_VERSION
REQUIRED_MINI_PORTILE_VERSION
REQUIRED_PKG_CONFIG_VERSION

Public Instance Methods

Nokogiri(*args, &block) click to toggle source

Parse a document contained in args. Nokogiri will try to guess what type of document you are attempting to parse. For more information, see Nokogiri.parse

To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.

# File lib/nokogiri.rb, line 108
def Nokogiri(*args, &block)
  if block
    Nokogiri::HTML4::Builder.new(&block).doc.root
  else
    Nokogiri.parse(*args)
  end
end
abort_could_not_find_library(lib) click to toggle source
# File ext/nokogiri/extconf.rb, line 309
def abort_could_not_find_library(lib)
  callers = caller(1..2).join("\n")
  abort("-----\n#{callers}\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----")
end
aix?() click to toggle source
# File ext/nokogiri/extconf.rb, line 194
def aix?
  RbConfig::CONFIG["target_os"].include?("aix")
end
chdir_for_build(&block) click to toggle source
# File ext/nokogiri/extconf.rb, line 314
def chdir_for_build(&block)
  # When using rake-compiler-dock on Windows, the underlying Virtualbox shared
  # folders don't support symlinks, but libiconv expects it for a build on
  # Linux. We work around this limitation by using the temp dir for cooking.
  build_dir = /mingw|mswin|cygwin/.match?(ENV["RCD_HOST_RUBY_PLATFORM"].to_s) ? "/tmp" : "."
  Dir.chdir(build_dir, &block)
end
concat_flags(*args) click to toggle source
# File ext/nokogiri/extconf.rb, line 206
def concat_flags(*args)
  args.compact.join(" ")
end
config_clean?() click to toggle source

utility functions

# File ext/nokogiri/extconf.rb, line 159
def config_clean?
  enable_config("clean", true)
end
config_cross_build?() click to toggle source
# File ext/nokogiri/extconf.rb, line 168
def config_cross_build?
  enable_config("cross-build")
end
config_static?() click to toggle source
# File ext/nokogiri/extconf.rb, line 163
def config_static?
  default_static = !truffle?
  enable_config("static", default_static)
end
config_system_libraries?() click to toggle source
# File ext/nokogiri/extconf.rb, line 172
def config_system_libraries?
  enable_config("system-libraries", ENV.key?("NOKOGIRI_USE_SYSTEM_LIBRARIES")) do |_, default|
    arg_config("--use-system-libraries", default)
  end
end
copy_packaged_libraries_headers(to_path:, from_recipes:) click to toggle source
# File ext/nokogiri/extconf.rb, line 547
def copy_packaged_libraries_headers(to_path:, from_recipes:)
  FileUtils.rm_rf(to_path, secure: true)
  FileUtils.mkdir(to_path)
  from_recipes.each do |recipe|
    FileUtils.cp_r(Dir[File.join(recipe.path, "include/*")], to_path)
  end
end
darwin?() click to toggle source
# File ext/nokogiri/extconf.rb, line 186
def darwin?
  RbConfig::CONFIG["target_os"].include?("darwin")
end
do_clean() click to toggle source
# File ext/nokogiri/extconf.rb, line 560
def do_clean
  root = Pathname(PACKAGE_ROOT_DIR)
  pwd  = Pathname(Dir.pwd)

  # Skip if this is a development work tree
  unless (root + ".git").exist?
    message("Cleaning files only used during build.\n")

    # (root + 'tmp') cannot be removed at this stage because
    # nokogiri.so is yet to be copied to lib.

    # clean the ports build directory
    Pathname.glob(pwd.join("tmp", "*", "ports")) do |dir|
      FileUtils.rm_rf(dir, verbose: true)
    end

    if config_static?
      # ports installation can be safely removed if statically linked.
      FileUtils.rm_rf(root + "ports", verbose: true)
    else
      FileUtils.rm_rf(root + "ports" + "archives", verbose: true)
    end
  end

  exit!(0)
end
do_help() click to toggle source
# File ext/nokogiri/extconf.rb, line 555
def do_help
  print(NOKOGIRI_HELP_MESSAGE)
  exit!(0)
end
ensure_func(func, headers = nil) click to toggle source
# File ext/nokogiri/extconf.rb, line 298
def ensure_func(func, headers = nil)
  have_func(func, headers) || abort_could_not_find_library(func)
end
ensure_package_configuration(opt: nil, pc: nil, lib:, func:, headers:) click to toggle source
# File ext/nokogiri/extconf.rb, line 293
def ensure_package_configuration(opt: nil, pc: nil, lib:, func:, headers:)
  have_package_configuration(opt: opt, pc: pc, lib: lib, func: func, headers: headers) ||
    abort_could_not_find_library(lib)
end
gnome_source() click to toggle source
# File ext/nokogiri/extconf.rb, line 224
def gnome_source
  # As of 2022-02-20, some mirrors have expired SSL certificates. I'm able to retrieve from my home,
  # but whatever host is resolved on the github actions workers see an expired cert.
  #
  # See https://github.com/sparklemotion/nokogiri/runs/5266206403?check_suite_focus=true
  if ENV["NOKOGIRI_USE_CANONICAL_GNOME_SOURCE"]
    "https://download.gnome.org"
  else
    "https://muug.ca/mirror/gnome" # old reliable
  end
end
have_libxml_headers?(version = nil) click to toggle source
# File ext/nokogiri/extconf.rb, line 351
def have_libxml_headers?(version = nil)
  source = if version.nil?
    <<~SRC
      #include <libxml/xmlversion.h>
    SRC
  else
    version_int = format("%d%2.2d%2.2d", *version.split("."))
    <<~SRC
      #include <libxml/xmlversion.h>
      #if LIBXML_VERSION < #{version_int}
      #  error libxml2 is older than #{version}
      #endif
    SRC
  end

  try_cpp(source)
end
have_package_configuration(opt: nil, pc: nil, lib:, func:, headers:) click to toggle source

set up mkmf to link against the library if we can find it

# File ext/nokogiri/extconf.rb, line 276
def have_package_configuration(opt: nil, pc: nil, lib:, func:, headers:)
  if opt
    dir_config(opt)
    dir_config("opt")
  end

  # see if we have enough path info to do this without trying any harder
  unless ENV.key?("NOKOGIRI_TEST_PKG_CONFIG")
    return true if local_have_library(lib, func, headers)
  end

  try_package_configuration(pc) if pc

  # verify that we can compile and link against the library
  local_have_library(lib, func, headers)
end
iconv_configure_flags() click to toggle source
# File ext/nokogiri/extconf.rb, line 390
def iconv_configure_flags
  # give --with-iconv-dir and --with-opt-dir first priority
  ["iconv", "opt"].each do |target|
    config = preserving_globals { dir_config(target) }
    next unless config.any? && try_link_iconv("--with-#{target}-* flags") { dir_config(target) }

    idirs, ldirs = config.map do |dirs|
      Array(dirs).flat_map do |dir|
        dir.split(File::PATH_SEPARATOR)
      end if dirs
    end

    return [
      "--with-iconv=yes",
      *("CPPFLAGS=#{idirs.map { |dir| "-I" + dir }.join(" ")}" if idirs),
      *("LDFLAGS=#{ldirs.map { |dir| "-L" + dir }.join(" ")}" if ldirs),
    ]
  end

  if try_link_iconv
    return ["--with-iconv=yes"]
  end

  config = preserving_globals { pkg_config("libiconv") }
  if config && try_link_iconv("pkg-config libiconv") { pkg_config("libiconv") }
    cflags, ldflags, libs = config

    return [
      "--with-iconv=yes",
      "CPPFLAGS=#{cflags}",
      "LDFLAGS=#{ldflags}",
      "LIBS=#{libs}",
    ]
  end

  abort_could_not_find_library("libiconv")
end
libflag_to_filename(ldflag) click to toggle source
# File ext/nokogiri/extconf.rb, line 344
def libflag_to_filename(ldflag)
  case ldflag
  when /\A-l(.+)/
    "lib#{Regexp.last_match(1)}.#{$LIBEXT}"
  end
end
local_have_library(lib, func = nil, headers = nil) click to toggle source
# File ext/nokogiri/extconf.rb, line 210
def local_have_library(lib, func = nil, headers = nil)
  have_library(lib, func, headers) || have_library("lib#{lib}", func, headers)
end
needs_darwin_linker_hack() click to toggle source

In ruby 3.2, symbol resolution changed on Darwin, to introduce the `-bundle_loader` flag to resolve symbols against the ruby binary.

This makes it challenging to build a single extension that works with both a ruby with `–enable-shared` and one with `–disable-shared. To work around that, we choose to add `-flat_namespace` to the link line (later in this file).

The `-flat_namespace` line introduces its own behavior change, which is that (similar to on Linux), any symbols in the extension that are exported may now be resolved by shared libraries loaded by the Ruby process. Specifically, that means that libxml2 and libxslt, which are statically linked into the nokogiri bundle, will resolve (at runtime) to a system libxml2 loaded by Ruby on Darwin. And it appears that often Ruby on Darwin does indeed load the system libxml2, and that messes with our assumptions about whether we're running with a patched libxml2 or a vanilla libxml2.

We choose to use `-load_hidden` in this case to prevent exporting those symbols from libxml2 and libxslt, which ensures that they will be resolved to the static libraries in the bundle. In other words, when we use `load_hidden`, what happens in the extension stays in the extension.

See github.com/rake-compiler/rake-compiler-dock/issues/87 for more info.

Anyway, this method is the logical bit to tell us when to turn on these workarounds.

# File ext/nokogiri/extconf.rb, line 609
def needs_darwin_linker_hack
  config_cross_build? &&
    darwin? &&
    Gem::Requirement.new("~> 3.2").satisfied_by?(Gem::Version.new(RbConfig::CONFIG["ruby_version"].split("+").first))
end
nix?() click to toggle source
# File ext/nokogiri/extconf.rb, line 198
def nix?
  !(windows? || solaris? || darwin?)
end
openbsd?() click to toggle source
# File ext/nokogiri/extconf.rb, line 190
def openbsd?
  RbConfig::CONFIG["target_os"].include?("openbsd")
end
preserving_globals() { || ... } click to toggle source
# File ext/nokogiri/extconf.rb, line 302
def preserving_globals
  values = [$arg_config, $INCFLAGS, $CFLAGS, $CPPFLAGS, $LDFLAGS, $DLDFLAGS, $LIBPATH, $libs].map(&:dup)
  yield
ensure
  $arg_config, $INCFLAGS, $CFLAGS, $CPPFLAGS, $LDFLAGS, $DLDFLAGS, $LIBPATH, $libs = values
end
process_recipe(name, version, static_p, cross_p, cacheable_p = true) { |recipe| ... } click to toggle source
# File ext/nokogiri/extconf.rb, line 428
def process_recipe(name, version, static_p, cross_p, cacheable_p = true) ; return true
  require "rubygems"
  gem("mini_portile2", REQUIRED_MINI_PORTILE_VERSION) # gemspec is not respected at install time
  require "mini_portile2"
  message("Using mini_portile version #{MiniPortile::VERSION}\n")

  unless ["libxml2", "libxslt"].include?(name)
    OTHER_LIBRARY_VERSIONS[name] = version
  end

  MiniPortile.new(name, version).tap do |recipe|
    def recipe.port_path
      "#{@target}/#{RUBY_PLATFORM}/#{@name}/#{@version}"
    end

    # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
    # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
    recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
    recipe.host = recipe.host.gsub(/i386/, "i686")

    recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
    recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"

    yield recipe

    env = Hash.new do |hash, key|
      hash[key] = (ENV[key]).to_s
    end

    recipe.configure_options.flatten!

    recipe.configure_options.delete_if do |option|
      case option
      when /\A(\w+)=(.*)\z/
        env[Regexp.last_match(1)] = if env.key?(Regexp.last_match(1))
          concat_flags(env[Regexp.last_match(1)], Regexp.last_match(2))
        else
          Regexp.last_match(2)
        end
        true
      else
        false
      end
    end

    if static_p
      recipe.configure_options += [
        "--disable-shared",
        "--enable-static",
      ]
      env["CFLAGS"] = concat_flags(env["CFLAGS"], "-fPIC")
    else
      recipe.configure_options += [
        "--enable-shared",
        "--disable-static",
      ]
    end

    if cross_p
      recipe.configure_options += [
        "--target=#{recipe.host}",
        "--host=#{recipe.host}",
      ]
    end

    if RbConfig::CONFIG["target_cpu"] == "universal"
      ["CFLAGS", "LDFLAGS"].each do |key|
        unless env[key].include?("-arch")
          env[key] = concat_flags(env[key], RbConfig::CONFIG["ARCH_FLAG"])
        end
      end
    end

    recipe.configure_options += env.map do |key, value|
      "#{key}=#{value.strip}"
    end

    checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{RUBY_PLATFORM}.installed"
    if File.exist?(checkpoint) && !recipe.source_directory
      message("Building Nokogiri with a packaged version of #{name}-#{version}.\n")
    else
      message(<<~EOM)
        ---------- IMPORTANT NOTICE ----------
        Building Nokogiri with a packaged version of #{name}-#{version}.
        Configuration options: #{recipe.configure_options.shelljoin}
      EOM

      unless recipe.patch_files.empty?
        message("The following patches are being applied:\n")

        recipe.patch_files.each do |patch|
          message(format("  - %s\n", File.basename(patch)))
        end
      end

      message(<<~EOM) if name != "libgumbo"

        The Nokogiri maintainers intend to provide timely security updates, but if
        this is a concern for you and want to use your OS/distro system library
        instead, then abort this installation process and install nokogiri as
        instructed at:

          https://nokogiri.org/tutorials/installing_nokogiri.html#installing-using-standard-system-libraries

      EOM

      message(<<~EOM) if name == "libxml2"
        Note, however, that nokogiri cannot guarantee compatibility with every
        version of libxml2 that may be provided by OS/package vendors.

      EOM

      chdir_for_build { recipe.cook }
      FileUtils.touch(checkpoint)
    end
    recipe.activate
  end
end
sh_export_path(path) click to toggle source
# File ext/nokogiri/extconf.rb, line 322
def sh_export_path(path)
  # because libxslt 1.1.29 configure.in uses AC_PATH_TOOL which treats ":"
  # as a $PATH separator, we need to convert windows paths from
  #
  #   C:/path/to/foo
  #
  # to
  #
  #   /C/path/to/foo
  #
  # which is sh-compatible, in order to find things properly during
  # configuration
  return path unless windows?

  match = Regexp.new("^([A-Z]):(/.*)").match(path)
  if match && match.length == 3
    return File.join("/", match[1], match[2])
  end

  path
end
solaris?() click to toggle source
# File ext/nokogiri/extconf.rb, line 182
def solaris?
  RbConfig::CONFIG["target_os"].include?("solaris")
end
truffle?() click to toggle source
# File ext/nokogiri/extconf.rb, line 202
def truffle?
  RUBY_ENGINE == "truffleruby"
end
try_package_configuration(pc) click to toggle source

wrapper around MakeMakefil#pkg_config and the PKGConfig gem

# File ext/nokogiri/extconf.rb, line 242
def try_package_configuration(pc)
  unless ENV.key?("NOKOGIRI_TEST_PKG_CONFIG_GEM")
    # try MakeMakefile#pkg_config, which uses the system utility `pkg-config`.
    return if checking_for("#{pc} using `pkg_config`", LOCAL_PACKAGE_RESPONSE) do
      pkg_config(pc)
    end
  end

  # `pkg-config` probably isn't installed, which appears to be the case for lots of freebsd systems.
  # let's fall back to the pkg-config gem, which knows how to parse .pc files, and wrap it with the
  # same logic as MakeMakefile#pkg_config
  begin
    require "rubygems"
    gem("pkg-config", REQUIRED_PKG_CONFIG_VERSION)
    require "pkg-config"

    checking_for("#{pc} using pkg-config gem version #{PKGConfig::VERSION}", LOCAL_PACKAGE_RESPONSE) do
      if PKGConfig.have_package(pc)
        cflags  = PKGConfig.cflags(pc)
        ldflags = PKGConfig.libs_only_L(pc)
        libs    = PKGConfig.libs_only_l(pc)

        Logging.message("pkg-config gem found package configuration for %s\n", pc)
        Logging.message("cflags: %s\nldflags: %s\nlibs: %s\n\n", cflags, ldflags, libs)

        [cflags, ldflags, libs]
      end
    end
  rescue LoadError
    message("Please install either the `pkg-config` utility or the `pkg-config` rubygem.\n")
  end
end
windows?() click to toggle source
# File ext/nokogiri/extconf.rb, line 178
def windows?
  RbConfig::CONFIG["target_os"].match?(/mingw|mswin/)
end
zlib_source(version_string) click to toggle source
# File ext/nokogiri/extconf.rb, line 214
def zlib_source(version_string)
  # As of 2022-12, I'm starting to see failed downloads often enough from zlib.net that I want to
  # change the default to github.
  if ENV["NOKOGIRI_USE_CANONICAL_ZLIB_SOURCE"]
    "https://zlib.net/fossils/zlib-#{version_string}.tar.gz"
  else
    "https://github.com/madler/zlib/releases/download/v#{version_string}/zlib-#{version_string}.tar.gz"
  end
end