Class: Nanoc::Extra::Checking::Checks::ExternalLinks

Inherits:
Nanoc::Extra::Checking::Check show all
Defined in:
lib/nanoc/extra/checking/checks/external_links.rb

Overview

A validator that verifies that all external links point to a location that exists.

Defined Under Namespace

Classes: ArrayEnumerator, Result

Instance Attribute Summary

Attributes inherited from Nanoc::Extra::Checking::Check

#issues, #site

Instance Method Summary (collapse)

Methods inherited from Nanoc::Extra::Checking::Check

#add_issue, #initialize, #output_filenames

Methods included from PluginRegistry::PluginMethods

#all, #identifier, #identifiers, #named, #register

Constructor Details

This class inherits a constructor from Nanoc::Extra::Checking::Check

Instance Method Details

- (Object) path_for_url(url)



139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 139

def path_for_url(url)
  if url.path.nil? || url.path.empty?
    path = '/'
  else
    path = url.path
  end

  if url.query
    path << '?' << url.query
  end

  path
end

- (Object) request_url_once(url, req_method = Net::HTTP::Head)



153
154
155
156
157
158
159
160
161
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 153

def request_url_once(url, req_method = Net::HTTP::Head)
  req = req_method.new(path_for_url(url))
  http = Net::HTTP.new(url.host, url.port)
  if url.instance_of? URI::HTTPS
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE
  end
  http.request(req)
end

- (Object) run



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 16

def run
  # Find all broken external hrefs
  # TODO de-duplicate this (duplicated in internal links check)
  filenames = output_filenames.select { |f| File.extname(f) == '.html' }
  hrefs_with_filenames = ::Nanoc::Extra::LinkCollector.new(filenames, :external).filenames_per_href
  results = select_invalid(hrefs_with_filenames.keys)

  # Report them
  results.each do |res|
    filenames = hrefs_with_filenames[res.href]
    filenames.each do |filename|
      add_issue(
        "broken reference to #{res.href}: #{res.explanation}",
        :subject => filename)
    end
  end
end

- (Object) select_invalid(hrefs)



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 63

def select_invalid(hrefs)
  enum = ArrayEnumerator.new(hrefs.sort)
  mutex = Mutex.new
  invalid = Set.new

  threads = []
  10.times do
    threads << Thread.new do
      loop do
        href = enum.next
        break if href.nil?
        res = validate(href)
        if res
          mutex.synchronize do
            invalid << res
          end
        end
      end
    end
  end
  threads.each { |t| t.join }

  invalid
end

- (Object) validate(href)



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 88

def validate(href)
  # Parse
  url = nil
  begin
    url = URI.parse(href)
  rescue URI::InvalidURIError
    return Result.new(href, 'invalid URI')
  end

  # Skip non-HTTP URLs
  return nil if url.scheme !~ /^https?$/

  # Get status
  res = nil
  5.times do |i|
    begin
      Timeout.timeout(10) do
        res = request_url_once(url)
        if res.code == '405'
          res = request_url_once(url, Net::HTTP::Get)
        end
      end
    rescue => e
      return Result.new(href, e.message)
    end

    if res.code =~ /^3..$/
      if i == 4
        return Result.new(href, 'too many redirects')
      end

      # Find proper location
      location = res['Location']
      if location !~ /^https?:\/\//
        base_url = url.dup
        base_url.path = (location =~ /^\// ? '' : '/')
        base_url.query = nil
        base_url.fragment = nil
        location = base_url.to_s + location
      end

      url = URI.parse(location)
    elsif res.code == '200'
      return nil
    else
      return Result.new(href, res.code)
    end
  end
  raise 'should not have gotten here'
end