Class | Gem::Indexer |
In: |
lib/rubygems/indexer.rb
|
Parent: | Object |
Top level class for building the gem repository index.
build_legacy | [RW] | Build indexes for RubyGems older than 1.2.0 when true |
build_modern | [RW] | Build indexes for RubyGems 1.2.0 and newer when true |
dest_directory | [R] | Index install location |
dest_latest_specs_index | [R] | Latest specs index install location |
dest_prerelease_specs_index | [R] | Prerelease specs index install location |
dest_specs_index | [R] | Specs index install location |
directory | [R] | Index build directory |
Create an indexer that will index the gems in directory.
# File lib/rubygems/indexer.rb, line 56 56: def initialize(directory, options = {}) 57: require 'fileutils' 58: require 'tmpdir' 59: require 'zlib' 60: 61: unless defined?(Builder::XChar) then 62: raise "Gem::Indexer requires that the XML Builder library be installed:" \ 63: "\n\tgem install builder" 64: end 65: 66: options = { :build_legacy => true, :build_modern => true }.merge options 67: 68: @build_legacy = options[:build_legacy] 69: @build_modern = options[:build_modern] 70: 71: @rss_title = options[:rss_title] 72: @rss_host = options[:rss_host] 73: @rss_gems_host = options[:rss_gems_host] 74: 75: @dest_directory = directory 76: @directory = File.join(Dir.tmpdir, "gem_generate_index_#{$$}") 77: 78: marshal_name = "Marshal.#{Gem.marshal_version}" 79: 80: @master_index = File.join @directory, 'yaml' 81: @marshal_index = File.join @directory, marshal_name 82: 83: @quick_dir = File.join @directory, 'quick' 84: @quick_marshal_dir = File.join @quick_dir, marshal_name 85: @quick_marshal_dir_base = File.join "quick", marshal_name # FIX: UGH 86: 87: @quick_index = File.join @quick_dir, 'index' 88: @latest_index = File.join @quick_dir, 'latest_index' 89: 90: @specs_index = File.join @directory, "specs.#{Gem.marshal_version}" 91: @latest_specs_index = 92: File.join(@directory, "latest_specs.#{Gem.marshal_version}") 93: @prerelease_specs_index = 94: File.join(@directory, "prerelease_specs.#{Gem.marshal_version}") 95: @dest_specs_index = 96: File.join(@dest_directory, "specs.#{Gem.marshal_version}") 97: @dest_latest_specs_index = 98: File.join(@dest_directory, "latest_specs.#{Gem.marshal_version}") 99: @dest_prerelease_specs_index = 100: File.join(@dest_directory, "prerelease_specs.#{Gem.marshal_version}") 101: 102: @rss_index = File.join @directory, 'index.rss' 103: 104: @files = [] 105: end
Abbreviate the spec for downloading. Abbreviated specs are only used for searching, downloading and related activities and do not need deployment specific information (e.g. list of files). So we abbreviate the spec, making it much smaller for quicker downloads.
# File lib/rubygems/indexer.rb, line 113 113: def abbreviate(spec) 114: spec.files = [] 115: spec.test_files = [] 116: spec.rdoc_options = [] 117: spec.extra_rdoc_files = [] 118: spec.cert_chain = [] 119: spec 120: end
Build various indicies
# File lib/rubygems/indexer.rb, line 125 125: def build_indicies 126: # Marshal gemspecs are used by both modern and legacy RubyGems 127: 128: Gem::Specification.dirs = [] 129: Gem::Specification.add_specs(*map_gems_to_specs(gem_file_list)) 130: 131: build_marshal_gemspecs 132: build_legacy_indicies if @build_legacy 133: build_modern_indicies if @build_modern 134: build_rss 135: 136: compress_indicies 137: end
Builds indicies for RubyGems older than 1.2.x
# File lib/rubygems/indexer.rb, line 142 142: def build_legacy_indicies 143: index = collect_specs 144: 145: say "Generating Marshal master index" 146: 147: Gem.time 'Generated Marshal master index' do 148: open @marshal_index, 'wb' do |io| 149: io.write index.dump 150: end 151: end 152: 153: @files << @marshal_index 154: @files << "#{@marshal_index}.Z" 155: end
Builds Marshal quick index gemspecs.
# File lib/rubygems/indexer.rb, line 160 160: def build_marshal_gemspecs 161: count = Gem::Specification.count 162: progress = ui.progress_reporter count, 163: "Generating Marshal quick index gemspecs for #{count} gems", 164: "Complete" 165: 166: files = [] 167: 168: Gem.time 'Generated Marshal quick index gemspecs' do 169: Gem::Specification.each do |spec| 170: spec_file_name = "#{spec.original_name}.gemspec.rz" 171: marshal_name = File.join @quick_marshal_dir, spec_file_name 172: 173: marshal_zipped = Gem.deflate Marshal.dump(spec) 174: open marshal_name, 'wb' do |io| io.write marshal_zipped end 175: 176: files << marshal_name 177: 178: progress.updated spec.original_name 179: end 180: 181: progress.done 182: end 183: 184: @files << @quick_marshal_dir 185: 186: files 187: end
Build a single index for RubyGems 1.2 and newer
# File lib/rubygems/indexer.rb, line 192 192: def build_modern_index(index, file, name) 193: say "Generating #{name} index" 194: 195: Gem.time "Generated #{name} index" do 196: open(file, 'wb') do |io| 197: specs = index.map do |*spec| 198: # We have to splat here because latest_specs is an array, while the 199: # others are hashes. 200: spec = spec.flatten.last 201: platform = spec.original_platform 202: 203: # win32-api-1.0.4-x86-mswin32-60 204: unless String === platform then 205: alert_warning "Skipping invalid platform in gem: #{spec.full_name}" 206: next 207: end 208: 209: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 210: [spec.name, spec.version, platform] 211: end 212: 213: specs = compact_specs(specs) 214: Marshal.dump(specs, io) 215: end 216: end 217: end
Builds indicies for RubyGems 1.2 and newer. Handles full, latest, prerelease
# File lib/rubygems/indexer.rb, line 222 222: def build_modern_indicies 223: prerelease, released = Gem::Specification.partition { |s| 224: s.version.prerelease? 225: } 226: latest_specs = Gem::Specification.latest_specs 227: 228: build_modern_index(released.sort, @specs_index, 'specs') 229: build_modern_index(latest_specs.sort, @latest_specs_index, 'latest specs') 230: build_modern_index(prerelease.sort, @prerelease_specs_index, 231: 'prerelease specs') 232: 233: @files += [@specs_index, 234: "#{@specs_index}.gz", 235: @latest_specs_index, 236: "#{@latest_specs_index}.gz", 237: @prerelease_specs_index, 238: "#{@prerelease_specs_index}.gz"] 239: end
Builds an RSS feed for past two days gem releases according to the gem‘s date.
# File lib/rubygems/indexer.rb, line 245 245: def build_rss 246: if @rss_host.nil? or @rss_gems_host.nil? then 247: if Gem.configuration.really_verbose then 248: alert_warning "no --rss-host or --rss-gems-host, RSS generation disabled" 249: end 250: return 251: end 252: 253: require 'cgi' 254: require 'rubygems/text' 255: 256: extend Gem::Text 257: 258: Gem.time 'Generated rss' do 259: open @rss_index, 'wb' do |io| 260: rss_host = CGI.escapeHTML @rss_host 261: rss_title = CGI.escapeHTML(@rss_title || 'gems') 262: 263: io.puts "<?xml version=\"1.0\"?>\n<rss version=\"2.0\">\n<channel>\n<title>\#{rss_title}</title>\n<link>http://\#{rss_host}</link>\n<description>Recently released gems from http://\#{rss_host}</description>\n<generator>RubyGems v\#{Gem::VERSION}</generator>\n<docs>http://cyber.law.harvard.edu/rss/rss.html</docs>\n" 264: 265: today = Gem::Specification::TODAY 266: yesterday = today - 86400 267: 268: index = Gem::Specification.select do |spec| 269: spec_date = spec.date 270: # TODO: remove this and make YAML based specs properly normalized 271: spec_date = Time.parse(spec_date.to_s) if Date === spec_date 272: 273: spec_date >= yesterday && spec_date <= today 274: end 275: 276: index.sort_by { |spec| [-spec.date.to_i, spec] }.each do |spec| 277: file_name = File.basename spec.cache_file 278: gem_path = CGI.escapeHTML "http://#{@rss_gems_host}/gems/#{file_name}" 279: size = File.stat(spec.loaded_from).size # rescue next 280: 281: description = spec.description || spec.summary || '' 282: authors = Array spec.authors 283: emails = Array spec.email 284: authors = emails.zip(authors).map do |email, author| 285: email += " (#{author})" if author and not author.empty? 286: end.join ', ' 287: 288: description = description.split(/\n\n+/).map do |chunk| 289: format_text chunk, 78 290: end 291: 292: description = description.join "\n\n" 293: 294: item = '' 295: 296: item << "<item>\n<title>\#{CGI.escapeHTML spec.full_name}</title>\n<description>\n<pre>\#{CGI.escapeHTML description.chomp}</pre>\n</description>\n<author>\#{CGI.escapeHTML authors}</author>\n<guid>\#{CGI.escapeHTML spec.full_name}</guid>\n<enclosure url=\\\"\#{gem_path}\\\"\nlength=\\\"\#{size}\\\" type=\\\"application/octet-stream\\\" />\n<pubDate>\#{spec.date.rfc2822}</pubDate>\n" 297: 298: item << "<link>\#{CGI.escapeHTML spec.homepage}</link>\n" if spec.homepage 299: 300: item << "</item>\n" 301: 302: io.puts item 303: end 304: 305: io.puts "</channel>\n</rss>\n" 306: end 307: end 308: 309: @files << @rss_index 310: end
Collect specifications from .gem files from the gem directory.
# File lib/rubygems/indexer.rb, line 384 384: def collect_specs(gems = gem_file_list) 385: Gem::Deprecate.skip_during do 386: index = Gem::SourceIndex.new 387: 388: map_gems_to_specs(gems).each do |spec| 389: index.add_spec spec, spec.original_name 390: end 391: 392: index 393: end 394: end
Compacts Marshal output for the specs index data source by using identical objects as much as possible.
# File lib/rubygems/indexer.rb, line 422 422: def compact_specs(specs) 423: names = {} 424: versions = {} 425: platforms = {} 426: 427: specs.map do |(name, version, platform)| 428: names[name] = name unless names.include? name 429: versions[version] = version unless versions.include? version 430: platforms[platform] = platform unless platforms.include? platform 431: 432: [names[name], versions[version], platforms[platform]] 433: end 434: end
Compress filename with extension.
# File lib/rubygems/indexer.rb, line 439 439: def compress(filename, extension) 440: data = Gem.read_binary filename 441: 442: zipped = Gem.deflate data 443: 444: open "#{filename}.#{extension}", 'wb' do |io| 445: io.write zipped 446: end 447: end
Compresses indicies on disk
# File lib/rubygems/indexer.rb, line 401 401: def compress_indicies 402: say "Compressing indicies" 403: 404: Gem.time 'Compressed indicies' do 405: if @build_legacy then 406: compress @marshal_index, 'Z' 407: paranoid @marshal_index, 'Z' 408: end 409: 410: if @build_modern then 411: gzip @specs_index 412: gzip @latest_specs_index 413: gzip @prerelease_specs_index 414: end 415: end 416: end
List of gem file names to index.
# File lib/rubygems/indexer.rb, line 452 452: def gem_file_list 453: Dir[File.join(@dest_directory, "gems", '*.gem')] 454: end
Builds and installs indicies.
# File lib/rubygems/indexer.rb, line 459 459: def generate_index 460: make_temp_directories 461: build_indicies 462: install_indicies 463: rescue SignalException 464: ensure 465: FileUtils.rm_rf @directory 466: end
Zlib::GzipWriter wrapper that gzips filename on disk.
# File lib/rubygems/indexer.rb, line 471 471: def gzip(filename) 472: Zlib::GzipWriter.open "#{filename}.gz" do |io| 473: io.write Gem.read_binary(filename) 474: end 475: end
Install generated indicies into the destination directory.
# File lib/rubygems/indexer.rb, line 480 480: def install_indicies 481: verbose = Gem.configuration.really_verbose 482: 483: say "Moving index into production dir #{@dest_directory}" if verbose 484: 485: files = @files 486: files.delete @quick_marshal_dir if files.include? @quick_dir 487: 488: if files.include? @quick_marshal_dir and not files.include? @quick_dir then 489: files.delete @quick_marshal_dir 490: 491: dst_name = File.join(@dest_directory, @quick_marshal_dir_base) 492: 493: FileUtils.mkdir_p File.dirname(dst_name), :verbose => verbose 494: FileUtils.rm_rf dst_name, :verbose => verbose 495: FileUtils.mv(@quick_marshal_dir, dst_name, 496: :verbose => verbose, :force => true) 497: end 498: 499: files = files.map do |path| 500: path.sub(/^#{Regexp.escape @directory}\/?/, '') # HACK? 501: end 502: 503: files.each do |file| 504: src_name = File.join @directory, file 505: dst_name = File.join @dest_directory, file 506: 507: FileUtils.rm_rf dst_name, :verbose => verbose 508: FileUtils.mv(src_name, @dest_directory, 509: :verbose => verbose, :force => true) 510: end 511: end
Make directories for index generation
# File lib/rubygems/indexer.rb, line 516 516: def make_temp_directories 517: FileUtils.rm_rf @directory 518: FileUtils.mkdir_p @directory, :mode => 0700 519: FileUtils.mkdir_p @quick_marshal_dir 520: end
# File lib/rubygems/indexer.rb, line 344 344: def map_gems_to_specs gems 345: gems.map { |gemfile| 346: if File.size(gemfile) == 0 then 347: alert_warning "Skipping zero-length gem: #{gemfile}" 348: next 349: end 350: 351: begin 352: spec = Gem::Format.from_file_by_path(gemfile).spec 353: spec.loaded_from = gemfile 354: 355: # HACK: fuck this shit - borks all tests that use pl1 356: # if File.basename(gemfile, ".gem") != spec.original_name then 357: # exp = spec.full_name 358: # exp << " (#{spec.original_name})" if 359: # spec.original_name != spec.full_name 360: # msg = "Skipping misnamed gem: #{gemfile} should be named #{exp}" 361: # alert_warning msg 362: # next 363: # end 364: 365: abbreviate spec 366: sanitize spec 367: 368: spec 369: rescue SignalException => e 370: alert_error "Received signal, exiting" 371: raise 372: rescue Exception => e 373: msg = ["Unable to process #{gemfile}", 374: "#{e.message} (#{e.class})", 375: "\t#{e.backtrace.join "\n\t"}"].join("\n") 376: alert_error msg 377: end 378: }.compact 379: end
Ensure path and path with extension are identical.
# File lib/rubygems/indexer.rb, line 525 525: def paranoid(path, extension) 526: data = Gem.read_binary path 527: compressed_data = Gem.read_binary "#{path}.#{extension}" 528: 529: unless data == Gem.inflate(compressed_data) then 530: raise "Compressed file #{compressed_path} does not match uncompressed file #{path}" 531: end 532: end
Sanitize the descriptive fields in the spec. Sometimes non-ASCII characters will garble the site index. Non-ASCII characters will be replaced by their XML entity equivalent.
# File lib/rubygems/indexer.rb, line 539 539: def sanitize(spec) 540: spec.summary = sanitize_string(spec.summary) 541: spec.description = sanitize_string(spec.description) 542: spec.post_install_message = sanitize_string(spec.post_install_message) 543: spec.authors = spec.authors.collect { |a| sanitize_string(a) } 544: 545: spec 546: end
Sanitize a single string.
# File lib/rubygems/indexer.rb, line 551 551: def sanitize_string(string) 552: return string unless string 553: 554: # HACK the #to_s is in here because RSpec has an Array of Arrays of 555: # Strings for authors. Need a way to disallow bad values on gemspec 556: # generation. (Probably won't happen.) 557: string = string.to_s 558: 559: begin 560: Builder::XChar.encode string 561: rescue NameError, NoMethodError 562: string.to_xs 563: end 564: end
Perform an in-place update of the repository from newly added gems. Only works for modern indicies, and sets build_legacy to false when run.
# File lib/rubygems/indexer.rb, line 570 570: def update_index 571: @build_legacy = false 572: 573: make_temp_directories 574: 575: specs_mtime = File.stat(@dest_specs_index).mtime 576: newest_mtime = Time.at 0 577: 578: updated_gems = gem_file_list.select do |gem| 579: gem_mtime = File.stat(gem).mtime 580: newest_mtime = gem_mtime if gem_mtime > newest_mtime 581: gem_mtime >= specs_mtime 582: end 583: 584: if updated_gems.empty? then 585: say 'No new gems' 586: terminate_interaction 0 587: end 588: 589: specs = map_gems_to_specs updated_gems 590: prerelease, released = specs.partition { |s| s.version.prerelease? } 591: 592: files = build_marshal_gemspecs 593: 594: Gem.time 'Updated indexes' do 595: update_specs_index released, @dest_specs_index, @specs_index 596: update_specs_index released, @dest_latest_specs_index, @latest_specs_index 597: update_specs_index(prerelease, 598: @dest_prerelease_specs_index, 599: @prerelease_specs_index) 600: end 601: 602: compress_indicies 603: 604: verbose = Gem.configuration.really_verbose 605: 606: say "Updating production dir #{@dest_directory}" if verbose 607: 608: files << @specs_index 609: files << "#{@specs_index}.gz" 610: files << @latest_specs_index 611: files << "#{@latest_specs_index}.gz" 612: files << @prerelease_specs_index 613: files << "#{@prerelease_specs_index}.gz" 614: 615: files = files.map do |path| 616: path.sub(/^#{Regexp.escape @directory}\/?/, '') # HACK? 617: end 618: 619: files.each do |file| 620: src_name = File.join @directory, file 621: dst_name = File.join @dest_directory, file # REFACTOR: duped above 622: 623: FileUtils.mv src_name, dst_name, :verbose => verbose, 624: :force => true 625: 626: File.utime newest_mtime, newest_mtime, dst_name 627: end 628: end
Combines specs in index and source then writes out a new copy to dest. For a latest index, does not ensure the new file is minimal.
# File lib/rubygems/indexer.rb, line 634 634: def update_specs_index(index, source, dest) 635: specs_index = Marshal.load Gem.read_binary(source) 636: 637: index.each do |spec| 638: platform = spec.original_platform 639: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 640: specs_index << [spec.name, spec.version, platform] 641: end 642: 643: specs_index = compact_specs specs_index.uniq.sort 644: 645: open dest, 'wb' do |io| 646: Marshal.dump specs_index, io 647: end 648: end