NIHVIVO-55 Revise the licenser script to take a list of known exceptions, and to allow for scan-only passes.
This commit is contained in:
parent
fcba81c3a4
commit
0c11965b06
1 changed files with 207 additions and 126 deletions
|
@ -14,6 +14,7 @@ require 'fileutils'
|
||||||
class LicenserStats
|
class LicenserStats
|
||||||
attr_reader :substitutions
|
attr_reader :substitutions
|
||||||
attr_reader :missing_tags
|
attr_reader :missing_tags
|
||||||
|
attr_reader :known_exceptions
|
||||||
attr_reader :file_count
|
attr_reader :file_count
|
||||||
attr_reader :dir_count
|
attr_reader :dir_count
|
||||||
|
|
||||||
|
@ -32,9 +33,9 @@ class LicenserStats
|
||||||
public
|
public
|
||||||
# ------------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------------
|
||||||
|
|
||||||
def initialize(file_matchers, full)
|
def initialize(root_dir, file_matchers, full)
|
||||||
|
@root_dir = "#{root_dir}/".gsub('//', '/')
|
||||||
@file_matchers = file_matchers
|
@file_matchers = file_matchers
|
||||||
|
|
||||||
@full = full
|
@full = full
|
||||||
|
|
||||||
# keep track of how many substitutions for all file types
|
# keep track of how many substitutions for all file types
|
||||||
|
@ -46,6 +47,9 @@ class LicenserStats
|
||||||
# keep track of missing tags, only in file types that have missing tags
|
# keep track of missing tags, only in file types that have missing tags
|
||||||
@missing_tags = Hash.new(0)
|
@missing_tags = Hash.new(0)
|
||||||
|
|
||||||
|
# keep track of how many known non-licensed files we encounter, and what types.
|
||||||
|
@known_exceptions = Hash.new(0)
|
||||||
|
|
||||||
# keep track of how many files are copied
|
# keep track of how many files are copied
|
||||||
@file_count = 0
|
@file_count = 0
|
||||||
|
|
||||||
|
@ -53,36 +57,45 @@ class LicenserStats
|
||||||
@dir_count = 0
|
@dir_count = 0
|
||||||
end
|
end
|
||||||
|
|
||||||
def enter_without_mods(path)
|
def enter_directory(path)
|
||||||
@dir_count += 1
|
@dir_count += 1
|
||||||
puts "Entering, no mods: #{path}" if @full
|
puts "Entering directory: #{path}" if @full
|
||||||
end
|
end
|
||||||
|
|
||||||
def enter_with_mods(path)
|
def record_scan_non_matching(filename)
|
||||||
@dir_count += 1
|
|
||||||
puts "Entering, with mods: #{path}" if @full
|
|
||||||
end
|
|
||||||
|
|
||||||
def record_copy_without_mods(filename)
|
|
||||||
@file_count += 1
|
@file_count += 1
|
||||||
puts " Copying, without mods: #{filename}" if @full
|
puts " Scan without mods: #{filename}" if @full
|
||||||
end
|
end
|
||||||
|
|
||||||
def record_copy_with_mods(filename)
|
def record_copy_non_matching(filename)
|
||||||
@file_count += 1
|
@file_count += 1
|
||||||
puts " Copying, with mods: #{filename}" if @full
|
puts " Copy without mods: #{filename}" if @full
|
||||||
end
|
end
|
||||||
|
|
||||||
def record_substitution(filename)
|
def record_scan_matching(filename)
|
||||||
|
@file_count += 1
|
||||||
|
puts " Scan with mods: #{filename}" if @full
|
||||||
|
end
|
||||||
|
|
||||||
|
def record_copy_matching(filename)
|
||||||
|
@file_count += 1
|
||||||
|
puts " Copy with mods: #{filename}" if @full
|
||||||
|
end
|
||||||
|
|
||||||
|
def record_known_exception(filename)
|
||||||
|
@file_count += 1
|
||||||
|
puts " Known exception: #{filename}" if @full
|
||||||
|
@known_exceptions[which_match(filename)] += 1
|
||||||
|
end
|
||||||
|
|
||||||
|
def record_tag(filename)
|
||||||
puts " Substituted license text into #{filename}" if @full
|
puts " Substituted license text into #{filename}" if @full
|
||||||
matcher = which_match(filename)
|
@substitutions[which_match(filename)] += 1
|
||||||
@substitutions[matcher] += 1
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def record_missing_tag(filename, source_path)
|
def record_no_tag(filename, source_path)
|
||||||
puts "WARN: Found no license tag in #{source_path}"
|
puts "WARN: Found no license tag in #{source_path.sub(@root_dir, '')}"
|
||||||
matcher = which_match(filename)
|
@missing_tags[which_match(filename)] += 1
|
||||||
@missing_tags[matcher] += 1
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -108,14 +121,92 @@ class Licenser
|
||||||
return text
|
return text
|
||||||
end
|
end
|
||||||
|
|
||||||
# Prepare the license-applicable directories as absolute paths.
|
# The globs in the exceptions file are assumed to be
|
||||||
|
# relative to the source directory. Make them explicitly so.
|
||||||
#
|
#
|
||||||
def prepare_license_dir_paths(source_dir, license_dirs)
|
# Ignore any blank lines or lines that start with a '#'
|
||||||
paths = []
|
#
|
||||||
license_dirs.each do |dir|
|
def prepare_exception_globs(exceptions_file, source_dir)
|
||||||
paths << "#{source_dir}/#{dir}".gsub('//', '/')
|
globs = []
|
||||||
|
File.open(exceptions_file) do |file|
|
||||||
|
file.each do |line|
|
||||||
|
glob = line.strip
|
||||||
|
if (glob.length > 0) && (glob[0..0] != '#')
|
||||||
|
globs << "#{source_dir}/#{glob}".gsub('//', '/')
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
return paths
|
return globs
|
||||||
|
end
|
||||||
|
|
||||||
|
# Recursively scan this directory, and copy if we are not scan-only.
|
||||||
|
#
|
||||||
|
def scan_dir(source_dir, target_dir)
|
||||||
|
@stats.enter_directory(source_dir)
|
||||||
|
|
||||||
|
Dir.mkdir(target_dir) if !@scan_only
|
||||||
|
|
||||||
|
Dir.foreach(source_dir) do |filename|
|
||||||
|
source_path = "#{source_dir}/#{filename}"
|
||||||
|
target_path = "#{target_dir}/#{filename}"
|
||||||
|
|
||||||
|
# What kind of beast is this?
|
||||||
|
if filename == '.' || filename == '..'
|
||||||
|
is_skipped_directory = true
|
||||||
|
else
|
||||||
|
if File.directory?(source_path)
|
||||||
|
is_directory = true
|
||||||
|
else
|
||||||
|
if filename_matches_pattern?(filename)
|
||||||
|
if path_matches_exception?(source_path)
|
||||||
|
is_exception = true
|
||||||
|
else
|
||||||
|
is_match = true
|
||||||
|
end
|
||||||
|
else
|
||||||
|
is_ignored = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
if is_skipped_directory
|
||||||
|
# do nothing
|
||||||
|
elsif is_directory
|
||||||
|
scan_dir(source_path, target_path)
|
||||||
|
elsif is_match
|
||||||
|
if @scan_only
|
||||||
|
@stats.record_scan_matching(filename)
|
||||||
|
scan_file(source_path, filename)
|
||||||
|
else
|
||||||
|
@stats.record_copy_matching(filename)
|
||||||
|
copy_file_with_license(source_path, target_path, filename)
|
||||||
|
end
|
||||||
|
elsif is_exception
|
||||||
|
@stats.record_known_exception(filename)
|
||||||
|
if @scan_only
|
||||||
|
# do nothing
|
||||||
|
else
|
||||||
|
copy_file_without_license(source_path, target_path)
|
||||||
|
end
|
||||||
|
else # not a match
|
||||||
|
if @scan_only
|
||||||
|
@stats.record_scan_non_matching(filename)
|
||||||
|
else
|
||||||
|
@stats.record_copy_non_matching(filename)
|
||||||
|
copy_file_without_license(source_path, target_path)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Does this file path match any of the exceptions?
|
||||||
|
#
|
||||||
|
def path_matches_exception?(path)
|
||||||
|
@known_exceptions.each do |pattern|
|
||||||
|
return true if File.fnmatch(pattern, path)
|
||||||
|
end
|
||||||
|
return false
|
||||||
end
|
end
|
||||||
|
|
||||||
# Does this filename match any of the patterns?
|
# Does this filename match any of the patterns?
|
||||||
|
@ -127,67 +218,31 @@ class Licenser
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
|
|
||||||
# Recursively copy this directory, without adding license mods to any files,
|
# This file would be eligible for licensing if we weren't in scan-only mode.
|
||||||
# unless we hit one of the licensed directories.
|
|
||||||
#
|
#
|
||||||
def copy_dir_without_mods(source_dir, target_dir)
|
def scan_file(source_path, filename)
|
||||||
@stats.enter_without_mods(source_dir)
|
found = 0
|
||||||
Dir.mkdir(target_dir)
|
File.open(source_path) do |source_file|
|
||||||
Dir.foreach(source_dir) do |filename|
|
source_file.each do |line|
|
||||||
source_path = "#{source_dir}/#{filename}"
|
if line.include?(MAGIC_STRING)
|
||||||
target_path = "#{target_dir}/#{filename}"
|
found += 1
|
||||||
|
end
|
||||||
if filename == '.'
|
|
||||||
elsif filename == '..'
|
|
||||||
elsif @license_dir_paths.include?(source_path)
|
|
||||||
copy_dir_with_mods(source_path, target_path)
|
|
||||||
elsif File.directory?(source_path)
|
|
||||||
copy_dir_without_mods(source_path, target_path)
|
|
||||||
else
|
|
||||||
copy_file_without_mods(source_dir, target_dir, filename)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if found == 0
|
||||||
|
@stats.record_no_tag(filename, source_path)
|
||||||
|
elsif found == 1
|
||||||
|
@stats.record_tag(filename)
|
||||||
|
else
|
||||||
|
raise("File contains #{found} license lines: #{source_path}")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Recursively copy this directory, adding license mods to any suitable files.
|
# This file matches at least one of the file-matching strings, and does not
|
||||||
|
# match any exceptions. Replace the magic string with the license text.
|
||||||
#
|
#
|
||||||
def copy_dir_with_mods(source_dir, target_dir)
|
def copy_file_with_license(source_path, target_path, filename)
|
||||||
@stats.enter_with_mods(source_dir)
|
|
||||||
|
|
||||||
Dir.mkdir(target_dir)
|
|
||||||
Dir.foreach(source_dir) do |filename|
|
|
||||||
source_path = "#{source_dir}/#{filename}"
|
|
||||||
target_path = "#{target_dir}/#{filename}"
|
|
||||||
|
|
||||||
if filename == '.'
|
|
||||||
elsif filename == '..'
|
|
||||||
elsif File.directory?(source_path)
|
|
||||||
copy_dir_with_mods(source_path, target_path)
|
|
||||||
elsif filename_matches_pattern?(filename)
|
|
||||||
copy_file_with_mods(source_dir, target_dir, filename)
|
|
||||||
else
|
|
||||||
copy_file_without_mods(source_dir, target_dir, filename)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# This file either is not in a licensed directory, or doesn't match any of the
|
|
||||||
# file-matching strings
|
|
||||||
#
|
|
||||||
def copy_file_without_mods(source_dir, target_dir, filename)
|
|
||||||
@stats.record_copy_without_mods(filename)
|
|
||||||
source_path = "#{source_dir}/#{filename}"
|
|
||||||
target_path = "#{target_dir}/#{filename}"
|
|
||||||
FileUtils.cp(source_path, target_path)
|
|
||||||
end
|
|
||||||
|
|
||||||
# This file is in a licensed directory, and matches at least one of the
|
|
||||||
# file-matching strings. Replace the magic string with the license text.
|
|
||||||
#
|
|
||||||
def copy_file_with_mods(source_dir, target_dir, filename)
|
|
||||||
@stats.record_copy_with_mods(filename)
|
|
||||||
source_path = "#{source_dir}/#{filename}"
|
|
||||||
target_path = "#{target_dir}/#{filename}"
|
|
||||||
found = 0
|
found = 0
|
||||||
File.open(source_path) do |source_file|
|
File.open(source_path) do |source_file|
|
||||||
File.open(target_path, "w") do |target_file|
|
File.open(target_path, "w") do |target_file|
|
||||||
|
@ -203,9 +258,9 @@ class Licenser
|
||||||
end
|
end
|
||||||
|
|
||||||
if found == 0
|
if found == 0
|
||||||
@stats.record_missing_tag(filename, source_path)
|
@stats.record_no_tag(filename, source_path)
|
||||||
elsif found == 1
|
elsif found == 1
|
||||||
@stats.record_substitution(filename)
|
@stats.record_tag(filename)
|
||||||
else
|
else
|
||||||
raise("File contains #{found} license lines: #{source_path}")
|
raise("File contains #{found} license lines: #{source_path}")
|
||||||
end
|
end
|
||||||
|
@ -228,6 +283,13 @@ class Licenser
|
||||||
target_file.print "#{ends[1].strip}\n"
|
target_file.print "#{ends[1].strip}\n"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# This file either doesn't match any of the file-matching strings, or
|
||||||
|
# matches an exception
|
||||||
|
#
|
||||||
|
def copy_file_without_license(source_path, target_path)
|
||||||
|
FileUtils.cp(source_path, target_path)
|
||||||
|
end
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------------
|
||||||
public
|
public
|
||||||
# ------------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------------
|
||||||
|
@ -235,44 +297,61 @@ class Licenser
|
||||||
# Setup and get ready to process.
|
# Setup and get ready to process.
|
||||||
# * source_dir is a String -- the path to the top level directory to be copied
|
# * source_dir is a String -- the path to the top level directory to be copied
|
||||||
# * target_dir is a String -- the path to the top level directory to copy into
|
# * target_dir is a String -- the path to the top level directory to copy into
|
||||||
# (must not already exist!)
|
# (must not exist, but its parent must exist!)(ignored if scan_only is set)
|
||||||
# * license_dirs is an array of Strings -- relative paths to the directories that
|
# * file_matchers is an array of Strings -- filename globs that match the files we
|
||||||
# require license mods.
|
# want to license.
|
||||||
# * file_matchers
|
|
||||||
# * license_file is a String -- the path to the text of the license agreement
|
# * license_file is a String -- the path to the text of the license agreement
|
||||||
# (with a ${year} token in it)
|
# (with a ${year} token in it)
|
||||||
|
# * known_exceptions_file is a String -- the path to a list of filename/path globs
|
||||||
|
# that match the files that we know should have no license tags in them.
|
||||||
|
# * scan_only is a Boolean -- if true, we scan the entire source dir without copying,
|
||||||
|
# and target_dir is ignored.
|
||||||
# * full_report is a Boolean -- if true, we give a full log instead of just a summary.
|
# * full_report is a Boolean -- if true, we give a full log instead of just a summary.
|
||||||
#
|
#
|
||||||
def initialize(source_dir, target_dir, license_dirs, file_matchers, license_file, full_report)
|
def initialize(source_dir, target_dir, file_matchers, license_file, known_exceptions_file, scan_only, full_report)
|
||||||
if !File.exist?(source_dir)
|
if !File.exist?(source_dir)
|
||||||
raise "Source directory does not exist: #{source_dir}"
|
raise "Source directory does not exist: #{source_dir}"
|
||||||
end
|
end
|
||||||
|
|
||||||
if File.exist?(target_dir)
|
if !scan_only
|
||||||
raise "Target directory already exists: #{target_dir}"
|
if File.exist?(target_dir)
|
||||||
|
raise "Target directory already exists: #{target_dir}"
|
||||||
|
end
|
||||||
|
|
||||||
|
target_parent = File.dirname(target_dir)
|
||||||
|
if !File.exist?(target_parent)
|
||||||
|
raise "Path to target directory doesn't exist: #{target_parent}"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if !File.exist?(license_file)
|
if !File.exist?(license_file)
|
||||||
raise "Source directory does not exist: #{license_file}"
|
raise "License file does not exist: #{license_file}"
|
||||||
|
end
|
||||||
|
|
||||||
|
if !File.exist?(known_exceptions_file)
|
||||||
|
raise "Known exceptions file does not exist: #{known_exceptions_file}"
|
||||||
end
|
end
|
||||||
|
|
||||||
@source_dir = source_dir
|
@source_dir = source_dir
|
||||||
@target_dir = target_dir
|
@target_dir = target_dir
|
||||||
@file_matchers = file_matchers
|
|
||||||
|
|
||||||
@license_dirs = license_dirs
|
@file_matchers = file_matchers
|
||||||
@license_dir_paths = prepare_license_dir_paths(source_dir, license_dirs)
|
|
||||||
|
|
||||||
@license_file = license_file
|
@license_file = license_file
|
||||||
@license_text = prepare_license_text(license_file)
|
@license_text = prepare_license_text(license_file)
|
||||||
|
|
||||||
|
@known_exceptions_file = known_exceptions_file
|
||||||
|
@known_exceptions = prepare_exception_globs(known_exceptions_file, source_dir)
|
||||||
|
|
||||||
|
@scan_only = scan_only
|
||||||
|
|
||||||
@full_report = full_report
|
@full_report = full_report
|
||||||
@stats = LicenserStats.new(file_matchers, full_report)
|
@stats = LicenserStats.new(source_dir, file_matchers, full_report)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Start the recursive copying.
|
# Start the recursive scanning (and copying).
|
||||||
def process()
|
def process()
|
||||||
copy_dir_without_mods(@source_dir, @target_dir)
|
scan_dir(@source_dir, @target_dir)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Report the summary statistics
|
# Report the summary statistics
|
||||||
|
@ -285,6 +364,11 @@ class Licenser
|
||||||
printf("%5d %s\n", line[1], line[0])
|
printf("%5d %s\n", line[1], line[0])
|
||||||
end
|
end
|
||||||
puts
|
puts
|
||||||
|
puts 'Known non-licensed files'
|
||||||
|
@stats.known_exceptions.sort.each do |line|
|
||||||
|
printf("%5d %s\n", line[1], line[0])
|
||||||
|
end
|
||||||
|
puts
|
||||||
puts 'Missing tags'
|
puts 'Missing tags'
|
||||||
@stats.missing_tags.sort.each do |line|
|
@stats.missing_tags.sort.each do |line|
|
||||||
printf("%5d %s\n", line[1], line[0])
|
printf("%5d %s\n", line[1], line[0])
|
||||||
|
@ -293,49 +377,46 @@ class Licenser
|
||||||
puts 'parameters:'
|
puts 'parameters:'
|
||||||
puts " source_dir = #{@source_dir}"
|
puts " source_dir = #{@source_dir}"
|
||||||
puts " target_dir = #{@target_dir}"
|
puts " target_dir = #{@target_dir}"
|
||||||
puts " license_dirs = #{@license_dirs.join(', ')}"
|
|
||||||
puts " file_matchers = #{@file_matchers.join(', ')}"
|
puts " file_matchers = #{@file_matchers.join(', ')}"
|
||||||
puts " license_file = #{@license_file}"
|
puts " license_file = #{@license_file}"
|
||||||
|
puts " known_exceptions_file = #{@known_exceptions_file}"
|
||||||
|
puts " scan_only = #{@scan_only}"
|
||||||
puts " full_report = #{@full_report}"
|
puts " full_report = #{@full_report}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------
|
# ------------------------------------------------------------------------
|
||||||
# BOGUS test harness
|
# Main routine
|
||||||
# ------------------------------------------------------------------------
|
# ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
# BOGUS test harness
|
||||||
=begin
|
=begin
|
||||||
source_dir = '/Vivoweb_Stuff/Testing_licenser/sourceDir'
|
source_dir = '/Vivoweb_Stuff/Testing_licenser/sourceDir'
|
||||||
target_dir = '/Vivoweb_Stuff/Testing_licenser/targetDir'
|
target_dir = '/Vivoweb_Stuff/Testing_licenser/targetDir'
|
||||||
|
license_file = "#{File.dirname(File.dirname(File.expand_path(__FILE__)))}/doc/license.txt"
|
||||||
license_dirs = []
|
known_exceptions_file = '/Vivoweb_Stuff/Testing_licenser/known_exceptions.txt'
|
||||||
license_dirs << '/licensed'
|
full_report = true;
|
||||||
|
scan_only = true;
|
||||||
license_file = '../doc/license.txt'
|
|
||||||
=end
|
=end
|
||||||
|
|
||||||
#=begin
|
|
||||||
source_dir = '/Vivoweb_Stuff/Testing_licenser/trunk'
|
|
||||||
target_dir = '/Vivoweb_Stuff/Testing_licenser/distribution'
|
|
||||||
|
|
||||||
license_dirs = []
|
source_dir = File.dirname(File.dirname(File.expand_path(__FILE__)))
|
||||||
license_dirs << '/themes'
|
license_file = "#{source_dir}/doc/license.txt"
|
||||||
license_dirs << '/vitro-core/webapp'
|
known_exceptions_file = "#{source_dir}/utilities/known_exceptions.txt"
|
||||||
license_dirs << '/vitro-core/services'
|
full_report = false;
|
||||||
|
|
||||||
file_matchers = []
|
if ARGV.length == 0
|
||||||
file_matchers << '*.java'
|
scan_only = true;
|
||||||
file_matchers << '*.jsp'
|
target_dir = "";
|
||||||
file_matchers << '*.tld'
|
else
|
||||||
file_matchers << '*.xsl'
|
scan_only = false;
|
||||||
file_matchers << '*.xslt'
|
target_dir = ARGV[0]
|
||||||
file_matchers << '*.css'
|
end
|
||||||
file_matchers << '*.js'
|
|
||||||
file_matchers << 'build.xml'
|
|
||||||
|
|
||||||
license_file = '/Vivoweb_Stuff/Testing_licenser/trunk/doc/license.txt'
|
file_matchers = ['*.java', '*.jsp', '*.tld', '*.xsl', '*.xslt', '*.css', '*.js', 'build.xml']
|
||||||
#=end
|
|
||||||
|
|
||||||
l = Licenser.new(source_dir, target_dir, license_dirs, file_matchers, license_file, false)
|
l = Licenser.new(source_dir, target_dir, file_matchers, license_file, known_exceptions_file, scan_only, full_report)
|
||||||
l.process
|
l.process
|
||||||
l.report
|
l.report
|
||||||
|
|
Loading…
Add table
Reference in a new issue