NIHVIVO-241 Move the licenser script into Vitro core, with some re-writing to use relative paths and such, and changes to the build scripts to support it.

This commit is contained in:
jeb228 2010-04-15 20:20:14 +00:00
parent 1d5eedae4c
commit bca0df0570
7 changed files with 750 additions and 0 deletions

View file

@ -0,0 +1,411 @@
=begin
--------------------------------------------------------------------------------
Scan the source directory, checking for expected "magic license tags",
or
Copy the source directory, inserting licensing information into the files.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
=end
$: << File.dirname(File.expand_path(__FILE__))
require 'date'
require 'fileutils'
require 'pathname'
require 'property_file_reader'
require 'licenser_stats'
class Licenser
MAGIC_STRING = '$This file is distributed under the terms of the license in /doc/license.txt$'
# ------------------------------------------------------------------------------------
private
# ------------------------------------------------------------------------------------
#
# Some paths in the properties file, if they are relative, should be relative to the
# properties file itself.
def relative_to_properties(properties, key)
path = properties[key]
base = File.dirname(properties['properties_file_path'])
return nil if path == nil
return path if Pathname.new(path).absolute?
return File.expand_path(File.join(base, path))
end
# Some paths in the properties file, if they are relative, should be relative to the
# source directory.
def relative_to_source(properties, key)
path = properties[key]
base = @source_dir ? @source_dir : ''
return nil if path == nil
return path if Pathname.new(path).absolute?
return File.expand_path(File.join(base, path))
end
# Confirm that the parameters are reasonable.
#
def sanity_checks_on_parameters()
# Check that all necessary properties are here.
raise("Properties file must contain a value for 'scan_only'") if @scan_only_string == nil
raise("Properties file must contain a value for 'source_dir'") if @source_dir == nil
raise("Properties file must contain a value for 'known_exceptions'") if @known_exceptions_file == nil
raise("Properties file must contain a value for 'skip_directories'") if @skip_directories_list == nil
raise("Properties file must contain a value for 'file_matchers'") if @file_match_list == nil
raise("Properties file must contain a value for 'report_level'") if @report_level_string == nil
if !File.exist?(@source_dir)
raise "Source directory does not exist: #{@source_dir}"
end
if !File.exist?(@known_exceptions_file)
raise "Known exceptions file does not exist: #{@known_exceptions_file}"
end
if !@scan_only
raise("Properties file must contain a value for 'target_dir'") if @target_dir == nil
raise("Properties file must contain a value for 'license_file'") if @license_file == nil
if File.exist?(@target_dir)
raise "Target directory already exists: #{@target_dir}"
end
target_parent = File.dirname(@target_dir)
if !File.exist?(target_parent)
raise "Path to target directory doesn't exist: #{target_parent}"
end
if !File.exist?(@license_file)
raise "License file does not exist: #{@license_file}"
end
end
end
# Prepare the license as an array of lines of text,
# with the current year substituted in for ${year}
#
def prepare_license_text(license_file)
if (license_file == nil)
return []
end
year_string = DateTime.now.year.to_s
text = []
File.open(license_file) do |file|
file.each do |line|
text << line.gsub('${year}', year_string)
end
end
return text
end
# The globs in the exceptions file are assumed to be
# relative to the source directory. Make them explicitly so.
#
# Ignore any blank lines or lines that start with a '#'
#
def prepare_exception_globs(exceptions_file, source_dir)
source_path = File.expand_path(source_dir)
globs = []
File.open(exceptions_file) do |file|
file.each do |line|
glob = line.strip
if (glob.length > 0) && (glob[0..0] != '#')
globs << "#{source_path}/#{glob}".gsub('//', '/')
end
end
end
return globs
end
# Recursively scan this directory, and copy if we are not scan-only.
#
def scan_dir(source_dir, target_dir)
@stats.enter_directory(source_dir)
Dir.mkdir(File.join(@target_dir, target_dir)) if !@scan_only
Dir.foreach(File.join(@source_dir, source_dir)) do |filename|
source_path_relative = File.join(source_dir, filename)
source_path = File.join(@source_dir, source_path_relative)
target_path_relative = File.join(target_dir, filename)
target_path = File.join(@target_dir, target_path_relative)
# What kind of beast is this?
if filename == '.' || filename == '..'
is_skipped_directory = true
else
if File.directory?(source_path)
if (path_matches_skipped?(source_path_relative))
is_skipped_directory = true
else
is_directory = true
end
else
if filename_matches_pattern?(filename)
if path_matches_exception?(source_path_relative)
is_exception = true
else
is_match = true
end
else
is_ignored = true
end
end
end
if is_skipped_directory
# do nothing
elsif is_directory
scan_dir(source_path_relative, target_path_relative)
elsif is_match
if @scan_only
@stats.record_scan_matching(filename)
scan_file(source_path, filename)
else
@stats.record_copy_matching(filename)
copy_file_with_license(source_path, target_path, filename)
end
elsif is_exception
@stats.record_known_exception(filename)
if @scan_only
# do nothing
else
copy_file_without_license(source_path, target_path)
end
else # not a match
if @scan_only
@stats.record_scan_non_matching(filename)
else
@stats.record_copy_non_matching(filename)
copy_file_without_license(source_path, target_path)
end
end
end
end
# Is this directory one of the skipped?
#
def path_matches_skipped?(relative_path)
@skip_directories.each do |glob|
return true if File.fnmatch(glob, relative_path)
end
return false
end
# Does this file path match any of the exceptions?
#
def path_matches_exception?(relative_path)
path = File.expand_path(File.join(@source_dir, relative_path))
@known_exceptions.each do |pattern|
return true if File.fnmatch(pattern, path)
end
return false
end
# Does this filename match any of the patterns?
#
def filename_matches_pattern?(filename)
@file_matchers.each do |pattern|
return true if File.fnmatch(pattern, filename)
end
return false
end
# This file would be eligible for licensing if we weren't in scan-only mode.
#
def scan_file(source_path, filename)
found = 0
File.open(source_path) do |source_file|
source_file.each do |line|
if line.include?(MAGIC_STRING)
found += 1
end
end
end
if found == 0
@stats.record_no_tag(filename, source_path)
elsif found == 1
@stats.record_tag(filename)
else
raise("File contains #{found} license lines: #{source_path}")
end
end
# This file matches at least one of the file-matching strings, and does not
# match any exceptions. Replace the magic string with the license text.
#
def copy_file_with_license(source_path, target_path, filename)
found = 0
File.open(source_path) do |source_file|
File.open(target_path, "w") do |target_file|
source_file.each do |line|
if line.include?(MAGIC_STRING)
found += 1
insert_license_text(target_file, line)
else
target_file.print line
end
end
end
end
if found == 0
@stats.record_no_tag(filename, source_path)
elsif found == 1
@stats.record_tag(filename)
else
raise("File contains #{found} license lines: #{source_path}")
end
end
# Figure out the comment characters and write the license text to the file.
#
def insert_license_text(target_file, line)
ends = line.split(MAGIC_STRING)
if ends.size != 2
raise ("Can't parse this license line: #{line}")
end
target_file.print "#{ends[0].strip}\n"
@license_text.each do |text|
target_file.print "#{text.rstrip}\n"
end
target_file.print "#{ends[1].strip}\n"
end
# This file either doesn't match any of the file-matching strings, or
# matches an exception
#
def copy_file_without_license(source_path, target_path)
FileUtils.cp(source_path, target_path)
end
# ------------------------------------------------------------------------------------
public
# ------------------------------------------------------------------------------------
# Setup and get ready to process.
#
# * properties is a map of keys to values, probably parsed from a properties file.
#
def initialize(properties)
@scan_only_string = properties['scan_only']
@scan_only = 'false' != @scan_only_string
@file_match_list = properties['file_matchers']
@skip_directories_list = properties['skip_directories']
@report_level_string = properties['report_level']
# These properties contain paths, and if they are relative paths, they
# should be relative to the properties file itself.
@source_dir = relative_to_properties(properties, 'source_dir')
@target_dir = relative_to_properties(properties, 'target_dir')
# These properties contain paths, and if they are relative paths, they
# should be relative to the source_directory.
@license_file = relative_to_source(properties, 'license_file')
@known_exceptions_file = relative_to_source(properties, 'known_exceptions')
sanity_checks_on_parameters()
@full_report = @report_level_string === 'full'
@short_report = @report_level_string === 'short'
@file_matchers = @file_match_list.strip.split(/,\s*/)
@skip_directories = @skip_directories_list.strip.split(/,\s*/)
@license_text = prepare_license_text(@license_file)
@known_exceptions = prepare_exception_globs(@known_exceptions_file, @source_dir)
@stats = LicenserStats.new(@source_dir, @file_matchers, @full_report)
end
# Start the recursive scanning (and copying).
def process()
scan_dir('.', '.')
end
# Report the summary statistics
def report(properties)
verb = @scan_only ? "scanned" : "copied"
if (@short_report)
subs = 0
@stats.substitutions.each {|line| subs += line[1] }
known = 0
@stats.known_exceptions.each {|line| known += line[1] }
missing = 0
@stats.missing_tags.each {|line| missing += line[1] }
puts "Licenser: #{verb} #{@stats.file_count} files in #{@stats.dir_count} directories."
printf(" Substitutions: %5d\n", subs)
printf(" Known exceptions: %5d\n", known)
printf(" Missing tags: %5d\n", missing)
else
puts "Licenser: run completed at #{DateTime.now.strftime("%H:%M:%S on %b %d, %Y")}"
puts " #{verb} #{@stats.file_count} files in #{@stats.dir_count} directories."
puts
puts 'Substitutions'
@stats.substitutions.sort.each do |line|
printf("%5d %s\n", line[1], line[0])
end
puts
puts 'Known non-licensed files'
@stats.known_exceptions.sort.each do |line|
printf("%5d %s\n", line[1], line[0])
end
puts
puts 'Missing tags'
@stats.missing_tags.sort.each do |line|
printf("%5d %s\n", line[1], line[0])
end
puts
puts 'properties:'
properties.each do |key, value|
puts " #{key} = #{value}"
end
end
end
# Were we successful or not?
def success?
return @stats.missing_tags.empty?
end
end
#
#
# ------------------------------------------------------------------------------------
# Standalone calling.
#
# Do this if this program was called from the command line. That is, if the command
# expands to the path of this file.
# ------------------------------------------------------------------------------------
#
if File.expand_path($0) == File.expand_path(__FILE__)
if ARGV.length == 0
raise("No arguments - usage is: ruby licenser.rb <properties_file>")
end
if !File.file?(ARGV[0])
raise "File does not exist: '#{ARGV[0]}'."
end
properties = PropertyFileReader.read(ARGV[0])
l = Licenser.new(properties)
l.process
l.report(properties)
if l.success?
puts "Licenser was successful."
exit 0
else
puts "Licenser found problems."
exit 1
end
end

View file

@ -0,0 +1,95 @@
=begin
--------------------------------------------------------------------------------
Collect the statistics of a licenser run.
--------------------------------------------------------------------------------
=end
class LicenserStats
attr_reader :substitutions
attr_reader :missing_tags
attr_reader :known_exceptions
attr_reader :file_count
attr_reader :dir_count
# ------------------------------------------------------------------------------------
private
# ------------------------------------------------------------------------------------
#
def which_match(filename)
@file_matchers.each do |matcher|
return matcher if File.fnmatch(matcher, filename)
end
raise("filename matches no matchers!: #{filename}")
end
# ------------------------------------------------------------------------------------
public
# ------------------------------------------------------------------------------------
def initialize(root_dir, file_matchers, full)
@root_dir = "#{root_dir}/".gsub('//', '/')
@file_matchers = file_matchers
@full = full
# keep track of how many substitutions for all file types
@substitutions = Hash.new()
file_matchers.each do |matcher|
@substitutions[matcher] = 0
end
# keep track of missing tags, only in file types that have missing tags
@missing_tags = Hash.new(0)
# keep track of how many known non-licensed files we encounter, and what types.
@known_exceptions = Hash.new(0)
# keep track of how many files are copied
@file_count = 0
#keep track of how many directories are copied
@dir_count = 0
end
def enter_directory(path)
@dir_count += 1
puts "Entering directory: #{path}" if @full
end
def record_scan_non_matching(filename)
@file_count += 1
puts " Scan without mods: #{filename}" if @full
end
def record_copy_non_matching(filename)
@file_count += 1
puts " Copy without mods: #{filename}" if @full
end
def record_scan_matching(filename)
@file_count += 1
puts " Scan with mods: #{filename}" if @full
end
def record_copy_matching(filename)
@file_count += 1
puts " Copy with mods: #{filename}" if @full
end
def record_known_exception(filename)
@file_count += 1
puts " Known exception: #{filename}" if @full
@known_exceptions[which_match(filename)] += 1
end
def record_tag(filename)
puts " Substituted license text into #{filename}" if @full
@substitutions[which_match(filename)] += 1
end
def record_no_tag(filename, source_path)
puts "ERROR: Found no license tag in #{source_path.sub(@root_dir, '')}"
@missing_tags[which_match(filename)] += 1
end
end

View file

@ -0,0 +1,39 @@
=begin
--------------------------------------------------------------------------------
A utility class that reads a properties file and returns a hash containing the
properties.
--------------------------------------------------------------------------------
=end
class PropertyFileReader
# Read a properties file and return a hash.
#
# Parameters: the path to the properties file
#
# The hash includes the special property "properties_file_path", which holds
# the path to the properties file.
#
def self.read(file_path)
properties = {}
properties["properties_file_path"] = File.expand_path(file_path)
File.open(file_path) do |file|
file.each_line do |line|
line.strip!
if line.length == 0 || line[0] == ?# || line[0] == ?!
# ignore blank lines, and lines starting with '#' or '!'.
elsif line =~ /(.*?)\s*[=:]\s*(.*)/
# key and value are separated by '=' or ':' and optional whitespace.
properties[$1.strip] = $2
else
# No '=' or ':' means that the value is empty.
properties[line] = ''
end
end
end
return properties
end
end

View file

@ -264,4 +264,23 @@ deploy - Deploy the application directly into the Tomcat webapps directory.
</sync> </sync>
</target> </target>
<!-- =================================
target: licenser
In regular use, checks that all appropriate source files have license tags.
At release time, applies license text to source files.
================================= -->
<target name="licenser" description="--> Check source files for licensing tags">
<property name="licenser.properties.file"
location="${webapp.dir}/config/licenser/licenser.properties" />
<property name="licenser.label" value="Vitro core" />
<echo message="Checking license tags on ${licenser.label}" />
<exec executable="ruby" dir="${webapp.dir}/../utilities/licenser" failonerror="true">
<arg value="licenser.rb" />
<arg value="${licenser.properties.file}" />
<redirector outputproperty="licenser.test.output" alwayslog="true" />
</exec>
</target>
</project> </project>

View file

@ -0,0 +1,131 @@
#
# A list of files and directories that are known exceptions to the
# license-insertion process.
#
# Files will only be altered if they contain a "magic" license place-holder,
# but if they match one of the file-matchers and don't contain a place-holder,
# the process will write a warning.
#
# File-matchers are:
# '*.java', '*.jsp', '*.tld', '*.xsl', '*.xslt', '*.css', '*.js', 'build.xml'
#
# Known exceptions listed here produce no warnings.
#
# ContentType from Google gdata API, Apache 2.0 license included in file.
webapp/src/edu/cornell/mannlib/vitro/webapp/web/ContentType.java
# The JSON Java source files from json.org
webapp/src/org/json/*
# TinyMCE from tinymce.moxiecode.com
webapp/web/js/tiny_mce/*
webapp/web/js/tiny_mce/**/*
# From apache.org under the Apache license.
webapp/web/admin/axis/SOAPMonitorApplet.java
services/additions/admin/axis/SOAPMonitorApplet.java
# JQuery from jquery.com
webapp/web/js/jquery.js
webapp/web/js/jquery_plugins/*
# From pajhome.org.uk
webapp/web/js/md5.js
# From dojotoolkit.org
webapp/web/dojo.js
webapp/web/src/AdapterRegistry.js
webapp/web/src/animation/*
webapp/web/src/animation.js
webapp/web/src/behavior.js
webapp/web/src/bootstrap1.js
webapp/web/src/bootstrap2.js
webapp/web/src/browser_debug.js
webapp/web/src/collections/*
webapp/web/src/compat/*
webapp/web/src/crypto/*
webapp/web/src/crypto.js
webapp/web/src/data/*
webapp/web/src/data.js
webapp/web/src/date.js
webapp/web/src/debug/*
webapp/web/src/debug.js
webapp/web/src/Deferred.js
webapp/web/src/dnd/*
webapp/web/src/doc.js
webapp/web/src/dom.js
webapp/web/src/event/*
webapp/web/src/event.js
webapp/web/src/experimental.js
webapp/web/src/flash.js
webapp/web/src/fx/*
webapp/web/src/graphics/*
webapp/web/src/hostenv_adobesvg.js
webapp/web/src/hostenv_browser.js
webapp/web/src/hostenv_dashboard.js
webapp/web/src/hostenv_jsc.js
webapp/web/src/hostenv_rhino.js
webapp/web/src/hostenv_spidermonkey.js
webapp/web/src/hostenv_svg.js
webapp/web/src/hostenv_wsh.js
webapp/web/src/html/*
webapp/web/src/html.js
webapp/web/src/i18n/*
webapp/web/src/iCalendar.js
webapp/web/src/io/*
webapp/web/src/io.js
webapp/web/src/json.js
webapp/web/src/lang/*
webapp/web/src/lang.js
webapp/web/src/lfx/*
webapp/web/src/loader.js
webapp/web/src/loader_xd.js
webapp/web/src/logging/*
webapp/web/src/math/*
webapp/web/src/math.js
webapp/web/src/profile.js
webapp/web/src/reflect/*
webapp/web/src/regexp.js
webapp/web/src/rpc/*
webapp/web/src/selection/*
webapp/web/src/storage/*
webapp/web/src/storage.js
webapp/web/src/string/*
webapp/web/src/string.js
webapp/web/src/style.js
webapp/web/src/svg.js
webapp/web/src/text/*
webapp/web/src/undo/*
webapp/web/src/uri/*
webapp/web/src/uuid/*
webapp/web/src/validate/*
webapp/web/src/validate.js
webapp/web/src/widget/*
webapp/web/src/widget/**/*
webapp/web/src/xml/*
# PROBLEM? Third-party tlds - where do they come from?
webapp/config/tlds/c.tld
webapp/config/tlds/fn.tld
webapp/config/tlds/sparqltag.tld
webapp/config/tlds/taglibs-mailer.tld
webapp/config/tlds/taglibs-random.tld
webapp/config/tlds/taglibs-string.tld
# PROBLEM: It appears that these files require a LICENSE file to accompany them.
webapp/web/themes/enhanced/css/blueprint/grid.css
webapp/web/themes/enhanced/css/blueprint/ie.css
# PROBLEM: This is from www.involutionstudios.com, not blueprint. What are the license restrictions?
webapp/web/themes/enhanced/css/blueprint/liquid.css
# PROBLEM? from simon.incutio.com, but what are the license restrictions?
webapp/web/js/betterDateInput.js
# PROBLEM? from wrox.com, but what are the license restrictions?
webapp/web/js/detect.js
# PROBLEM? perhaps from www.dannyg.com, but what are the license restrictions?
webapp/web/toggle.js
webapp/web/js/toggle.js

View file

@ -0,0 +1,37 @@
# --------------------------------------------------------------------------
# Properties for running the licenser utility in Vitro core.
# --------------------------------------------------------------------------
# The path to the top level directory to be scanned or copied
# (if relative, then relative to this file)
source_dir = ../../../
# The path to the top level directory to copy into (ignored if only scanning)
# (if relative, then relative to this file)
target_dir =
# A list of filename globs that match the files we want to license,
# delimited by commas with optional white-space.
file_matchers = *.java, *.jsp, *.tld, *.xsl, *.xslt, *.css, *.js, build.xml
# "globs" that describe paths that we won't follow for scanning OR FOR COPYING.
# (relative to the source_dir)
skip_directories = ./bin, ./.svn, ./**/.svn, ./webapp/.build
# The path to a file containing filename/path globs that match the files that
# we know should have no license tags in them.
# The file contains one glob per line; blank lines and comments ("#") are ignored.
# (if relative, then relative to the source directory)
known_exceptions = webapp/config/licenser/known_exceptions.txt
# The path to the text of the license agreement (ignored if only scanning)
# If the agreement contains a ${year} token, the current year will be substituted.
# (if relative, then relative to the source directory)
license_file = doc/license.txt
# Set to 'full' for a full report, 'short' for a brief statment, or to anything
# else for a medium-length summary.
report_level = short
# if true, we are just scanning, not copying.
scan_only = true

View file

@ -174,5 +174,23 @@ deploy - Deploy the application directly into the Tomcat webapps directory.
</java> </java>
</target> </target>
<!-- =================================
target: licenser
In regular use, checks that all appropriate source files have license tags.
At release time, applies license text to source files.
================================= -->
<target name="licenser" description="--> Check source files for licensing tags">
<!-- Once for the product... -->
<antcall target="vitroCore.licenser">
<param name="licenser.properties.file" value="${licenser.product.properties.file}" />
<param name="licenser.label" value="${ant.project.name}" />
</antcall>
<!-- ...and once for the core. -->
<condition property="licenser.properties.file" value="${licenser.core.properties.file}">
<isset property="licenser.core.properties.file"/>
</condition>
<antcall target="vitroCore.licenser" />
</target>
</project> </project>