From bca0df05703c76c2c2cc6043b004cc8108aac864 Mon Sep 17 00:00:00 2001 From: jeb228 Date: Thu, 15 Apr 2010 20:20:14 +0000 Subject: [PATCH] NIHVIVO-241 Move the licenser script into Vitro core, with some re-writing to use relative paths and such, and changes to the build scripts to support it. --- utilities/licenser/licenser.rb | 411 ++++++++++++++++++++ utilities/licenser/licenser_stats.rb | 95 +++++ utilities/licenser/property_file_reader.rb | 39 ++ webapp/build.xml | 19 + webapp/config/licenser/known_exceptions.txt | 131 +++++++ webapp/config/licenser/licenser.properties | 37 ++ webapp/product-build.xml | 18 + 7 files changed, 750 insertions(+) create mode 100644 utilities/licenser/licenser.rb create mode 100644 utilities/licenser/licenser_stats.rb create mode 100644 utilities/licenser/property_file_reader.rb create mode 100644 webapp/config/licenser/known_exceptions.txt create mode 100644 webapp/config/licenser/licenser.properties diff --git a/utilities/licenser/licenser.rb b/utilities/licenser/licenser.rb new file mode 100644 index 000000000..3f1ac6abc --- /dev/null +++ b/utilities/licenser/licenser.rb @@ -0,0 +1,411 @@ +=begin +-------------------------------------------------------------------------------- + +Scan the source directory, checking for expected "magic license tags", + or +Copy the source directory, inserting licensing information into the files. + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +=end +$: << File.dirname(File.expand_path(__FILE__)) +require 'date' +require 'fileutils' +require 'pathname' +require 'property_file_reader' +require 'licenser_stats' + +class Licenser + + MAGIC_STRING = '$This file is distributed under the terms of the license in /doc/license.txt$' + + # ------------------------------------------------------------------------------------ + private + # ------------------------------------------------------------------------------------ + # + # Some paths in the properties file, if they are relative, should be relative to the + # properties file itself. + def relative_to_properties(properties, key) + path = properties[key] + base = File.dirname(properties['properties_file_path']) + + return nil if path == nil + return path if Pathname.new(path).absolute? + return File.expand_path(File.join(base, path)) + end + + # Some paths in the properties file, if they are relative, should be relative to the + # source directory. + def relative_to_source(properties, key) + path = properties[key] + base = @source_dir ? @source_dir : '' + + return nil if path == nil + return path if Pathname.new(path).absolute? + return File.expand_path(File.join(base, path)) + end + + # Confirm that the parameters are reasonable. + # + def sanity_checks_on_parameters() + # Check that all necessary properties are here. + raise("Properties file must contain a value for 'scan_only'") if @scan_only_string == nil + raise("Properties file must contain a value for 'source_dir'") if @source_dir == nil + raise("Properties file must contain a value for 'known_exceptions'") if @known_exceptions_file == nil + raise("Properties file must contain a value for 'skip_directories'") if @skip_directories_list == nil + raise("Properties file must contain a value for 'file_matchers'") if @file_match_list == nil + raise("Properties file must contain a value for 'report_level'") if @report_level_string == nil + + if !File.exist?(@source_dir) + raise "Source directory does not exist: #{@source_dir}" + end + + if !File.exist?(@known_exceptions_file) + raise "Known exceptions file does not exist: #{@known_exceptions_file}" + end + + if !@scan_only + raise("Properties file must contain a value for 'target_dir'") if @target_dir == nil + raise("Properties file must contain a value for 'license_file'") if @license_file == nil + + if File.exist?(@target_dir) + raise "Target directory already exists: #{@target_dir}" + end + + target_parent = File.dirname(@target_dir) + if !File.exist?(target_parent) + raise "Path to target directory doesn't exist: #{target_parent}" + end + + if !File.exist?(@license_file) + raise "License file does not exist: #{@license_file}" + end + end + end + + # Prepare the license as an array of lines of text, + # with the current year substituted in for ${year} + # + def prepare_license_text(license_file) + if (license_file == nil) + return [] + end + + year_string = DateTime.now.year.to_s + + text = [] + File.open(license_file) do |file| + file.each do |line| + text << line.gsub('${year}', year_string) + end + end + return text + end + + # The globs in the exceptions file are assumed to be + # relative to the source directory. Make them explicitly so. + # + # Ignore any blank lines or lines that start with a '#' + # + def prepare_exception_globs(exceptions_file, source_dir) + source_path = File.expand_path(source_dir) + globs = [] + File.open(exceptions_file) do |file| + file.each do |line| + glob = line.strip + if (glob.length > 0) && (glob[0..0] != '#') + globs << "#{source_path}/#{glob}".gsub('//', '/') + end + end + end + return globs + end + + # Recursively scan this directory, and copy if we are not scan-only. + # + def scan_dir(source_dir, target_dir) + @stats.enter_directory(source_dir) + + Dir.mkdir(File.join(@target_dir, target_dir)) if !@scan_only + + Dir.foreach(File.join(@source_dir, source_dir)) do |filename| + source_path_relative = File.join(source_dir, filename) + source_path = File.join(@source_dir, source_path_relative) + target_path_relative = File.join(target_dir, filename) + target_path = File.join(@target_dir, target_path_relative) + + # What kind of beast is this? + if filename == '.' || filename == '..' + is_skipped_directory = true + else + if File.directory?(source_path) + if (path_matches_skipped?(source_path_relative)) + is_skipped_directory = true + else + is_directory = true + end + else + if filename_matches_pattern?(filename) + if path_matches_exception?(source_path_relative) + is_exception = true + else + is_match = true + end + else + is_ignored = true + end + end + end + + if is_skipped_directory + # do nothing + elsif is_directory + scan_dir(source_path_relative, target_path_relative) + elsif is_match + if @scan_only + @stats.record_scan_matching(filename) + scan_file(source_path, filename) + else + @stats.record_copy_matching(filename) + copy_file_with_license(source_path, target_path, filename) + end + elsif is_exception + @stats.record_known_exception(filename) + if @scan_only + # do nothing + else + copy_file_without_license(source_path, target_path) + end + else # not a match + if @scan_only + @stats.record_scan_non_matching(filename) + else + @stats.record_copy_non_matching(filename) + copy_file_without_license(source_path, target_path) + end + end + end + end + + # Is this directory one of the skipped? + # + def path_matches_skipped?(relative_path) + @skip_directories.each do |glob| + return true if File.fnmatch(glob, relative_path) + end + return false + end + + # Does this file path match any of the exceptions? + # + def path_matches_exception?(relative_path) + path = File.expand_path(File.join(@source_dir, relative_path)) + @known_exceptions.each do |pattern| + return true if File.fnmatch(pattern, path) + end + return false + end + + # Does this filename match any of the patterns? + # + def filename_matches_pattern?(filename) + @file_matchers.each do |pattern| + return true if File.fnmatch(pattern, filename) + end + return false + end + + # This file would be eligible for licensing if we weren't in scan-only mode. + # + def scan_file(source_path, filename) + found = 0 + File.open(source_path) do |source_file| + source_file.each do |line| + if line.include?(MAGIC_STRING) + found += 1 + end + end + end + + if found == 0 + @stats.record_no_tag(filename, source_path) + elsif found == 1 + @stats.record_tag(filename) + else + raise("File contains #{found} license lines: #{source_path}") + end + end + + # This file matches at least one of the file-matching strings, and does not + # match any exceptions. Replace the magic string with the license text. + # + def copy_file_with_license(source_path, target_path, filename) + found = 0 + File.open(source_path) do |source_file| + File.open(target_path, "w") do |target_file| + source_file.each do |line| + if line.include?(MAGIC_STRING) + found += 1 + insert_license_text(target_file, line) + else + target_file.print line + end + end + end + end + + if found == 0 + @stats.record_no_tag(filename, source_path) + elsif found == 1 + @stats.record_tag(filename) + else + raise("File contains #{found} license lines: #{source_path}") + end + end + + # Figure out the comment characters and write the license text to the file. + # + def insert_license_text(target_file, line) + ends = line.split(MAGIC_STRING) + if ends.size != 2 + raise ("Can't parse this license line: #{line}") + end + + target_file.print "#{ends[0].strip}\n" + + @license_text.each do |text| + target_file.print "#{text.rstrip}\n" + end + + target_file.print "#{ends[1].strip}\n" + end + + # This file either doesn't match any of the file-matching strings, or + # matches an exception + # + def copy_file_without_license(source_path, target_path) + FileUtils.cp(source_path, target_path) + end + + # ------------------------------------------------------------------------------------ + public + # ------------------------------------------------------------------------------------ + + # Setup and get ready to process. + # + # * properties is a map of keys to values, probably parsed from a properties file. + # + def initialize(properties) + @scan_only_string = properties['scan_only'] + @scan_only = 'false' != @scan_only_string + + @file_match_list = properties['file_matchers'] + @skip_directories_list = properties['skip_directories'] + @report_level_string = properties['report_level'] + + # These properties contain paths, and if they are relative paths, they + # should be relative to the properties file itself. + @source_dir = relative_to_properties(properties, 'source_dir') + @target_dir = relative_to_properties(properties, 'target_dir') + + # These properties contain paths, and if they are relative paths, they + # should be relative to the source_directory. + @license_file = relative_to_source(properties, 'license_file') + @known_exceptions_file = relative_to_source(properties, 'known_exceptions') + + sanity_checks_on_parameters() + + @full_report = @report_level_string === 'full' + @short_report = @report_level_string === 'short' + @file_matchers = @file_match_list.strip.split(/,\s*/) + @skip_directories = @skip_directories_list.strip.split(/,\s*/) + @license_text = prepare_license_text(@license_file) + @known_exceptions = prepare_exception_globs(@known_exceptions_file, @source_dir) + + @stats = LicenserStats.new(@source_dir, @file_matchers, @full_report) + end + + # Start the recursive scanning (and copying). + def process() + scan_dir('.', '.') + end + + # Report the summary statistics + def report(properties) + verb = @scan_only ? "scanned" : "copied" + if (@short_report) + subs = 0 + @stats.substitutions.each {|line| subs += line[1] } + known = 0 + @stats.known_exceptions.each {|line| known += line[1] } + missing = 0 + @stats.missing_tags.each {|line| missing += line[1] } + + puts "Licenser: #{verb} #{@stats.file_count} files in #{@stats.dir_count} directories." + printf(" Substitutions: %5d\n", subs) + printf(" Known exceptions: %5d\n", known) + printf(" Missing tags: %5d\n", missing) + else + puts "Licenser: run completed at #{DateTime.now.strftime("%H:%M:%S on %b %d, %Y")}" + puts " #{verb} #{@stats.file_count} files in #{@stats.dir_count} directories." + puts + puts 'Substitutions' + @stats.substitutions.sort.each do |line| + printf("%5d %s\n", line[1], line[0]) + end + puts + puts 'Known non-licensed files' + @stats.known_exceptions.sort.each do |line| + printf("%5d %s\n", line[1], line[0]) + end + puts + puts 'Missing tags' + @stats.missing_tags.sort.each do |line| + printf("%5d %s\n", line[1], line[0]) + end + puts + puts 'properties:' + properties.each do |key, value| + puts " #{key} = #{value}" + end + end + end + + # Were we successful or not? + def success? + return @stats.missing_tags.empty? + end +end + +# +# +# ------------------------------------------------------------------------------------ +# Standalone calling. +# +# Do this if this program was called from the command line. That is, if the command +# expands to the path of this file. +# ------------------------------------------------------------------------------------ +# + +if File.expand_path($0) == File.expand_path(__FILE__) + if ARGV.length == 0 + raise("No arguments - usage is: ruby licenser.rb ") + end + if !File.file?(ARGV[0]) + raise "File does not exist: '#{ARGV[0]}'." + end + + properties = PropertyFileReader.read(ARGV[0]) + + l = Licenser.new(properties) + l.process + l.report(properties) + + if l.success? + puts "Licenser was successful." + exit 0 + else + puts "Licenser found problems." + exit 1 + end +end diff --git a/utilities/licenser/licenser_stats.rb b/utilities/licenser/licenser_stats.rb new file mode 100644 index 000000000..cac01b42a --- /dev/null +++ b/utilities/licenser/licenser_stats.rb @@ -0,0 +1,95 @@ +=begin +-------------------------------------------------------------------------------- + +Collect the statistics of a licenser run. + +-------------------------------------------------------------------------------- +=end + +class LicenserStats + attr_reader :substitutions + attr_reader :missing_tags + attr_reader :known_exceptions + attr_reader :file_count + attr_reader :dir_count + + # ------------------------------------------------------------------------------------ + private + # ------------------------------------------------------------------------------------ + # + def which_match(filename) + @file_matchers.each do |matcher| + return matcher if File.fnmatch(matcher, filename) + end + raise("filename matches no matchers!: #{filename}") + end + + # ------------------------------------------------------------------------------------ + public + # ------------------------------------------------------------------------------------ + + def initialize(root_dir, file_matchers, full) + @root_dir = "#{root_dir}/".gsub('//', '/') + @file_matchers = file_matchers + @full = full + + # keep track of how many substitutions for all file types + @substitutions = Hash.new() + file_matchers.each do |matcher| + @substitutions[matcher] = 0 + end + + # keep track of missing tags, only in file types that have missing tags + @missing_tags = Hash.new(0) + + # keep track of how many known non-licensed files we encounter, and what types. + @known_exceptions = Hash.new(0) + + # keep track of how many files are copied + @file_count = 0 + + #keep track of how many directories are copied + @dir_count = 0 + end + + def enter_directory(path) + @dir_count += 1 + puts "Entering directory: #{path}" if @full + end + + def record_scan_non_matching(filename) + @file_count += 1 + puts " Scan without mods: #{filename}" if @full + end + + def record_copy_non_matching(filename) + @file_count += 1 + puts " Copy without mods: #{filename}" if @full + end + + def record_scan_matching(filename) + @file_count += 1 + puts " Scan with mods: #{filename}" if @full + end + + def record_copy_matching(filename) + @file_count += 1 + puts " Copy with mods: #{filename}" if @full + end + + def record_known_exception(filename) + @file_count += 1 + puts " Known exception: #{filename}" if @full + @known_exceptions[which_match(filename)] += 1 + end + + def record_tag(filename) + puts " Substituted license text into #{filename}" if @full + @substitutions[which_match(filename)] += 1 + end + + def record_no_tag(filename, source_path) + puts "ERROR: Found no license tag in #{source_path.sub(@root_dir, '')}" + @missing_tags[which_match(filename)] += 1 + end +end diff --git a/utilities/licenser/property_file_reader.rb b/utilities/licenser/property_file_reader.rb new file mode 100644 index 000000000..8b3c29b5e --- /dev/null +++ b/utilities/licenser/property_file_reader.rb @@ -0,0 +1,39 @@ +=begin +-------------------------------------------------------------------------------- + +A utility class that reads a properties file and returns a hash containing the +properties. + +-------------------------------------------------------------------------------- +=end + +class PropertyFileReader + # Read a properties file and return a hash. + # + # Parameters: the path to the properties file + # + # The hash includes the special property "properties_file_path", which holds + # the path to the properties file. + # + def self.read(file_path) + properties = {} + properties["properties_file_path"] = File.expand_path(file_path) + + File.open(file_path) do |file| + file.each_line do |line| + line.strip! + if line.length == 0 || line[0] == ?# || line[0] == ?! + # ignore blank lines, and lines starting with '#' or '!'. + elsif line =~ /(.*?)\s*[=:]\s*(.*)/ + # key and value are separated by '=' or ':' and optional whitespace. + properties[$1.strip] = $2 + else + # No '=' or ':' means that the value is empty. + properties[line] = '' + end + end + end + + return properties + end +end diff --git a/webapp/build.xml b/webapp/build.xml index 0e2314611..c92a3cc6c 100644 --- a/webapp/build.xml +++ b/webapp/build.xml @@ -264,4 +264,23 @@ deploy - Deploy the application directly into the Tomcat webapps directory. + + + + + + + + + + + + + diff --git a/webapp/config/licenser/known_exceptions.txt b/webapp/config/licenser/known_exceptions.txt new file mode 100644 index 000000000..338bbf48d --- /dev/null +++ b/webapp/config/licenser/known_exceptions.txt @@ -0,0 +1,131 @@ +# +# A list of files and directories that are known exceptions to the +# license-insertion process. +# +# Files will only be altered if they contain a "magic" license place-holder, +# but if they match one of the file-matchers and don't contain a place-holder, +# the process will write a warning. +# +# File-matchers are: +# '*.java', '*.jsp', '*.tld', '*.xsl', '*.xslt', '*.css', '*.js', 'build.xml' +# +# Known exceptions listed here produce no warnings. +# + +# ContentType from Google gdata API, Apache 2.0 license included in file. +webapp/src/edu/cornell/mannlib/vitro/webapp/web/ContentType.java + +# The JSON Java source files from json.org +webapp/src/org/json/* + +# TinyMCE from tinymce.moxiecode.com +webapp/web/js/tiny_mce/* +webapp/web/js/tiny_mce/**/* + +# From apache.org under the Apache license. +webapp/web/admin/axis/SOAPMonitorApplet.java +services/additions/admin/axis/SOAPMonitorApplet.java + +# JQuery from jquery.com +webapp/web/js/jquery.js +webapp/web/js/jquery_plugins/* + +# From pajhome.org.uk +webapp/web/js/md5.js + +# From dojotoolkit.org +webapp/web/dojo.js +webapp/web/src/AdapterRegistry.js +webapp/web/src/animation/* +webapp/web/src/animation.js +webapp/web/src/behavior.js +webapp/web/src/bootstrap1.js +webapp/web/src/bootstrap2.js +webapp/web/src/browser_debug.js +webapp/web/src/collections/* +webapp/web/src/compat/* +webapp/web/src/crypto/* +webapp/web/src/crypto.js +webapp/web/src/data/* +webapp/web/src/data.js +webapp/web/src/date.js +webapp/web/src/debug/* +webapp/web/src/debug.js +webapp/web/src/Deferred.js +webapp/web/src/dnd/* +webapp/web/src/doc.js +webapp/web/src/dom.js +webapp/web/src/event/* +webapp/web/src/event.js +webapp/web/src/experimental.js +webapp/web/src/flash.js +webapp/web/src/fx/* +webapp/web/src/graphics/* +webapp/web/src/hostenv_adobesvg.js +webapp/web/src/hostenv_browser.js +webapp/web/src/hostenv_dashboard.js +webapp/web/src/hostenv_jsc.js +webapp/web/src/hostenv_rhino.js +webapp/web/src/hostenv_spidermonkey.js +webapp/web/src/hostenv_svg.js +webapp/web/src/hostenv_wsh.js +webapp/web/src/html/* +webapp/web/src/html.js +webapp/web/src/i18n/* +webapp/web/src/iCalendar.js +webapp/web/src/io/* +webapp/web/src/io.js +webapp/web/src/json.js +webapp/web/src/lang/* +webapp/web/src/lang.js +webapp/web/src/lfx/* +webapp/web/src/loader.js +webapp/web/src/loader_xd.js +webapp/web/src/logging/* +webapp/web/src/math/* +webapp/web/src/math.js +webapp/web/src/profile.js +webapp/web/src/reflect/* +webapp/web/src/regexp.js +webapp/web/src/rpc/* +webapp/web/src/selection/* +webapp/web/src/storage/* +webapp/web/src/storage.js +webapp/web/src/string/* +webapp/web/src/string.js +webapp/web/src/style.js +webapp/web/src/svg.js +webapp/web/src/text/* +webapp/web/src/undo/* +webapp/web/src/uri/* +webapp/web/src/uuid/* +webapp/web/src/validate/* +webapp/web/src/validate.js +webapp/web/src/widget/* +webapp/web/src/widget/**/* +webapp/web/src/xml/* + +# PROBLEM? Third-party tlds - where do they come from? +webapp/config/tlds/c.tld +webapp/config/tlds/fn.tld +webapp/config/tlds/sparqltag.tld +webapp/config/tlds/taglibs-mailer.tld +webapp/config/tlds/taglibs-random.tld +webapp/config/tlds/taglibs-string.tld + +# PROBLEM: It appears that these files require a LICENSE file to accompany them. +webapp/web/themes/enhanced/css/blueprint/grid.css +webapp/web/themes/enhanced/css/blueprint/ie.css + +# PROBLEM: This is from www.involutionstudios.com, not blueprint. What are the license restrictions? +webapp/web/themes/enhanced/css/blueprint/liquid.css + +# PROBLEM? from simon.incutio.com, but what are the license restrictions? +webapp/web/js/betterDateInput.js + +# PROBLEM? from wrox.com, but what are the license restrictions? +webapp/web/js/detect.js + +# PROBLEM? perhaps from www.dannyg.com, but what are the license restrictions? +webapp/web/toggle.js +webapp/web/js/toggle.js diff --git a/webapp/config/licenser/licenser.properties b/webapp/config/licenser/licenser.properties new file mode 100644 index 000000000..8b2a2dcc5 --- /dev/null +++ b/webapp/config/licenser/licenser.properties @@ -0,0 +1,37 @@ +# -------------------------------------------------------------------------- +# Properties for running the licenser utility in Vitro core. +# -------------------------------------------------------------------------- + +# The path to the top level directory to be scanned or copied +# (if relative, then relative to this file) +source_dir = ../../../ + +# The path to the top level directory to copy into (ignored if only scanning) +# (if relative, then relative to this file) +target_dir = + +# A list of filename globs that match the files we want to license, +# delimited by commas with optional white-space. +file_matchers = *.java, *.jsp, *.tld, *.xsl, *.xslt, *.css, *.js, build.xml + +# "globs" that describe paths that we won't follow for scanning OR FOR COPYING. +# (relative to the source_dir) +skip_directories = ./bin, ./.svn, ./**/.svn, ./webapp/.build + +# The path to a file containing filename/path globs that match the files that +# we know should have no license tags in them. +# The file contains one glob per line; blank lines and comments ("#") are ignored. +# (if relative, then relative to the source directory) +known_exceptions = webapp/config/licenser/known_exceptions.txt + +# The path to the text of the license agreement (ignored if only scanning) +# If the agreement contains a ${year} token, the current year will be substituted. +# (if relative, then relative to the source directory) +license_file = doc/license.txt + +# Set to 'full' for a full report, 'short' for a brief statment, or to anything +# else for a medium-length summary. +report_level = short + +# if true, we are just scanning, not copying. +scan_only = true diff --git a/webapp/product-build.xml b/webapp/product-build.xml index 96ca569f2..79d9acede 100644 --- a/webapp/product-build.xml +++ b/webapp/product-build.xml @@ -174,5 +174,23 @@ deploy - Deploy the application directly into the Tomcat webapps directory. + + + + + + + + + + + + +