NIHVIVO-4011 Create a utility scanner that will look for I18n problems in templates and properties files.

This commit is contained in:
j2blake 2013-01-15 15:37:53 -05:00
parent 912a063a4b
commit aab76220c7
3 changed files with 539 additions and 0 deletions

View file

@ -0,0 +1,143 @@
=begin
--------------------------------------------------------------------------------
A utility routine that scans I18n-related properties files and Freemarker
templates for obvious issues.
Properties files:
Warn if a specialized file has no default version.
Warn about duplicate keys, keys with empty values.
Warn about file reference values with no corresponding file
Warn about keys that do not appear in the default version.
If the "complete" flag is set,
Warn if the default version is not found.
Warn about missing keys, compared to the default version.
Freemarker templates:
Warn about visible text that contains other than blank space or Freemarker expressions.
Visible text is:
Anything that is not inside a tag and not between <script> tags
title="" attributes on any tags
alert="" attributes on <img> tags
alt="" attributes on <img> tags
value="" attributes on <input> tags with submit attributes
On the command line, provide a file "glob" (don't allow the shell to interpret it)
and optional "complete" or "summary" flags. E.g.:
i18nChecker.rb '../../themes/wilma/**/*' complete
--------------------------------------------------------------------------------
=end
$: << File.dirname(File.expand_path(__FILE__))
require 'properties_file_checker'
require 'template_file_checker'
class I18nChecker
# ------------------------------------------------------------------------------------
private
# ------------------------------------------------------------------------------------
#
# Parse the arguments and complain if they don't make sense.
#
def sanity_check_arguments(args)
if ARGV.length == 0
raise("No arguments - usage is: ruby i18nChecker.rb <file_spec_glob> [complete] [summary]")
end
file_spec = args[0]
complete = false;
summary = false;
args[1..-1].each do |arg|
if "complete" == arg.downcase()
complete = true
elsif "summary" == arg.downcase()
summary = true
else
raise("'#{arg}' is an invalid argument")
end
end
puts "file_spec = '#{file_spec}', complete = #{complete}, summary = #{summary}"
return file_spec, complete, summary
end
#
# Go through the specified files and pick out the *.properties and *.ftl files.
#
def get_file_paths(file_spec)
properties = []
templates = []
Dir.glob(file_spec) do |path|
properties << path if File.extname(path) == '.properties'
templates << path if File.extname(path) == '.ftl'
end
@total_files = properties.size + templates.size
puts "Found #{properties.size} property files, #{templates.size} templates."
return properties, templates
end
def process_properties_files(paths, complete, summary)
checkers = []
paths.each() do |path|
checkers << PropertiesFileChecker.new(path)
end
checkers.each() do |child|
checkers.each() do |root|
child.try_to_set_root(root)
end
end
checkers.each() do |checker|
checker.report(complete, summary)
@total_warnings += checker.warnings.size
end
end
def process_template_files(paths, summary)
paths.each() do |path|
tf = TemplateFileChecker.new(path)
tf.report(summary)
@total_warnings += tf.warnings.size
end
end
# ------------------------------------------------------------------------------------
public
# ------------------------------------------------------------------------------------
def initialize(args)
@total_files = 0
@total_warnings = 0
file_spec, complete, summary = sanity_check_arguments(args)
properties, templates = get_file_paths(file_spec)
process_properties_files(properties, complete, summary)
process_template_files(templates, summary)
end
def summarize()
puts "Found #{@total_warnings} warnings in #{@total_files} files."
end
end
#
#
# ------------------------------------------------------------------------------------
# Standalone calling.
#
# Do this if this program was called from the command line. That is, if the command
# expands to the path of this file.
# ------------------------------------------------------------------------------------
#
if File.expand_path($0) == File.expand_path(__FILE__)
checker = I18nChecker.new(ARGV)
checker.summarize()
end

View file

@ -0,0 +1,220 @@
=begin
--------------------------------------------------------------------------------
Read and interpret a properties file. Accept a default version, if appropriate.
Warn if a specialized file has no default version.
Warn about duplicate keys, keys with empty values.
Warn about file reference values with no corresponding file
Warn about keys that do not appear in the default version.
If the "complete" flag is set,
Warn if the default version is not found.
Warn about missing keys, compared to the default version.
--------------------------------------------------------------------------------
=end
class Warning
attr_reader :line
attr_reader :message
def initialize(line, message)
@line = line
@message = message
end
end
class Property
attr_reader :line
attr_reader :key
attr_reader :value
def initialize(line, key, value)
@line = line
@key = key
@value = value
end
end
class PropertiesFileChecker
# ------------------------------------------------------------------------------------
private
# ------------------------------------------------------------------------------------
#
# What path would represent the default version of this property file?
#
def figure_rootpath()
name = File.basename(path)
dirname = File.dirname(path)
extname = File.extname(path)
raise("Invalid property file name: '#{path}': too many underscores.") if name.count("_") > 2
first_underscore = name.index('_')
if first_underscore
@rootpath = File.join(dirname, name[0, first_underscore] + extname)
else
@rootpath = path
end
end
#
# Is this a default version or a locale-specific version?
#
def is_root?()
@rootpath == @path
end
def check_for_faux_continuations(lines)
ln = 0
lines.map do |line|
ln += 1
if /(\\) +$/.match(line)
@warnings << Warning.new(ln, "On a continuation line, the \\ must not be followed by spaces.")
$` + $1
else
line
end
end
end
def join_continuation_lines(lines)
(lines.size()-1).downto(0) do |i|
if /(.*)\\$/.match(lines[i])
lines[i] = $1 + lines[i+1].lstrip()
lines[i+1] = ''
end
end
return lines
end
def read_properties(lines)
ln = 0
lines.each do |line|
ln += 1
line.strip!
# ignore blank lines, and lines starting with '#' or '!'.
next if line.length == 0 || line[0] == ?# || line[0] == ?!
if line =~ /(.*?)\s*[=:]\s*(.*)/
# key and value are separated by '=' or ':' and optional whitespace.
key = $1.strip
value = $2
else
# No '=' or ':' means that the value is empty.
key = line;
value = ''
end
if dupe = @properties[key]
@warnings << Warning.new(ln, "Key '#{key}' is duplicated on line #{dupe.line}")
else
@properties[key] = Property.new(ln, key, value)
end
end
end
def check_for_root_file()
@warnings << Warning.new(0, "Found no root file '#{File.basename(@rootpath)}'") if !is_root?() && !@root
end
def scan_for_empty_values()
@properties.values.each do |p|
@warnings << Warning.new(p.line, "Value for '#{p.key}' is empty.") if p.value.empty?
end
end
def scan_for_invalid_file_references()
@properties.values.each do |p|
if /@@file\s+(.*)/.match(p.value)
file_reference = $1.strip
dir = File.dirname(@path)
path = File.join(dir, file_reference)
unless File.file?(path)
@warnings << Warning.new(p.line, "Invalid file reference '#{p.value}': file not found")
end
end
end
end
def scan_for_properties_not_present_in_root()
root_name = File.basename(@root.path)
extra_keys = @properties.keys - @root.properties.keys
extra_keys.each do |key|
p = @properties[key]
@warnings << Warning.new(p.line, "Property '#{key}' is not present in root file '#{root_name}'")
end
end
def scan_for_properties_not_present_in_derived
root_name = File.basename(@root.path)
next_line = @properties.values.max {|a, b| a.line <=> b.line}.line + 1
missing_keys = @root.properties.keys - @properties.keys
missing_keys.sort.each do |key|
@warnings << Warning.new(next_line, "No value to override '#{key}' in the root file '#{root_name}'")
end
end
# ------------------------------------------------------------------------------------
public
# ------------------------------------------------------------------------------------
def initialize(path)
@path = path
@root = nil
@rootpath = nil
@warnings = []
@properties = {}
figure_rootpath()
lines = IO.readlines(@path)
lines = check_for_faux_continuations(lines)
lines = join_continuation_lines(lines)
read_properties(lines)
end
def try_to_set_root(root)
if !is_root?
if root.path == @rootpath
@root = root
end
end
end
def path()
@path
end
def properties()
@properties
end
def warnings()
@warnings
end
#
# Analyze the properties, and say what we found.
#
def report(complete, summary)
check_for_root_file() if complete
scan_for_empty_values()
scan_for_invalid_file_references()
scan_for_properties_not_present_in_root() if @root
scan_for_properties_not_present_in_derived() if complete && @root
puts " Properties file '#{@path}', #{@properties.size()} properties"
if !@warnings.empty?
if summary
puts " #{@warnings.size} warnings."
else
@warnings.sort! {|a, b| a.line <=> b.line}
@warnings.each do |w|
puts " line #{w.line}: #{w.message}"
end
end
end
end
end

View file

@ -0,0 +1,176 @@
=begin
--------------------------------------------------------------------------------
Read a freemarker template and looks for likely i18n problems.
Warn about visible text that contains other than blank space or Freemarker expressions.
Visible text is:
Anything that is not inside a tag and not between <script> or <style> tags
title="" attributes on any tags
alert="" attributes on <img> tags
alt="" attributes on <img> tags
value="" attributes on <input> tags with submit attributes
a Freemarker express is enclosed in ${}
--------------------------------------------------------------------------------
=end
class Warning
attr_reader :line
attr_reader :message
def initialize(line, message)
@line = line
@message = message
end
end
class TemplateFileChecker
# ------------------------------------------------------------------------------------
private
# ------------------------------------------------------------------------------------
def blanks(length)
' '.ljust(length, ' ')
end
def replace_comments(raw)
raw.gsub(/<!--.*?-->/m) {|s| blanks(s.size)}
end
def replace_script_tags(raw)
raw.gsub(/<script.*?>.*?<\/script>/m) {|s| blanks(s.size)}
end
def replace_style_tags(raw)
raw.gsub(/<style.*?>.*?<\/style>/m) {|s| blanks(s.size)}
end
def replace_freemarker_comments(raw)
raw.gsub(/<#--.*?-->/m) {|s| blanks(s.size)}
end
def replace_freemarker_tags(raw)
raw.gsub(/<#.*?>/m) {|s| blanks(s.size)}
end
def replace_freemarker_expressions(raw)
dirty = raw
while /\$\{[^\{\}]*\}/m.match(dirty)
dirty = $` + blanks($&.size) + $'
end
dirty
end
def remove_entities(raw)
raw.gsub(/&[a-zA-Z]+;/, '')
end
def launder(raw)
replace_script_tags(
replace_style_tags(
replace_freemarker_expressions(
replace_freemarker_tags(
replace_comments(
replace_freemarker_comments(raw))))))
end
def load(path)
IO.open(File.new(path).fileno) do |f|
@contents = f.read()
@clean_contents = launder(@contents)
pos = 0
while found = @contents.index(/\n/, pos)
pos = found + 1
@line_offsets << pos
end
end
end
def line_number(offset)
return @line_offsets.find_index() {|o| o > offset} || @line_offsets.size
end
def text_for_display(raw)
clean = raw.gsub(/[\n\r]/m, ' ').strip
if clean.size < 50
clean
else
clean[0..50] + "..."
end
end
def scan(regexp, group_index, message)
@clean_contents.gsub(regexp) do |s|
offset = $~.begin(group_index)
value = $~[group_index]
if contains_words?(value)
@warnings << Warning.new(line_number(offset), "#{message}: '#{text_for_display(value)}'")
end
s
end
end
def contains_words?(raw)
remove_entities(raw).count('a-zA-Z') > 0
end
def scan_for_words_outside_of_tags()
scan(/>\s*([^><]+)\s*</m, 1, "Words found outside of tags")
end
def scan_for_title_attributes()
scan(/<[^>]*title=(["'])\s*([^>]+?)\s*\1.*?>/mi, 2, "Words found in title attribute of an HTML tag")
end
def scan_for_alert_attributes()
scan(/<img\b[^>]*alert=(["'])\s*([^>]+?)\s*\1.*?>/mi, 2, "Words found in alert attribute of <img> tag")
end
def scan_for_alt_attributes()
scan(/<img\b[^>]*alt=(["'])\s*([^>]+?)\s*\1.*?>/mi, 2, "Words found in alt attribute of <img> tag")
end
def scan_for_value_attributes_on_submit_tags()
scan(/<input\b[^>]*type=["']submit["'][^>]*value=(["'])\s*([^'">]+?)\s*\1.*?>/mi, 2, "Words found in value attribute of <input type='submit'> tag")
end
# ------------------------------------------------------------------------------------
public
# ------------------------------------------------------------------------------------
def initialize(path)
@path = path
@contents = ''
@clean_contents = ''
@line_offsets = [0]
@warnings = []
load(path)
scan_for_words_outside_of_tags()
scan_for_title_attributes()
scan_for_alert_attributes()
scan_for_alt_attributes()
scan_for_value_attributes_on_submit_tags()
end
def report(summary)
puts " Template file '#{@path}'"
if !@warnings.empty?
if summary
puts " #{@warnings.size} warnings."
else
@warnings.sort! {|a, b| a.line <=> b.line}
@warnings.each do |w|
puts " line #{w.line}: #{w.message}"
end
end
end
end
def warnings()
@warnings
end
end