Maven migration (first draft)

This commit is contained in:
Graham Triggs 2015-11-19 23:47:41 +00:00
parent da79ac3e1d
commit fee48b0b50
1711 changed files with 662 additions and 0 deletions

View file

@ -0,0 +1,16 @@
Ruby scripts for analyzing the SPARQL queries that were written to the log by RDFServiceLogger.
Run VIVO, enable logging of SPARQL queries, through the Developer panel, load the page of interest.
Then, run the script to summarize the results.
Run it like this:
./scan_query_times.rb partitions /Users/jeb228/Development/Performance/Weill/tomcat/logs/vivo.all.log overlap unmatched
where:
"partitions" is the file that defines how the timings will be broken down. Check the "partitions"
file in this directory for an example
"......vivo.all.log" is the VIVO log file to be analyzed.
"overlap" is the file where the queries are written to, if they match more than one criterion.
"unmatched" is the file where queries are written if they match no criteria.
Read the comments in scan_query_times.rb for more information.

View file

@ -0,0 +1,171 @@
class Criterion
attr_accessor :expression
attr_accessor :group_name
attr_accessor :count
attr_accessor :total_time
def initialize(expr, current_group_name='')
expr = expr.gsub(/\s+/, ' ')
@expression = Regexp.new(expr, Regexp::MULTILINE)
@group_name = current_group_name || ''
@count = 0
@total_time = 0.0
end
def add(log_record)
@count += 1
@total_time += log_record.time
end
def to_s()
format("%s: count=%5d, total_time=%9.3f, expression='%s'", self.class, @count, @total_time, @expression.source)
end
end
class QueryCriterion < Criterion
def test(log_record)
if expression.match(log_record.query)
add(log_record)
return true
end
end
end
class StackCriterion < Criterion
def test(log_record)
if expression.match(log_record.stack.join())
add(log_record)
return true
end
end
end
class WritingCriterion < Criterion
def initialize(expr, filename)
super(expr)
@file = File.new(filename, "w")
end
def add(log_record)
super
@file.write("#{@expression}\n\n--------------------------\n\n")
@file.write(log_record.to_s)
@file.write("\n--------------------------\n\n")
end
end
class UnmatchedCriterion < WritingCriterion
def initialize(filename)
super("UNMATCHED", filename)
end
end
class OverlapCriterion < WritingCriterion
def initialize(filename)
super("OVERLAP", filename)
end
end
class GroupList
def initialize(criteria)
@list = []
criteria.each do |c|
add_to_group(c)
end
end
def add_to_group(c)
@list.each do |g|
if g.name == c.group_name
g.add(c)
return
end
end
@list << Group.new(c)
end
def to_s()
total_count = @list.inject(0) {|sum, g| sum + g.total_count}
total_time = @list.inject(0.0) {|sum, g| sum + g.total_time}
format("%s \nALL GROUPS: total count = %5d, total time = %9.3f seconds \n", @list.join("\n"), total_count, total_time)
end
class Group
attr :name
attr :criteria
attr :total_count
attr :total_time
def initialize(c)
@name = c.group_name
@criteria = []
@total_count = 0
@total_time = 0.0
add(c)
end
def add(c)
@criteria << c
@total_count += c.count
@total_time += c.total_time
end
def to_s()
format("GROUP '%s' \n %s\n %20s total count = %5d, total time = %9.3f seconds \n", @name, @criteria.join("\n "), ' ', @total_count, @total_time)
end
end
end
class Partition
# ------------------------------------------------------------------------------------
private
# ------------------------------------------------------------------------------------
#
def parse_partition_file(partition_file)
@criteria = []
current_group_name = ''
File.open(partition_file).each() do |line|
next if line.strip().empty? || line.start_with?('#')
if line.start_with?("QUERY ")
@criteria << QueryCriterion.new(line.slice(6..-1).strip, current_group_name)
elsif line.start_with?("STACK ")
@criteria << StackCriterion.new(line.slice(6..-1).strip, current_group_name)
elsif line.start_with?("GROUP ")
current_group_name = line.slice(6..-1).strip
elsif line.start_with?("GROUP")
current_group_name = ''
else
raise "Invalid line in partition file: '#{line}'"
end
end
end
def match_against_criteria(log_record)
matches = 0
@criteria.each do |c|
matches += c.test(log_record) ? 1 : 0
end
if matches == 0
@unmatched.add(log_record)
elsif matches > 1
@overlap.add(log_record)
end
end
# ------------------------------------------------------------------------------------
public
# ------------------------------------------------------------------------------------
def initialize(partition_file, overlap_file, unmatched_file)
parse_partition_file(partition_file)
@overlap = OverlapCriterion.new(overlap_file)
@unmatched = UnmatchedCriterion.new(unmatched_file)
end
def accumulate(log_record)
match_against_criteria(log_record)
end
def report
puts "#{GroupList.new(@criteria)}\n#{@overlap}\n#{@unmatched}\n"
end
end

View file

@ -0,0 +1,10 @@
STACK AjaxVisualizationController
#STACK IndividualRequestAnalysisContextImpl
#STACK IndividualResponseBuilder
# STACK BaseIndividualTemplateModel
#STACK \WIndividualTemplateModel
#STACK FileServingServlet
#QUERY authorshipValue
#QUERY infoResourceValue
#QUERY editorObj
#QUERY IAO_0000030

View file

@ -0,0 +1,115 @@
#!/usr/bin/ruby
=begin
--------------------------------------------------------------------------------
A utility that reads query times from a VIVO log file, and sums them based on a
list of partitioning expressions.
1) The file of partitioning expressions is parsed and stored.
2) The log file is scanned for records from the RDFServiceLogger, which are
accumulated in the program storage.
3) The partitioning expressions are tested against each log record, and matches
are recorded.
Strings of white space are treated as single spaces, both in the
partitioning expressions and in the log records. This makes the
expressions easier to read, both in the partitions file and in the program
output.
4) For each partioning expression, report:
a) the expression
b) how many queries matched it
c) the total time spent in those queries
5) As errors, report:
a) how many queries (and how much time) matched no expression. Dump a few of
them to the error file.
b) how many queries (and how much time) matched multiple expressions. Dump a
few of them to the error file.
--------------------------------------------------------------------------------
Command line: scan_query_times.rb [partition_file] [log_file] [overlap_file] [unmatched_file]
If the wrong number of arguments, abort.
If either file does not exist, abort.
--------------------------------------------------------------------------------
Partitioning expressions: parse the lines in the file
If a line is empty, ignore it.
If a line begins with a #, ignore it.
If a line begins with the word "STACK ", then the remainder of the line
(trimmed) is a regular expression to be matched against the stack trace of
each log record.
If a line begins with the word "QUERY ", then the remainder of the line
(trimmed) is a regular expression to be matched against the query text of
each log record.
If a line begins with the word "GROUP ", then the remainder of the line
(trimmed) is the text name of the group. All matchers that follow are
included in that named group, for summary output purposes. Matcher found
before the first GROUP line are in the unnamed group, as are any that
follow a GROUP line with no name.
Otherwise, abort.
--------------------------------------------------------------------------------
A log record begins like this:
2014-04-29 14:26:00,798 INFO [RDFServiceLogger] 0.001 sparqlAskQuery [ASK {...
The timestamp is ignored.
The log leve is ignored.
The logging class must be "RDFServiceLogger"
The time is recorded.
The method name is ignored.
The remainder of the text on that line (trimmed) is recorded as the query.
Any subsequent lines are the stack trace.
The end of the log record is a line that does not begin with whitespace.
--------------------------------------------------------------------------------
=end
$: << File.dirname(File.expand_path(__FILE__))
require 'partition'
require 'scanner'
class UsageError < StandardError; end
#
# Parse the arguments and complain if they don't make sense.
#
def parse_args(args)
raise UsageError, "usage is: scan_query_times.rb <partition_file> <log_file> <overlap_file> <unmatched_file>" unless (args.length == 4)
partition_file = args[0]
raise UsageError, "File '#{partition_file}' does not exist." unless File.exist?(partition_file)
log_file = args[1]
raise UsageError, "File '#{log_file}' does not exist." unless File.exist?(log_file)
overlap_file = args[2]
raise UsageError, "File '#{overlap_file}' does not exist." unless File.exist?(overlap_file)
unmatched_file = args[3]
raise UsageError, "File '#{unmatched_file}' does not exist." unless File.exist?(unmatched_file)
return partition_file, log_file, overlap_file, unmatched_file
end
#
#
# ------------------------------------------------------------------------------------
# Standalone calling.
#
# Do this if this program was called from the command line. That is, if the command
# expands to the path of this file.
# ------------------------------------------------------------------------------------
#
if File.expand_path($0) == File.expand_path(__FILE__)
begin
partition_file, log_file, overlap_file, unmatched_file = parse_args(ARGV)
partition = Partition.new(partition_file, overlap_file, unmatched_file)
scanner = Scanner.new(partition)
scanner.process(log_file)
partition.report
rescue UsageError => e
puts "\n----------------\nUsage error\n----------------\n\n#{e}\n\n----------------\n\n"
exit
end
end

View file

@ -0,0 +1,116 @@
class LogRecord
attr_accessor :time
attr_accessor :method
attr_accessor :query
attr_accessor :stack
def to_s
"LogRecord: time=#{@time}\n query: #{@query}\n stack - #{@stack.size} lines:\n #{@stack.join(" ")}"
end
end
class LogFileParser
# ------------------------------------------------------------------------------------
private
# ------------------------------------------------------------------------------------
#
def get_line()
if @line_buffer
line = @line_buffer
@line_buffer = nil
return line
elsif @file.eof?
return nil
else
@lines += 1
return @file.readline
end
end
def unget_line(line)
@line_buffer = line
end
def parse_first_line(line)
match = /\[RDFServiceLogger\]\s+([0-9.]+).*\[(\w*, )?(.+)\]\s*$/.match(line)
return nil unless match
log_record = LogRecord.new
log_record.time = match[1].to_f
log_record.query = match[3].strip.gsub(/\s+/, ' ')
log_record.stack = []
return log_record
end
def is_stack_line(line)
return false unless line
return line.start_with?(' ')
end
def next_record()
while true
first_line = get_line
return nil unless first_line
log_record = parse_first_line(first_line)
break if log_record
end
while true
stack_line = get_line
if is_stack_line(stack_line)
log_record.stack << stack_line
else
unget_line(stack_line)
return log_record
end
end
end
# ------------------------------------------------------------------------------------
public
# ------------------------------------------------------------------------------------
#
def parse_records()
@file = File.new(@log_file)
begin
while true
record = next_record()
break unless record
yield record
end
ensure
@file.close
end
end
def initialize(log_file)
@log_file = log_file
@line_buffer = nil
@lines = 0
end
def report()
puts "LogFileParser: read #{@lines} lines from '#{@log_file}'"
end
end
class Scanner
# ------------------------------------------------------------------------------------
private
# ------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------
public
# ------------------------------------------------------------------------------------
def initialize(partition)
@partition = partition
end
def process(log_file)
lfp = LogFileParser.new(log_file)
lfp.parse_records() do | log_record |
@partition.accumulate(log_record)
end
lfp.report
end
end