A utility script that will help to analyze SPARQL query timings.
This commit is contained in:
parent
51e315798c
commit
6b77c3b3dc
5 changed files with 362 additions and 0 deletions
16
utilities/performance-measurement/README.txt
Normal file
16
utilities/performance-measurement/README.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
Ruby scripts for analyzing the SPARQL queries that were written to the log by RDFServiceLogger.
|
||||
|
||||
Run VIVO, enable logging of SPARQL queries, through the Developer panel, load the page of interest.
|
||||
Then, run the script to summarize the results.
|
||||
|
||||
Run it like this:
|
||||
./scan_query_times.rb partitions /Users/jeb228/Development/Performance/Weill/tomcat/logs/vivo.all.log overlap unmatched
|
||||
where:
|
||||
"partitions" is the file that defines how the timings will be broken down. Check the "partitions"
|
||||
file in this directory for an example
|
||||
"......vivo.all.log" is the VIVO log file to be analyzed.
|
||||
"overlap" is the file where the queries are written to, if they match more than one criterion.
|
||||
"unmatched" is the file where queries are written if they match no criteria.
|
||||
|
||||
Read the comments in scan_query_times.rb for more information.
|
||||
|
114
utilities/performance-measurement/partition.rb
Normal file
114
utilities/performance-measurement/partition.rb
Normal file
|
@ -0,0 +1,114 @@
|
|||
class Criterion
|
||||
attr_accessor :expression
|
||||
attr_accessor :count
|
||||
attr_accessor :total_time
|
||||
def initialize(expr)
|
||||
@expression = Regexp.new(expr, Regexp::MULTILINE)
|
||||
@count = 0
|
||||
@total_time = 0.0
|
||||
end
|
||||
|
||||
def add(log_record)
|
||||
@count += 1
|
||||
@total_time += log_record.time
|
||||
end
|
||||
|
||||
def to_s()
|
||||
"#{self.class}: count=#{@count}, total_time=#{format("%.3f", @total_time)}, expression='#{@expression}'"
|
||||
end
|
||||
end
|
||||
|
||||
class QueryCriterion < Criterion
|
||||
def test(log_record)
|
||||
if expression.match(log_record.query)
|
||||
add(log_record)
|
||||
return true
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class StackCriterion < Criterion
|
||||
def test(log_record)
|
||||
if expression.match(log_record.stack.join())
|
||||
add(log_record)
|
||||
return true
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class WritingCriterion < Criterion
|
||||
def initialize(expr, filename)
|
||||
super(expr)
|
||||
@file = File.new(filename, "w")
|
||||
end
|
||||
|
||||
def add(log_record)
|
||||
super
|
||||
@file.write("#{@expression}\n\n--------------------------\n\n")
|
||||
@file.write(log_record.to_s)
|
||||
@file.write("\n--------------------------\n\n")
|
||||
end
|
||||
end
|
||||
|
||||
class UnmatchedCriterion < WritingCriterion
|
||||
def initialize(filename)
|
||||
super("UNMATCHED", filename)
|
||||
end
|
||||
end
|
||||
|
||||
class OverlapCriterion < WritingCriterion
|
||||
def initialize(filename)
|
||||
super("OVERLAP", filename)
|
||||
end
|
||||
end
|
||||
|
||||
class Partition
|
||||
# ------------------------------------------------------------------------------------
|
||||
private
|
||||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
def parse_partition_file(partition_file)
|
||||
@criteria = []
|
||||
File.open(partition_file).each() do |line|
|
||||
next if line.strip().empty? || line.start_with?('#')
|
||||
if line.start_with?("QUERY ")
|
||||
@criteria << QueryCriterion.new(line.slice(6..-1).strip)
|
||||
elsif line.start_with?("STACK ")
|
||||
@criteria << StackCriterion.new(line.slice(6..-1).strip)
|
||||
else
|
||||
raise "Invalid line in partition file: '#{line}'"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def match_against_criteria(log_record)
|
||||
matches = 0
|
||||
@criteria.each do |c|
|
||||
matches += c.test(log_record) ? 1 : 0
|
||||
end
|
||||
if matches == 0
|
||||
@unmatched.add(log_record)
|
||||
elsif matches > 1
|
||||
@overlap.add(log_record)
|
||||
end
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
public
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
def initialize(partition_file, overlap_file, unmatched_file)
|
||||
parse_partition_file(partition_file)
|
||||
@overlap = OverlapCriterion.new(overlap_file)
|
||||
@unmatched = UnmatchedCriterion.new(unmatched_file)
|
||||
end
|
||||
|
||||
def accumulate(log_record)
|
||||
match_against_criteria(log_record)
|
||||
end
|
||||
|
||||
def report
|
||||
puts "#{@criteria.join("\n")}\n#{@overlap}\n#{@unmatched}"
|
||||
end
|
||||
|
||||
end
|
10
utilities/performance-measurement/partitions
Normal file
10
utilities/performance-measurement/partitions
Normal file
|
@ -0,0 +1,10 @@
|
|||
STACK AjaxVisualizationController
|
||||
#STACK IndividualRequestAnalysisContextImpl
|
||||
#STACK IndividualResponseBuilder
|
||||
# STACK BaseIndividualTemplateModel
|
||||
#STACK \WIndividualTemplateModel
|
||||
#STACK FileServingServlet
|
||||
#QUERY authorshipValue
|
||||
#QUERY infoResourceValue
|
||||
#QUERY editorObj
|
||||
#QUERY IAO_0000030
|
106
utilities/performance-measurement/scan_query_times.rb
Executable file
106
utilities/performance-measurement/scan_query_times.rb
Executable file
|
@ -0,0 +1,106 @@
|
|||
#!/usr/bin/ruby
|
||||
=begin
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
A utility that reads query times from a VIVO log file, and sums them based on a
|
||||
list of partitioning expressions.
|
||||
|
||||
1) The file of partitioning expressions is parsed and stored.
|
||||
2) The log file is scanned for records from the RDFServiceLogger, which are
|
||||
accumulated in the program storage.
|
||||
3) The partitioning expressions are tested against each log record, and matches
|
||||
are recorded.
|
||||
4) For each partioning expression, report:
|
||||
a) the expression
|
||||
b) how many queries matched it
|
||||
c) the total time spent in those queries
|
||||
5) As errors, report:
|
||||
a) how many queries (and how much time) matched no expression. Dump a few of
|
||||
them to the error file.
|
||||
b) how many queries (and how much time) matched multiple expressions. Dump a
|
||||
few of them to the error file.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Command line: scan_query_times.rb [partition_file] [log_file] [overlap_file] [unmatched_file]
|
||||
If the wrong number of arguments, abort.
|
||||
If either file does not exist, abort.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Partitioning expressions: parse the lines in the file
|
||||
If a line is empty, ignore it.
|
||||
If a line begins with a #, ignore it.
|
||||
If a line begins with the word "STACK ", then the remainder of the line
|
||||
(trimmed) is a regular expression to be matched against the stack trace of
|
||||
each log record.
|
||||
If a line begins with the word "QUERY ", then the remainder of the line
|
||||
(trimmed) is a regular expression to be matched against the query text of
|
||||
each log record.
|
||||
Otherwise, abort.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
A log record begins like this:
|
||||
2014-04-29 14:26:00,798 INFO [RDFServiceLogger] 0.001 sparqlAskQuery [ASK {...
|
||||
|
||||
The timestamp is ignored.
|
||||
The log leve is ignored.
|
||||
The logging class must be "RDFServiceLogger"
|
||||
The time is recorded.
|
||||
The method name is ignored.
|
||||
The remainder of the text on that line (trimmed) is recorded as the query.
|
||||
Any subsequent lines are the stack trace.
|
||||
|
||||
The end of the log record is a line that does not begin with whitespace.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
=end
|
||||
|
||||
$: << File.dirname(File.expand_path(__FILE__))
|
||||
|
||||
require 'partition'
|
||||
require 'scanner'
|
||||
|
||||
class UsageError < StandardError; end
|
||||
|
||||
#
|
||||
# Parse the arguments and complain if they don't make sense.
|
||||
#
|
||||
def parse_args(args)
|
||||
raise UsageError, "usage is: scan_query_times.rb <partition_file> <log_file> <overlap_file> <unmatched_file>" unless (args.length == 4)
|
||||
|
||||
partition_file = args[0]
|
||||
raise UsageError, "File '#{partition_file}' does not exist." unless File.exist?(partition_file)
|
||||
log_file = args[1]
|
||||
raise UsageError, "File '#{log_file}' does not exist." unless File.exist?(log_file)
|
||||
overlap_file = args[2]
|
||||
raise UsageError, "File '#{overlap_file}' does not exist." unless File.exist?(overlap_file)
|
||||
unmatched_file = args[3]
|
||||
raise UsageError, "File '#{unmatched_file}' does not exist." unless File.exist?(unmatched_file)
|
||||
|
||||
return partition_file, log_file, overlap_file, unmatched_file
|
||||
end
|
||||
|
||||
#
|
||||
#
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Standalone calling.
|
||||
#
|
||||
# Do this if this program was called from the command line. That is, if the command
|
||||
# expands to the path of this file.
|
||||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
|
||||
if File.expand_path($0) == File.expand_path(__FILE__)
|
||||
begin
|
||||
partition_file, log_file, overlap_file, unmatched_file = parse_args(ARGV)
|
||||
partition = Partition.new(partition_file, overlap_file, unmatched_file)
|
||||
scanner = Scanner.new(partition)
|
||||
scanner.process(log_file)
|
||||
partition.report
|
||||
rescue UsageError => e
|
||||
puts "\n----------------\nUsage error\n----------------\n\n#{e}\n\n----------------\n\n"
|
||||
exit
|
||||
end
|
||||
end
|
116
utilities/performance-measurement/scanner.rb
Normal file
116
utilities/performance-measurement/scanner.rb
Normal file
|
@ -0,0 +1,116 @@
|
|||
class LogRecord
|
||||
attr_accessor :time
|
||||
attr_accessor :method
|
||||
attr_accessor :query
|
||||
attr_accessor :stack
|
||||
def to_s
|
||||
"LogRecord: time=#{@time}\n query: #{@query}\n stack - #{@stack.size} lines:\n #{@stack.join(" ")}"
|
||||
end
|
||||
end
|
||||
|
||||
class LogFileParser
|
||||
# ------------------------------------------------------------------------------------
|
||||
private
|
||||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
def get_line()
|
||||
if @line_buffer
|
||||
line = @line_buffer
|
||||
@line_buffer = nil
|
||||
return line
|
||||
elsif @file.eof?
|
||||
return nil
|
||||
else
|
||||
@lines += 1
|
||||
return @file.readline
|
||||
end
|
||||
end
|
||||
|
||||
def unget_line(line)
|
||||
@line_buffer = line
|
||||
end
|
||||
|
||||
def parse_first_line(line)
|
||||
match = /\[RDFServiceLogger\]\s+([0-9.]+).*\[(\w*, )?(.+)\]\s*$/.match(line)
|
||||
return nil unless match
|
||||
|
||||
log_record = LogRecord.new
|
||||
log_record.time = match[1].to_f
|
||||
log_record.query = match[3].strip
|
||||
log_record.stack = []
|
||||
return log_record
|
||||
end
|
||||
|
||||
def is_stack_line(line)
|
||||
return false unless line
|
||||
return line.start_with?(' ')
|
||||
end
|
||||
|
||||
def next_record()
|
||||
while true
|
||||
first_line = get_line
|
||||
return nil unless first_line
|
||||
log_record = parse_first_line(first_line)
|
||||
break if log_record
|
||||
end
|
||||
|
||||
while true
|
||||
stack_line = get_line
|
||||
if is_stack_line(stack_line)
|
||||
log_record.stack << stack_line
|
||||
else
|
||||
unget_line(stack_line)
|
||||
return log_record
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
public
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
def parse_records()
|
||||
@file = File.new(@log_file)
|
||||
begin
|
||||
while true
|
||||
record = next_record()
|
||||
break unless record
|
||||
yield record
|
||||
end
|
||||
ensure
|
||||
@file.close
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(log_file)
|
||||
@log_file = log_file
|
||||
@line_buffer = nil
|
||||
@lines = 0
|
||||
end
|
||||
|
||||
def report()
|
||||
puts "LogFileParser: read #{@lines} lines from '#{@log_file}'"
|
||||
end
|
||||
end
|
||||
|
||||
class Scanner
|
||||
# ------------------------------------------------------------------------------------
|
||||
private
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
public
|
||||
# ------------------------------------------------------------------------------------
|
||||
def initialize(partition)
|
||||
@partition = partition
|
||||
end
|
||||
|
||||
def process(log_file)
|
||||
lfp = LogFileParser.new(log_file)
|
||||
lfp.parse_records() do | log_record |
|
||||
@partition.accumulate(log_record)
|
||||
end
|
||||
lfp.report
|
||||
end
|
||||
end
|
Loading…
Add table
Add a link
Reference in a new issue