A utility script that will help to analyze SPARQL query timings.
This commit is contained in:
parent
51e315798c
commit
6b77c3b3dc
5 changed files with 362 additions and 0 deletions
16
utilities/performance-measurement/README.txt
Normal file
16
utilities/performance-measurement/README.txt
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
Ruby scripts for analyzing the SPARQL queries that were written to the log by RDFServiceLogger.
|
||||||
|
|
||||||
|
Run VIVO, enable logging of SPARQL queries, through the Developer panel, load the page of interest.
|
||||||
|
Then, run the script to summarize the results.
|
||||||
|
|
||||||
|
Run it like this:
|
||||||
|
./scan_query_times.rb partitions /Users/jeb228/Development/Performance/Weill/tomcat/logs/vivo.all.log overlap unmatched
|
||||||
|
where:
|
||||||
|
"partitions" is the file that defines how the timings will be broken down. Check the "partitions"
|
||||||
|
file in this directory for an example
|
||||||
|
"......vivo.all.log" is the VIVO log file to be analyzed.
|
||||||
|
"overlap" is the file where the queries are written to, if they match more than one criterion.
|
||||||
|
"unmatched" is the file where queries are written if they match no criteria.
|
||||||
|
|
||||||
|
Read the comments in scan_query_times.rb for more information.
|
||||||
|
|
114
utilities/performance-measurement/partition.rb
Normal file
114
utilities/performance-measurement/partition.rb
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
class Criterion
|
||||||
|
attr_accessor :expression
|
||||||
|
attr_accessor :count
|
||||||
|
attr_accessor :total_time
|
||||||
|
def initialize(expr)
|
||||||
|
@expression = Regexp.new(expr, Regexp::MULTILINE)
|
||||||
|
@count = 0
|
||||||
|
@total_time = 0.0
|
||||||
|
end
|
||||||
|
|
||||||
|
def add(log_record)
|
||||||
|
@count += 1
|
||||||
|
@total_time += log_record.time
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s()
|
||||||
|
"#{self.class}: count=#{@count}, total_time=#{format("%.3f", @total_time)}, expression='#{@expression}'"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class QueryCriterion < Criterion
|
||||||
|
def test(log_record)
|
||||||
|
if expression.match(log_record.query)
|
||||||
|
add(log_record)
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class StackCriterion < Criterion
|
||||||
|
def test(log_record)
|
||||||
|
if expression.match(log_record.stack.join())
|
||||||
|
add(log_record)
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class WritingCriterion < Criterion
|
||||||
|
def initialize(expr, filename)
|
||||||
|
super(expr)
|
||||||
|
@file = File.new(filename, "w")
|
||||||
|
end
|
||||||
|
|
||||||
|
def add(log_record)
|
||||||
|
super
|
||||||
|
@file.write("#{@expression}\n\n--------------------------\n\n")
|
||||||
|
@file.write(log_record.to_s)
|
||||||
|
@file.write("\n--------------------------\n\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class UnmatchedCriterion < WritingCriterion
|
||||||
|
def initialize(filename)
|
||||||
|
super("UNMATCHED", filename)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class OverlapCriterion < WritingCriterion
|
||||||
|
def initialize(filename)
|
||||||
|
super("OVERLAP", filename)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Partition
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
private
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
def parse_partition_file(partition_file)
|
||||||
|
@criteria = []
|
||||||
|
File.open(partition_file).each() do |line|
|
||||||
|
next if line.strip().empty? || line.start_with?('#')
|
||||||
|
if line.start_with?("QUERY ")
|
||||||
|
@criteria << QueryCriterion.new(line.slice(6..-1).strip)
|
||||||
|
elsif line.start_with?("STACK ")
|
||||||
|
@criteria << StackCriterion.new(line.slice(6..-1).strip)
|
||||||
|
else
|
||||||
|
raise "Invalid line in partition file: '#{line}'"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def match_against_criteria(log_record)
|
||||||
|
matches = 0
|
||||||
|
@criteria.each do |c|
|
||||||
|
matches += c.test(log_record) ? 1 : 0
|
||||||
|
end
|
||||||
|
if matches == 0
|
||||||
|
@unmatched.add(log_record)
|
||||||
|
elsif matches > 1
|
||||||
|
@overlap.add(log_record)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
public
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def initialize(partition_file, overlap_file, unmatched_file)
|
||||||
|
parse_partition_file(partition_file)
|
||||||
|
@overlap = OverlapCriterion.new(overlap_file)
|
||||||
|
@unmatched = UnmatchedCriterion.new(unmatched_file)
|
||||||
|
end
|
||||||
|
|
||||||
|
def accumulate(log_record)
|
||||||
|
match_against_criteria(log_record)
|
||||||
|
end
|
||||||
|
|
||||||
|
def report
|
||||||
|
puts "#{@criteria.join("\n")}\n#{@overlap}\n#{@unmatched}"
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
10
utilities/performance-measurement/partitions
Normal file
10
utilities/performance-measurement/partitions
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
STACK AjaxVisualizationController
|
||||||
|
#STACK IndividualRequestAnalysisContextImpl
|
||||||
|
#STACK IndividualResponseBuilder
|
||||||
|
# STACK BaseIndividualTemplateModel
|
||||||
|
#STACK \WIndividualTemplateModel
|
||||||
|
#STACK FileServingServlet
|
||||||
|
#QUERY authorshipValue
|
||||||
|
#QUERY infoResourceValue
|
||||||
|
#QUERY editorObj
|
||||||
|
#QUERY IAO_0000030
|
106
utilities/performance-measurement/scan_query_times.rb
Executable file
106
utilities/performance-measurement/scan_query_times.rb
Executable file
|
@ -0,0 +1,106 @@
|
||||||
|
#!/usr/bin/ruby
|
||||||
|
=begin
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
A utility that reads query times from a VIVO log file, and sums them based on a
|
||||||
|
list of partitioning expressions.
|
||||||
|
|
||||||
|
1) The file of partitioning expressions is parsed and stored.
|
||||||
|
2) The log file is scanned for records from the RDFServiceLogger, which are
|
||||||
|
accumulated in the program storage.
|
||||||
|
3) The partitioning expressions are tested against each log record, and matches
|
||||||
|
are recorded.
|
||||||
|
4) For each partioning expression, report:
|
||||||
|
a) the expression
|
||||||
|
b) how many queries matched it
|
||||||
|
c) the total time spent in those queries
|
||||||
|
5) As errors, report:
|
||||||
|
a) how many queries (and how much time) matched no expression. Dump a few of
|
||||||
|
them to the error file.
|
||||||
|
b) how many queries (and how much time) matched multiple expressions. Dump a
|
||||||
|
few of them to the error file.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Command line: scan_query_times.rb [partition_file] [log_file] [overlap_file] [unmatched_file]
|
||||||
|
If the wrong number of arguments, abort.
|
||||||
|
If either file does not exist, abort.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Partitioning expressions: parse the lines in the file
|
||||||
|
If a line is empty, ignore it.
|
||||||
|
If a line begins with a #, ignore it.
|
||||||
|
If a line begins with the word "STACK ", then the remainder of the line
|
||||||
|
(trimmed) is a regular expression to be matched against the stack trace of
|
||||||
|
each log record.
|
||||||
|
If a line begins with the word "QUERY ", then the remainder of the line
|
||||||
|
(trimmed) is a regular expression to be matched against the query text of
|
||||||
|
each log record.
|
||||||
|
Otherwise, abort.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
A log record begins like this:
|
||||||
|
2014-04-29 14:26:00,798 INFO [RDFServiceLogger] 0.001 sparqlAskQuery [ASK {...
|
||||||
|
|
||||||
|
The timestamp is ignored.
|
||||||
|
The log leve is ignored.
|
||||||
|
The logging class must be "RDFServiceLogger"
|
||||||
|
The time is recorded.
|
||||||
|
The method name is ignored.
|
||||||
|
The remainder of the text on that line (trimmed) is recorded as the query.
|
||||||
|
Any subsequent lines are the stack trace.
|
||||||
|
|
||||||
|
The end of the log record is a line that does not begin with whitespace.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
=end
|
||||||
|
|
||||||
|
$: << File.dirname(File.expand_path(__FILE__))
|
||||||
|
|
||||||
|
require 'partition'
|
||||||
|
require 'scanner'
|
||||||
|
|
||||||
|
class UsageError < StandardError; end
|
||||||
|
|
||||||
|
#
|
||||||
|
# Parse the arguments and complain if they don't make sense.
|
||||||
|
#
|
||||||
|
def parse_args(args)
|
||||||
|
raise UsageError, "usage is: scan_query_times.rb <partition_file> <log_file> <overlap_file> <unmatched_file>" unless (args.length == 4)
|
||||||
|
|
||||||
|
partition_file = args[0]
|
||||||
|
raise UsageError, "File '#{partition_file}' does not exist." unless File.exist?(partition_file)
|
||||||
|
log_file = args[1]
|
||||||
|
raise UsageError, "File '#{log_file}' does not exist." unless File.exist?(log_file)
|
||||||
|
overlap_file = args[2]
|
||||||
|
raise UsageError, "File '#{overlap_file}' does not exist." unless File.exist?(overlap_file)
|
||||||
|
unmatched_file = args[3]
|
||||||
|
raise UsageError, "File '#{unmatched_file}' does not exist." unless File.exist?(unmatched_file)
|
||||||
|
|
||||||
|
return partition_file, log_file, overlap_file, unmatched_file
|
||||||
|
end
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
# Standalone calling.
|
||||||
|
#
|
||||||
|
# Do this if this program was called from the command line. That is, if the command
|
||||||
|
# expands to the path of this file.
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
|
||||||
|
if File.expand_path($0) == File.expand_path(__FILE__)
|
||||||
|
begin
|
||||||
|
partition_file, log_file, overlap_file, unmatched_file = parse_args(ARGV)
|
||||||
|
partition = Partition.new(partition_file, overlap_file, unmatched_file)
|
||||||
|
scanner = Scanner.new(partition)
|
||||||
|
scanner.process(log_file)
|
||||||
|
partition.report
|
||||||
|
rescue UsageError => e
|
||||||
|
puts "\n----------------\nUsage error\n----------------\n\n#{e}\n\n----------------\n\n"
|
||||||
|
exit
|
||||||
|
end
|
||||||
|
end
|
116
utilities/performance-measurement/scanner.rb
Normal file
116
utilities/performance-measurement/scanner.rb
Normal file
|
@ -0,0 +1,116 @@
|
||||||
|
class LogRecord
|
||||||
|
attr_accessor :time
|
||||||
|
attr_accessor :method
|
||||||
|
attr_accessor :query
|
||||||
|
attr_accessor :stack
|
||||||
|
def to_s
|
||||||
|
"LogRecord: time=#{@time}\n query: #{@query}\n stack - #{@stack.size} lines:\n #{@stack.join(" ")}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class LogFileParser
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
private
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
def get_line()
|
||||||
|
if @line_buffer
|
||||||
|
line = @line_buffer
|
||||||
|
@line_buffer = nil
|
||||||
|
return line
|
||||||
|
elsif @file.eof?
|
||||||
|
return nil
|
||||||
|
else
|
||||||
|
@lines += 1
|
||||||
|
return @file.readline
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def unget_line(line)
|
||||||
|
@line_buffer = line
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_first_line(line)
|
||||||
|
match = /\[RDFServiceLogger\]\s+([0-9.]+).*\[(\w*, )?(.+)\]\s*$/.match(line)
|
||||||
|
return nil unless match
|
||||||
|
|
||||||
|
log_record = LogRecord.new
|
||||||
|
log_record.time = match[1].to_f
|
||||||
|
log_record.query = match[3].strip
|
||||||
|
log_record.stack = []
|
||||||
|
return log_record
|
||||||
|
end
|
||||||
|
|
||||||
|
def is_stack_line(line)
|
||||||
|
return false unless line
|
||||||
|
return line.start_with?(' ')
|
||||||
|
end
|
||||||
|
|
||||||
|
def next_record()
|
||||||
|
while true
|
||||||
|
first_line = get_line
|
||||||
|
return nil unless first_line
|
||||||
|
log_record = parse_first_line(first_line)
|
||||||
|
break if log_record
|
||||||
|
end
|
||||||
|
|
||||||
|
while true
|
||||||
|
stack_line = get_line
|
||||||
|
if is_stack_line(stack_line)
|
||||||
|
log_record.stack << stack_line
|
||||||
|
else
|
||||||
|
unget_line(stack_line)
|
||||||
|
return log_record
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
public
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
def parse_records()
|
||||||
|
@file = File.new(@log_file)
|
||||||
|
begin
|
||||||
|
while true
|
||||||
|
record = next_record()
|
||||||
|
break unless record
|
||||||
|
yield record
|
||||||
|
end
|
||||||
|
ensure
|
||||||
|
@file.close
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(log_file)
|
||||||
|
@log_file = log_file
|
||||||
|
@line_buffer = nil
|
||||||
|
@lines = 0
|
||||||
|
end
|
||||||
|
|
||||||
|
def report()
|
||||||
|
puts "LogFileParser: read #{@lines} lines from '#{@log_file}'"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Scanner
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
private
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
public
|
||||||
|
# ------------------------------------------------------------------------------------
|
||||||
|
def initialize(partition)
|
||||||
|
@partition = partition
|
||||||
|
end
|
||||||
|
|
||||||
|
def process(log_file)
|
||||||
|
lfp = LogFileParser.new(log_file)
|
||||||
|
lfp.parse_records() do | log_record |
|
||||||
|
@partition.accumulate(log_record)
|
||||||
|
end
|
||||||
|
lfp.report
|
||||||
|
end
|
||||||
|
end
|
Loading…
Add table
Add a link
Reference in a new issue