From 6b77c3b3dc3ca372499fbf3ead78083315aa8059 Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Fri, 2 May 2014 15:56:26 -0400 Subject: [PATCH] A utility script that will help to analyze SPARQL query timings. --- utilities/performance-measurement/README.txt | 16 +++ .../performance-measurement/partition.rb | 114 +++++++++++++++++ utilities/performance-measurement/partitions | 10 ++ .../scan_query_times.rb | 106 ++++++++++++++++ utilities/performance-measurement/scanner.rb | 116 ++++++++++++++++++ 5 files changed, 362 insertions(+) create mode 100644 utilities/performance-measurement/README.txt create mode 100644 utilities/performance-measurement/partition.rb create mode 100644 utilities/performance-measurement/partitions create mode 100755 utilities/performance-measurement/scan_query_times.rb create mode 100644 utilities/performance-measurement/scanner.rb diff --git a/utilities/performance-measurement/README.txt b/utilities/performance-measurement/README.txt new file mode 100644 index 00000000..4e914ed4 --- /dev/null +++ b/utilities/performance-measurement/README.txt @@ -0,0 +1,16 @@ +Ruby scripts for analyzing the SPARQL queries that were written to the log by RDFServiceLogger. + +Run VIVO, enable logging of SPARQL queries, through the Developer panel, load the page of interest. +Then, run the script to summarize the results. + +Run it like this: +./scan_query_times.rb partitions /Users/jeb228/Development/Performance/Weill/tomcat/logs/vivo.all.log overlap unmatched +where: + "partitions" is the file that defines how the timings will be broken down. Check the "partitions" + file in this directory for an example + "......vivo.all.log" is the VIVO log file to be analyzed. + "overlap" is the file where the queries are written to, if they match more than one criterion. + "unmatched" is the file where queries are written if they match no criteria. + +Read the comments in scan_query_times.rb for more information. + diff --git a/utilities/performance-measurement/partition.rb b/utilities/performance-measurement/partition.rb new file mode 100644 index 00000000..6fab25f8 --- /dev/null +++ b/utilities/performance-measurement/partition.rb @@ -0,0 +1,114 @@ +class Criterion + attr_accessor :expression + attr_accessor :count + attr_accessor :total_time + def initialize(expr) + @expression = Regexp.new(expr, Regexp::MULTILINE) + @count = 0 + @total_time = 0.0 + end + + def add(log_record) + @count += 1 + @total_time += log_record.time + end + + def to_s() + "#{self.class}: count=#{@count}, total_time=#{format("%.3f", @total_time)}, expression='#{@expression}'" + end +end + +class QueryCriterion < Criterion + def test(log_record) + if expression.match(log_record.query) + add(log_record) + return true + end + end +end + +class StackCriterion < Criterion + def test(log_record) + if expression.match(log_record.stack.join()) + add(log_record) + return true + end + end +end + +class WritingCriterion < Criterion + def initialize(expr, filename) + super(expr) + @file = File.new(filename, "w") + end + + def add(log_record) + super + @file.write("#{@expression}\n\n--------------------------\n\n") + @file.write(log_record.to_s) + @file.write("\n--------------------------\n\n") + end +end + +class UnmatchedCriterion < WritingCriterion + def initialize(filename) + super("UNMATCHED", filename) + end +end + +class OverlapCriterion < WritingCriterion + def initialize(filename) + super("OVERLAP", filename) + end +end + +class Partition + # ------------------------------------------------------------------------------------ + private + # ------------------------------------------------------------------------------------ + # + def parse_partition_file(partition_file) + @criteria = [] + File.open(partition_file).each() do |line| + next if line.strip().empty? || line.start_with?('#') + if line.start_with?("QUERY ") + @criteria << QueryCriterion.new(line.slice(6..-1).strip) + elsif line.start_with?("STACK ") + @criteria << StackCriterion.new(line.slice(6..-1).strip) + else + raise "Invalid line in partition file: '#{line}'" + end + end + end + + def match_against_criteria(log_record) + matches = 0 + @criteria.each do |c| + matches += c.test(log_record) ? 1 : 0 + end + if matches == 0 + @unmatched.add(log_record) + elsif matches > 1 + @overlap.add(log_record) + end + end + + # ------------------------------------------------------------------------------------ + public + # ------------------------------------------------------------------------------------ + + def initialize(partition_file, overlap_file, unmatched_file) + parse_partition_file(partition_file) + @overlap = OverlapCriterion.new(overlap_file) + @unmatched = UnmatchedCriterion.new(unmatched_file) + end + + def accumulate(log_record) + match_against_criteria(log_record) + end + + def report + puts "#{@criteria.join("\n")}\n#{@overlap}\n#{@unmatched}" + end + +end diff --git a/utilities/performance-measurement/partitions b/utilities/performance-measurement/partitions new file mode 100644 index 00000000..d7774007 --- /dev/null +++ b/utilities/performance-measurement/partitions @@ -0,0 +1,10 @@ +STACK AjaxVisualizationController +#STACK IndividualRequestAnalysisContextImpl +#STACK IndividualResponseBuilder +# STACK BaseIndividualTemplateModel +#STACK \WIndividualTemplateModel +#STACK FileServingServlet +#QUERY authorshipValue +#QUERY infoResourceValue +#QUERY editorObj +#QUERY IAO_0000030 diff --git a/utilities/performance-measurement/scan_query_times.rb b/utilities/performance-measurement/scan_query_times.rb new file mode 100755 index 00000000..9a2b4c12 --- /dev/null +++ b/utilities/performance-measurement/scan_query_times.rb @@ -0,0 +1,106 @@ +#!/usr/bin/ruby +=begin +-------------------------------------------------------------------------------- + +A utility that reads query times from a VIVO log file, and sums them based on a +list of partitioning expressions. + +1) The file of partitioning expressions is parsed and stored. +2) The log file is scanned for records from the RDFServiceLogger, which are + accumulated in the program storage. +3) The partitioning expressions are tested against each log record, and matches + are recorded. +4) For each partioning expression, report: + a) the expression + b) how many queries matched it + c) the total time spent in those queries +5) As errors, report: + a) how many queries (and how much time) matched no expression. Dump a few of + them to the error file. + b) how many queries (and how much time) matched multiple expressions. Dump a + few of them to the error file. + +-------------------------------------------------------------------------------- + +Command line: scan_query_times.rb [partition_file] [log_file] [overlap_file] [unmatched_file] + If the wrong number of arguments, abort. + If either file does not exist, abort. + +-------------------------------------------------------------------------------- + +Partitioning expressions: parse the lines in the file + If a line is empty, ignore it. + If a line begins with a #, ignore it. + If a line begins with the word "STACK ", then the remainder of the line + (trimmed) is a regular expression to be matched against the stack trace of + each log record. + If a line begins with the word "QUERY ", then the remainder of the line + (trimmed) is a regular expression to be matched against the query text of + each log record. + Otherwise, abort. + +-------------------------------------------------------------------------------- + +A log record begins like this: + 2014-04-29 14:26:00,798 INFO [RDFServiceLogger] 0.001 sparqlAskQuery [ASK {... + + The timestamp is ignored. + The log leve is ignored. + The logging class must be "RDFServiceLogger" + The time is recorded. + The method name is ignored. + The remainder of the text on that line (trimmed) is recorded as the query. + Any subsequent lines are the stack trace. + +The end of the log record is a line that does not begin with whitespace. + +-------------------------------------------------------------------------------- +=end + +$: << File.dirname(File.expand_path(__FILE__)) + +require 'partition' +require 'scanner' + +class UsageError < StandardError; end + +# +# Parse the arguments and complain if they don't make sense. +# +def parse_args(args) + raise UsageError, "usage is: scan_query_times.rb " unless (args.length == 4) + + partition_file = args[0] + raise UsageError, "File '#{partition_file}' does not exist." unless File.exist?(partition_file) + log_file = args[1] + raise UsageError, "File '#{log_file}' does not exist." unless File.exist?(log_file) + overlap_file = args[2] + raise UsageError, "File '#{overlap_file}' does not exist." unless File.exist?(overlap_file) + unmatched_file = args[3] + raise UsageError, "File '#{unmatched_file}' does not exist." unless File.exist?(unmatched_file) + + return partition_file, log_file, overlap_file, unmatched_file +end + +# +# +# ------------------------------------------------------------------------------------ +# Standalone calling. +# +# Do this if this program was called from the command line. That is, if the command +# expands to the path of this file. +# ------------------------------------------------------------------------------------ +# + +if File.expand_path($0) == File.expand_path(__FILE__) + begin + partition_file, log_file, overlap_file, unmatched_file = parse_args(ARGV) + partition = Partition.new(partition_file, overlap_file, unmatched_file) + scanner = Scanner.new(partition) + scanner.process(log_file) + partition.report + rescue UsageError => e + puts "\n----------------\nUsage error\n----------------\n\n#{e}\n\n----------------\n\n" + exit + end +end diff --git a/utilities/performance-measurement/scanner.rb b/utilities/performance-measurement/scanner.rb new file mode 100644 index 00000000..bf70c0ca --- /dev/null +++ b/utilities/performance-measurement/scanner.rb @@ -0,0 +1,116 @@ +class LogRecord + attr_accessor :time + attr_accessor :method + attr_accessor :query + attr_accessor :stack + def to_s + "LogRecord: time=#{@time}\n query: #{@query}\n stack - #{@stack.size} lines:\n #{@stack.join(" ")}" + end +end + +class LogFileParser + # ------------------------------------------------------------------------------------ + private + # ------------------------------------------------------------------------------------ + # + def get_line() + if @line_buffer + line = @line_buffer + @line_buffer = nil + return line + elsif @file.eof? + return nil + else + @lines += 1 + return @file.readline + end + end + + def unget_line(line) + @line_buffer = line + end + + def parse_first_line(line) + match = /\[RDFServiceLogger\]\s+([0-9.]+).*\[(\w*, )?(.+)\]\s*$/.match(line) + return nil unless match + + log_record = LogRecord.new + log_record.time = match[1].to_f + log_record.query = match[3].strip + log_record.stack = [] + return log_record + end + + def is_stack_line(line) + return false unless line + return line.start_with?(' ') + end + + def next_record() + while true + first_line = get_line + return nil unless first_line + log_record = parse_first_line(first_line) + break if log_record + end + + while true + stack_line = get_line + if is_stack_line(stack_line) + log_record.stack << stack_line + else + unget_line(stack_line) + return log_record + end + end + end + + # ------------------------------------------------------------------------------------ + public + + # ------------------------------------------------------------------------------------ + # + def parse_records() + @file = File.new(@log_file) + begin + while true + record = next_record() + break unless record + yield record + end + ensure + @file.close + end + end + + def initialize(log_file) + @log_file = log_file + @line_buffer = nil + @lines = 0 + end + + def report() + puts "LogFileParser: read #{@lines} lines from '#{@log_file}'" + end +end + +class Scanner + # ------------------------------------------------------------------------------------ + private + # ------------------------------------------------------------------------------------ + + # ------------------------------------------------------------------------------------ + public + # ------------------------------------------------------------------------------------ + def initialize(partition) + @partition = partition + end + + def process(log_file) + lfp = LogFileParser.new(log_file) + lfp.parse_records() do | log_record | + @partition.accumulate(log_record) + end + lfp.report + end +end