VIVO-125 Create more "Google Spanish" labels
Add more labels by changing the filter in the label_stripper/label_inserter scripts. Improve the structure of the scripts to use an external filter.
This commit is contained in:
parent
b758f374ad
commit
c5acf46316
5 changed files with 589 additions and 423 deletions
|
@ -11,6 +11,9 @@ require 'rdf/ntriples'
|
|||
require 'rdf/n3'
|
||||
|
||||
include RDF
|
||||
|
||||
class UsageError < StandardError; end
|
||||
class FilterError < StandardError; end
|
||||
|
||||
class LabelCommon
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
@ -21,6 +24,13 @@ class LabelCommon
|
|||
public
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
def self.load_filter(filter_file)
|
||||
return lambda{|s| true} unless filter_file
|
||||
return eval(IO.read(filter_file))
|
||||
rescue
|
||||
raise FilterError.new($!.message)
|
||||
end
|
||||
|
||||
def initialize(rdf_file, &filter)
|
||||
@filter = filter.nil? ? lambda{true} : filter
|
||||
@graph = Graph.load(rdf_file)
|
||||
|
|
|
@ -24,7 +24,7 @@ require 'label_common'
|
|||
|
||||
include RDF
|
||||
|
||||
class LabelStripper
|
||||
class LabelInserter
|
||||
# ------------------------------------------------------------------------------------
|
||||
private
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
@ -33,29 +33,32 @@ class LabelStripper
|
|||
# Parse the arguments and complain if they don't make sense.
|
||||
#
|
||||
def sanity_check_arguments(args)
|
||||
raise "usage is: label_inserter.rb <rdf_file> <labels_input_file> <locale> <n3_output_file> [ok]" unless (4..5).include?(args.length)
|
||||
|
||||
if args[4].nil?
|
||||
ok = false
|
||||
elsif args[4].downcase == 'ok'
|
||||
if args[-1].downcase == 'ok'
|
||||
ok = true
|
||||
args.pop
|
||||
else
|
||||
raise "fifth argument, if present, must be 'ok'"
|
||||
ok = false
|
||||
end
|
||||
|
||||
|
||||
raise UsageError, "usage is: label_inserter.rb <rdf_file> <labels_input_file> <locale> [filter_file] <n3_output_file> [ok]" unless (4..5).include?(args.length)
|
||||
|
||||
n3_output_file = args.pop
|
||||
raise UsageError, "File '#{n3_output_file}' already exists. specify 'ok' to overwrite it." if File.exist?(n3_output_file) && !ok
|
||||
|
||||
rdf_file = args[0]
|
||||
raise "File '#{rdf_file}' does not exist." unless File.exist?(rdf_file)
|
||||
raise UsageError, "File '#{rdf_file}' does not exist." unless File.exist?(rdf_file)
|
||||
|
||||
labels_input_file = args[1]
|
||||
raise "File '#{labels_input_file}' does not exist." unless File.exist?(labels_input_file)
|
||||
raise UsageError, "File '#{labels_input_file}' does not exist." unless File.exist?(labels_input_file)
|
||||
|
||||
locale = args[2]
|
||||
raise "Locale should be like 'ab' or 'ab-CD'." unless /^[a-z]{2}(-[A-Z]{2})?$/ =~ locale
|
||||
raise UsageError, "Locale should be like 'ab' or 'ab-CD'." unless /^[a-z]{2}(-[A-Z]{2})?$/ =~ locale
|
||||
|
||||
n3_output_file = args[3]
|
||||
raise "File '#{n3_output_file}' already exists. specify 'ok' to overwrite it." if File.exist?(n3_output_file) && !ok
|
||||
filter_file = args[3]
|
||||
raise UsageError, "File '#{filter_file}' does not exist." if filter_file && !File.exist?(filter_file)
|
||||
filter = LabelCommon.load_filter(filter_file)
|
||||
|
||||
return rdf_file, labels_input_file, locale, n3_output_file
|
||||
return rdf_file, labels_input_file, locale, filter, n3_output_file
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
@ -63,19 +66,23 @@ class LabelStripper
|
|||
# ------------------------------------------------------------------------------------
|
||||
|
||||
def initialize(args)
|
||||
@rdf_file, @labels_input_file, @locale, @n3_output_file = sanity_check_arguments(args)
|
||||
@rdf_file, @labels_input_file, @locale, @filter, @n3_output_file = sanity_check_arguments(args)
|
||||
rescue UsageError => e
|
||||
puts "\n----------------\nUsage error\n----------------\n\n#{e}\n\n----------------\n\n"
|
||||
exit
|
||||
rescue FilterError => e
|
||||
puts "\n----------------\nFilter file is invalid\n----------------\n\n#{e}\n\n----------------\n\n"
|
||||
exit
|
||||
end
|
||||
|
||||
def process(&filter)
|
||||
filter = filter || lambda{true}
|
||||
|
||||
def process()
|
||||
query = Query.new({
|
||||
:prop => {
|
||||
RDFS.label => :label,
|
||||
}
|
||||
})
|
||||
|
||||
solutions = LabelCommon.new(@rdf_file).process(query, &filter)
|
||||
solutions = LabelCommon.new(@rdf_file).process(query, &@filter)
|
||||
|
||||
labels = IO.readlines(@labels_input_file)
|
||||
|
||||
|
@ -105,10 +112,7 @@ end
|
|||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
|
||||
#vivo_filter = lambda {|s| s.prop.start_with?("http://vivoweb.org/ontology/core#") && !s.label.to_s.strip.empty?}
|
||||
vivo_filter = lambda {|s| !s.label.to_s.strip.empty?}
|
||||
|
||||
if File.expand_path($0) == File.expand_path(__FILE__)
|
||||
stripper = LabelStripper.new(ARGV)
|
||||
stripper.process(&vivo_filter)
|
||||
inserter = LabelInserter.new(ARGV)
|
||||
inserter.process()
|
||||
end
|
||||
|
|
|
@ -32,23 +32,26 @@ class LabelStripper
|
|||
# Parse the arguments and complain if they don't make sense.
|
||||
#
|
||||
def sanity_check_arguments(args)
|
||||
raise "usage is: label_stripper.rb <rdf_file> <labels_output_file> [ok]" unless (2..3).include?(args.length)
|
||||
|
||||
if args[2].nil?
|
||||
ok = false
|
||||
elsif args[2].downcase == 'ok'
|
||||
if args[-1].downcase == 'ok'
|
||||
ok = true
|
||||
args.pop
|
||||
else
|
||||
raise "third argument, if present, must be 'ok'"
|
||||
ok = false
|
||||
end
|
||||
|
||||
|
||||
raise UsageError, "usage is: label_stripper.rb <rdf_file> [filter_file] <labels_output_file> [ok]" unless (2..3).include?(args.length)
|
||||
|
||||
output_file = args.pop
|
||||
raise UsageError, "File '#{output_file}' already exists. specify 'ok' to overwrite it." if File.exist?(output_file) && !ok
|
||||
|
||||
rdf_file = args[0]
|
||||
raise "File '#{rdf_file}' does not exist." unless File.exist?(rdf_file)
|
||||
raise UsageError, "File '#{rdf_file}' does not exist." unless File.exist?(rdf_file)
|
||||
|
||||
labels_output_file = args[1]
|
||||
raise "File '#{labels_output_file}' already exists. specify 'ok' to overwrite it." if File.exist?(labels_output_file) && !ok
|
||||
filter_file = args[1]
|
||||
raise UsageError, "File '#{filter_file}' does not exist." if filter_file && !File.exist?(filter_file)
|
||||
filter = LabelCommon.load_filter(filter_file)
|
||||
|
||||
return rdf_file, labels_output_file
|
||||
return rdf_file, filter, output_file
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
@ -56,19 +59,23 @@ class LabelStripper
|
|||
# ------------------------------------------------------------------------------------
|
||||
|
||||
def initialize(args)
|
||||
@rdf_file, @labels_output_file = sanity_check_arguments(args)
|
||||
@rdf_file, @filter, @labels_output_file = sanity_check_arguments(args)
|
||||
rescue UsageError => e
|
||||
puts "\n----------------\nUsage error\n----------------\n\n#{e}\n\n----------------\n\n"
|
||||
exit
|
||||
rescue FilterError => e
|
||||
puts "\n----------------\nFilter file is invalid\n----------------\n\n#{e}\n\n----------------\n\n"
|
||||
exit
|
||||
end
|
||||
|
||||
def process(&filter)
|
||||
filter = filter || lambda{true}
|
||||
|
||||
def process()
|
||||
query = Query.new({
|
||||
:prop => {
|
||||
RDFS.label => :label,
|
||||
}
|
||||
})
|
||||
|
||||
solutions = LabelCommon.new(@rdf_file).process(query, &filter)
|
||||
solutions = LabelCommon.new(@rdf_file).process(query, &@filter)
|
||||
|
||||
File.open(@labels_output_file, 'w') do |f|
|
||||
solutions.each do |s|
|
||||
|
@ -88,10 +95,7 @@ end
|
|||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
|
||||
#vivo_filter = lambda {|s| s.prop.start_with?("http://vivoweb.org/ontology/core#") && !s.label.to_s.strip.empty?}
|
||||
vivo_filter = lambda {|s| !s.label.to_s.strip.empty?}
|
||||
|
||||
if File.expand_path($0) == File.expand_path(__FILE__)
|
||||
stripper = LabelStripper.new(ARGV)
|
||||
stripper.process(&vivo_filter)
|
||||
stripper.process()
|
||||
end
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
lambda {|s|
|
||||
(
|
||||
s.prop.start_with?("http://vivoweb.org/ontology/core#") ||
|
||||
s.prop.start_with?("http://vitro.mannlib.cornell.edu/ns/") ||
|
||||
s.prop.start_with?("http://purl.org/ontology/bibo/") ||
|
||||
s.prop.start_with?("http://xmlns.com/foaf/0.1/") ||
|
||||
s.prop.start_with?("http://purl.org/dc/terms/") ||
|
||||
s.prop.start_with?("http://purl.org/dc/elements/1.1/") ||
|
||||
s.prop.start_with?("http://purl.org/NET/c4dm/event.owl#") ) &&
|
||||
!s.label.to_s.strip.empty?
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue