VIVO-75 Ruby scripts to help with good-enough translation.
One set will take an ontology file, extract the labels for Google translation, and create RDF for those translated labels. The other set does the same for a properties file.
This commit is contained in:
parent
2c49931672
commit
c3d7651c82
6 changed files with 575 additions and 0 deletions
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/ruby
|
||||
=begin
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
=end
|
||||
|
||||
require 'rubygems'
|
||||
require 'rdf'
|
||||
require 'rdf/rdfxml'
|
||||
require 'rdf/ntriples'
|
||||
|
||||
include RDF
|
||||
|
||||
class LabelCommon
|
||||
# ------------------------------------------------------------------------------------
|
||||
private
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
public
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
def initialize(rdf_file, &filter)
|
||||
@filter = filter.nil? ? lambda{true} : filter
|
||||
@graph = Graph.load(rdf_file)
|
||||
end
|
||||
|
||||
def process(query, &filter)
|
||||
solutions = query.execute(@graph)
|
||||
solutions.filter!(&filter)
|
||||
solutions.order(:prop)
|
||||
end
|
||||
end
|
|
@ -0,0 +1,113 @@
|
|||
#!/usr/bin/ruby
|
||||
=begin
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
A utility that reads an RDF file, builds a model, sorts all of the URIs that have
|
||||
labels, and associates those URIs with the labels in a supplied text file, one line
|
||||
per label.
|
||||
|
||||
These labels are assigned the language specified on the command line, and the
|
||||
resulting RDF statements are sent to standard output as N3.
|
||||
|
||||
On the command line provide the path to the RDF file, the path to the labels file,
|
||||
and the desired language/locale. E.g.:
|
||||
|
||||
label_inserter.rb ../../vivo-core-1.5-annotations.rdf labels.file es_ES
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
=end
|
||||
|
||||
$: << File.dirname(File.expand_path(__FILE__))
|
||||
require 'rubygems'
|
||||
require 'rdf'
|
||||
require 'label_common'
|
||||
|
||||
include RDF
|
||||
|
||||
class LabelStripper
|
||||
# ------------------------------------------------------------------------------------
|
||||
private
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
#
|
||||
# Parse the arguments and complain if they don't make sense.
|
||||
#
|
||||
def sanity_check_arguments(args)
|
||||
raise "usage is: label_inserter.rb <rdf_file> <labels_input_file> <locale> <n3_output_file> [ok]" unless (4..5).include?(args.length)
|
||||
|
||||
if args[4].nil?
|
||||
ok = false
|
||||
elsif args[4].downcase == 'ok'
|
||||
ok = true
|
||||
else
|
||||
raise "fifth argument, if present, must be 'ok'"
|
||||
end
|
||||
|
||||
rdf_file = args[0]
|
||||
raise "File '#{rdf_file}' does not exist." unless File.exist?(rdf_file)
|
||||
|
||||
labels_input_file = args[1]
|
||||
raise "File '#{labels_input_file}' does not exist." unless File.exist?(labels_input_file)
|
||||
|
||||
locale = args[2]
|
||||
raise "Locale should be like 'ab' or 'ab-CD'." unless /^[a-z]{2}(-[A-Z]{2})?$/ =~ locale
|
||||
|
||||
n3_output_file = args[3]
|
||||
raise "File '#{n3_output_file}' already exists. specify 'ok' to overwrite it." if File.exist?(n3_output_file) && !ok
|
||||
|
||||
return rdf_file, labels_input_file, locale, n3_output_file
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
public
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
def initialize(args)
|
||||
@rdf_file, @labels_input_file, @locale, @n3_output_file = sanity_check_arguments(args)
|
||||
end
|
||||
|
||||
def process(&filter)
|
||||
filter = filter || lambda{true}
|
||||
|
||||
query = Query.new({
|
||||
:prop => {
|
||||
RDFS.label => :label,
|
||||
}
|
||||
})
|
||||
|
||||
solutions = LabelCommon.new(@rdf_file).process(query, &filter)
|
||||
|
||||
labels = IO.readlines(@labels_input_file)
|
||||
|
||||
raise "Number of labels (#{labels.length}) doesn't match number of URIs (#{solutions.length})" unless labels.length == solutions.length
|
||||
|
||||
graph = Graph.new
|
||||
solutions.zip(labels).each do |data|
|
||||
s = data[0].prop
|
||||
p = RDFS.label
|
||||
o = Literal.new(data[1].chomp, :language => @locale)
|
||||
graph << Statement.new(s, p, o)
|
||||
end
|
||||
|
||||
File.open(@n3_output_file, 'w') do |f|
|
||||
f.puts graph.dump(:ntriples)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
#
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Standalone calling.
|
||||
#
|
||||
# Do this if this program was called from the command line. That is, if the command
|
||||
# expands to the path of this file.
|
||||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
|
||||
vivo_filter = lambda {|s| s.prop.start_with?("http://vivoweb.org/ontology/core#") && !s.label.to_s.strip.empty?}
|
||||
|
||||
if File.expand_path($0) == File.expand_path(__FILE__)
|
||||
stripper = LabelStripper.new(ARGV)
|
||||
stripper.process(&vivo_filter)
|
||||
end
|
|
@ -0,0 +1,95 @@
|
|||
#!/usr/bin/ruby
|
||||
=begin
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
A utility that reads an RDF file, builds a model, sorts all of the URIs that have
|
||||
labels, and produces a file of those labels, one per line. The idea is that this
|
||||
file could be translated, and the result could be put into RDF by label_inserter.rb
|
||||
|
||||
This required the RDF.rb gem: sudo gem install rdf
|
||||
|
||||
On the command line provide the path to the RDF file. E.g.:
|
||||
|
||||
label_stripper.rb '../../vivo-core-1.5-annotations.rdf'
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
=end
|
||||
|
||||
$: << File.dirname(File.expand_path(__FILE__))
|
||||
require 'rubygems'
|
||||
require 'rdf'
|
||||
require 'label_common'
|
||||
|
||||
include RDF
|
||||
|
||||
class LabelStripper
|
||||
# ------------------------------------------------------------------------------------
|
||||
private
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
#
|
||||
# Parse the arguments and complain if they don't make sense.
|
||||
#
|
||||
def sanity_check_arguments(args)
|
||||
raise "usage is: label_stripper.rb <rdf_file> <labels_output_file> [ok]" unless (2..3).include?(args.length)
|
||||
|
||||
if args[2].nil?
|
||||
ok = false
|
||||
elsif args[2].downcase == 'ok'
|
||||
ok = true
|
||||
else
|
||||
raise "third argument, if present, must be 'ok'"
|
||||
end
|
||||
|
||||
rdf_file = args[0]
|
||||
raise "File '#{rdf_file}' does not exist." unless File.exist?(rdf_file)
|
||||
|
||||
labels_output_file = args[1]
|
||||
raise "File '#{labels_output_file}' already exists. specify 'ok' to overwrite it." if File.exist?(labels_output_file) && !ok
|
||||
|
||||
return rdf_file, labels_output_file
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
public
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
def initialize(args)
|
||||
@rdf_file, @labels_output_file = sanity_check_arguments(args)
|
||||
end
|
||||
|
||||
def process(&filter)
|
||||
filter = filter || lambda{true}
|
||||
|
||||
query = Query.new({
|
||||
:prop => {
|
||||
RDFS.label => :label,
|
||||
}
|
||||
})
|
||||
|
||||
solutions = LabelCommon.new(@rdf_file).process(query, &filter)
|
||||
|
||||
File.open(@labels_output_file, 'w') do |f|
|
||||
solutions.each do |s|
|
||||
f.puts s.label
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
#
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Standalone calling.
|
||||
#
|
||||
# Do this if this program was called from the command line. That is, if the command
|
||||
# expands to the path of this file.
|
||||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
|
||||
vivo_filter = lambda {|s| s.prop.start_with?("http://vivoweb.org/ontology/core#") && !s.label.to_s.strip.empty?}
|
||||
|
||||
if File.expand_path($0) == File.expand_path(__FILE__)
|
||||
stripper = LabelStripper.new(ARGV)
|
||||
stripper.process(&vivo_filter)
|
||||
end
|
|
@ -0,0 +1,80 @@
|
|||
#!/usr/bin/ruby
|
||||
=begin
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Some common routines used both by property_stripper and property_inserter
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
=end
|
||||
|
||||
class Warning
|
||||
attr_reader :line
|
||||
attr_reader :message
|
||||
|
||||
def initialize(line, message)
|
||||
@line = line
|
||||
@message = message
|
||||
end
|
||||
end
|
||||
|
||||
class Property
|
||||
attr_reader :line
|
||||
attr_reader :key
|
||||
attr_accessor :value
|
||||
|
||||
def initialize(line, key, value)
|
||||
@line = line
|
||||
@key = key
|
||||
@value = value
|
||||
end
|
||||
end
|
||||
|
||||
class PropertiesFile
|
||||
attr_reader :properties
|
||||
attr_reader :warnings
|
||||
|
||||
def join_continuation_lines(lines)
|
||||
(lines.size()-1).downto(0) do |i|
|
||||
if /(.*)\\$/.match(lines[i])
|
||||
lines[i] = $1 + lines[i+1].lstrip()
|
||||
lines[i+1] = ''
|
||||
end
|
||||
end
|
||||
return lines
|
||||
end
|
||||
|
||||
def read_properties(lines)
|
||||
ln = 0
|
||||
lines.each do |line|
|
||||
ln += 1
|
||||
line.strip!
|
||||
|
||||
# ignore blank lines, and lines starting with '#' or '!'.
|
||||
next if line.length == 0 || line[0] == ?# || line[0] == ?!
|
||||
|
||||
if line =~ /(.*?)\s*[=:]\s*(.*)/
|
||||
# key and value are separated by '=' or ':' and optional whitespace.
|
||||
key = $1.strip
|
||||
value = $2
|
||||
else
|
||||
# No '=' or ':' means that the value is empty.
|
||||
key = line;
|
||||
value = ''
|
||||
end
|
||||
|
||||
if dupe = @properties[key]
|
||||
@warnings << Warning.new(ln, "Key '#{key}' is duplicated on line #{dupe.line}")
|
||||
else
|
||||
@properties[key] = Property.new(ln, key, value)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(path)
|
||||
@properties = {}
|
||||
@warnings = []
|
||||
lines = IO.readlines(path)
|
||||
lines = join_continuation_lines(lines)
|
||||
read_properties(lines)
|
||||
end
|
||||
end
|
165
utilities/languageSupport/translateValuesInPropertyFile/property_inserter.rb
Executable file
165
utilities/languageSupport/translateValuesInPropertyFile/property_inserter.rb
Executable file
|
@ -0,0 +1,165 @@
|
|||
#!/usr/bin/ruby
|
||||
=begin
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Builds a property file, using an existing property file as a template, but
|
||||
getting the property values from a text file of translated text
|
||||
(see property_stripper.rb) and optionally a partial file of translated properties.
|
||||
|
||||
So, if you have a template file of English-language properties (e.g. all.properties),
|
||||
this will read the file into a properties structure. The text file of translated
|
||||
values is presumed to have one value per line, associated with the alphabetized
|
||||
list of keys from the template file. The translated values will replace the orignal
|
||||
values, with the exception that any value that starts with @@file will not be
|
||||
replaced.
|
||||
|
||||
If a partially translated file is provided, it will be read and used to replace
|
||||
any translated values from the text file, which are assumed to be weaker. Note
|
||||
that this is true of @@file values as well, which are presumed to be corrected
|
||||
for the language.
|
||||
|
||||
Any @@file values that are not overridden by the partial translation will result in
|
||||
a warning to stderr.
|
||||
|
||||
Finally, the template file is processed again, replacing the existing values with
|
||||
the translated values, but keeping the same comment and spacing structure.
|
||||
|
||||
On the command line provide the path to the tempate file, the text file, and
|
||||
optionally the partial translation. E.g.:
|
||||
|
||||
property_inserter.rb '../../all.properties' translated.txt '../../all_es.properties' 'all_es.properties'
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
=end
|
||||
|
||||
$: << File.dirname(File.expand_path(__FILE__))
|
||||
require "property_common"
|
||||
|
||||
class PropertyInserter
|
||||
# ------------------------------------------------------------------------------------
|
||||
private
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
#
|
||||
# Parse the arguments and complain if they don't make sense.
|
||||
#
|
||||
def sanity_check_arguments(args)
|
||||
raise "usage is: property_inserter.rb <template_file> <translated_values_file> [partial_translation] <output_file> [ok]" unless (3..5).include?(args.length)
|
||||
|
||||
if args[-1].downcase == 'ok'
|
||||
ok = true
|
||||
args.pop
|
||||
else
|
||||
ok = false
|
||||
end
|
||||
|
||||
output_file = args.pop
|
||||
raise "File '#{output_file}' already exists. specify 'ok' to overwrite it." if File.exist?(output_file) && !ok
|
||||
|
||||
template_file = args[0]
|
||||
raise "File '#{template_file}' does not exist." unless File.exist?(template_file)
|
||||
|
||||
translated_values_file = args[1]
|
||||
raise "File '#{translated_values_file}' does not exist." unless File.exist?(translated_values_file)
|
||||
|
||||
partial_translation = args[2]
|
||||
raise "File '#{partial_translation}' does not exist." if partial_translation && !File.exist?(partial_translation)
|
||||
|
||||
return template_file, translated_values_file, partial_translation, output_file
|
||||
end
|
||||
|
||||
def read_template_file()
|
||||
PropertiesFile.new(@template_file).properties
|
||||
end
|
||||
|
||||
def read_and_merge_translated_values()
|
||||
lines = IO.readlines(@translated_values_file)
|
||||
raise "Number of lines in the translated values file (#{lines.size}) does not match the number of properties in the template file (#{@properties_map.size})." unless lines.size == @properties_map.size
|
||||
count = 0
|
||||
@properties_map.keys.sort.zip(lines) do |a|
|
||||
key, value = a
|
||||
unless @properties_map[key].value.start_with?("@@file")
|
||||
@properties_map[key].value = value
|
||||
count += 1
|
||||
end
|
||||
end
|
||||
puts "Merged #{count} translated values."
|
||||
end
|
||||
|
||||
def read_and_merge_partial_translation()
|
||||
count = 0
|
||||
if @partial_translation
|
||||
@partial_map = PropertiesFile.new(@partial_translation).properties
|
||||
@partial_map.keys.each do |key|
|
||||
@properties_map[key].value = @partial_map[key].value
|
||||
count += 1
|
||||
end
|
||||
end
|
||||
puts "Overrode #{count} from partial translation."
|
||||
end
|
||||
|
||||
def write_result()
|
||||
template_lines = merge_continuation_lines(IO.readlines(@template_file))
|
||||
File.open(@output_file, 'w') do |f|
|
||||
template_lines.each do |line|
|
||||
if line.length == 0 || line[0] == ?# || line[0] == ?!
|
||||
# copy blank lines, and lines starting with '#' or '!'.
|
||||
f.puts line
|
||||
elsif line =~ /(.*?)(\s*[=:]\s*)(.*)/
|
||||
# key and value are separated by '=' or ':' and optional whitespace.
|
||||
key = $1.strip
|
||||
f.puts "#{$1}#{$2}#{@properties_map[key].value}"
|
||||
else
|
||||
# No '=' or ':' means that the value was empty.
|
||||
key = line.strip;
|
||||
if @properties_map[key]
|
||||
f.puts "#{key} = #{@properties_map[key].value}"
|
||||
else
|
||||
f.puts line
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def merge_continuation_lines(lines)
|
||||
(lines.size()-1).downto(0) do |i|
|
||||
if /(.*)\\$/.match(lines[i])
|
||||
lines[i] = $1 + lines[i+1].lstrip()
|
||||
lines.delete_at(i+1)
|
||||
end
|
||||
end
|
||||
return lines
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
public
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
def initialize(args)
|
||||
@template_file, @translated_values_file, @partial_translation, @output_file = sanity_check_arguments(args)
|
||||
end
|
||||
|
||||
def process()
|
||||
@properties_map = read_template_file()
|
||||
read_and_merge_translated_values()
|
||||
read_and_merge_partial_translation()
|
||||
write_result()
|
||||
puts "Wrote #{@properties_map.length} values to '#{@output_file}'"
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
#
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Standalone calling.
|
||||
#
|
||||
# Do this if this program was called from the command line. That is, if the command
|
||||
# expands to the path of this file.
|
||||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
|
||||
if File.expand_path($0) == File.expand_path(__FILE__)
|
||||
inserter = PropertyInserter.new(ARGV)
|
||||
inserter.process()
|
||||
end
|
89
utilities/languageSupport/translateValuesInPropertyFile/property_stripper.rb
Executable file
89
utilities/languageSupport/translateValuesInPropertyFile/property_stripper.rb
Executable file
|
@ -0,0 +1,89 @@
|
|||
#!/usr/bin/ruby
|
||||
=begin
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Read a property file, sort the remainder alphabetically and write their values to
|
||||
a text file, one value per line.
|
||||
|
||||
The idea is that this file could be translated and the result could be used to
|
||||
create a new property file with property_inserter.rb
|
||||
|
||||
On the command line provide the path to the properties file. E.g.:
|
||||
|
||||
property_stripper.rb '../../all.properties' output_file
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
=end
|
||||
|
||||
$: << File.dirname(File.expand_path(__FILE__))
|
||||
require "property_common"
|
||||
|
||||
class PropertyStripper
|
||||
# ------------------------------------------------------------------------------------
|
||||
private
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
#
|
||||
# Parse the arguments and complain if they don't make sense.
|
||||
#
|
||||
def sanity_check_arguments(args)
|
||||
raise "usage is: property_stripper.rb <properties_file> <values_output_file> [ok]" unless (2..3).include?(args.length)
|
||||
|
||||
if args[2].nil?
|
||||
ok = false
|
||||
elsif args[2].downcase == 'ok'
|
||||
ok = true
|
||||
else
|
||||
raise "third argument, if present, must be 'ok'"
|
||||
end
|
||||
|
||||
properties_file = args[0]
|
||||
raise "File '#{properties_file}' does not exist." unless File.exist?(properties_file)
|
||||
|
||||
values_output_file = args[1]
|
||||
raise "File '#{values_output_file}' already exists. specify 'ok' to overwrite it." if File.exist?(values_output_file) && !ok
|
||||
|
||||
return properties_file, values_output_file
|
||||
end
|
||||
|
||||
def read_properties_file(properties_file)
|
||||
PropertiesFile.new(properties_file).properties
|
||||
end
|
||||
|
||||
def write_values(values_output_file, properties)
|
||||
File.open(values_output_file, 'w') do |f|
|
||||
properties.keys.sort.each do |key|
|
||||
f.puts properties[key].value
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
public
|
||||
# ------------------------------------------------------------------------------------
|
||||
|
||||
def initialize(args)
|
||||
@properties_file, @values_output_file = sanity_check_arguments(args)
|
||||
end
|
||||
|
||||
def process()
|
||||
@properties = read_properties_file(@properties_file)
|
||||
write_values(@values_output_file, @properties)
|
||||
puts "Wrote #{@properties.length} values to '#{@values_output_file}'"
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
#
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Standalone calling.
|
||||
#
|
||||
# Do this if this program was called from the command line. That is, if the command
|
||||
# expands to the path of this file.
|
||||
# ------------------------------------------------------------------------------------
|
||||
#
|
||||
|
||||
if File.expand_path($0) == File.expand_path(__FILE__)
|
||||
stripper = PropertyStripper.new(ARGV)
|
||||
stripper.process()
|
||||
end
|
Loading…
Add table
Reference in a new issue