Refine the script that scans for obsolete URIs

This commit is contained in:
j2blake 2013-11-04 12:55:02 -05:00
parent 9f854b6490
commit 50d788adb4
5 changed files with 64 additions and 37 deletions

View file

@ -10,6 +10,8 @@ class DirectoryWalker
if FileTest.directory?(path) if FileTest.directory?(path)
if File.basename(path).start_with?(".") if File.basename(path).start_with?(".")
Find.prune # Don't look any further into this directory. Find.prune # Don't look any further into this directory.
elsif @known_exceptions.skip?(path)
Find.prune
else else
next next
end end
@ -33,13 +35,13 @@ class DirectoryWalker
def scan_line(path, line_number, line) def scan_line(path, line_number, line)
@obsolete_uris.uris.each do |uri| @obsolete_uris.uris.each do |uri|
next if @known_exceptions.skip?(path, line_number, uri) # next if @known_exceptions.skip?(path, line_number, uri)
@report.add_event(Event.new(path, line_number, line, uri)) if line =~ Regexp.new("\\b#{Regexp.quote(uri)}\\b") @report.add_event(Event.new(path, line_number, line, uri)) if line =~ Regexp.new("\\b#{Regexp.quote(uri)}\\b")
end end
if @complete if @complete
@obsolete_uris.localnames.each do |localname| @obsolete_uris.localnames.each do |localname|
term = ":#{localname}" term = ":#{localname}"
next if @known_exceptions.skip?(path, line_number, term) # next if @known_exceptions.skip?(path, line_number, term)
@report.add_event(Event.new(path, line_number, line, term)) if line =~ Regexp.new("#{Regexp.quote(term)}\\b") @report.add_event(Event.new(path, line_number, line, term)) if line =~ Regexp.new("#{Regexp.quote(term)}\\b")
end end
end end

View file

@ -1,7 +1,7 @@
ruby obsoleteUriChecker.rb /Users/jeb228/git/VIVO \ ruby obsoleteUriChecker.rb /Users/jeb228/git/VIVO \
../../../productMods/WEB-INF/ontologies/update/diff.tab.txt \ ../../../productMods/WEB-INF/ontologies/update/diff.tab.txt \
vivo_known_exceptions_2.txt complete > scan_VIVO_maint_branch vivo_known_exceptions.txt complete > scan_VIVO_maint_branch
ruby obsoleteUriChecker.rb /Users/jeb228/git/Vitro \ ruby obsoleteUriChecker.rb /Users/jeb228/git/Vitro \
../../../productMods/WEB-INF/ontologies/update/diff.tab.txt \ ../../../productMods/WEB-INF/ontologies/update/diff.tab.txt \
vivo_known_exceptions_2.txt complete > scan_Vitro_maint_branch vivo_known_exceptions.txt complete > scan_Vitro_maint_branch

View file

@ -0,0 +1,2 @@
ruby obsoleteUriChecker.rb /Users/jeb228/Documents/Releases/VIVO\ 1.6/vivo-rel-1.6-rc1 ../../../productMods/WEB-INF/ontologies/update/diff.tab.txt vivo_known_exceptions.txt complete

View file

@ -1,17 +1,23 @@
.GIF
.as bin
.class utilities/ISF-transition/obsoleteUris
.fla
.gif *.GIF
.gz *.as
.ico *.class
.jar *.fla
.jpg *.gif
.psd *.gz
.png *.ico
.swf *.jar
.war *.jpg
.zip *.psd
*.png
*.swf
*.war
*.zip
**/.*
# #
# first_pass: no excluded files. everything was duplicated in the .bin directory, and # first_pass: no excluded files. everything was duplicated in the .bin directory, and
@ -39,3 +45,25 @@ productMods/WEB-INF/ontologies/update/oldVersion/vivo-bibo-1.5.owl
productMods/WEB-INF/ontologies/update/oldVersion/scires-1.5.owl productMods/WEB-INF/ontologies/update/oldVersion/scires-1.5.owl
productMods/WEB-INF/ontologies/update/oldVersion/vivo-dcterms-1.5.owl productMods/WEB-INF/ontologies/update/oldVersion/vivo-dcterms-1.5.owl
productMods/WEB-INF/ontologies/update/oldVersion/vivo-dcelements-1.5.owl productMods/WEB-INF/ontologies/update/oldVersion/vivo-dcelements-1.5.owl
#
# Exclude old performance tests
#
utilities/LoadTesting/distros/release1.4/deploy.properties.template
#
# Exclude the migration code itself
#
productMods/WEB-INF/ontologies/update/**/*
#
# This is commented out.
#
src/edu/cornell/mannlib/vitro/webapp/visualization/utilities/UtilitiesRequestHandler.java
#
# The URI is obsolete, but it has been replaced by another URI with the same localname.
# http://purl.org/dc/terms/publisher http://vivoweb.org/ontology/core#publisher
# How to catch this?
#

View file

@ -1,22 +1,17 @@
.* .GIF
bin .as
.class
*.GIF .fla
*.as .gif
*.class .gz
*.fla .ico
*.gif .jar
*.gz .jpg
*.ico .psd
*.jar .png
*.jpg .swf
*.psd .war
*.png .zip
*.swf
*.war
*.zip
**/.*
# #
# first_pass: no excluded files. everything was duplicated in the .bin directory, and # first_pass: no excluded files. everything was duplicated in the .bin directory, and