diff --git a/solr/apache-solr-3.6.2.war b/solr/apache-solr-3.6.2.war
deleted file mode 100644
index 8144c4119..000000000
Binary files a/solr/apache-solr-3.6.2.war and /dev/null differ
diff --git a/solr/homeDirectoryTemplate/conf/admin-extra.html b/solr/homeDirectoryTemplate/conf/admin-extra.html
index 21b50901c..fecab2051 100644
--- a/solr/homeDirectoryTemplate/conf/admin-extra.html
+++ b/solr/homeDirectoryTemplate/conf/admin-extra.html
@@ -1,31 +1,24 @@
-
-
-
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/admin-extra.menu-bottom.html b/solr/homeDirectoryTemplate/conf/admin-extra.menu-bottom.html
new file mode 100644
index 000000000..3359a460a
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/admin-extra.menu-bottom.html
@@ -0,0 +1,25 @@
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/admin-extra.menu-top.html b/solr/homeDirectoryTemplate/conf/admin-extra.menu-top.html
new file mode 100644
index 000000000..0886cee37
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/admin-extra.menu-top.html
@@ -0,0 +1,25 @@
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/clustering/carrot2/kmeans-attributes.xml b/solr/homeDirectoryTemplate/conf/clustering/carrot2/kmeans-attributes.xml
new file mode 100644
index 000000000..d802465f6
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/clustering/carrot2/kmeans-attributes.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/clustering/carrot2/lingo-attributes.xml b/solr/homeDirectoryTemplate/conf/clustering/carrot2/lingo-attributes.xml
new file mode 100644
index 000000000..5febfc320
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/clustering/carrot2/lingo-attributes.xml
@@ -0,0 +1,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/clustering/carrot2/stc-attributes.xml b/solr/homeDirectoryTemplate/conf/clustering/carrot2/stc-attributes.xml
new file mode 100644
index 000000000..c1bf110c8
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/clustering/carrot2/stc-attributes.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/currency.xml b/solr/homeDirectoryTemplate/conf/currency.xml
new file mode 100644
index 000000000..3a9c58afe
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/currency.xml
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/elevate.xml b/solr/homeDirectoryTemplate/conf/elevate.xml
index b91e75cec..25d5cebe4 100644
--- a/solr/homeDirectoryTemplate/conf/elevate.xml
+++ b/solr/homeDirectoryTemplate/conf/elevate.xml
@@ -1,36 +1,38 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/lang/contractions_ca.txt b/solr/homeDirectoryTemplate/conf/lang/contractions_ca.txt
new file mode 100644
index 000000000..307a85f91
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/contractions_ca.txt
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
diff --git a/solr/homeDirectoryTemplate/conf/lang/contractions_fr.txt b/solr/homeDirectoryTemplate/conf/lang/contractions_fr.txt
new file mode 100644
index 000000000..f1bba51b2
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/contractions_fr.txt
@@ -0,0 +1,15 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
+d
+c
+jusqu
+quoiqu
+lorsqu
+puisqu
diff --git a/solr/homeDirectoryTemplate/conf/lang/contractions_ga.txt b/solr/homeDirectoryTemplate/conf/lang/contractions_ga.txt
new file mode 100644
index 000000000..9ebe7fa34
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/contractions_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
diff --git a/solr/homeDirectoryTemplate/conf/lang/contractions_it.txt b/solr/homeDirectoryTemplate/conf/lang/contractions_it.txt
new file mode 100644
index 000000000..cac040953
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/contractions_it.txt
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l
+all
+dall
+dell
+nell
+sull
+coll
+pell
+gl
+agl
+dagl
+degl
+negl
+sugl
+un
+m
+t
+s
+v
+d
diff --git a/solr/homeDirectoryTemplate/conf/lang/hyphenations_ga.txt b/solr/homeDirectoryTemplate/conf/lang/hyphenations_ga.txt
new file mode 100644
index 000000000..4d2642cc5
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/hyphenations_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
diff --git a/solr/homeDirectoryTemplate/conf/lang/stemdict_nl.txt b/solr/homeDirectoryTemplate/conf/lang/stemdict_nl.txt
new file mode 100644
index 000000000..441072971
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stemdict_nl.txt
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets fiets
+bromfiets bromfiets
+ei eier
+kind kinder
diff --git a/solr/homeDirectoryTemplate/conf/lang/stoptags_ja.txt b/solr/homeDirectoryTemplate/conf/lang/stoptags_ja.txt
new file mode 100644
index 000000000..71b750845
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stoptags_ja.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below. Note that comments are
+# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+# noun: unclassified nouns
+#名詞
+#
+# noun-common: Common nouns or nouns where the sub-classification is undefined
+#名詞-一般
+#
+# noun-proper: Proper nouns where the sub-classification is undefined
+#名詞-固有名詞
+#
+# noun-proper-misc: miscellaneous proper nouns
+#名詞-固有名詞-一般
+#
+# noun-proper-person: Personal names where the sub-classification is undefined
+#名詞-固有名詞-人名
+#
+# noun-proper-person-misc: names that cannot be divided into surname and
+# given name; foreign names; names where the surname or given name is unknown.
+# e.g. お市の方
+#名詞-固有名詞-人名-一般
+#
+# noun-proper-person-surname: Mainly Japanese surnames.
+# e.g. 山田
+#名詞-固有名詞-人名-姓
+#
+# noun-proper-person-given_name: Mainly Japanese given names.
+# e.g. 太郎
+#名詞-固有名詞-人名-名
+#
+# noun-proper-organization: Names representing organizations.
+# e.g. 通産省, NHK
+#名詞-固有名詞-組織
+#
+# noun-proper-place: Place names where the sub-classification is undefined
+#名詞-固有名詞-地域
+#
+# noun-proper-place-misc: Place names excluding countries.
+# e.g. アジア, バルセロナ, 京都
+#名詞-固有名詞-地域-一般
+#
+# noun-proper-place-country: Country names.
+# e.g. 日本, オーストラリア
+#名詞-固有名詞-地域-国
+#
+# noun-pronoun: Pronouns where the sub-classification is undefined
+#名詞-代名詞
+#
+# noun-pronoun-misc: miscellaneous pronouns:
+# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
+#名詞-代名詞-一般
+#
+# noun-pronoun-contraction: Spoken language contraction made by combining a
+# pronoun and the particle 'wa'.
+# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
+#名詞-代名詞-縮約
+#
+# noun-adverbial: Temporal nouns such as names of days or months that behave
+# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+# e.g. 金曜, 一月, 午後, 少量
+#名詞-副詞可能
+#
+# noun-verbal: Nouns that take arguments with case and can appear followed by
+# 'suru' and related verbs (する, できる, なさる, くださる)
+# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
+#名詞-サ変接続
+#
+# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
+# e.g. 健康, 安易, 駄目, だめ
+#名詞-形容動詞語幹
+#
+# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+# e.g. 0, 1, 2, 何, 数, 幾
+#名詞-数
+#
+# noun-affix: noun affixes where the sub-classification is undefined
+#名詞-非自立
+#
+# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
+# attach to the base form of inflectional words, words that cannot be classified
+# into any of the other categories below. This category includes indefinite nouns.
+# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
+# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
+# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
+# わり, 割り, 割, ん-口語/, もん-口語/
+#名詞-非自立-一般
+#
+# noun-affix-adverbial: noun affixes that that can behave as adverbs.
+# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
+# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
+# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
+# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
+# 儘, 侭, みぎり, 矢先
+#名詞-非自立-副詞可能
+#
+# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
+# with the stem よう(だ) ("you(da)").
+# e.g. よう, やう, 様 (よう)
+#名詞-非自立-助動詞語幹
+#
+# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+# connection form な (aux "da").
+# e.g. みたい, ふう
+#名詞-非自立-形容動詞語幹
+#
+# noun-special: special nouns where the sub-classification is undefined.
+#名詞-特殊
+#
+# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
+# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
+# form of inflectional words.
+# e.g. そう
+#名詞-特殊-助動詞語幹
+#
+# noun-suffix: noun suffixes where the sub-classification is undefined.
+#名詞-接尾
+#
+# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
+# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+# any of the other categories below. In general, this category is more inclusive than
+# 接尾語 ("suffix") and is usually the last element in a compound noun.
+# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
+# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
+#名詞-接尾-一般
+#
+# noun-suffix-person: Suffixes that form nouns and attach to person names more often
+# than other nouns.
+# e.g. 君, 様, 著
+#名詞-接尾-人名
+#
+# noun-suffix-place: Suffixes that form nouns and attach to place names more often
+# than other nouns.
+# e.g. 町, 市, 県
+#名詞-接尾-地域
+#
+# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
+# can appear before スル ("suru").
+# e.g. 化, 視, 分け, 入り, 落ち, 買い
+#名詞-接尾-サ変接続
+#
+# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
+# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
+# conjunctive form of inflectional words.
+# e.g. そう
+#名詞-接尾-助動詞語幹
+#
+# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
+# form of inflectional words and appear before the copula だ ("da").
+# e.g. 的, げ, がち
+#名詞-接尾-形容動詞語幹
+#
+# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
+#名詞-接尾-副詞可能
+#
+# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
+# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
+# to numbers.
+# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
+#名詞-接尾-助数詞
+#
+# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+# e.g. (楽し) さ, (考え) 方
+#名詞-接尾-特殊
+#
+# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
+# together.
+# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#名詞-接続詞的
+#
+# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
+# semantically verb-like.
+# e.g. ごらん, ご覧, 御覧, 頂戴
+#名詞-動詞非自立的
+#
+# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
+# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
+# is いわく ("iwaku").
+#名詞-引用文字列
+#
+# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
+# behave like an adjective.
+# e.g. 申し訳, 仕方, とんでも, 違い
+#名詞-ナイ形容詞語幹
+#
+#####
+# prefix: unclassified prefixes
+#接頭詞
+#
+# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
+# excluding numerical expressions.
+# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
+#接頭詞-名詞接続
+#
+# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+# in conjunctive form followed by なる/なさる/くださる.
+# e.g. お (読みなさい), お (座り)
+#接頭詞-動詞接続
+#
+# prefix-adjectival: Prefixes that attach to adjectives.
+# e.g. お (寒いですねえ), バカ (でかい)
+#接頭詞-形容詞接続
+#
+# prefix-numerical: Prefixes that attach to numerical expressions.
+# e.g. 約, およそ, 毎時
+#接頭詞-数接続
+#
+#####
+# verb: unclassified verbs
+#動詞
+#
+# verb-main:
+#動詞-自立
+#
+# verb-auxiliary:
+#動詞-非自立
+#
+# verb-suffix:
+#動詞-接尾
+#
+#####
+# adjective: unclassified adjectives
+#形容詞
+#
+# adjective-main:
+#形容詞-自立
+#
+# adjective-auxiliary:
+#形容詞-非自立
+#
+# adjective-suffix:
+#形容詞-接尾
+#
+#####
+# adverb: unclassified adverbs
+#副詞
+#
+# adverb-misc: Words that can be segmented into one unit and where adnominal
+# modification is not possible.
+# e.g. あいかわらず, 多分
+#副詞-一般
+#
+# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
+# な, する, だ, etc.
+# e.g. こんなに, そんなに, あんなに, なにか, なんでも
+#副詞-助詞類接続
+#
+#####
+# adnominal: Words that only have noun-modifying forms.
+# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
+# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
+# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
+#連体詞
+#
+#####
+# conjunction: Conjunctions that can occur independently.
+# e.g. が, けれども, そして, じゃあ, それどころか
+接続詞
+#
+#####
+# particle: unclassified particles.
+助詞
+#
+# particle-case: case particles where the subclassification is undefined.
+助詞-格助詞
+#
+# particle-case-misc: Case particles.
+# e.g. から, が, で, と, に, へ, より, を, の, にて
+助詞-格助詞-一般
+#
+# particle-case-quote: the "to" that appears after nouns, a person’s speech,
+# quotation marks, expressions of decisions from a meeting, reasons, judgements,
+# conjectures, etc.
+# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
+助詞-格助詞-引用
+#
+# particle-case-compound: Compounds of particles and verbs that mainly behave
+# like case particles.
+# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
+# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
+# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
+# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
+# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
+# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
+# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
+# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
+助詞-格助詞-連語
+#
+# particle-conjunctive:
+# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
+# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
+# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
+助詞-接続助詞
+#
+# particle-dependency:
+# e.g. こそ, さえ, しか, すら, は, も, ぞ
+助詞-係助詞
+#
+# particle-adverbial:
+# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
+# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
+# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
+# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
+# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
+助詞-副助詞
+#
+# particle-interjective: particles with interjective grammatical roles.
+# e.g. (松島) や
+助詞-間投助詞
+#
+# particle-coordinate:
+# e.g. と, たり, だの, だり, とか, なり, や, やら
+助詞-並立助詞
+#
+# particle-final:
+# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
+# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
+助詞-終助詞
+#
+# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
+# adverbial, conjunctive, or sentence final. For example:
+# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
+# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
+# 「(祈りが届いたせい) か (, 試験に合格した.)」
+# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
+# e.g. か
+助詞-副助詞/並立助詞/終助詞
+#
+# particle-adnominalizer: The "no" that attaches to nouns and modifies
+# non-inflectional words.
+助詞-連体化
+#
+# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
+# that are giongo, giseigo, or gitaigo.
+# e.g. に, と
+助詞-副詞化
+#
+# particle-special: A particle that does not fit into one of the above classifications.
+# This includes particles that are used in Tanka, Haiku, and other poetry.
+# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
+助詞-特殊
+#
+#####
+# auxiliary-verb:
+助動詞
+#
+#####
+# interjection: Greetings and other exclamations.
+# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
+# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
+#感動詞
+#
+#####
+# symbol: unclassified Symbols.
+記号
+#
+# symbol-misc: A general symbol not in one of the categories below.
+# e.g. [○◎@$〒→+]
+記号-一般
+#
+# symbol-comma: Commas
+# e.g. [,、]
+記号-読点
+#
+# symbol-period: Periods and full stops.
+# e.g. [..。]
+記号-句点
+#
+# symbol-space: Full-width whitespace.
+記号-空白
+#
+# symbol-open_bracket:
+# e.g. [({‘“『【]
+記号-括弧開
+#
+# symbol-close_bracket:
+# e.g. [)}’”』」】]
+記号-括弧閉
+#
+# symbol-alphabetic:
+#記号-アルファベット
+#
+#####
+# other: unclassified other
+#その他
+#
+# other-interjection: Words that are hard to classify as noun-suffixes or
+# sentence-final particles.
+# e.g. (だ)ァ
+その他-間投
+#
+#####
+# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+# e.g. あの, うんと, えと
+フィラー
+#
+#####
+# non-verbal: non-verbal sound.
+非言語音
+#
+#####
+# fragment:
+#語断片
+#
+#####
+# unknown: unknown part of speech.
+#未知語
+#
+##### End of file
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_ar.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_ar.txt
new file mode 100644
index 000000000..046829db6
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_ar.txt
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+في
+وفي
+فيها
+فيه
+و
+ف
+ثم
+او
+أو
+ب
+بها
+به
+ا
+أ
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+فما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+فان
+فأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+فهى
+فهي
+فهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+نحو
+بين
+بينما
+منذ
+ضمن
+حيث
+الان
+الآن
+خلال
+بعد
+قبل
+حتى
+عند
+عندما
+لدى
+جميع
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_bg.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_bg.txt
new file mode 100644
index 000000000..1ae4ba2ae
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_bg.txt
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бяха
+в
+вас
+ваш
+ваша
+вероятно
+вече
+взема
+ви
+вие
+винаги
+все
+всеки
+всички
+всичко
+всяка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+досега
+доста
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+засега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иска
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+която
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+моля
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+нас
+не
+него
+нея
+ни
+ние
+никой
+нито
+но
+някои
+някой
+няма
+обаче
+около
+освен
+особено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+после
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+с
+са
+само
+се
+сега
+си
+скоро
+след
+сме
+според
+сред
+срещу
+сте
+съм
+със
+също
+т
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+трябва
+тук
+тъй
+тя
+тях
+у
+харесва
+ч
+че
+често
+чрез
+ще
+щом
+я
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_ca.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_ca.txt
new file mode 100644
index 000000000..3da65deaf
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_ca.txt
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+ací
+ah
+així
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allà
+allí
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquí
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+està
+estàvem
+estaven
+estàveu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son
+són
+sons
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_ckb.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_ckb.txt
new file mode 100644
index 000000000..87abf118f
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_ckb.txt
@@ -0,0 +1,136 @@
+# set of kurdish stopwords
+# note these have been normalized with our scheme (e represented with U+06D5, etc)
+# constructed from:
+# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
+# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
+# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc
+
+# and
+و
+# which
+کە
+# of
+ی
+# made/did
+کرد
+# that/which
+ئەوەی
+# on/head
+سەر
+# two
+دوو
+# also
+هەروەها
+# from/that
+لەو
+# makes/does
+دەکات
+# some
+چەند
+# every
+هەر
+
+# demonstratives
+# that
+ئەو
+# this
+ئەم
+
+# personal pronouns
+# I
+من
+# we
+ئێمە
+# you
+تۆ
+# you
+ئێوە
+# he/she/it
+ئەو
+# they
+ئەوان
+
+# prepositions
+# to/with/by
+بە
+پێ
+# without
+بەبێ
+# along with/while/during
+بەدەم
+# in the opinion of
+بەلای
+# according to
+بەپێی
+# before
+بەرلە
+# in the direction of
+بەرەوی
+# in front of/toward
+بەرەوە
+# before/in the face of
+بەردەم
+# without
+بێ
+# except for
+بێجگە
+# for
+بۆ
+# on/in
+دە
+تێ
+# with
+دەگەڵ
+# after
+دوای
+# except for/aside from
+جگە
+# in/from
+لە
+لێ
+# in front of/before/because of
+لەبەر
+# between/among
+لەبەینی
+# concerning/about
+لەبابەت
+# concerning
+لەبارەی
+# instead of
+لەباتی
+# beside
+لەبن
+# instead of
+لەبرێتی
+# behind
+لەدەم
+# with/together with
+لەگەڵ
+# by
+لەلایەن
+# within
+لەناو
+# between/among
+لەنێو
+# for the sake of
+لەپێناوی
+# with respect to
+لەرەوی
+# by means of/for
+لەرێ
+# for the sake of
+لەرێگا
+# on/on top of/according to
+لەسەر
+# under
+لەژێر
+# between/among
+ناو
+# between/among
+نێوان
+# after
+پاش
+# before
+پێش
+# like
+وەک
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_cz.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_cz.txt
new file mode 100644
index 000000000..53c6097da
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_cz.txt
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tímto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proč
+máte
+tato
+kam
+tohoto
+kdo
+kteří
+mi
+nám
+tom
+tomuto
+mít
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tím
+takže
+svých
+její
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+či
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+článku
+články
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+první
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+není
+vás
+jen
+podle
+zde
+už
+být
+více
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+další
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+přičemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jí
+ji
+mě
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jíž
+jelikož
+jež
+jakož
+načež
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_da.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_da.txt
new file mode 100644
index 000000000..42e6145b9
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_da.txt
@@ -0,0 +1,110 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og | and
+i | in
+jeg | I
+det | that (dem. pronoun)/it (pers. pronoun)
+at | that (in front of a sentence)/to (with infinitive)
+en | a/an
+den | it (pers. pronoun)/that (dem. pronoun)
+til | to/at/for/until/against/by/of/into, more
+er | present tense of "to be"
+som | who, as
+på | on/upon/in/on/at/to/after/of/with/for, on
+de | they
+med | with/by/in, along
+han | he
+af | of/by/from/off/for/in/with/on, off
+for | at/for/to/from/by/of/ago, in front/before, because
+ikke | not
+der | who/which, there/those
+var | past tense of "to be"
+mig | me/myself
+sig | oneself/himself/herself/itself/themselves
+men | but
+et | a/an/one, one (number), someone/somebody/one
+har | present tense of "to have"
+om | round/about/for/in/a, about/around/down, if
+vi | we
+min | my
+havde | past tense of "to have"
+ham | him
+hun | she
+nu | now
+over | over/above/across/by/beyond/past/on/about, over/past
+da | then, when/as/since
+fra | from/off/since, off, since
+du | you
+ud | out
+sin | his/her/its/one's
+dem | them
+os | us/ourselves
+op | up
+man | you/one
+hans | his
+hvor | where
+eller | or
+hvad | what
+skal | must/shall etc.
+selv | myself/youself/herself/ourselves etc., even
+her | here
+alle | all/everyone/everybody etc.
+vil | will (verb)
+blev | past tense of "to stay/to remain/to get/to become"
+kunne | could
+ind | in
+når | when
+være | present tense of "to be"
+dog | however/yet/after all
+noget | something
+ville | would
+jo | you know/you see (adv), yes
+deres | their/theirs
+efter | after/behind/according to/for/by/from, later/afterwards
+ned | down
+skulle | should
+denne | this
+end | than
+dette | this
+mit | my/mine
+også | also
+under | under/beneath/below/during, below/underneath
+have | have
+dig | you
+anden | other
+hende | her
+mine | my
+alt | everything
+meget | much/very, plenty of
+sit | his, her, its, one's
+sine | his, her, its, one's
+vor | our
+mod | against
+disse | these
+hvis | if
+din | your/yours
+nogle | some
+hos | by/at
+blive | be/become
+mange | many
+ad | by/through
+bliver | present tense of "to be/to become"
+hendes | her/hers
+været | be
+thi | for (conj)
+jer | you
+sådan | such, like this/like that
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_de.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_de.txt
new file mode 100644
index 000000000..86525e7ae
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_de.txt
@@ -0,0 +1,294 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber | but
+
+alle | all
+allem
+allen
+aller
+alles
+
+als | than, as
+also | so
+am | an + dem
+an | at
+
+ander | other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch | also
+auf | on
+aus | out of
+bei | by
+bin | am
+bis | until
+bist | art
+da | there
+damit | with it
+dann | then
+
+der | the
+den
+des
+dem
+die
+das
+
+daß | that
+
+derselbe | the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu | to that
+
+dein | thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn | because
+
+derer | of those
+dessen | of him
+
+dich | thee
+dir | to thee
+du | thou
+
+dies | this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch | (several meanings)
+dort | (over) there
+
+
+durch | through
+
+ein | a
+eine
+einem
+einen
+einer
+eines
+
+einig | some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal | once
+
+er | he
+ihn | him
+ihm | to him
+
+es | it
+etwas | something
+
+euer | your
+eure
+eurem
+euren
+eurer
+eures
+
+für | for
+gegen | towards
+gewesen | p.p. of sein
+hab | have
+habe | have
+haben | have
+hat | has
+hatte | had
+hatten | had
+hier | here
+hin | there
+hinter | behind
+
+ich | I
+mich | me
+mir | to me
+
+
+ihr | you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch | to you
+
+im | in + dem
+in | in
+indem | while
+ins | in + das
+ist | is
+
+jede | each, every
+jedem
+jeden
+jeder
+jedes
+
+jene | that
+jenem
+jenen
+jener
+jenes
+
+jetzt | now
+kann | can
+
+kein | no
+keine
+keinem
+keinen
+keiner
+keines
+
+können | can
+könnte | could
+machen | do
+man | one
+
+manche | some, many a
+manchem
+manchen
+mancher
+manches
+
+mein | my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit | with
+muss | must
+musste | had to
+nach | to(wards)
+nicht | not
+nichts | nothing
+noch | still, yet
+nun | now
+nur | only
+ob | whether
+oder | or
+ohne | without
+sehr | very
+
+sein | his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst | self
+sich | herself
+
+sie | they, she
+ihnen | to them
+
+sind | are
+so | so
+
+solche | such
+solchem
+solchen
+solcher
+solches
+
+soll | shall
+sollte | should
+sondern | but
+sonst | else
+über | over
+um | about, around
+und | and
+
+uns | us
+unse
+unsem
+unsen
+unser
+unses
+
+unter | under
+viel | much
+vom | von + dem
+von | from
+vor | before
+während | while
+war | was
+waren | were
+warst | wast
+was | what
+weg | away, off
+weil | because
+weiter | further
+
+welche | which
+welchem
+welchen
+welcher
+welches
+
+wenn | when
+werde | will
+werden | will
+wie | how
+wieder | again
+will | want
+wir | we
+wird | will
+wirst | willst
+wo | where
+wollen | want
+wollte | wanted
+würde | would
+würden | would
+zu | to
+zum | zu + dem
+zur | zu + der
+zwar | indeed
+zwischen | between
+
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_el.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_el.txt
new file mode 100644
index 000000000..232681f5b
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_el.txt
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς'
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+προσ
+με
+σε
+ωσ
+παρα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_en.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_en.txt
new file mode 100644
index 000000000..2c164c0b2
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_es.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_es.txt
new file mode 100644
index 000000000..487d78c8d
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_es.txt
@@ -0,0 +1,356 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | from, of
+la | the, her
+que | who, that
+el | the
+en | in
+y | and
+a | to
+los | the, them
+del | de + el
+se | himself, from him etc
+las | the, them
+por | for, by, etc
+un | a
+para | for
+con | with
+no | no
+una | a
+su | his, her
+al | a + el
+ | es from SER
+lo | him
+como | how
+más | more
+pero | pero
+sus | su plural
+le | to him, her
+ya | already
+o | or
+ | fue from SER
+este | this
+ | ha from HABER
+sí | himself etc
+porque | because
+esta | this
+ | son from SER
+entre | between
+ | está from ESTAR
+cuando | when
+muy | very
+sin | without
+sobre | on
+ | ser from SER
+ | tiene from TENER
+también | also
+me | me
+hasta | until
+hay | there is/are
+donde | where
+ | han from HABER
+quien | whom, that
+ | están from ESTAR
+ | estado from ESTAR
+desde | from
+todo | all
+nos | us
+durante | during
+ | estados from ESTAR
+todos | all
+uno | a
+les | to them
+ni | nor
+contra | against
+otros | other
+ | fueron from SER
+ese | that
+eso | that
+ | había from HABER
+ante | before
+ellos | they
+e | and (variant of y)
+esto | this
+mí | me
+antes | before
+algunos | some
+qué | what?
+unos | a
+yo | I
+otro | other
+otras | other
+otra | other
+él | he
+tanto | so much, many
+esa | that
+estos | these
+mucho | much, many
+quienes | who
+nada | nothing
+muchos | many
+cual | who
+ | sea from SER
+poco | few
+ella | she
+estar | to be
+ | haber from HABER
+estas | these
+ | estaba from ESTAR
+ | estamos from ESTAR
+algunas | some
+algo | something
+nosotros | we
+
+ | other forms
+
+mi | me
+mis | mi plural
+tú | thou
+te | thee
+ti | thee
+tu | thy
+tus | tu plural
+ellas | they
+nosotras | we
+vosotros | you
+vosotras | you
+os | you
+mío | mine
+mía |
+míos |
+mías |
+tuyo | thine
+tuya |
+tuyos |
+tuyas |
+suyo | his, hers, theirs
+suya |
+suyos |
+suyas |
+nuestro | ours
+nuestra |
+nuestros |
+nuestras |
+vuestro | yours
+vuestra |
+vuestros |
+vuestras |
+esos | those
+esas | those
+
+ | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estaría
+estarías
+estaríamos
+estaríais
+estarían
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+ | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habría
+habrías
+habríamos
+habríais
+habrían
+había
+habías
+habíamos
+habíais
+habían
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+ | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+sería
+serías
+seríamos
+seríais
+serían
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+ | sed also means 'thirst'
+
+ | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendría
+tendrías
+tendríamos
+tendríais
+tendrían
+tenía
+tenías
+teníamos
+teníais
+tenían
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_eu.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_eu.txt
new file mode 100644
index 000000000..25f1db934
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_eu.txt
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_fa.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_fa.txt
new file mode 100644
index 000000000..723641c6d
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_fa.txt
@@ -0,0 +1,313 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+وگو
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+و
+دو
+نخستين
+ولي
+چرا
+چه
+وسط
+ه
+كدام
+قابل
+يك
+رفت
+هفت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرفته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+حق
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرفت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+فقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استفاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رفته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+گفت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+حدود
+مختلف
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تحت
+ضمن
+هستيم
+گفته
+فكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+حتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطفا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+فوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_fi.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_fi.txt
new file mode 100644
index 000000000..4372c9a05
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_fi.txt
@@ -0,0 +1,97 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
+minä minun minut minua minussa minusta minuun minulla minulta minulle | I
+sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
+hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
+me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
+te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
+he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
+
+tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
+tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
+se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
+nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
+nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
+ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
+
+kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
+mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
+mitkä | (pl)
+
+joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
+jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
+
+| conjunctions
+
+että | that
+ja | and
+jos | if
+koska | because
+kuin | than
+mutta | but
+niin | so
+sekä | and
+sillä | for
+tai | or
+vaan | but
+vai | or
+vaikka | although
+
+
+| prepositions
+
+kanssa | with
+mukaan | according to
+noin | about
+poikki | across
+yli | over, across
+
+| other
+
+kun | when
+niin | so
+nyt | now
+itse | self
+
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_fr.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_fr.txt
new file mode 100644
index 000000000..749abae68
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_fr.txt
@@ -0,0 +1,186 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au | a + le
+aux | a + les
+avec | with
+ce | this
+ces | these
+dans | with
+de | of
+des | de + les
+du | de + le
+elle | she
+en | `of them' etc
+et | and
+eux | them
+il | he
+je | I
+la | the
+le | the
+leur | their
+lui | him
+ma | my (fem)
+mais | but
+me | me
+même | same; as in moi-même (myself) etc
+mes | me (pl)
+moi | me
+mon | my (masc)
+ne | not
+nos | our (pl)
+notre | our
+nous | we
+on | one
+ou | where
+par | by
+pas | not
+pour | for
+qu | que before vowel
+que | that
+qui | who
+sa | his, her (fem)
+se | oneself
+ses | his (pl)
+son | his, her (masc)
+sur | on
+ta | thy (fem)
+te | thee
+tes | thy (pl)
+toi | thee
+ton | thy (masc)
+tu | thou
+un | a
+une | a
+vos | your (pl)
+votre | your
+vous | you
+
+ | single letter forms
+
+c | c'
+d | d'
+j | j'
+l | l'
+à | to, at
+m | m'
+n | n'
+s | s'
+t | t'
+y | there
+
+ | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+ | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+ | Later additions (from Jean-Christophe Deschamps)
+ceci | this
+cela | that
+celà | that
+cet | this
+cette | this
+ici | here
+ils | they
+les | the (pl)
+leurs | their (pl)
+quel | which
+quels | which
+quelle | which
+quelles | which
+sans | without
+soi | oneself
+
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_ga.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_ga.txt
new file mode 100644
index 000000000..9ff88d747
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_ga.txt
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtí
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+ní
+níor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sí
+tar
+thar
+thú
+triúr
+trí
+trína
+trínár
+tríocha
+tú
+um
+ár
+é
+éis
+í
+ó
+ón
+óna
+ónár
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_gl.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_gl.txt
new file mode 100644
index 000000000..d8760b12c
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_gl.txt
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aínda
+alí
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquí
+ao
+aos
+as
+así
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+había
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_hi.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_hi.txt
new file mode 100644
index 000000000..86286bb08
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_hi.txt
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer
+# for spelling variation (see section below), such that it can be used whether or
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well.
+अंदर
+अत
+अपना
+अपनी
+अपने
+अभी
+आदि
+आप
+इत्यादि
+इन
+इनका
+इन्हीं
+इन्हें
+इन्हों
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उन
+उनका
+उनकी
+उनके
+उनको
+उन्हीं
+उन्हें
+उन्हों
+उस
+उसके
+उसी
+उसे
+एक
+एवं
+एस
+ऐसे
+और
+कई
+कर
+करता
+करते
+करना
+करने
+करें
+कहते
+कहा
+का
+काफ़ी
+कि
+कितना
+किन्हें
+किन्हों
+किया
+किर
+किस
+किसी
+किसे
+की
+कुछ
+कुल
+के
+को
+कोई
+कौन
+कौनसा
+गया
+घर
+जब
+जहाँ
+जा
+जितना
+जिन
+जिन्हें
+जिन्हों
+जिस
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तब
+तरह
+तिन
+तिन्हें
+तिन्हों
+तिस
+तिसे
+तो
+था
+थी
+थे
+दबारा
+दिया
+दुसरा
+दूसरे
+दो
+द्वारा
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर
+पहले
+पूरा
+पे
+फिर
+बनी
+बही
+बहुत
+बाद
+बाला
+बिलकुल
+भी
+भीतर
+मगर
+मानो
+मे
+में
+यदि
+यह
+यहाँ
+यही
+या
+यिह
+ये
+रखें
+रहा
+रहे
+ऱ्वासा
+लिए
+लिये
+लेकिन
+व
+वर्ग
+वह
+वह
+वहाँ
+वहीं
+वाले
+वुह
+वे
+वग़ैरह
+संग
+सकता
+सकते
+सबसे
+सभी
+साथ
+साबुत
+साभ
+सारा
+से
+सो
+ही
+हुआ
+हुई
+हुए
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+# additional normalized forms of the above
+अपनि
+जेसे
+होति
+सभि
+तिंहों
+इंहों
+दवारा
+इसि
+किंहें
+थि
+उंहों
+ओर
+जिंहें
+वहिं
+अभि
+बनि
+हि
+उंहिं
+उंहें
+हें
+वगेरह
+एसे
+रवासा
+कोन
+निचे
+काफि
+उसि
+पुरा
+भितर
+हे
+बहि
+वहां
+कोइ
+यहां
+जिंहों
+तिंहें
+किसि
+कइ
+यहि
+इंहिं
+जिधर
+इंहें
+अदि
+इतयादि
+हुइ
+कोनसा
+इसकि
+दुसरे
+जहां
+अप
+किंहों
+उनकि
+भि
+वरग
+हुअ
+जेसा
+नहिं
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_hu.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_hu.txt
new file mode 100644
index 000000000..37526da8a
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_hu.txt
@@ -0,0 +1,211 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amíg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+így
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kívül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+míg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+ő
+ők
+őket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_hy.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_hy.txt
new file mode 100644
index 000000000..60c1c50fb
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_hy.txt
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+այդ
+այլ
+այն
+այս
+դու
+դուք
+եմ
+են
+ենք
+ես
+եք
+է
+էի
+էին
+էինք
+էիր
+էիք
+էր
+ըստ
+թ
+ի
+ին
+իսկ
+իր
+կամ
+համար
+հետ
+հետո
+մենք
+մեջ
+մի
+ն
+նա
+նաև
+նրա
+նրանք
+որ
+որը
+որոնք
+որպես
+ու
+ում
+պիտի
+վրա
+և
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_id.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_id.txt
new file mode 100644
index 000000000..4617f83a5
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_id.txt
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_it.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_it.txt
new file mode 100644
index 000000000..1219cc773
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_it.txt
@@ -0,0 +1,303 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad | a (to) before vowel
+al | a + il
+allo | a + lo
+ai | a + i
+agli | a + gli
+all | a + l'
+agl | a + gl'
+alla | a + la
+alle | a + le
+con | with
+col | con + il
+coi | con + i (forms collo, cogli etc are now very rare)
+da | from
+dal | da + il
+dallo | da + lo
+dai | da + i
+dagli | da + gli
+dall | da + l'
+dagl | da + gll'
+dalla | da + la
+dalle | da + le
+di | of
+del | di + il
+dello | di + lo
+dei | di + i
+degli | di + gli
+dell | di + l'
+degl | di + gl'
+della | di + la
+delle | di + le
+in | in
+nel | in + el
+nello | in + lo
+nei | in + i
+negli | in + gli
+nell | in + l'
+negl | in + gl'
+nella | in + la
+nelle | in + le
+su | on
+sul | su + il
+sullo | su + lo
+sui | su + i
+sugli | su + gli
+sull | su + l'
+sugl | su + gl'
+sulla | su + la
+sulle | su + le
+per | through, by
+tra | among
+contro | against
+io | I
+tu | thou
+lui | he
+lei | she
+noi | we
+voi | you
+loro | they
+mio | my
+mia |
+miei |
+mie |
+tuo |
+tua |
+tuoi | thy
+tue |
+suo |
+sua |
+suoi | his, her
+sue |
+nostro | our
+nostra |
+nostri |
+nostre |
+vostro | your
+vostra |
+vostri |
+vostre |
+mi | me
+ti | thee
+ci | us, there
+vi | you, there
+lo | him, the
+la | her, the
+li | them
+le | them, the
+gli | to him, the
+ne | from there etc
+il | the
+un | a
+uno | a
+una | a
+ma | but
+ed | and
+se | if
+perché | why, because
+anche | also
+come | how
+dov | where (as dov')
+dove | where
+che | who, that
+chi | who
+cui | whom
+non | not
+più | more
+quale | who, that
+quanto | how much
+quanti |
+quanta |
+quante |
+quello | that
+quelli |
+quella |
+quelle |
+questo | this
+questi |
+questa |
+queste |
+si | yes
+tutto | all
+tutti | all
+
+ | single letter forms:
+
+a | at
+c | as c' for ce or ci
+e | and
+i | the
+l | as l'
+o | or
+
+ | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrà
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+ | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarà
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+ | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farà
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+ | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starà
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_ja.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_ja.txt
new file mode 100644
index 000000000..d4321be6b
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_ja.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
+# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
+# using the same character width as the entries in this file. Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+の
+に
+は
+を
+た
+が
+で
+て
+と
+し
+れ
+さ
+ある
+いる
+も
+する
+から
+な
+こと
+として
+い
+や
+れる
+など
+なっ
+ない
+この
+ため
+その
+あっ
+よう
+また
+もの
+という
+あり
+まで
+られ
+なる
+へ
+か
+だ
+これ
+によって
+により
+おり
+より
+による
+ず
+なり
+られる
+において
+ば
+なかっ
+なく
+しかし
+について
+せ
+だっ
+その後
+できる
+それ
+う
+ので
+なお
+のみ
+でき
+き
+つ
+における
+および
+いう
+さらに
+でも
+ら
+たり
+その他
+に関する
+たち
+ます
+ん
+なら
+に対して
+特に
+せる
+及び
+これら
+とき
+では
+にて
+ほか
+ながら
+うち
+そして
+とともに
+ただし
+かつて
+それぞれ
+または
+お
+ほど
+ものの
+に対する
+ほとんど
+と共に
+といった
+です
+とも
+ところ
+ここ
+##### End of file
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_lv.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_lv.txt
new file mode 100644
index 000000000..e21a23c06
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined:
+# pronouns, adverbs, interjections were removed
+#
+# prepositions
+aiz
+ap
+ar
+apakš
+ārpus
+augšpus
+bez
+caur
+dēļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pār
+pēc
+pie
+pirms
+pret
+priekš
+starp
+šaipus
+uz
+viņpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomēr
+tikko
+turpretī
+arī
+kaut
+gan
+tādēļ
+tā
+ne
+tikvien
+vien
+kā
+ir
+te
+vai
+kamēr
+# Particles
+ar
+diezin
+droši
+diemžēl
+nebūt
+ik
+it
+taču
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekām
+iekāms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tālab
+tāpēc
+nekā
+itin
+jā
+jau
+jel
+nē
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt
+biju
+biji
+bija
+bijām
+bijāt
+esmu
+esi
+esam
+esat
+būšu
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikām
+tikāt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapāt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvām
+kļuvāt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varēt
+varēju
+varējām
+varēšu
+varēsim
+var
+varēji
+varējāt
+varēsi
+varēsiet
+varat
+varēja
+varēs
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_nl.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_nl.txt
new file mode 100644
index 000000000..47a2aeacf
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_nl.txt
@@ -0,0 +1,119 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_no.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_no.txt
new file mode 100644
index 000000000..a7a2c28ba
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_no.txt
@@ -0,0 +1,194 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard , Jan 2005
+
+og | and
+i | in
+jeg | I
+det | it/this/that
+at | to (w. inf.)
+en | a/an
+et | a/an
+den | it/this/that
+til | to
+er | is/am/are
+som | who/that
+på | on
+de | they / you(formal)
+med | with
+han | he
+av | of
+ikke | not
+ikkje | not *
+der | there
+så | so
+var | was/were
+meg | me
+seg | you
+men | but
+ett | one
+har | have
+om | about
+vi | we
+min | my
+mitt | my
+ha | have
+hadde | had
+hun | she
+nå | now
+over | over
+da | when/as
+ved | by/know
+fra | from
+du | you
+ut | out
+sin | your
+dem | them
+oss | us
+opp | up
+man | you/one
+kan | can
+hans | his
+hvor | where
+eller | or
+hva | what
+skal | shall/must
+selv | self (reflective)
+sjøl | self (reflective)
+her | here
+alle | all
+vil | will
+bli | become
+ble | became
+blei | became *
+blitt | have become
+kunne | could
+inn | in
+når | when
+være | be
+kom | come
+noen | some
+noe | some
+ville | would
+dere | you
+som | who/which/that
+deres | their/theirs
+kun | only/just
+ja | yes
+etter | after
+ned | down
+skulle | should
+denne | this
+for | for/because
+deg | you
+si | hers/his
+sine | hers/his
+sitt | hers/his
+mot | against
+å | to
+meget | much
+hvorfor | why
+dette | this
+disse | these/those
+uten | without
+hvordan | how
+ingen | none
+din | your
+ditt | your
+blir | become
+samme | same
+hvilken | which
+hvilke | which (plural)
+sånn | such a
+inni | inside/within
+mellom | between
+vår | our
+hver | each
+hvem | who
+vors | us/ours
+hvis | whose
+både | both
+bare | only/just
+enn | than
+fordi | as/because
+før | before
+mange | many
+også | also
+slik | just
+vært | been
+være | to be
+båe | both *
+begge | both
+siden | since
+dykk | your *
+dykkar | yours *
+dei | they *
+deira | them *
+deires | theirs *
+deim | them *
+di | your (fem.) *
+då | as/when *
+eg | I *
+ein | a/an *
+eit | a/an *
+eitt | a/an *
+elles | or *
+honom | he *
+hjå | at *
+ho | she *
+hoe | she *
+henne | her
+hennar | her/hers
+hennes | hers
+hoss | how *
+hossen | how *
+ikkje | not *
+ingi | noone *
+inkje | noone *
+korleis | how *
+korso | how *
+kva | what/which *
+kvar | where *
+kvarhelst | where *
+kven | who/whom *
+kvi | why *
+kvifor | why *
+me | we *
+medan | while *
+mi | my *
+mine | my *
+mykje | much *
+no | now *
+nokon | some (masc./neut.) *
+noka | some (fem.) *
+nokor | some *
+noko | some *
+nokre | some *
+si | his/hers *
+sia | since *
+sidan | since *
+so | so *
+somt | some *
+somme | some *
+um | about*
+upp | up *
+vere | be *
+vore | was *
+verte | become *
+vort | become *
+varte | became *
+vart | became *
+
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_pt.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_pt.txt
new file mode 100644
index 000000000..acfeb01af
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_pt.txt
@@ -0,0 +1,253 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | é from SER
+com | with
+não | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+à | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | há from HAV
+nos | em + os; us
+já | already, now
+ | está from EST
+eu | I
+também | also
+só | only, just
+pelo | per + o
+pela | per + a
+até | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | estão from EST
+você | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+às | a + as
+minha | my
+ | têm from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | será from SER
+nós | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+vocês | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houveríamos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+seríamos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tínhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+teríamos
+teriam
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_ro.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_ro.txt
new file mode 100644
index 000000000..4fdee90a5
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceşti
+aceştia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aş
+aşadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deşi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eşti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+nişte
+noastră
+noastre
+noi
+noştri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+şi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+ţi
+ţie
+tine
+toată
+toate
+tot
+toţi
+totuşi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voştri
+vostru
+vouă
+vreo
+vreun
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_ru.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_ru.txt
new file mode 100644
index 000000000..55271400c
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_ru.txt
@@ -0,0 +1,243 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и | and
+в | in/into
+во | alternative form
+не | not
+что | what/that
+он | he
+на | on/onto
+я | i
+с | from
+со | alternative form
+как | how
+а | milder form of `no' (but)
+то | conjunction and form of `that'
+все | all
+она | she
+так | so, thus
+его | him
+но | but
+да | yes/and
+ты | thou
+к | towards, by
+у | around, chez
+же | intensifier particle
+вы | you
+за | beyond, behind
+бы | conditional/subj. particle
+по | up to, along
+только | only
+ее | her
+мне | to me
+было | it was
+вот | here is/are, particle
+от | away from
+меня | me
+еще | still, yet, more
+нет | no, there isnt/arent
+о | about
+из | out of
+ему | to him
+теперь | now
+когда | when
+даже | even
+ну | so, well
+вдруг | suddenly
+ли | interrogative particle
+если | if
+уже | already, but homonym of `narrower'
+или | or
+ни | neither
+быть | to be
+был | he was
+него | prepositional form of его
+до | up to
+вас | you accusative
+нибудь | indef. suffix preceded by hyphen
+опять | again
+уж | already, but homonym of `adder'
+вам | to you
+сказал | he said
+ведь | particle `after all'
+там | there
+потом | then
+себя | oneself
+ничего | nothing
+ей | to her
+может | usually with `быть' as `maybe'
+они | they
+тут | here
+где | where
+есть | there is/are
+надо | got to, must
+ней | prepositional form of ей
+для | for
+мы | we
+тебя | thee
+их | them, their
+чем | than
+была | she was
+сам | self
+чтоб | in order to
+без | without
+будто | as if
+человек | man, person, one
+чего | genitive form of `what'
+раз | once
+тоже | also
+себе | to oneself
+под | beneath
+жизнь | life
+будет | will be
+ж | short form of intensifer particle `же'
+тогда | then
+кто | who
+этот | this
+говорил | was saying
+того | genitive form of `that'
+потому | for that reason
+этого | genitive form of `this'
+какой | which
+совсем | altogether
+ним | prepositional form of `его', `они'
+здесь | here
+этом | prepositional form of `этот'
+один | one
+почти | almost
+мой | my
+тем | instrumental/dative plural of `тот', `то'
+чтобы | full form of `in order that'
+нее | her (acc.)
+кажется | it seems
+сейчас | now
+были | they were
+куда | where to
+зачем | why
+сказать | to say
+всех | all (acc., gen. preposn. plural)
+никогда | never
+сегодня | today
+можно | possible, one can
+при | by
+наконец | finally
+два | two
+об | alternative form of `о', about
+другой | another
+хоть | even
+после | after
+над | above
+больше | more
+тот | that one (masc.)
+через | across, in
+эти | these
+нас | us
+про | about
+всего | in all, only, of all
+них | prepositional form of `они' (they)
+какая | which, feminine
+много | lots
+разве | interrogative particle
+сказала | she said
+три | three
+эту | this, acc. fem. sing.
+моя | my, feminine
+впрочем | moreover, besides
+хорошо | good
+свою | ones own, acc. fem. sing.
+этой | oblique form of `эта', fem. `this'
+перед | in front of
+иногда | sometimes
+лучше | better
+чуть | a little
+том | preposn. form of `that one'
+нельзя | one must not
+такой | such a one
+им | to them
+более | more
+всегда | always
+конечно | of course
+всю | acc. fem. sing of `all'
+между | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | я меня мне мной [мною]
+ | ты тебя тебе тобой [тобою]
+ | он его ему им [него, нему, ним]
+ | она ее эи ею [нее, нэи, нею]
+ | оно его ему им [него, нему, ним]
+ |
+ | мы нас нам нами
+ | вы вас вам вами
+ | они их им ими [них, ним, ними]
+ |
+ | себя себе собой [собою]
+ |
+ | demonstrative pronouns: этот (this), тот (that)
+ |
+ | этот эта это эти
+ | этого эты это эти
+ | этого этой этого этих
+ | этому этой этому этим
+ | этим этой этим [этою] этими
+ | этом этой этом этих
+ |
+ | тот та то те
+ | того ту то те
+ | того той того тех
+ | тому той тому тем
+ | тем той тем [тою] теми
+ | том той том тех
+ |
+ | determinative pronouns
+ |
+ | (a) весь (all)
+ |
+ | весь вся все все
+ | всего всю все все
+ | всего всей всего всех
+ | всему всей всему всем
+ | всем всей всем [всею] всеми
+ | всем всей всем всех
+ |
+ | (b) сам (himself etc)
+ |
+ | сам сама само сами
+ | самого саму само самих
+ | самого самой самого самих
+ | самому самой самому самим
+ | самим самой самим [самою] самими
+ | самом самой самом самих
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | быть бы буд быв есть суть
+ | име
+ | дел
+ | мог мож мочь
+ | уме
+ | хоч хот
+ | долж
+ | можн
+ | нужн
+ | нельзя
+
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_sv.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_sv.txt
new file mode 100644
index 000000000..096f87f67
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_sv.txt
@@ -0,0 +1,133 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ | så = so, but also seed. These are indicated clearly below.
+
+och | and
+det | it, this/that
+att | to (with infinitive)
+i | in, at
+en | a
+jag | I
+hon | she
+som | who, that
+han | he
+på | on
+den | it, this/that
+med | with
+var | where, each
+sig | him(self) etc
+för | for
+så | so (also: seed)
+till | to
+är | is
+men | but
+ett | a
+om | if; around, about
+hade | had
+de | they, these/those
+av | of
+icke | not, no
+mig | me
+du | you
+henne | her
+då | then, when
+sin | his
+nu | now
+har | have
+inte | inte någon = no one
+hans | his
+honom | him
+skulle | 'sake'
+hennes | her
+där | there
+min | my
+man | one (pronoun)
+ej | nor
+vid | at, by, on (also: vast)
+kunde | could
+något | some etc
+från | from, off
+ut | out
+när | when
+efter | after, behind
+upp | up
+vi | we
+dem | them
+vara | be
+vad | what
+över | over
+än | than
+dig | you
+kan | can
+sina | his
+här | here
+ha | have
+mot | towards
+alla | all
+under | under (also: wonder)
+någon | some etc
+eller | or (else)
+allt | all
+mycket | much
+sedan | since
+ju | why
+denna | this/that
+själv | myself, yourself etc
+detta | this/that
+åt | to
+utan | without
+varit | was
+hur | how
+ingen | no
+mitt | my
+ni | you
+bli | to be, become
+blev | from bli
+oss | us
+din | thy
+dessa | these/those
+några | some etc
+deras | their
+blir | from bli
+mina | my
+samma | (the) same
+vilken | who, that
+er | you, your
+sådan | such a
+vår | our
+blivit | from bli
+dess | its
+inom | within
+mellan | between
+sådant | such a
+varför | why
+varje | each
+vilka | who, that
+ditt | thy
+vem | who
+vilket | who, that
+sitta | his
+sådana | such a
+vart | each
+dina | thy
+vars | whose
+vårt | our
+våra | our
+ert | your
+era | your
+vilkas | whose
+
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_th.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_th.txt
new file mode 100644
index 000000000..07f0fabe6
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+แห่ง
+แล้ว
+และ
+แรก
+แบบ
+แต่
+เอง
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่อ
+เพื่อ
+เพราะ
+เป็นการ
+เป็น
+เปิดเผย
+เปิด
+เนื่องจาก
+เดียวกัน
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+อีก
+อาจ
+อะไร
+ออก
+อย่าง
+อยู่
+อยาก
+หาก
+หลาย
+หลังจาก
+หลัง
+หรือ
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สําหรับ
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาก
+มา
+พร้อม
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นํา
+นั้น
+นัก
+นอกจาก
+ทุก
+ที่สุด
+ที่
+ทําให้
+ทํา
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูก
+ถึง
+ต้อง
+ต่างๆ
+ต่าง
+ต่อ
+ตาม
+ตั้งแต่
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาก
+จัด
+จะ
+คือ
+ความ
+ครั้ง
+คง
+ขึ้น
+ของ
+ขอ
+ขณะ
+ก่อน
+ก็
+การ
+กับ
+กัน
+กว่า
+กล่าว
diff --git a/solr/homeDirectoryTemplate/conf/lang/stopwords_tr.txt b/solr/homeDirectoryTemplate/conf/lang/stopwords_tr.txt
new file mode 100644
index 000000000..84d9408d4
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beş
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birşey
+birşeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+değil
+diğer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eğer
+elli
+en
+etmesi
+etti
+ettiği
+ettiğini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+işte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduğu
+olduğunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+rağmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+şey
+şeyden
+şeyi
+şeyler
+şöyle
+şu
+şuna
+şunda
+şundan
+şunları
+şunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiş
+yine
+yirmi
+yoksa
+yüz
+zaten
diff --git a/solr/homeDirectoryTemplate/conf/lang/userdict_ja.txt b/solr/homeDirectoryTemplate/conf/lang/userdict_ja.txt
new file mode 100644
index 000000000..6f0368e4d
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags. Notice that entries do
+# not have weights since they are always used when found. This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+# , ... , ... ,
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same is undefined.
+#
+# Whitespace only lines are ignored. Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
+
+# Custom segmentation for compound katakana
+トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
+ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
+
+# Custom reading for former sumo wrestler
+朝青龍,朝青龍,アサショウリュウ,カスタム人名
diff --git a/solr/homeDirectoryTemplate/conf/mapping-FoldToASCII.txt b/solr/homeDirectoryTemplate/conf/mapping-FoldToASCII.txt
index 020f833b6..9a84b6eac 100644
--- a/solr/homeDirectoryTemplate/conf/mapping-FoldToASCII.txt
+++ b/solr/homeDirectoryTemplate/conf/mapping-FoldToASCII.txt
@@ -1,3813 +1,3813 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This map converts alphabetic, numeric, and symbolic Unicode characters
-# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
-# block) into their ASCII equivalents, if one exists.
-#
-# Characters from the following Unicode blocks are converted; however, only
-# those characters with reasonable ASCII alternatives are converted:
-#
-# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf
-# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf
-# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf
-# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf
-# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf
-# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf
-# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf
-# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf
-# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf
-# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf
-# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf
-# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf
-# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf
-# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf
-# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf
-# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf
-#
-# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
-#
-# The set of character conversions supported by this map is a superset of
-# those supported by the map represented by mapping-ISOLatin1Accent.txt.
-#
-# See the bottom of this file for the Perl script used to generate the contents
-# of this file (without this header) from ASCIIFoldingFilter.java.
-
-
-# Syntax:
-# "source" => "target"
-# "source".length() > 0 (source cannot be empty.)
-# "target".length() >= 0 (target can be empty.)
-
-
-# À [LATIN CAPITAL LETTER A WITH GRAVE]
-"\u00C0" => "A"
-
-# Á [LATIN CAPITAL LETTER A WITH ACUTE]
-"\u00C1" => "A"
-
-# Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
-"\u00C2" => "A"
-
-# Ã [LATIN CAPITAL LETTER A WITH TILDE]
-"\u00C3" => "A"
-
-# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
-"\u00C4" => "A"
-
-# Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
-"\u00C5" => "A"
-
-# Ā [LATIN CAPITAL LETTER A WITH MACRON]
-"\u0100" => "A"
-
-# Ă [LATIN CAPITAL LETTER A WITH BREVE]
-"\u0102" => "A"
-
-# Ą [LATIN CAPITAL LETTER A WITH OGONEK]
-"\u0104" => "A"
-
-# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
-"\u018F" => "A"
-
-# Ǎ [LATIN CAPITAL LETTER A WITH CARON]
-"\u01CD" => "A"
-
-# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
-"\u01DE" => "A"
-
-# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
-"\u01E0" => "A"
-
-# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
-"\u01FA" => "A"
-
-# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
-"\u0200" => "A"
-
-# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
-"\u0202" => "A"
-
-# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
-"\u0226" => "A"
-
-# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
-"\u023A" => "A"
-
-# ᴀ [LATIN LETTER SMALL CAPITAL A]
-"\u1D00" => "A"
-
-# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
-"\u1E00" => "A"
-
-# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
-"\u1EA0" => "A"
-
-# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
-"\u1EA2" => "A"
-
-# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
-"\u1EA4" => "A"
-
-# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
-"\u1EA6" => "A"
-
-# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EA8" => "A"
-
-# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
-"\u1EAA" => "A"
-
-# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EAC" => "A"
-
-# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
-"\u1EAE" => "A"
-
-# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
-"\u1EB0" => "A"
-
-# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
-"\u1EB2" => "A"
-
-# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
-"\u1EB4" => "A"
-
-# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
-"\u1EB6" => "A"
-
-# Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
-"\u24B6" => "A"
-
-# A [FULLWIDTH LATIN CAPITAL LETTER A]
-"\uFF21" => "A"
-
-# à [LATIN SMALL LETTER A WITH GRAVE]
-"\u00E0" => "a"
-
-# á [LATIN SMALL LETTER A WITH ACUTE]
-"\u00E1" => "a"
-
-# â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
-"\u00E2" => "a"
-
-# ã [LATIN SMALL LETTER A WITH TILDE]
-"\u00E3" => "a"
-
-# ä [LATIN SMALL LETTER A WITH DIAERESIS]
-"\u00E4" => "a"
-
-# å [LATIN SMALL LETTER A WITH RING ABOVE]
-"\u00E5" => "a"
-
-# ā [LATIN SMALL LETTER A WITH MACRON]
-"\u0101" => "a"
-
-# ă [LATIN SMALL LETTER A WITH BREVE]
-"\u0103" => "a"
-
-# ą [LATIN SMALL LETTER A WITH OGONEK]
-"\u0105" => "a"
-
-# ǎ [LATIN SMALL LETTER A WITH CARON]
-"\u01CE" => "a"
-
-# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
-"\u01DF" => "a"
-
-# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
-"\u01E1" => "a"
-
-# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
-"\u01FB" => "a"
-
-# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
-"\u0201" => "a"
-
-# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
-"\u0203" => "a"
-
-# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
-"\u0227" => "a"
-
-# ɐ [LATIN SMALL LETTER TURNED A]
-"\u0250" => "a"
-
-# ə [LATIN SMALL LETTER SCHWA]
-"\u0259" => "a"
-
-# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
-"\u025A" => "a"
-
-# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
-"\u1D8F" => "a"
-
-# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
-"\u1D95" => "a"
-
-# ạ [LATIN SMALL LETTER A WITH RING BELOW]
-"\u1E01" => "a"
-
-# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
-"\u1E9A" => "a"
-
-# ạ [LATIN SMALL LETTER A WITH DOT BELOW]
-"\u1EA1" => "a"
-
-# ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
-"\u1EA3" => "a"
-
-# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
-"\u1EA5" => "a"
-
-# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
-"\u1EA7" => "a"
-
-# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EA9" => "a"
-
-# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
-"\u1EAB" => "a"
-
-# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EAD" => "a"
-
-# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
-"\u1EAF" => "a"
-
-# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
-"\u1EB1" => "a"
-
-# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
-"\u1EB3" => "a"
-
-# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
-"\u1EB5" => "a"
-
-# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
-"\u1EB7" => "a"
-
-# ₐ [LATIN SUBSCRIPT SMALL LETTER A]
-"\u2090" => "a"
-
-# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
-"\u2094" => "a"
-
-# ⓐ [CIRCLED LATIN SMALL LETTER A]
-"\u24D0" => "a"
-
-# ⱥ [LATIN SMALL LETTER A WITH STROKE]
-"\u2C65" => "a"
-
-# Ɐ [LATIN CAPITAL LETTER TURNED A]
-"\u2C6F" => "a"
-
-# a [FULLWIDTH LATIN SMALL LETTER A]
-"\uFF41" => "a"
-
-# Ꜳ [LATIN CAPITAL LETTER AA]
-"\uA732" => "AA"
-
-# Æ [LATIN CAPITAL LETTER AE]
-"\u00C6" => "AE"
-
-# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
-"\u01E2" => "AE"
-
-# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
-"\u01FC" => "AE"
-
-# ᴁ [LATIN LETTER SMALL CAPITAL AE]
-"\u1D01" => "AE"
-
-# Ꜵ [LATIN CAPITAL LETTER AO]
-"\uA734" => "AO"
-
-# Ꜷ [LATIN CAPITAL LETTER AU]
-"\uA736" => "AU"
-
-# Ꜹ [LATIN CAPITAL LETTER AV]
-"\uA738" => "AV"
-
-# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
-"\uA73A" => "AV"
-
-# Ꜽ [LATIN CAPITAL LETTER AY]
-"\uA73C" => "AY"
-
-# ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
-"\u249C" => "(a)"
-
-# ꜳ [LATIN SMALL LETTER AA]
-"\uA733" => "aa"
-
-# æ [LATIN SMALL LETTER AE]
-"\u00E6" => "ae"
-
-# ǣ [LATIN SMALL LETTER AE WITH MACRON]
-"\u01E3" => "ae"
-
-# ǽ [LATIN SMALL LETTER AE WITH ACUTE]
-"\u01FD" => "ae"
-
-# ᴂ [LATIN SMALL LETTER TURNED AE]
-"\u1D02" => "ae"
-
-# ꜵ [LATIN SMALL LETTER AO]
-"\uA735" => "ao"
-
-# ꜷ [LATIN SMALL LETTER AU]
-"\uA737" => "au"
-
-# ꜹ [LATIN SMALL LETTER AV]
-"\uA739" => "av"
-
-# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
-"\uA73B" => "av"
-
-# ꜽ [LATIN SMALL LETTER AY]
-"\uA73D" => "ay"
-
-# Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
-"\u0181" => "B"
-
-# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
-"\u0182" => "B"
-
-# Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
-"\u0243" => "B"
-
-# ʙ [LATIN LETTER SMALL CAPITAL B]
-"\u0299" => "B"
-
-# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
-"\u1D03" => "B"
-
-# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
-"\u1E02" => "B"
-
-# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
-"\u1E04" => "B"
-
-# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
-"\u1E06" => "B"
-
-# Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
-"\u24B7" => "B"
-
-# B [FULLWIDTH LATIN CAPITAL LETTER B]
-"\uFF22" => "B"
-
-# ƀ [LATIN SMALL LETTER B WITH STROKE]
-"\u0180" => "b"
-
-# ƃ [LATIN SMALL LETTER B WITH TOPBAR]
-"\u0183" => "b"
-
-# ɓ [LATIN SMALL LETTER B WITH HOOK]
-"\u0253" => "b"
-
-# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
-"\u1D6C" => "b"
-
-# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
-"\u1D80" => "b"
-
-# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
-"\u1E03" => "b"
-
-# ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
-"\u1E05" => "b"
-
-# ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
-"\u1E07" => "b"
-
-# ⓑ [CIRCLED LATIN SMALL LETTER B]
-"\u24D1" => "b"
-
-# b [FULLWIDTH LATIN SMALL LETTER B]
-"\uFF42" => "b"
-
-# ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
-"\u249D" => "(b)"
-
-# Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
-"\u00C7" => "C"
-
-# Ć [LATIN CAPITAL LETTER C WITH ACUTE]
-"\u0106" => "C"
-
-# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
-"\u0108" => "C"
-
-# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
-"\u010A" => "C"
-
-# Č [LATIN CAPITAL LETTER C WITH CARON]
-"\u010C" => "C"
-
-# Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
-"\u0187" => "C"
-
-# Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
-"\u023B" => "C"
-
-# ʗ [LATIN LETTER STRETCHED C]
-"\u0297" => "C"
-
-# ᴄ [LATIN LETTER SMALL CAPITAL C]
-"\u1D04" => "C"
-
-# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
-"\u1E08" => "C"
-
-# Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
-"\u24B8" => "C"
-
-# C [FULLWIDTH LATIN CAPITAL LETTER C]
-"\uFF23" => "C"
-
-# ç [LATIN SMALL LETTER C WITH CEDILLA]
-"\u00E7" => "c"
-
-# ć [LATIN SMALL LETTER C WITH ACUTE]
-"\u0107" => "c"
-
-# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
-"\u0109" => "c"
-
-# ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
-"\u010B" => "c"
-
-# č [LATIN SMALL LETTER C WITH CARON]
-"\u010D" => "c"
-
-# ƈ [LATIN SMALL LETTER C WITH HOOK]
-"\u0188" => "c"
-
-# ȼ [LATIN SMALL LETTER C WITH STROKE]
-"\u023C" => "c"
-
-# ɕ [LATIN SMALL LETTER C WITH CURL]
-"\u0255" => "c"
-
-# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
-"\u1E09" => "c"
-
-# ↄ [LATIN SMALL LETTER REVERSED C]
-"\u2184" => "c"
-
-# ⓒ [CIRCLED LATIN SMALL LETTER C]
-"\u24D2" => "c"
-
-# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
-"\uA73E" => "c"
-
-# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
-"\uA73F" => "c"
-
-# c [FULLWIDTH LATIN SMALL LETTER C]
-"\uFF43" => "c"
-
-# ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
-"\u249E" => "(c)"
-
-# Ð [LATIN CAPITAL LETTER ETH]
-"\u00D0" => "D"
-
-# Ď [LATIN CAPITAL LETTER D WITH CARON]
-"\u010E" => "D"
-
-# Đ [LATIN CAPITAL LETTER D WITH STROKE]
-"\u0110" => "D"
-
-# Ɖ [LATIN CAPITAL LETTER AFRICAN D]
-"\u0189" => "D"
-
-# Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
-"\u018A" => "D"
-
-# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
-"\u018B" => "D"
-
-# ᴅ [LATIN LETTER SMALL CAPITAL D]
-"\u1D05" => "D"
-
-# ᴆ [LATIN LETTER SMALL CAPITAL ETH]
-"\u1D06" => "D"
-
-# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
-"\u1E0A" => "D"
-
-# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
-"\u1E0C" => "D"
-
-# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
-"\u1E0E" => "D"
-
-# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
-"\u1E10" => "D"
-
-# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
-"\u1E12" => "D"
-
-# Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
-"\u24B9" => "D"
-
-# Ꝺ [LATIN CAPITAL LETTER INSULAR D]
-"\uA779" => "D"
-
-# D [FULLWIDTH LATIN CAPITAL LETTER D]
-"\uFF24" => "D"
-
-# ð [LATIN SMALL LETTER ETH]
-"\u00F0" => "d"
-
-# ď [LATIN SMALL LETTER D WITH CARON]
-"\u010F" => "d"
-
-# đ [LATIN SMALL LETTER D WITH STROKE]
-"\u0111" => "d"
-
-# ƌ [LATIN SMALL LETTER D WITH TOPBAR]
-"\u018C" => "d"
-
-# ȡ [LATIN SMALL LETTER D WITH CURL]
-"\u0221" => "d"
-
-# ɖ [LATIN SMALL LETTER D WITH TAIL]
-"\u0256" => "d"
-
-# ɗ [LATIN SMALL LETTER D WITH HOOK]
-"\u0257" => "d"
-
-# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
-"\u1D6D" => "d"
-
-# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
-"\u1D81" => "d"
-
-# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
-"\u1D91" => "d"
-
-# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
-"\u1E0B" => "d"
-
-# ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
-"\u1E0D" => "d"
-
-# ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
-"\u1E0F" => "d"
-
-# ḑ [LATIN SMALL LETTER D WITH CEDILLA]
-"\u1E11" => "d"
-
-# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
-"\u1E13" => "d"
-
-# ⓓ [CIRCLED LATIN SMALL LETTER D]
-"\u24D3" => "d"
-
-# ꝺ [LATIN SMALL LETTER INSULAR D]
-"\uA77A" => "d"
-
-# d [FULLWIDTH LATIN SMALL LETTER D]
-"\uFF44" => "d"
-
-# DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
-"\u01C4" => "DZ"
-
-# DZ [LATIN CAPITAL LETTER DZ]
-"\u01F1" => "DZ"
-
-# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
-"\u01C5" => "Dz"
-
-# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
-"\u01F2" => "Dz"
-
-# ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
-"\u249F" => "(d)"
-
-# ȸ [LATIN SMALL LETTER DB DIGRAPH]
-"\u0238" => "db"
-
-# dž [LATIN SMALL LETTER DZ WITH CARON]
-"\u01C6" => "dz"
-
-# dz [LATIN SMALL LETTER DZ]
-"\u01F3" => "dz"
-
-# ʣ [LATIN SMALL LETTER DZ DIGRAPH]
-"\u02A3" => "dz"
-
-# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
-"\u02A5" => "dz"
-
-# È [LATIN CAPITAL LETTER E WITH GRAVE]
-"\u00C8" => "E"
-
-# É [LATIN CAPITAL LETTER E WITH ACUTE]
-"\u00C9" => "E"
-
-# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
-"\u00CA" => "E"
-
-# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
-"\u00CB" => "E"
-
-# Ē [LATIN CAPITAL LETTER E WITH MACRON]
-"\u0112" => "E"
-
-# Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
-"\u0114" => "E"
-
-# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
-"\u0116" => "E"
-
-# Ę [LATIN CAPITAL LETTER E WITH OGONEK]
-"\u0118" => "E"
-
-# Ě [LATIN CAPITAL LETTER E WITH CARON]
-"\u011A" => "E"
-
-# Ǝ [LATIN CAPITAL LETTER REVERSED E]
-"\u018E" => "E"
-
-# Ɛ [LATIN CAPITAL LETTER OPEN E]
-"\u0190" => "E"
-
-# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
-"\u0204" => "E"
-
-# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
-"\u0206" => "E"
-
-# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
-"\u0228" => "E"
-
-# Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
-"\u0246" => "E"
-
-# ᴇ [LATIN LETTER SMALL CAPITAL E]
-"\u1D07" => "E"
-
-# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
-"\u1E14" => "E"
-
-# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
-"\u1E16" => "E"
-
-# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
-"\u1E18" => "E"
-
-# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
-"\u1E1A" => "E"
-
-# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
-"\u1E1C" => "E"
-
-# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
-"\u1EB8" => "E"
-
-# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
-"\u1EBA" => "E"
-
-# Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
-"\u1EBC" => "E"
-
-# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
-"\u1EBE" => "E"
-
-# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
-"\u1EC0" => "E"
-
-# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EC2" => "E"
-
-# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
-"\u1EC4" => "E"
-
-# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EC6" => "E"
-
-# Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
-"\u24BA" => "E"
-
-# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
-"\u2C7B" => "E"
-
-# E [FULLWIDTH LATIN CAPITAL LETTER E]
-"\uFF25" => "E"
-
-# è [LATIN SMALL LETTER E WITH GRAVE]
-"\u00E8" => "e"
-
-# é [LATIN SMALL LETTER E WITH ACUTE]
-"\u00E9" => "e"
-
-# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
-"\u00EA" => "e"
-
-# ë [LATIN SMALL LETTER E WITH DIAERESIS]
-"\u00EB" => "e"
-
-# ē [LATIN SMALL LETTER E WITH MACRON]
-"\u0113" => "e"
-
-# ĕ [LATIN SMALL LETTER E WITH BREVE]
-"\u0115" => "e"
-
-# ė [LATIN SMALL LETTER E WITH DOT ABOVE]
-"\u0117" => "e"
-
-# ę [LATIN SMALL LETTER E WITH OGONEK]
-"\u0119" => "e"
-
-# ě [LATIN SMALL LETTER E WITH CARON]
-"\u011B" => "e"
-
-# ǝ [LATIN SMALL LETTER TURNED E]
-"\u01DD" => "e"
-
-# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
-"\u0205" => "e"
-
-# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
-"\u0207" => "e"
-
-# ȩ [LATIN SMALL LETTER E WITH CEDILLA]
-"\u0229" => "e"
-
-# ɇ [LATIN SMALL LETTER E WITH STROKE]
-"\u0247" => "e"
-
-# ɘ [LATIN SMALL LETTER REVERSED E]
-"\u0258" => "e"
-
-# ɛ [LATIN SMALL LETTER OPEN E]
-"\u025B" => "e"
-
-# ɜ [LATIN SMALL LETTER REVERSED OPEN E]
-"\u025C" => "e"
-
-# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
-"\u025D" => "e"
-
-# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
-"\u025E" => "e"
-
-# ʚ [LATIN SMALL LETTER CLOSED OPEN E]
-"\u029A" => "e"
-
-# ᴈ [LATIN SMALL LETTER TURNED OPEN E]
-"\u1D08" => "e"
-
-# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
-"\u1D92" => "e"
-
-# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
-"\u1D93" => "e"
-
-# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
-"\u1D94" => "e"
-
-# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
-"\u1E15" => "e"
-
-# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
-"\u1E17" => "e"
-
-# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
-"\u1E19" => "e"
-
-# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
-"\u1E1B" => "e"
-
-# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
-"\u1E1D" => "e"
-
-# ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
-"\u1EB9" => "e"
-
-# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
-"\u1EBB" => "e"
-
-# ẽ [LATIN SMALL LETTER E WITH TILDE]
-"\u1EBD" => "e"
-
-# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
-"\u1EBF" => "e"
-
-# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
-"\u1EC1" => "e"
-
-# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EC3" => "e"
-
-# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
-"\u1EC5" => "e"
-
-# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EC7" => "e"
-
-# ₑ [LATIN SUBSCRIPT SMALL LETTER E]
-"\u2091" => "e"
-
-# ⓔ [CIRCLED LATIN SMALL LETTER E]
-"\u24D4" => "e"
-
-# ⱸ [LATIN SMALL LETTER E WITH NOTCH]
-"\u2C78" => "e"
-
-# e [FULLWIDTH LATIN SMALL LETTER E]
-"\uFF45" => "e"
-
-# ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
-"\u24A0" => "(e)"
-
-# Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
-"\u0191" => "F"
-
-# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
-"\u1E1E" => "F"
-
-# Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
-"\u24BB" => "F"
-
-# ꜰ [LATIN LETTER SMALL CAPITAL F]
-"\uA730" => "F"
-
-# Ꝼ [LATIN CAPITAL LETTER INSULAR F]
-"\uA77B" => "F"
-
-# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
-"\uA7FB" => "F"
-
-# F [FULLWIDTH LATIN CAPITAL LETTER F]
-"\uFF26" => "F"
-
-# ƒ [LATIN SMALL LETTER F WITH HOOK]
-"\u0192" => "f"
-
-# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
-"\u1D6E" => "f"
-
-# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
-"\u1D82" => "f"
-
-# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
-"\u1E1F" => "f"
-
-# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
-"\u1E9B" => "f"
-
-# ⓕ [CIRCLED LATIN SMALL LETTER F]
-"\u24D5" => "f"
-
-# ꝼ [LATIN SMALL LETTER INSULAR F]
-"\uA77C" => "f"
-
-# f [FULLWIDTH LATIN SMALL LETTER F]
-"\uFF46" => "f"
-
-# ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
-"\u24A1" => "(f)"
-
-# ff [LATIN SMALL LIGATURE FF]
-"\uFB00" => "ff"
-
-# ffi [LATIN SMALL LIGATURE FFI]
-"\uFB03" => "ffi"
-
-# ffl [LATIN SMALL LIGATURE FFL]
-"\uFB04" => "ffl"
-
-# fi [LATIN SMALL LIGATURE FI]
-"\uFB01" => "fi"
-
-# fl [LATIN SMALL LIGATURE FL]
-"\uFB02" => "fl"
-
-# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
-"\u011C" => "G"
-
-# Ğ [LATIN CAPITAL LETTER G WITH BREVE]
-"\u011E" => "G"
-
-# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
-"\u0120" => "G"
-
-# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
-"\u0122" => "G"
-
-# Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
-"\u0193" => "G"
-
-# Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
-"\u01E4" => "G"
-
-# ǥ [LATIN SMALL LETTER G WITH STROKE]
-"\u01E5" => "G"
-
-# Ǧ [LATIN CAPITAL LETTER G WITH CARON]
-"\u01E6" => "G"
-
-# ǧ [LATIN SMALL LETTER G WITH CARON]
-"\u01E7" => "G"
-
-# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
-"\u01F4" => "G"
-
-# ɢ [LATIN LETTER SMALL CAPITAL G]
-"\u0262" => "G"
-
-# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
-"\u029B" => "G"
-
-# Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
-"\u1E20" => "G"
-
-# Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
-"\u24BC" => "G"
-
-# Ᵹ [LATIN CAPITAL LETTER INSULAR G]
-"\uA77D" => "G"
-
-# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
-"\uA77E" => "G"
-
-# G [FULLWIDTH LATIN CAPITAL LETTER G]
-"\uFF27" => "G"
-
-# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
-"\u011D" => "g"
-
-# ğ [LATIN SMALL LETTER G WITH BREVE]
-"\u011F" => "g"
-
-# ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
-"\u0121" => "g"
-
-# ģ [LATIN SMALL LETTER G WITH CEDILLA]
-"\u0123" => "g"
-
-# ǵ [LATIN SMALL LETTER G WITH ACUTE]
-"\u01F5" => "g"
-
-# ɠ [LATIN SMALL LETTER G WITH HOOK]
-"\u0260" => "g"
-
-# ɡ [LATIN SMALL LETTER SCRIPT G]
-"\u0261" => "g"
-
-# ᵷ [LATIN SMALL LETTER TURNED G]
-"\u1D77" => "g"
-
-# ᵹ [LATIN SMALL LETTER INSULAR G]
-"\u1D79" => "g"
-
-# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
-"\u1D83" => "g"
-
-# ḡ [LATIN SMALL LETTER G WITH MACRON]
-"\u1E21" => "g"
-
-# ⓖ [CIRCLED LATIN SMALL LETTER G]
-"\u24D6" => "g"
-
-# ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
-"\uA77F" => "g"
-
-# g [FULLWIDTH LATIN SMALL LETTER G]
-"\uFF47" => "g"
-
-# ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
-"\u24A2" => "(g)"
-
-# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
-"\u0124" => "H"
-
-# Ħ [LATIN CAPITAL LETTER H WITH STROKE]
-"\u0126" => "H"
-
-# Ȟ [LATIN CAPITAL LETTER H WITH CARON]
-"\u021E" => "H"
-
-# ʜ [LATIN LETTER SMALL CAPITAL H]
-"\u029C" => "H"
-
-# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
-"\u1E22" => "H"
-
-# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
-"\u1E24" => "H"
-
-# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
-"\u1E26" => "H"
-
-# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
-"\u1E28" => "H"
-
-# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
-"\u1E2A" => "H"
-
-# Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
-"\u24BD" => "H"
-
-# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
-"\u2C67" => "H"
-
-# Ⱶ [LATIN CAPITAL LETTER HALF H]
-"\u2C75" => "H"
-
-# H [FULLWIDTH LATIN CAPITAL LETTER H]
-"\uFF28" => "H"
-
-# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
-"\u0125" => "h"
-
-# ħ [LATIN SMALL LETTER H WITH STROKE]
-"\u0127" => "h"
-
-# ȟ [LATIN SMALL LETTER H WITH CARON]
-"\u021F" => "h"
-
-# ɥ [LATIN SMALL LETTER TURNED H]
-"\u0265" => "h"
-
-# ɦ [LATIN SMALL LETTER H WITH HOOK]
-"\u0266" => "h"
-
-# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
-"\u02AE" => "h"
-
-# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
-"\u02AF" => "h"
-
-# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
-"\u1E23" => "h"
-
-# ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
-"\u1E25" => "h"
-
-# ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
-"\u1E27" => "h"
-
-# ḩ [LATIN SMALL LETTER H WITH CEDILLA]
-"\u1E29" => "h"
-
-# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
-"\u1E2B" => "h"
-
-# ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
-"\u1E96" => "h"
-
-# ⓗ [CIRCLED LATIN SMALL LETTER H]
-"\u24D7" => "h"
-
-# ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
-"\u2C68" => "h"
-
-# ⱶ [LATIN SMALL LETTER HALF H]
-"\u2C76" => "h"
-
-# h [FULLWIDTH LATIN SMALL LETTER H]
-"\uFF48" => "h"
-
-# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
-"\u01F6" => "HV"
-
-# ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
-"\u24A3" => "(h)"
-
-# ƕ [LATIN SMALL LETTER HV]
-"\u0195" => "hv"
-
-# Ì [LATIN CAPITAL LETTER I WITH GRAVE]
-"\u00CC" => "I"
-
-# Í [LATIN CAPITAL LETTER I WITH ACUTE]
-"\u00CD" => "I"
-
-# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
-"\u00CE" => "I"
-
-# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
-"\u00CF" => "I"
-
-# Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
-"\u0128" => "I"
-
-# Ī [LATIN CAPITAL LETTER I WITH MACRON]
-"\u012A" => "I"
-
-# Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
-"\u012C" => "I"
-
-# Į [LATIN CAPITAL LETTER I WITH OGONEK]
-"\u012E" => "I"
-
-# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
-"\u0130" => "I"
-
-# Ɩ [LATIN CAPITAL LETTER IOTA]
-"\u0196" => "I"
-
-# Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
-"\u0197" => "I"
-
-# Ǐ [LATIN CAPITAL LETTER I WITH CARON]
-"\u01CF" => "I"
-
-# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
-"\u0208" => "I"
-
-# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
-"\u020A" => "I"
-
-# ɪ [LATIN LETTER SMALL CAPITAL I]
-"\u026A" => "I"
-
-# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
-"\u1D7B" => "I"
-
-# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
-"\u1E2C" => "I"
-
-# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
-"\u1E2E" => "I"
-
-# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
-"\u1EC8" => "I"
-
-# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
-"\u1ECA" => "I"
-
-# Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
-"\u24BE" => "I"
-
-# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
-"\uA7FE" => "I"
-
-# I [FULLWIDTH LATIN CAPITAL LETTER I]
-"\uFF29" => "I"
-
-# ì [LATIN SMALL LETTER I WITH GRAVE]
-"\u00EC" => "i"
-
-# í [LATIN SMALL LETTER I WITH ACUTE]
-"\u00ED" => "i"
-
-# î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
-"\u00EE" => "i"
-
-# ï [LATIN SMALL LETTER I WITH DIAERESIS]
-"\u00EF" => "i"
-
-# ĩ [LATIN SMALL LETTER I WITH TILDE]
-"\u0129" => "i"
-
-# ī [LATIN SMALL LETTER I WITH MACRON]
-"\u012B" => "i"
-
-# ĭ [LATIN SMALL LETTER I WITH BREVE]
-"\u012D" => "i"
-
-# į [LATIN SMALL LETTER I WITH OGONEK]
-"\u012F" => "i"
-
-# ı [LATIN SMALL LETTER DOTLESS I]
-"\u0131" => "i"
-
-# ǐ [LATIN SMALL LETTER I WITH CARON]
-"\u01D0" => "i"
-
-# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
-"\u0209" => "i"
-
-# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
-"\u020B" => "i"
-
-# ɨ [LATIN SMALL LETTER I WITH STROKE]
-"\u0268" => "i"
-
-# ᴉ [LATIN SMALL LETTER TURNED I]
-"\u1D09" => "i"
-
-# ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
-"\u1D62" => "i"
-
-# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
-"\u1D7C" => "i"
-
-# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
-"\u1D96" => "i"
-
-# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
-"\u1E2D" => "i"
-
-# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
-"\u1E2F" => "i"
-
-# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
-"\u1EC9" => "i"
-
-# ị [LATIN SMALL LETTER I WITH DOT BELOW]
-"\u1ECB" => "i"
-
-# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
-"\u2071" => "i"
-
-# ⓘ [CIRCLED LATIN SMALL LETTER I]
-"\u24D8" => "i"
-
-# i [FULLWIDTH LATIN SMALL LETTER I]
-"\uFF49" => "i"
-
-# IJ [LATIN CAPITAL LIGATURE IJ]
-"\u0132" => "IJ"
-
-# ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
-"\u24A4" => "(i)"
-
-# ij [LATIN SMALL LIGATURE IJ]
-"\u0133" => "ij"
-
-# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
-"\u0134" => "J"
-
-# Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
-"\u0248" => "J"
-
-# ᴊ [LATIN LETTER SMALL CAPITAL J]
-"\u1D0A" => "J"
-
-# Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
-"\u24BF" => "J"
-
-# J [FULLWIDTH LATIN CAPITAL LETTER J]
-"\uFF2A" => "J"
-
-# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
-"\u0135" => "j"
-
-# ǰ [LATIN SMALL LETTER J WITH CARON]
-"\u01F0" => "j"
-
-# ȷ [LATIN SMALL LETTER DOTLESS J]
-"\u0237" => "j"
-
-# ɉ [LATIN SMALL LETTER J WITH STROKE]
-"\u0249" => "j"
-
-# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
-"\u025F" => "j"
-
-# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
-"\u0284" => "j"
-
-# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
-"\u029D" => "j"
-
-# ⓙ [CIRCLED LATIN SMALL LETTER J]
-"\u24D9" => "j"
-
-# ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
-"\u2C7C" => "j"
-
-# j [FULLWIDTH LATIN SMALL LETTER J]
-"\uFF4A" => "j"
-
-# ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
-"\u24A5" => "(j)"
-
-# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
-"\u0136" => "K"
-
-# Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
-"\u0198" => "K"
-
-# Ǩ [LATIN CAPITAL LETTER K WITH CARON]
-"\u01E8" => "K"
-
-# ᴋ [LATIN LETTER SMALL CAPITAL K]
-"\u1D0B" => "K"
-
-# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
-"\u1E30" => "K"
-
-# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
-"\u1E32" => "K"
-
-# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
-"\u1E34" => "K"
-
-# Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
-"\u24C0" => "K"
-
-# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
-"\u2C69" => "K"
-
-# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
-"\uA740" => "K"
-
-# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
-"\uA742" => "K"
-
-# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
-"\uA744" => "K"
-
-# K [FULLWIDTH LATIN CAPITAL LETTER K]
-"\uFF2B" => "K"
-
-# ķ [LATIN SMALL LETTER K WITH CEDILLA]
-"\u0137" => "k"
-
-# ƙ [LATIN SMALL LETTER K WITH HOOK]
-"\u0199" => "k"
-
-# ǩ [LATIN SMALL LETTER K WITH CARON]
-"\u01E9" => "k"
-
-# ʞ [LATIN SMALL LETTER TURNED K]
-"\u029E" => "k"
-
-# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
-"\u1D84" => "k"
-
-# ḱ [LATIN SMALL LETTER K WITH ACUTE]
-"\u1E31" => "k"
-
-# ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
-"\u1E33" => "k"
-
-# ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
-"\u1E35" => "k"
-
-# ⓚ [CIRCLED LATIN SMALL LETTER K]
-"\u24DA" => "k"
-
-# ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
-"\u2C6A" => "k"
-
-# ꝁ [LATIN SMALL LETTER K WITH STROKE]
-"\uA741" => "k"
-
-# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
-"\uA743" => "k"
-
-# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
-"\uA745" => "k"
-
-# k [FULLWIDTH LATIN SMALL LETTER K]
-"\uFF4B" => "k"
-
-# ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
-"\u24A6" => "(k)"
-
-# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
-"\u0139" => "L"
-
-# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
-"\u013B" => "L"
-
-# Ľ [LATIN CAPITAL LETTER L WITH CARON]
-"\u013D" => "L"
-
-# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
-"\u013F" => "L"
-
-# Ł [LATIN CAPITAL LETTER L WITH STROKE]
-"\u0141" => "L"
-
-# Ƚ [LATIN CAPITAL LETTER L WITH BAR]
-"\u023D" => "L"
-
-# ʟ [LATIN LETTER SMALL CAPITAL L]
-"\u029F" => "L"
-
-# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
-"\u1D0C" => "L"
-
-# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
-"\u1E36" => "L"
-
-# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
-"\u1E38" => "L"
-
-# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
-"\u1E3A" => "L"
-
-# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
-"\u1E3C" => "L"
-
-# Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
-"\u24C1" => "L"
-
-# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
-"\u2C60" => "L"
-
-# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
-"\u2C62" => "L"
-
-# Ꝇ [LATIN CAPITAL LETTER BROKEN L]
-"\uA746" => "L"
-
-# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
-"\uA748" => "L"
-
-# Ꞁ [LATIN CAPITAL LETTER TURNED L]
-"\uA780" => "L"
-
-# L [FULLWIDTH LATIN CAPITAL LETTER L]
-"\uFF2C" => "L"
-
-# ĺ [LATIN SMALL LETTER L WITH ACUTE]
-"\u013A" => "l"
-
-# ļ [LATIN SMALL LETTER L WITH CEDILLA]
-"\u013C" => "l"
-
-# ľ [LATIN SMALL LETTER L WITH CARON]
-"\u013E" => "l"
-
-# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
-"\u0140" => "l"
-
-# ł [LATIN SMALL LETTER L WITH STROKE]
-"\u0142" => "l"
-
-# ƚ [LATIN SMALL LETTER L WITH BAR]
-"\u019A" => "l"
-
-# ȴ [LATIN SMALL LETTER L WITH CURL]
-"\u0234" => "l"
-
-# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
-"\u026B" => "l"
-
-# ɬ [LATIN SMALL LETTER L WITH BELT]
-"\u026C" => "l"
-
-# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
-"\u026D" => "l"
-
-# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
-"\u1D85" => "l"
-
-# ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
-"\u1E37" => "l"
-
-# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
-"\u1E39" => "l"
-
-# ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
-"\u1E3B" => "l"
-
-# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
-"\u1E3D" => "l"
-
-# ⓛ [CIRCLED LATIN SMALL LETTER L]
-"\u24DB" => "l"
-
-# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
-"\u2C61" => "l"
-
-# ꝇ [LATIN SMALL LETTER BROKEN L]
-"\uA747" => "l"
-
-# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
-"\uA749" => "l"
-
-# ꞁ [LATIN SMALL LETTER TURNED L]
-"\uA781" => "l"
-
-# l [FULLWIDTH LATIN SMALL LETTER L]
-"\uFF4C" => "l"
-
-# LJ [LATIN CAPITAL LETTER LJ]
-"\u01C7" => "LJ"
-
-# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
-"\u1EFA" => "LL"
-
-# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
-"\u01C8" => "Lj"
-
-# ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
-"\u24A7" => "(l)"
-
-# lj [LATIN SMALL LETTER LJ]
-"\u01C9" => "lj"
-
-# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
-"\u1EFB" => "ll"
-
-# ʪ [LATIN SMALL LETTER LS DIGRAPH]
-"\u02AA" => "ls"
-
-# ʫ [LATIN SMALL LETTER LZ DIGRAPH]
-"\u02AB" => "lz"
-
-# Ɯ [LATIN CAPITAL LETTER TURNED M]
-"\u019C" => "M"
-
-# ᴍ [LATIN LETTER SMALL CAPITAL M]
-"\u1D0D" => "M"
-
-# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
-"\u1E3E" => "M"
-
-# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
-"\u1E40" => "M"
-
-# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
-"\u1E42" => "M"
-
-# Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
-"\u24C2" => "M"
-
-# Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
-"\u2C6E" => "M"
-
-# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
-"\uA7FD" => "M"
-
-# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
-"\uA7FF" => "M"
-
-# M [FULLWIDTH LATIN CAPITAL LETTER M]
-"\uFF2D" => "M"
-
-# ɯ [LATIN SMALL LETTER TURNED M]
-"\u026F" => "m"
-
-# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
-"\u0270" => "m"
-
-# ɱ [LATIN SMALL LETTER M WITH HOOK]
-"\u0271" => "m"
-
-# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
-"\u1D6F" => "m"
-
-# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
-"\u1D86" => "m"
-
-# ḿ [LATIN SMALL LETTER M WITH ACUTE]
-"\u1E3F" => "m"
-
-# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
-"\u1E41" => "m"
-
-# ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
-"\u1E43" => "m"
-
-# ⓜ [CIRCLED LATIN SMALL LETTER M]
-"\u24DC" => "m"
-
-# m [FULLWIDTH LATIN SMALL LETTER M]
-"\uFF4D" => "m"
-
-# ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
-"\u24A8" => "(m)"
-
-# Ñ [LATIN CAPITAL LETTER N WITH TILDE]
-"\u00D1" => "N"
-
-# Ń [LATIN CAPITAL LETTER N WITH ACUTE]
-"\u0143" => "N"
-
-# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
-"\u0145" => "N"
-
-# Ň [LATIN CAPITAL LETTER N WITH CARON]
-"\u0147" => "N"
-
-# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
-"\u014A" => "N"
-
-# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
-"\u019D" => "N"
-
-# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
-"\u01F8" => "N"
-
-# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
-"\u0220" => "N"
-
-# ɴ [LATIN LETTER SMALL CAPITAL N]
-"\u0274" => "N"
-
-# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
-"\u1D0E" => "N"
-
-# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
-"\u1E44" => "N"
-
-# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
-"\u1E46" => "N"
-
-# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
-"\u1E48" => "N"
-
-# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
-"\u1E4A" => "N"
-
-# Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
-"\u24C3" => "N"
-
-# N [FULLWIDTH LATIN CAPITAL LETTER N]
-"\uFF2E" => "N"
-
-# ñ [LATIN SMALL LETTER N WITH TILDE]
-"\u00F1" => "n"
-
-# ń [LATIN SMALL LETTER N WITH ACUTE]
-"\u0144" => "n"
-
-# ņ [LATIN SMALL LETTER N WITH CEDILLA]
-"\u0146" => "n"
-
-# ň [LATIN SMALL LETTER N WITH CARON]
-"\u0148" => "n"
-
-# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
-"\u0149" => "n"
-
-# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
-"\u014B" => "n"
-
-# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
-"\u019E" => "n"
-
-# ǹ [LATIN SMALL LETTER N WITH GRAVE]
-"\u01F9" => "n"
-
-# ȵ [LATIN SMALL LETTER N WITH CURL]
-"\u0235" => "n"
-
-# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
-"\u0272" => "n"
-
-# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
-"\u0273" => "n"
-
-# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
-"\u1D70" => "n"
-
-# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
-"\u1D87" => "n"
-
-# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
-"\u1E45" => "n"
-
-# ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
-"\u1E47" => "n"
-
-# ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
-"\u1E49" => "n"
-
-# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
-"\u1E4B" => "n"
-
-# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
-"\u207F" => "n"
-
-# ⓝ [CIRCLED LATIN SMALL LETTER N]
-"\u24DD" => "n"
-
-# n [FULLWIDTH LATIN SMALL LETTER N]
-"\uFF4E" => "n"
-
-# NJ [LATIN CAPITAL LETTER NJ]
-"\u01CA" => "NJ"
-
-# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
-"\u01CB" => "Nj"
-
-# ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
-"\u24A9" => "(n)"
-
-# nj [LATIN SMALL LETTER NJ]
-"\u01CC" => "nj"
-
-# Ò [LATIN CAPITAL LETTER O WITH GRAVE]
-"\u00D2" => "O"
-
-# Ó [LATIN CAPITAL LETTER O WITH ACUTE]
-"\u00D3" => "O"
-
-# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
-"\u00D4" => "O"
-
-# Õ [LATIN CAPITAL LETTER O WITH TILDE]
-"\u00D5" => "O"
-
-# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
-"\u00D6" => "O"
-
-# Ø [LATIN CAPITAL LETTER O WITH STROKE]
-"\u00D8" => "O"
-
-# Ō [LATIN CAPITAL LETTER O WITH MACRON]
-"\u014C" => "O"
-
-# Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
-"\u014E" => "O"
-
-# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
-"\u0150" => "O"
-
-# Ɔ [LATIN CAPITAL LETTER OPEN O]
-"\u0186" => "O"
-
-# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
-"\u019F" => "O"
-
-# Ơ [LATIN CAPITAL LETTER O WITH HORN]
-"\u01A0" => "O"
-
-# Ǒ [LATIN CAPITAL LETTER O WITH CARON]
-"\u01D1" => "O"
-
-# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
-"\u01EA" => "O"
-
-# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
-"\u01EC" => "O"
-
-# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
-"\u01FE" => "O"
-
-# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
-"\u020C" => "O"
-
-# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
-"\u020E" => "O"
-
-# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
-"\u022A" => "O"
-
-# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
-"\u022C" => "O"
-
-# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
-"\u022E" => "O"
-
-# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
-"\u0230" => "O"
-
-# ᴏ [LATIN LETTER SMALL CAPITAL O]
-"\u1D0F" => "O"
-
-# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
-"\u1D10" => "O"
-
-# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
-"\u1E4C" => "O"
-
-# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
-"\u1E4E" => "O"
-
-# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
-"\u1E50" => "O"
-
-# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
-"\u1E52" => "O"
-
-# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
-"\u1ECC" => "O"
-
-# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
-"\u1ECE" => "O"
-
-# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
-"\u1ED0" => "O"
-
-# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
-"\u1ED2" => "O"
-
-# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1ED4" => "O"
-
-# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
-"\u1ED6" => "O"
-
-# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
-"\u1ED8" => "O"
-
-# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
-"\u1EDA" => "O"
-
-# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
-"\u1EDC" => "O"
-
-# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
-"\u1EDE" => "O"
-
-# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
-"\u1EE0" => "O"
-
-# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
-"\u1EE2" => "O"
-
-# Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
-"\u24C4" => "O"
-
-# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
-"\uA74A" => "O"
-
-# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
-"\uA74C" => "O"
-
-# O [FULLWIDTH LATIN CAPITAL LETTER O]
-"\uFF2F" => "O"
-
-# ò [LATIN SMALL LETTER O WITH GRAVE]
-"\u00F2" => "o"
-
-# ó [LATIN SMALL LETTER O WITH ACUTE]
-"\u00F3" => "o"
-
-# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
-"\u00F4" => "o"
-
-# õ [LATIN SMALL LETTER O WITH TILDE]
-"\u00F5" => "o"
-
-# ö [LATIN SMALL LETTER O WITH DIAERESIS]
-"\u00F6" => "o"
-
-# ø [LATIN SMALL LETTER O WITH STROKE]
-"\u00F8" => "o"
-
-# ō [LATIN SMALL LETTER O WITH MACRON]
-"\u014D" => "o"
-
-# ŏ [LATIN SMALL LETTER O WITH BREVE]
-"\u014F" => "o"
-
-# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
-"\u0151" => "o"
-
-# ơ [LATIN SMALL LETTER O WITH HORN]
-"\u01A1" => "o"
-
-# ǒ [LATIN SMALL LETTER O WITH CARON]
-"\u01D2" => "o"
-
-# ǫ [LATIN SMALL LETTER O WITH OGONEK]
-"\u01EB" => "o"
-
-# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
-"\u01ED" => "o"
-
-# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
-"\u01FF" => "o"
-
-# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
-"\u020D" => "o"
-
-# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
-"\u020F" => "o"
-
-# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
-"\u022B" => "o"
-
-# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
-"\u022D" => "o"
-
-# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
-"\u022F" => "o"
-
-# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
-"\u0231" => "o"
-
-# ɔ [LATIN SMALL LETTER OPEN O]
-"\u0254" => "o"
-
-# ɵ [LATIN SMALL LETTER BARRED O]
-"\u0275" => "o"
-
-# ᴖ [LATIN SMALL LETTER TOP HALF O]
-"\u1D16" => "o"
-
-# ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
-"\u1D17" => "o"
-
-# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
-"\u1D97" => "o"
-
-# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
-"\u1E4D" => "o"
-
-# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
-"\u1E4F" => "o"
-
-# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
-"\u1E51" => "o"
-
-# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
-"\u1E53" => "o"
-
-# ọ [LATIN SMALL LETTER O WITH DOT BELOW]
-"\u1ECD" => "o"
-
-# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
-"\u1ECF" => "o"
-
-# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
-"\u1ED1" => "o"
-
-# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
-"\u1ED3" => "o"
-
-# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1ED5" => "o"
-
-# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
-"\u1ED7" => "o"
-
-# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
-"\u1ED9" => "o"
-
-# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
-"\u1EDB" => "o"
-
-# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
-"\u1EDD" => "o"
-
-# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
-"\u1EDF" => "o"
-
-# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
-"\u1EE1" => "o"
-
-# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
-"\u1EE3" => "o"
-
-# ₒ [LATIN SUBSCRIPT SMALL LETTER O]
-"\u2092" => "o"
-
-# ⓞ [CIRCLED LATIN SMALL LETTER O]
-"\u24DE" => "o"
-
-# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
-"\u2C7A" => "o"
-
-# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
-"\uA74B" => "o"
-
-# ꝍ [LATIN SMALL LETTER O WITH LOOP]
-"\uA74D" => "o"
-
-# o [FULLWIDTH LATIN SMALL LETTER O]
-"\uFF4F" => "o"
-
-# Œ [LATIN CAPITAL LIGATURE OE]
-"\u0152" => "OE"
-
-# ɶ [LATIN LETTER SMALL CAPITAL OE]
-"\u0276" => "OE"
-
-# Ꝏ [LATIN CAPITAL LETTER OO]
-"\uA74E" => "OO"
-
-# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
-"\u0222" => "OU"
-
-# ᴕ [LATIN LETTER SMALL CAPITAL OU]
-"\u1D15" => "OU"
-
-# ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
-"\u24AA" => "(o)"
-
-# œ [LATIN SMALL LIGATURE OE]
-"\u0153" => "oe"
-
-# ᴔ [LATIN SMALL LETTER TURNED OE]
-"\u1D14" => "oe"
-
-# ꝏ [LATIN SMALL LETTER OO]
-"\uA74F" => "oo"
-
-# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
-"\u0223" => "ou"
-
-# Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
-"\u01A4" => "P"
-
-# ᴘ [LATIN LETTER SMALL CAPITAL P]
-"\u1D18" => "P"
-
-# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
-"\u1E54" => "P"
-
-# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
-"\u1E56" => "P"
-
-# Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
-"\u24C5" => "P"
-
-# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
-"\u2C63" => "P"
-
-# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
-"\uA750" => "P"
-
-# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
-"\uA752" => "P"
-
-# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
-"\uA754" => "P"
-
-# P [FULLWIDTH LATIN CAPITAL LETTER P]
-"\uFF30" => "P"
-
-# ƥ [LATIN SMALL LETTER P WITH HOOK]
-"\u01A5" => "p"
-
-# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
-"\u1D71" => "p"
-
-# ᵽ [LATIN SMALL LETTER P WITH STROKE]
-"\u1D7D" => "p"
-
-# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
-"\u1D88" => "p"
-
-# ṕ [LATIN SMALL LETTER P WITH ACUTE]
-"\u1E55" => "p"
-
-# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
-"\u1E57" => "p"
-
-# ⓟ [CIRCLED LATIN SMALL LETTER P]
-"\u24DF" => "p"
-
-# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
-"\uA751" => "p"
-
-# ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
-"\uA753" => "p"
-
-# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
-"\uA755" => "p"
-
-# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
-"\uA7FC" => "p"
-
-# p [FULLWIDTH LATIN SMALL LETTER P]
-"\uFF50" => "p"
-
-# ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
-"\u24AB" => "(p)"
-
-# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
-"\u024A" => "Q"
-
-# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
-"\u24C6" => "Q"
-
-# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
-"\uA756" => "Q"
-
-# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
-"\uA758" => "Q"
-
-# Q [FULLWIDTH LATIN CAPITAL LETTER Q]
-"\uFF31" => "Q"
-
-# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
-"\u0138" => "q"
-
-# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
-"\u024B" => "q"
-
-# ʠ [LATIN SMALL LETTER Q WITH HOOK]
-"\u02A0" => "q"
-
-# ⓠ [CIRCLED LATIN SMALL LETTER Q]
-"\u24E0" => "q"
-
-# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
-"\uA757" => "q"
-
-# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
-"\uA759" => "q"
-
-# q [FULLWIDTH LATIN SMALL LETTER Q]
-"\uFF51" => "q"
-
-# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
-"\u24AC" => "(q)"
-
-# ȹ [LATIN SMALL LETTER QP DIGRAPH]
-"\u0239" => "qp"
-
-# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
-"\u0154" => "R"
-
-# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
-"\u0156" => "R"
-
-# Ř [LATIN CAPITAL LETTER R WITH CARON]
-"\u0158" => "R"
-
-# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
-"\u0210" => "R"
-
-# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
-"\u0212" => "R"
-
-# Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
-"\u024C" => "R"
-
-# ʀ [LATIN LETTER SMALL CAPITAL R]
-"\u0280" => "R"
-
-# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
-"\u0281" => "R"
-
-# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
-"\u1D19" => "R"
-
-# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
-"\u1D1A" => "R"
-
-# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
-"\u1E58" => "R"
-
-# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
-"\u1E5A" => "R"
-
-# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
-"\u1E5C" => "R"
-
-# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
-"\u1E5E" => "R"
-
-# Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
-"\u24C7" => "R"
-
-# Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
-"\u2C64" => "R"
-
-# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
-"\uA75A" => "R"
-
-# Ꞃ [LATIN CAPITAL LETTER INSULAR R]
-"\uA782" => "R"
-
-# R [FULLWIDTH LATIN CAPITAL LETTER R]
-"\uFF32" => "R"
-
-# ŕ [LATIN SMALL LETTER R WITH ACUTE]
-"\u0155" => "r"
-
-# ŗ [LATIN SMALL LETTER R WITH CEDILLA]
-"\u0157" => "r"
-
-# ř [LATIN SMALL LETTER R WITH CARON]
-"\u0159" => "r"
-
-# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
-"\u0211" => "r"
-
-# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
-"\u0213" => "r"
-
-# ɍ [LATIN SMALL LETTER R WITH STROKE]
-"\u024D" => "r"
-
-# ɼ [LATIN SMALL LETTER R WITH LONG LEG]
-"\u027C" => "r"
-
-# ɽ [LATIN SMALL LETTER R WITH TAIL]
-"\u027D" => "r"
-
-# ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
-"\u027E" => "r"
-
-# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
-"\u027F" => "r"
-
-# ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
-"\u1D63" => "r"
-
-# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
-"\u1D72" => "r"
-
-# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
-"\u1D73" => "r"
-
-# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
-"\u1D89" => "r"
-
-# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
-"\u1E59" => "r"
-
-# ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
-"\u1E5B" => "r"
-
-# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
-"\u1E5D" => "r"
-
-# ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
-"\u1E5F" => "r"
-
-# ⓡ [CIRCLED LATIN SMALL LETTER R]
-"\u24E1" => "r"
-
-# ꝛ [LATIN SMALL LETTER R ROTUNDA]
-"\uA75B" => "r"
-
-# ꞃ [LATIN SMALL LETTER INSULAR R]
-"\uA783" => "r"
-
-# r [FULLWIDTH LATIN SMALL LETTER R]
-"\uFF52" => "r"
-
-# ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
-"\u24AD" => "(r)"
-
-# Ś [LATIN CAPITAL LETTER S WITH ACUTE]
-"\u015A" => "S"
-
-# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
-"\u015C" => "S"
-
-# Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
-"\u015E" => "S"
-
-# Š [LATIN CAPITAL LETTER S WITH CARON]
-"\u0160" => "S"
-
-# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
-"\u0218" => "S"
-
-# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
-"\u1E60" => "S"
-
-# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
-"\u1E62" => "S"
-
-# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
-"\u1E64" => "S"
-
-# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
-"\u1E66" => "S"
-
-# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
-"\u1E68" => "S"
-
-# Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
-"\u24C8" => "S"
-
-# ꜱ [LATIN LETTER SMALL CAPITAL S]
-"\uA731" => "S"
-
-# ꞅ [LATIN SMALL LETTER INSULAR S]
-"\uA785" => "S"
-
-# S [FULLWIDTH LATIN CAPITAL LETTER S]
-"\uFF33" => "S"
-
-# ś [LATIN SMALL LETTER S WITH ACUTE]
-"\u015B" => "s"
-
-# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
-"\u015D" => "s"
-
-# ş [LATIN SMALL LETTER S WITH CEDILLA]
-"\u015F" => "s"
-
-# š [LATIN SMALL LETTER S WITH CARON]
-"\u0161" => "s"
-
-# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
-"\u017F" => "s"
-
-# ș [LATIN SMALL LETTER S WITH COMMA BELOW]
-"\u0219" => "s"
-
-# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
-"\u023F" => "s"
-
-# ʂ [LATIN SMALL LETTER S WITH HOOK]
-"\u0282" => "s"
-
-# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
-"\u1D74" => "s"
-
-# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
-"\u1D8A" => "s"
-
-# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
-"\u1E61" => "s"
-
-# ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
-"\u1E63" => "s"
-
-# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
-"\u1E65" => "s"
-
-# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
-"\u1E67" => "s"
-
-# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
-"\u1E69" => "s"
-
-# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
-"\u1E9C" => "s"
-
-# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
-"\u1E9D" => "s"
-
-# ⓢ [CIRCLED LATIN SMALL LETTER S]
-"\u24E2" => "s"
-
-# Ꞅ [LATIN CAPITAL LETTER INSULAR S]
-"\uA784" => "s"
-
-# s [FULLWIDTH LATIN SMALL LETTER S]
-"\uFF53" => "s"
-
-# ẞ [LATIN CAPITAL LETTER SHARP S]
-"\u1E9E" => "SS"
-
-# ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
-"\u24AE" => "(s)"
-
-# ß [LATIN SMALL LETTER SHARP S]
-"\u00DF" => "ss"
-
-# st [LATIN SMALL LIGATURE ST]
-"\uFB06" => "st"
-
-# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
-"\u0162" => "T"
-
-# Ť [LATIN CAPITAL LETTER T WITH CARON]
-"\u0164" => "T"
-
-# Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
-"\u0166" => "T"
-
-# Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
-"\u01AC" => "T"
-
-# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
-"\u01AE" => "T"
-
-# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
-"\u021A" => "T"
-
-# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
-"\u023E" => "T"
-
-# ᴛ [LATIN LETTER SMALL CAPITAL T]
-"\u1D1B" => "T"
-
-# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
-"\u1E6A" => "T"
-
-# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
-"\u1E6C" => "T"
-
-# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
-"\u1E6E" => "T"
-
-# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
-"\u1E70" => "T"
-
-# Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
-"\u24C9" => "T"
-
-# Ꞇ [LATIN CAPITAL LETTER INSULAR T]
-"\uA786" => "T"
-
-# T [FULLWIDTH LATIN CAPITAL LETTER T]
-"\uFF34" => "T"
-
-# ţ [LATIN SMALL LETTER T WITH CEDILLA]
-"\u0163" => "t"
-
-# ť [LATIN SMALL LETTER T WITH CARON]
-"\u0165" => "t"
-
-# ŧ [LATIN SMALL LETTER T WITH STROKE]
-"\u0167" => "t"
-
-# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
-"\u01AB" => "t"
-
-# ƭ [LATIN SMALL LETTER T WITH HOOK]
-"\u01AD" => "t"
-
-# ț [LATIN SMALL LETTER T WITH COMMA BELOW]
-"\u021B" => "t"
-
-# ȶ [LATIN SMALL LETTER T WITH CURL]
-"\u0236" => "t"
-
-# ʇ [LATIN SMALL LETTER TURNED T]
-"\u0287" => "t"
-
-# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
-"\u0288" => "t"
-
-# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
-"\u1D75" => "t"
-
-# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
-"\u1E6B" => "t"
-
-# ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
-"\u1E6D" => "t"
-
-# ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
-"\u1E6F" => "t"
-
-# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
-"\u1E71" => "t"
-
-# ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
-"\u1E97" => "t"
-
-# ⓣ [CIRCLED LATIN SMALL LETTER T]
-"\u24E3" => "t"
-
-# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
-"\u2C66" => "t"
-
-# t [FULLWIDTH LATIN SMALL LETTER T]
-"\uFF54" => "t"
-
-# Þ [LATIN CAPITAL LETTER THORN]
-"\u00DE" => "TH"
-
-# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
-"\uA766" => "TH"
-
-# Ꜩ [LATIN CAPITAL LETTER TZ]
-"\uA728" => "TZ"
-
-# ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
-"\u24AF" => "(t)"
-
-# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
-"\u02A8" => "tc"
-
-# þ [LATIN SMALL LETTER THORN]
-"\u00FE" => "th"
-
-# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
-"\u1D7A" => "th"
-
-# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
-"\uA767" => "th"
-
-# ʦ [LATIN SMALL LETTER TS DIGRAPH]
-"\u02A6" => "ts"
-
-# ꜩ [LATIN SMALL LETTER TZ]
-"\uA729" => "tz"
-
-# Ù [LATIN CAPITAL LETTER U WITH GRAVE]
-"\u00D9" => "U"
-
-# Ú [LATIN CAPITAL LETTER U WITH ACUTE]
-"\u00DA" => "U"
-
-# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
-"\u00DB" => "U"
-
-# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
-"\u00DC" => "U"
-
-# Ũ [LATIN CAPITAL LETTER U WITH TILDE]
-"\u0168" => "U"
-
-# Ū [LATIN CAPITAL LETTER U WITH MACRON]
-"\u016A" => "U"
-
-# Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
-"\u016C" => "U"
-
-# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
-"\u016E" => "U"
-
-# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
-"\u0170" => "U"
-
-# Ų [LATIN CAPITAL LETTER U WITH OGONEK]
-"\u0172" => "U"
-
-# Ư [LATIN CAPITAL LETTER U WITH HORN]
-"\u01AF" => "U"
-
-# Ǔ [LATIN CAPITAL LETTER U WITH CARON]
-"\u01D3" => "U"
-
-# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
-"\u01D5" => "U"
-
-# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
-"\u01D7" => "U"
-
-# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
-"\u01D9" => "U"
-
-# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
-"\u01DB" => "U"
-
-# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
-"\u0214" => "U"
-
-# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
-"\u0216" => "U"
-
-# Ʉ [LATIN CAPITAL LETTER U BAR]
-"\u0244" => "U"
-
-# ᴜ [LATIN LETTER SMALL CAPITAL U]
-"\u1D1C" => "U"
-
-# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
-"\u1D7E" => "U"
-
-# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
-"\u1E72" => "U"
-
-# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
-"\u1E74" => "U"
-
-# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
-"\u1E76" => "U"
-
-# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
-"\u1E78" => "U"
-
-# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
-"\u1E7A" => "U"
-
-# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
-"\u1EE4" => "U"
-
-# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
-"\u1EE6" => "U"
-
-# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
-"\u1EE8" => "U"
-
-# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
-"\u1EEA" => "U"
-
-# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
-"\u1EEC" => "U"
-
-# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
-"\u1EEE" => "U"
-
-# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
-"\u1EF0" => "U"
-
-# Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
-"\u24CA" => "U"
-
-# U [FULLWIDTH LATIN CAPITAL LETTER U]
-"\uFF35" => "U"
-
-# ù [LATIN SMALL LETTER U WITH GRAVE]
-"\u00F9" => "u"
-
-# ú [LATIN SMALL LETTER U WITH ACUTE]
-"\u00FA" => "u"
-
-# û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
-"\u00FB" => "u"
-
-# ü [LATIN SMALL LETTER U WITH DIAERESIS]
-"\u00FC" => "u"
-
-# ũ [LATIN SMALL LETTER U WITH TILDE]
-"\u0169" => "u"
-
-# ū [LATIN SMALL LETTER U WITH MACRON]
-"\u016B" => "u"
-
-# ŭ [LATIN SMALL LETTER U WITH BREVE]
-"\u016D" => "u"
-
-# ů [LATIN SMALL LETTER U WITH RING ABOVE]
-"\u016F" => "u"
-
-# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
-"\u0171" => "u"
-
-# ų [LATIN SMALL LETTER U WITH OGONEK]
-"\u0173" => "u"
-
-# ư [LATIN SMALL LETTER U WITH HORN]
-"\u01B0" => "u"
-
-# ǔ [LATIN SMALL LETTER U WITH CARON]
-"\u01D4" => "u"
-
-# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
-"\u01D6" => "u"
-
-# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
-"\u01D8" => "u"
-
-# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
-"\u01DA" => "u"
-
-# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
-"\u01DC" => "u"
-
-# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
-"\u0215" => "u"
-
-# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
-"\u0217" => "u"
-
-# ʉ [LATIN SMALL LETTER U BAR]
-"\u0289" => "u"
-
-# ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
-"\u1D64" => "u"
-
-# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
-"\u1D99" => "u"
-
-# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
-"\u1E73" => "u"
-
-# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
-"\u1E75" => "u"
-
-# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
-"\u1E77" => "u"
-
-# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
-"\u1E79" => "u"
-
-# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
-"\u1E7B" => "u"
-
-# ụ [LATIN SMALL LETTER U WITH DOT BELOW]
-"\u1EE5" => "u"
-
-# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
-"\u1EE7" => "u"
-
-# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
-"\u1EE9" => "u"
-
-# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
-"\u1EEB" => "u"
-
-# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
-"\u1EED" => "u"
-
-# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
-"\u1EEF" => "u"
-
-# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
-"\u1EF1" => "u"
-
-# ⓤ [CIRCLED LATIN SMALL LETTER U]
-"\u24E4" => "u"
-
-# u [FULLWIDTH LATIN SMALL LETTER U]
-"\uFF55" => "u"
-
-# ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
-"\u24B0" => "(u)"
-
-# ᵫ [LATIN SMALL LETTER UE]
-"\u1D6B" => "ue"
-
-# Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
-"\u01B2" => "V"
-
-# Ʌ [LATIN CAPITAL LETTER TURNED V]
-"\u0245" => "V"
-
-# ᴠ [LATIN LETTER SMALL CAPITAL V]
-"\u1D20" => "V"
-
-# Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
-"\u1E7C" => "V"
-
-# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
-"\u1E7E" => "V"
-
-# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
-"\u1EFC" => "V"
-
-# Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
-"\u24CB" => "V"
-
-# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
-"\uA75E" => "V"
-
-# Ꝩ [LATIN CAPITAL LETTER VEND]
-"\uA768" => "V"
-
-# V [FULLWIDTH LATIN CAPITAL LETTER V]
-"\uFF36" => "V"
-
-# ʋ [LATIN SMALL LETTER V WITH HOOK]
-"\u028B" => "v"
-
-# ʌ [LATIN SMALL LETTER TURNED V]
-"\u028C" => "v"
-
-# ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
-"\u1D65" => "v"
-
-# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
-"\u1D8C" => "v"
-
-# ṽ [LATIN SMALL LETTER V WITH TILDE]
-"\u1E7D" => "v"
-
-# ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
-"\u1E7F" => "v"
-
-# ⓥ [CIRCLED LATIN SMALL LETTER V]
-"\u24E5" => "v"
-
-# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
-"\u2C71" => "v"
-
-# ⱴ [LATIN SMALL LETTER V WITH CURL]
-"\u2C74" => "v"
-
-# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
-"\uA75F" => "v"
-
-# v [FULLWIDTH LATIN SMALL LETTER V]
-"\uFF56" => "v"
-
-# Ꝡ [LATIN CAPITAL LETTER VY]
-"\uA760" => "VY"
-
-# ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
-"\u24B1" => "(v)"
-
-# ꝡ [LATIN SMALL LETTER VY]
-"\uA761" => "vy"
-
-# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
-"\u0174" => "W"
-
-# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
-"\u01F7" => "W"
-
-# ᴡ [LATIN LETTER SMALL CAPITAL W]
-"\u1D21" => "W"
-
-# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
-"\u1E80" => "W"
-
-# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
-"\u1E82" => "W"
-
-# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
-"\u1E84" => "W"
-
-# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
-"\u1E86" => "W"
-
-# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
-"\u1E88" => "W"
-
-# Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
-"\u24CC" => "W"
-
-# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
-"\u2C72" => "W"
-
-# W [FULLWIDTH LATIN CAPITAL LETTER W]
-"\uFF37" => "W"
-
-# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
-"\u0175" => "w"
-
-# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
-"\u01BF" => "w"
-
-# ʍ [LATIN SMALL LETTER TURNED W]
-"\u028D" => "w"
-
-# ẁ [LATIN SMALL LETTER W WITH GRAVE]
-"\u1E81" => "w"
-
-# ẃ [LATIN SMALL LETTER W WITH ACUTE]
-"\u1E83" => "w"
-
-# ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
-"\u1E85" => "w"
-
-# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
-"\u1E87" => "w"
-
-# ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
-"\u1E89" => "w"
-
-# ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
-"\u1E98" => "w"
-
-# ⓦ [CIRCLED LATIN SMALL LETTER W]
-"\u24E6" => "w"
-
-# ⱳ [LATIN SMALL LETTER W WITH HOOK]
-"\u2C73" => "w"
-
-# w [FULLWIDTH LATIN SMALL LETTER W]
-"\uFF57" => "w"
-
-# ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
-"\u24B2" => "(w)"
-
-# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
-"\u1E8A" => "X"
-
-# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
-"\u1E8C" => "X"
-
-# Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
-"\u24CD" => "X"
-
-# X [FULLWIDTH LATIN CAPITAL LETTER X]
-"\uFF38" => "X"
-
-# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
-"\u1D8D" => "x"
-
-# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
-"\u1E8B" => "x"
-
-# ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
-"\u1E8D" => "x"
-
-# ₓ [LATIN SUBSCRIPT SMALL LETTER X]
-"\u2093" => "x"
-
-# ⓧ [CIRCLED LATIN SMALL LETTER X]
-"\u24E7" => "x"
-
-# x [FULLWIDTH LATIN SMALL LETTER X]
-"\uFF58" => "x"
-
-# ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
-"\u24B3" => "(x)"
-
-# Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
-"\u00DD" => "Y"
-
-# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
-"\u0176" => "Y"
-
-# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
-"\u0178" => "Y"
-
-# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
-"\u01B3" => "Y"
-
-# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
-"\u0232" => "Y"
-
-# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
-"\u024E" => "Y"
-
-# ʏ [LATIN LETTER SMALL CAPITAL Y]
-"\u028F" => "Y"
-
-# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
-"\u1E8E" => "Y"
-
-# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
-"\u1EF2" => "Y"
-
-# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
-"\u1EF4" => "Y"
-
-# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
-"\u1EF6" => "Y"
-
-# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
-"\u1EF8" => "Y"
-
-# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
-"\u1EFE" => "Y"
-
-# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
-"\u24CE" => "Y"
-
-# Y [FULLWIDTH LATIN CAPITAL LETTER Y]
-"\uFF39" => "Y"
-
-# ý [LATIN SMALL LETTER Y WITH ACUTE]
-"\u00FD" => "y"
-
-# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
-"\u00FF" => "y"
-
-# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
-"\u0177" => "y"
-
-# ƴ [LATIN SMALL LETTER Y WITH HOOK]
-"\u01B4" => "y"
-
-# ȳ [LATIN SMALL LETTER Y WITH MACRON]
-"\u0233" => "y"
-
-# ɏ [LATIN SMALL LETTER Y WITH STROKE]
-"\u024F" => "y"
-
-# ʎ [LATIN SMALL LETTER TURNED Y]
-"\u028E" => "y"
-
-# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
-"\u1E8F" => "y"
-
-# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
-"\u1E99" => "y"
-
-# ỳ [LATIN SMALL LETTER Y WITH GRAVE]
-"\u1EF3" => "y"
-
-# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
-"\u1EF5" => "y"
-
-# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
-"\u1EF7" => "y"
-
-# ỹ [LATIN SMALL LETTER Y WITH TILDE]
-"\u1EF9" => "y"
-
-# ỿ [LATIN SMALL LETTER Y WITH LOOP]
-"\u1EFF" => "y"
-
-# ⓨ [CIRCLED LATIN SMALL LETTER Y]
-"\u24E8" => "y"
-
-# y [FULLWIDTH LATIN SMALL LETTER Y]
-"\uFF59" => "y"
-
-# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
-"\u24B4" => "(y)"
-
-# Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
-"\u0179" => "Z"
-
-# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
-"\u017B" => "Z"
-
-# Ž [LATIN CAPITAL LETTER Z WITH CARON]
-"\u017D" => "Z"
-
-# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
-"\u01B5" => "Z"
-
-# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
-"\u021C" => "Z"
-
-# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
-"\u0224" => "Z"
-
-# ᴢ [LATIN LETTER SMALL CAPITAL Z]
-"\u1D22" => "Z"
-
-# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
-"\u1E90" => "Z"
-
-# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
-"\u1E92" => "Z"
-
-# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
-"\u1E94" => "Z"
-
-# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
-"\u24CF" => "Z"
-
-# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
-"\u2C6B" => "Z"
-
-# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
-"\uA762" => "Z"
-
-# Z [FULLWIDTH LATIN CAPITAL LETTER Z]
-"\uFF3A" => "Z"
-
-# ź [LATIN SMALL LETTER Z WITH ACUTE]
-"\u017A" => "z"
-
-# ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
-"\u017C" => "z"
-
-# ž [LATIN SMALL LETTER Z WITH CARON]
-"\u017E" => "z"
-
-# ƶ [LATIN SMALL LETTER Z WITH STROKE]
-"\u01B6" => "z"
-
-# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
-"\u021D" => "z"
-
-# ȥ [LATIN SMALL LETTER Z WITH HOOK]
-"\u0225" => "z"
-
-# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
-"\u0240" => "z"
-
-# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
-"\u0290" => "z"
-
-# ʑ [LATIN SMALL LETTER Z WITH CURL]
-"\u0291" => "z"
-
-# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
-"\u1D76" => "z"
-
-# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
-"\u1D8E" => "z"
-
-# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
-"\u1E91" => "z"
-
-# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
-"\u1E93" => "z"
-
-# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
-"\u1E95" => "z"
-
-# ⓩ [CIRCLED LATIN SMALL LETTER Z]
-"\u24E9" => "z"
-
-# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
-"\u2C6C" => "z"
-
-# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
-"\uA763" => "z"
-
-# z [FULLWIDTH LATIN SMALL LETTER Z]
-"\uFF5A" => "z"
-
-# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
-"\u24B5" => "(z)"
-
-# ⁰ [SUPERSCRIPT ZERO]
-"\u2070" => "0"
-
-# ₀ [SUBSCRIPT ZERO]
-"\u2080" => "0"
-
-# ⓪ [CIRCLED DIGIT ZERO]
-"\u24EA" => "0"
-
-# ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
-"\u24FF" => "0"
-
-# 0 [FULLWIDTH DIGIT ZERO]
-"\uFF10" => "0"
-
-# ¹ [SUPERSCRIPT ONE]
-"\u00B9" => "1"
-
-# ₁ [SUBSCRIPT ONE]
-"\u2081" => "1"
-
-# ① [CIRCLED DIGIT ONE]
-"\u2460" => "1"
-
-# ⓵ [DOUBLE CIRCLED DIGIT ONE]
-"\u24F5" => "1"
-
-# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
-"\u2776" => "1"
-
-# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
-"\u2780" => "1"
-
-# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
-"\u278A" => "1"
-
-# 1 [FULLWIDTH DIGIT ONE]
-"\uFF11" => "1"
-
-# ⒈ [DIGIT ONE FULL STOP]
-"\u2488" => "1."
-
-# ⑴ [PARENTHESIZED DIGIT ONE]
-"\u2474" => "(1)"
-
-# ² [SUPERSCRIPT TWO]
-"\u00B2" => "2"
-
-# ₂ [SUBSCRIPT TWO]
-"\u2082" => "2"
-
-# ② [CIRCLED DIGIT TWO]
-"\u2461" => "2"
-
-# ⓶ [DOUBLE CIRCLED DIGIT TWO]
-"\u24F6" => "2"
-
-# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
-"\u2777" => "2"
-
-# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
-"\u2781" => "2"
-
-# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
-"\u278B" => "2"
-
-# 2 [FULLWIDTH DIGIT TWO]
-"\uFF12" => "2"
-
-# ⒉ [DIGIT TWO FULL STOP]
-"\u2489" => "2."
-
-# ⑵ [PARENTHESIZED DIGIT TWO]
-"\u2475" => "(2)"
-
-# ³ [SUPERSCRIPT THREE]
-"\u00B3" => "3"
-
-# ₃ [SUBSCRIPT THREE]
-"\u2083" => "3"
-
-# ③ [CIRCLED DIGIT THREE]
-"\u2462" => "3"
-
-# ⓷ [DOUBLE CIRCLED DIGIT THREE]
-"\u24F7" => "3"
-
-# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
-"\u2778" => "3"
-
-# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
-"\u2782" => "3"
-
-# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
-"\u278C" => "3"
-
-# 3 [FULLWIDTH DIGIT THREE]
-"\uFF13" => "3"
-
-# ⒊ [DIGIT THREE FULL STOP]
-"\u248A" => "3."
-
-# ⑶ [PARENTHESIZED DIGIT THREE]
-"\u2476" => "(3)"
-
-# ⁴ [SUPERSCRIPT FOUR]
-"\u2074" => "4"
-
-# ₄ [SUBSCRIPT FOUR]
-"\u2084" => "4"
-
-# ④ [CIRCLED DIGIT FOUR]
-"\u2463" => "4"
-
-# ⓸ [DOUBLE CIRCLED DIGIT FOUR]
-"\u24F8" => "4"
-
-# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
-"\u2779" => "4"
-
-# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
-"\u2783" => "4"
-
-# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
-"\u278D" => "4"
-
-# 4 [FULLWIDTH DIGIT FOUR]
-"\uFF14" => "4"
-
-# ⒋ [DIGIT FOUR FULL STOP]
-"\u248B" => "4."
-
-# ⑷ [PARENTHESIZED DIGIT FOUR]
-"\u2477" => "(4)"
-
-# ⁵ [SUPERSCRIPT FIVE]
-"\u2075" => "5"
-
-# ₅ [SUBSCRIPT FIVE]
-"\u2085" => "5"
-
-# ⑤ [CIRCLED DIGIT FIVE]
-"\u2464" => "5"
-
-# ⓹ [DOUBLE CIRCLED DIGIT FIVE]
-"\u24F9" => "5"
-
-# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
-"\u277A" => "5"
-
-# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
-"\u2784" => "5"
-
-# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
-"\u278E" => "5"
-
-# 5 [FULLWIDTH DIGIT FIVE]
-"\uFF15" => "5"
-
-# ⒌ [DIGIT FIVE FULL STOP]
-"\u248C" => "5."
-
-# ⑸ [PARENTHESIZED DIGIT FIVE]
-"\u2478" => "(5)"
-
-# ⁶ [SUPERSCRIPT SIX]
-"\u2076" => "6"
-
-# ₆ [SUBSCRIPT SIX]
-"\u2086" => "6"
-
-# ⑥ [CIRCLED DIGIT SIX]
-"\u2465" => "6"
-
-# ⓺ [DOUBLE CIRCLED DIGIT SIX]
-"\u24FA" => "6"
-
-# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
-"\u277B" => "6"
-
-# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
-"\u2785" => "6"
-
-# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
-"\u278F" => "6"
-
-# 6 [FULLWIDTH DIGIT SIX]
-"\uFF16" => "6"
-
-# ⒍ [DIGIT SIX FULL STOP]
-"\u248D" => "6."
-
-# ⑹ [PARENTHESIZED DIGIT SIX]
-"\u2479" => "(6)"
-
-# ⁷ [SUPERSCRIPT SEVEN]
-"\u2077" => "7"
-
-# ₇ [SUBSCRIPT SEVEN]
-"\u2087" => "7"
-
-# ⑦ [CIRCLED DIGIT SEVEN]
-"\u2466" => "7"
-
-# ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
-"\u24FB" => "7"
-
-# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
-"\u277C" => "7"
-
-# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
-"\u2786" => "7"
-
-# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
-"\u2790" => "7"
-
-# 7 [FULLWIDTH DIGIT SEVEN]
-"\uFF17" => "7"
-
-# ⒎ [DIGIT SEVEN FULL STOP]
-"\u248E" => "7."
-
-# ⑺ [PARENTHESIZED DIGIT SEVEN]
-"\u247A" => "(7)"
-
-# ⁸ [SUPERSCRIPT EIGHT]
-"\u2078" => "8"
-
-# ₈ [SUBSCRIPT EIGHT]
-"\u2088" => "8"
-
-# ⑧ [CIRCLED DIGIT EIGHT]
-"\u2467" => "8"
-
-# ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
-"\u24FC" => "8"
-
-# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
-"\u277D" => "8"
-
-# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
-"\u2787" => "8"
-
-# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
-"\u2791" => "8"
-
-# 8 [FULLWIDTH DIGIT EIGHT]
-"\uFF18" => "8"
-
-# ⒏ [DIGIT EIGHT FULL STOP]
-"\u248F" => "8."
-
-# ⑻ [PARENTHESIZED DIGIT EIGHT]
-"\u247B" => "(8)"
-
-# ⁹ [SUPERSCRIPT NINE]
-"\u2079" => "9"
-
-# ₉ [SUBSCRIPT NINE]
-"\u2089" => "9"
-
-# ⑨ [CIRCLED DIGIT NINE]
-"\u2468" => "9"
-
-# ⓽ [DOUBLE CIRCLED DIGIT NINE]
-"\u24FD" => "9"
-
-# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
-"\u277E" => "9"
-
-# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
-"\u2788" => "9"
-
-# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
-"\u2792" => "9"
-
-# 9 [FULLWIDTH DIGIT NINE]
-"\uFF19" => "9"
-
-# ⒐ [DIGIT NINE FULL STOP]
-"\u2490" => "9."
-
-# ⑼ [PARENTHESIZED DIGIT NINE]
-"\u247C" => "(9)"
-
-# ⑩ [CIRCLED NUMBER TEN]
-"\u2469" => "10"
-
-# ⓾ [DOUBLE CIRCLED NUMBER TEN]
-"\u24FE" => "10"
-
-# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
-"\u277F" => "10"
-
-# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
-"\u2789" => "10"
-
-# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
-"\u2793" => "10"
-
-# ⒑ [NUMBER TEN FULL STOP]
-"\u2491" => "10."
-
-# ⑽ [PARENTHESIZED NUMBER TEN]
-"\u247D" => "(10)"
-
-# ⑪ [CIRCLED NUMBER ELEVEN]
-"\u246A" => "11"
-
-# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
-"\u24EB" => "11"
-
-# ⒒ [NUMBER ELEVEN FULL STOP]
-"\u2492" => "11."
-
-# ⑾ [PARENTHESIZED NUMBER ELEVEN]
-"\u247E" => "(11)"
-
-# ⑫ [CIRCLED NUMBER TWELVE]
-"\u246B" => "12"
-
-# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
-"\u24EC" => "12"
-
-# ⒓ [NUMBER TWELVE FULL STOP]
-"\u2493" => "12."
-
-# ⑿ [PARENTHESIZED NUMBER TWELVE]
-"\u247F" => "(12)"
-
-# ⑬ [CIRCLED NUMBER THIRTEEN]
-"\u246C" => "13"
-
-# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
-"\u24ED" => "13"
-
-# ⒔ [NUMBER THIRTEEN FULL STOP]
-"\u2494" => "13."
-
-# ⒀ [PARENTHESIZED NUMBER THIRTEEN]
-"\u2480" => "(13)"
-
-# ⑭ [CIRCLED NUMBER FOURTEEN]
-"\u246D" => "14"
-
-# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
-"\u24EE" => "14"
-
-# ⒕ [NUMBER FOURTEEN FULL STOP]
-"\u2495" => "14."
-
-# ⒁ [PARENTHESIZED NUMBER FOURTEEN]
-"\u2481" => "(14)"
-
-# ⑮ [CIRCLED NUMBER FIFTEEN]
-"\u246E" => "15"
-
-# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
-"\u24EF" => "15"
-
-# ⒖ [NUMBER FIFTEEN FULL STOP]
-"\u2496" => "15."
-
-# ⒂ [PARENTHESIZED NUMBER FIFTEEN]
-"\u2482" => "(15)"
-
-# ⑯ [CIRCLED NUMBER SIXTEEN]
-"\u246F" => "16"
-
-# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
-"\u24F0" => "16"
-
-# ⒗ [NUMBER SIXTEEN FULL STOP]
-"\u2497" => "16."
-
-# ⒃ [PARENTHESIZED NUMBER SIXTEEN]
-"\u2483" => "(16)"
-
-# ⑰ [CIRCLED NUMBER SEVENTEEN]
-"\u2470" => "17"
-
-# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
-"\u24F1" => "17"
-
-# ⒘ [NUMBER SEVENTEEN FULL STOP]
-"\u2498" => "17."
-
-# ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
-"\u2484" => "(17)"
-
-# ⑱ [CIRCLED NUMBER EIGHTEEN]
-"\u2471" => "18"
-
-# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
-"\u24F2" => "18"
-
-# ⒙ [NUMBER EIGHTEEN FULL STOP]
-"\u2499" => "18."
-
-# ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
-"\u2485" => "(18)"
-
-# ⑲ [CIRCLED NUMBER NINETEEN]
-"\u2472" => "19"
-
-# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
-"\u24F3" => "19"
-
-# ⒚ [NUMBER NINETEEN FULL STOP]
-"\u249A" => "19."
-
-# ⒆ [PARENTHESIZED NUMBER NINETEEN]
-"\u2486" => "(19)"
-
-# ⑳ [CIRCLED NUMBER TWENTY]
-"\u2473" => "20"
-
-# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
-"\u24F4" => "20"
-
-# ⒛ [NUMBER TWENTY FULL STOP]
-"\u249B" => "20."
-
-# ⒇ [PARENTHESIZED NUMBER TWENTY]
-"\u2487" => "(20)"
-
-# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
-"\u00AB" => "\""
-
-# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
-"\u00BB" => "\""
-
-# “ [LEFT DOUBLE QUOTATION MARK]
-"\u201C" => "\""
-
-# ” [RIGHT DOUBLE QUOTATION MARK]
-"\u201D" => "\""
-
-# „ [DOUBLE LOW-9 QUOTATION MARK]
-"\u201E" => "\""
-
-# ″ [DOUBLE PRIME]
-"\u2033" => "\""
-
-# ‶ [REVERSED DOUBLE PRIME]
-"\u2036" => "\""
-
-# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
-"\u275D" => "\""
-
-# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
-"\u275E" => "\""
-
-# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
-"\u276E" => "\""
-
-# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
-"\u276F" => "\""
-
-# " [FULLWIDTH QUOTATION MARK]
-"\uFF02" => "\""
-
-# ‘ [LEFT SINGLE QUOTATION MARK]
-"\u2018" => "\'"
-
-# ’ [RIGHT SINGLE QUOTATION MARK]
-"\u2019" => "\'"
-
-# ‚ [SINGLE LOW-9 QUOTATION MARK]
-"\u201A" => "\'"
-
-# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
-"\u201B" => "\'"
-
-# ′ [PRIME]
-"\u2032" => "\'"
-
-# ‵ [REVERSED PRIME]
-"\u2035" => "\'"
-
-# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
-"\u2039" => "\'"
-
-# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
-"\u203A" => "\'"
-
-# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
-"\u275B" => "\'"
-
-# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
-"\u275C" => "\'"
-
-# ' [FULLWIDTH APOSTROPHE]
-"\uFF07" => "\'"
-
-# ‐ [HYPHEN]
-"\u2010" => "-"
-
-# ‑ [NON-BREAKING HYPHEN]
-"\u2011" => "-"
-
-# ‒ [FIGURE DASH]
-"\u2012" => "-"
-
-# – [EN DASH]
-"\u2013" => "-"
-
-# — [EM DASH]
-"\u2014" => "-"
-
-# ⁻ [SUPERSCRIPT MINUS]
-"\u207B" => "-"
-
-# ₋ [SUBSCRIPT MINUS]
-"\u208B" => "-"
-
-# - [FULLWIDTH HYPHEN-MINUS]
-"\uFF0D" => "-"
-
-# ⁅ [LEFT SQUARE BRACKET WITH QUILL]
-"\u2045" => "["
-
-# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
-"\u2772" => "["
-
-# [ [FULLWIDTH LEFT SQUARE BRACKET]
-"\uFF3B" => "["
-
-# ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
-"\u2046" => "]"
-
-# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
-"\u2773" => "]"
-
-# ] [FULLWIDTH RIGHT SQUARE BRACKET]
-"\uFF3D" => "]"
-
-# ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
-"\u207D" => "("
-
-# ₍ [SUBSCRIPT LEFT PARENTHESIS]
-"\u208D" => "("
-
-# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
-"\u2768" => "("
-
-# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
-"\u276A" => "("
-
-# ( [FULLWIDTH LEFT PARENTHESIS]
-"\uFF08" => "("
-
-# ⸨ [LEFT DOUBLE PARENTHESIS]
-"\u2E28" => "(("
-
-# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
-"\u207E" => ")"
-
-# ₎ [SUBSCRIPT RIGHT PARENTHESIS]
-"\u208E" => ")"
-
-# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
-"\u2769" => ")"
-
-# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
-"\u276B" => ")"
-
-# ) [FULLWIDTH RIGHT PARENTHESIS]
-"\uFF09" => ")"
-
-# ⸩ [RIGHT DOUBLE PARENTHESIS]
-"\u2E29" => "))"
-
-# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
-"\u276C" => "<"
-
-# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
-"\u2770" => "<"
-
-# < [FULLWIDTH LESS-THAN SIGN]
-"\uFF1C" => "<"
-
-# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
-"\u276D" => ">"
-
-# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
-"\u2771" => ">"
-
-# > [FULLWIDTH GREATER-THAN SIGN]
-"\uFF1E" => ">"
-
-# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
-"\u2774" => "{"
-
-# { [FULLWIDTH LEFT CURLY BRACKET]
-"\uFF5B" => "{"
-
-# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
-"\u2775" => "}"
-
-# } [FULLWIDTH RIGHT CURLY BRACKET]
-"\uFF5D" => "}"
-
-# ⁺ [SUPERSCRIPT PLUS SIGN]
-"\u207A" => "+"
-
-# ₊ [SUBSCRIPT PLUS SIGN]
-"\u208A" => "+"
-
-# + [FULLWIDTH PLUS SIGN]
-"\uFF0B" => "+"
-
-# ⁼ [SUPERSCRIPT EQUALS SIGN]
-"\u207C" => "="
-
-# ₌ [SUBSCRIPT EQUALS SIGN]
-"\u208C" => "="
-
-# = [FULLWIDTH EQUALS SIGN]
-"\uFF1D" => "="
-
-# ! [FULLWIDTH EXCLAMATION MARK]
-"\uFF01" => "!"
-
-# ‼ [DOUBLE EXCLAMATION MARK]
-"\u203C" => "!!"
-
-# ⁉ [EXCLAMATION QUESTION MARK]
-"\u2049" => "!?"
-
-# # [FULLWIDTH NUMBER SIGN]
-"\uFF03" => "#"
-
-# $ [FULLWIDTH DOLLAR SIGN]
-"\uFF04" => "$"
-
-# ⁒ [COMMERCIAL MINUS SIGN]
-"\u2052" => "%"
-
-# % [FULLWIDTH PERCENT SIGN]
-"\uFF05" => "%"
-
-# & [FULLWIDTH AMPERSAND]
-"\uFF06" => "&"
-
-# ⁎ [LOW ASTERISK]
-"\u204E" => "*"
-
-# * [FULLWIDTH ASTERISK]
-"\uFF0A" => "*"
-
-# , [FULLWIDTH COMMA]
-"\uFF0C" => ","
-
-# . [FULLWIDTH FULL STOP]
-"\uFF0E" => "."
-
-# ⁄ [FRACTION SLASH]
-"\u2044" => "/"
-
-# / [FULLWIDTH SOLIDUS]
-"\uFF0F" => "/"
-
-# : [FULLWIDTH COLON]
-"\uFF1A" => ":"
-
-# ⁏ [REVERSED SEMICOLON]
-"\u204F" => ";"
-
-# ; [FULLWIDTH SEMICOLON]
-"\uFF1B" => ";"
-
-# ? [FULLWIDTH QUESTION MARK]
-"\uFF1F" => "?"
-
-# ⁇ [DOUBLE QUESTION MARK]
-"\u2047" => "??"
-
-# ⁈ [QUESTION EXCLAMATION MARK]
-"\u2048" => "?!"
-
-# @ [FULLWIDTH COMMERCIAL AT]
-"\uFF20" => "@"
-
-# \ [FULLWIDTH REVERSE SOLIDUS]
-"\uFF3C" => "\\"
-
-# ‸ [CARET]
-"\u2038" => "^"
-
-# ^ [FULLWIDTH CIRCUMFLEX ACCENT]
-"\uFF3E" => "^"
-
-# _ [FULLWIDTH LOW LINE]
-"\uFF3F" => "_"
-
-# ⁓ [SWUNG DASH]
-"\u2053" => "~"
-
-# ~ [FULLWIDTH TILDE]
-"\uFF5E" => "~"
-
-################################################################
-# Below is the Perl script used to generate the above mappings #
-# from ASCIIFoldingFilter.java: #
-################################################################
-#
-# #!/usr/bin/perl
-#
-# use warnings;
-# use strict;
-#
-# my @source_chars = ();
-# my @source_char_descriptions = ();
-# my $target = '';
-#
-# while (<>) {
-# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) {
-# push @source_chars, $1;
-# push @source_char_descriptions, $2;
-# next;
-# }
-# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) {
-# $target .= $1;
-# next;
-# }
-# if (/break;/) {
-# $target = "\\\"" if ($target eq '"');
-# for my $source_char_num (0..$#source_chars) {
-# print "# $source_char_descriptions[$source_char_num]\n";
-# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n";
-# }
-# @source_chars = ();
-# @source_char_descriptions = ();
-# $target = '';
-# }
-# }
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This map converts alphabetic, numeric, and symbolic Unicode characters
+# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
+# block) into their ASCII equivalents, if one exists.
+#
+# Characters from the following Unicode blocks are converted; however, only
+# those characters with reasonable ASCII alternatives are converted:
+#
+# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf
+# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf
+# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf
+# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf
+# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf
+# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf
+# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf
+# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf
+# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf
+# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf
+# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf
+# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf
+# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf
+# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf
+# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf
+# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf
+#
+# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
+#
+# The set of character conversions supported by this map is a superset of
+# those supported by the map represented by mapping-ISOLatin1Accent.txt.
+#
+# See the bottom of this file for the Perl script used to generate the contents
+# of this file (without this header) from ASCIIFoldingFilter.java.
+
+
+# Syntax:
+# "source" => "target"
+# "source".length() > 0 (source cannot be empty.)
+# "target".length() >= 0 (target can be empty.)
+
+
+# À [LATIN CAPITAL LETTER A WITH GRAVE]
+"\u00C0" => "A"
+
+# Á [LATIN CAPITAL LETTER A WITH ACUTE]
+"\u00C1" => "A"
+
+# Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
+"\u00C2" => "A"
+
+# Ã [LATIN CAPITAL LETTER A WITH TILDE]
+"\u00C3" => "A"
+
+# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
+"\u00C4" => "A"
+
+# Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
+"\u00C5" => "A"
+
+# Ā [LATIN CAPITAL LETTER A WITH MACRON]
+"\u0100" => "A"
+
+# Ă [LATIN CAPITAL LETTER A WITH BREVE]
+"\u0102" => "A"
+
+# Ą [LATIN CAPITAL LETTER A WITH OGONEK]
+"\u0104" => "A"
+
+# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
+"\u018F" => "A"
+
+# Ǎ [LATIN CAPITAL LETTER A WITH CARON]
+"\u01CD" => "A"
+
+# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
+"\u01DE" => "A"
+
+# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
+"\u01E0" => "A"
+
+# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
+"\u01FA" => "A"
+
+# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
+"\u0200" => "A"
+
+# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
+"\u0202" => "A"
+
+# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
+"\u0226" => "A"
+
+# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
+"\u023A" => "A"
+
+# ᴀ [LATIN LETTER SMALL CAPITAL A]
+"\u1D00" => "A"
+
+# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
+"\u1E00" => "A"
+
+# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
+"\u1EA0" => "A"
+
+# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
+"\u1EA2" => "A"
+
+# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
+"\u1EA4" => "A"
+
+# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
+"\u1EA6" => "A"
+
+# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1EA8" => "A"
+
+# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
+"\u1EAA" => "A"
+
+# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+"\u1EAC" => "A"
+
+# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
+"\u1EAE" => "A"
+
+# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
+"\u1EB0" => "A"
+
+# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
+"\u1EB2" => "A"
+
+# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
+"\u1EB4" => "A"
+
+# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
+"\u1EB6" => "A"
+
+# Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
+"\u24B6" => "A"
+
+# A [FULLWIDTH LATIN CAPITAL LETTER A]
+"\uFF21" => "A"
+
+# à [LATIN SMALL LETTER A WITH GRAVE]
+"\u00E0" => "a"
+
+# á [LATIN SMALL LETTER A WITH ACUTE]
+"\u00E1" => "a"
+
+# â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
+"\u00E2" => "a"
+
+# ã [LATIN SMALL LETTER A WITH TILDE]
+"\u00E3" => "a"
+
+# ä [LATIN SMALL LETTER A WITH DIAERESIS]
+"\u00E4" => "a"
+
+# å [LATIN SMALL LETTER A WITH RING ABOVE]
+"\u00E5" => "a"
+
+# ā [LATIN SMALL LETTER A WITH MACRON]
+"\u0101" => "a"
+
+# ă [LATIN SMALL LETTER A WITH BREVE]
+"\u0103" => "a"
+
+# ą [LATIN SMALL LETTER A WITH OGONEK]
+"\u0105" => "a"
+
+# ǎ [LATIN SMALL LETTER A WITH CARON]
+"\u01CE" => "a"
+
+# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
+"\u01DF" => "a"
+
+# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
+"\u01E1" => "a"
+
+# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
+"\u01FB" => "a"
+
+# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
+"\u0201" => "a"
+
+# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
+"\u0203" => "a"
+
+# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
+"\u0227" => "a"
+
+# ɐ [LATIN SMALL LETTER TURNED A]
+"\u0250" => "a"
+
+# ə [LATIN SMALL LETTER SCHWA]
+"\u0259" => "a"
+
+# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
+"\u025A" => "a"
+
+# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
+"\u1D8F" => "a"
+
+# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
+"\u1D95" => "a"
+
+# ạ [LATIN SMALL LETTER A WITH RING BELOW]
+"\u1E01" => "a"
+
+# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
+"\u1E9A" => "a"
+
+# ạ [LATIN SMALL LETTER A WITH DOT BELOW]
+"\u1EA1" => "a"
+
+# ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
+"\u1EA3" => "a"
+
+# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
+"\u1EA5" => "a"
+
+# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
+"\u1EA7" => "a"
+
+# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1EA9" => "a"
+
+# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
+"\u1EAB" => "a"
+
+# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+"\u1EAD" => "a"
+
+# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
+"\u1EAF" => "a"
+
+# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
+"\u1EB1" => "a"
+
+# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
+"\u1EB3" => "a"
+
+# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
+"\u1EB5" => "a"
+
+# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
+"\u1EB7" => "a"
+
+# ₐ [LATIN SUBSCRIPT SMALL LETTER A]
+"\u2090" => "a"
+
+# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
+"\u2094" => "a"
+
+# ⓐ [CIRCLED LATIN SMALL LETTER A]
+"\u24D0" => "a"
+
+# ⱥ [LATIN SMALL LETTER A WITH STROKE]
+"\u2C65" => "a"
+
+# Ɐ [LATIN CAPITAL LETTER TURNED A]
+"\u2C6F" => "a"
+
+# a [FULLWIDTH LATIN SMALL LETTER A]
+"\uFF41" => "a"
+
+# Ꜳ [LATIN CAPITAL LETTER AA]
+"\uA732" => "AA"
+
+# Æ [LATIN CAPITAL LETTER AE]
+"\u00C6" => "AE"
+
+# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
+"\u01E2" => "AE"
+
+# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
+"\u01FC" => "AE"
+
+# ᴁ [LATIN LETTER SMALL CAPITAL AE]
+"\u1D01" => "AE"
+
+# Ꜵ [LATIN CAPITAL LETTER AO]
+"\uA734" => "AO"
+
+# Ꜷ [LATIN CAPITAL LETTER AU]
+"\uA736" => "AU"
+
+# Ꜹ [LATIN CAPITAL LETTER AV]
+"\uA738" => "AV"
+
+# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
+"\uA73A" => "AV"
+
+# Ꜽ [LATIN CAPITAL LETTER AY]
+"\uA73C" => "AY"
+
+# ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
+"\u249C" => "(a)"
+
+# ꜳ [LATIN SMALL LETTER AA]
+"\uA733" => "aa"
+
+# æ [LATIN SMALL LETTER AE]
+"\u00E6" => "ae"
+
+# ǣ [LATIN SMALL LETTER AE WITH MACRON]
+"\u01E3" => "ae"
+
+# ǽ [LATIN SMALL LETTER AE WITH ACUTE]
+"\u01FD" => "ae"
+
+# ᴂ [LATIN SMALL LETTER TURNED AE]
+"\u1D02" => "ae"
+
+# ꜵ [LATIN SMALL LETTER AO]
+"\uA735" => "ao"
+
+# ꜷ [LATIN SMALL LETTER AU]
+"\uA737" => "au"
+
+# ꜹ [LATIN SMALL LETTER AV]
+"\uA739" => "av"
+
+# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
+"\uA73B" => "av"
+
+# ꜽ [LATIN SMALL LETTER AY]
+"\uA73D" => "ay"
+
+# Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
+"\u0181" => "B"
+
+# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
+"\u0182" => "B"
+
+# Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
+"\u0243" => "B"
+
+# ʙ [LATIN LETTER SMALL CAPITAL B]
+"\u0299" => "B"
+
+# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
+"\u1D03" => "B"
+
+# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
+"\u1E02" => "B"
+
+# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
+"\u1E04" => "B"
+
+# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
+"\u1E06" => "B"
+
+# Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
+"\u24B7" => "B"
+
+# B [FULLWIDTH LATIN CAPITAL LETTER B]
+"\uFF22" => "B"
+
+# ƀ [LATIN SMALL LETTER B WITH STROKE]
+"\u0180" => "b"
+
+# ƃ [LATIN SMALL LETTER B WITH TOPBAR]
+"\u0183" => "b"
+
+# ɓ [LATIN SMALL LETTER B WITH HOOK]
+"\u0253" => "b"
+
+# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
+"\u1D6C" => "b"
+
+# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
+"\u1D80" => "b"
+
+# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
+"\u1E03" => "b"
+
+# ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
+"\u1E05" => "b"
+
+# ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
+"\u1E07" => "b"
+
+# ⓑ [CIRCLED LATIN SMALL LETTER B]
+"\u24D1" => "b"
+
+# b [FULLWIDTH LATIN SMALL LETTER B]
+"\uFF42" => "b"
+
+# ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
+"\u249D" => "(b)"
+
+# Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
+"\u00C7" => "C"
+
+# Ć [LATIN CAPITAL LETTER C WITH ACUTE]
+"\u0106" => "C"
+
+# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
+"\u0108" => "C"
+
+# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
+"\u010A" => "C"
+
+# Č [LATIN CAPITAL LETTER C WITH CARON]
+"\u010C" => "C"
+
+# Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
+"\u0187" => "C"
+
+# Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
+"\u023B" => "C"
+
+# ʗ [LATIN LETTER STRETCHED C]
+"\u0297" => "C"
+
+# ᴄ [LATIN LETTER SMALL CAPITAL C]
+"\u1D04" => "C"
+
+# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
+"\u1E08" => "C"
+
+# Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
+"\u24B8" => "C"
+
+# C [FULLWIDTH LATIN CAPITAL LETTER C]
+"\uFF23" => "C"
+
+# ç [LATIN SMALL LETTER C WITH CEDILLA]
+"\u00E7" => "c"
+
+# ć [LATIN SMALL LETTER C WITH ACUTE]
+"\u0107" => "c"
+
+# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
+"\u0109" => "c"
+
+# ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
+"\u010B" => "c"
+
+# č [LATIN SMALL LETTER C WITH CARON]
+"\u010D" => "c"
+
+# ƈ [LATIN SMALL LETTER C WITH HOOK]
+"\u0188" => "c"
+
+# ȼ [LATIN SMALL LETTER C WITH STROKE]
+"\u023C" => "c"
+
+# ɕ [LATIN SMALL LETTER C WITH CURL]
+"\u0255" => "c"
+
+# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
+"\u1E09" => "c"
+
+# ↄ [LATIN SMALL LETTER REVERSED C]
+"\u2184" => "c"
+
+# ⓒ [CIRCLED LATIN SMALL LETTER C]
+"\u24D2" => "c"
+
+# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
+"\uA73E" => "c"
+
+# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
+"\uA73F" => "c"
+
+# c [FULLWIDTH LATIN SMALL LETTER C]
+"\uFF43" => "c"
+
+# ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
+"\u249E" => "(c)"
+
+# Ð [LATIN CAPITAL LETTER ETH]
+"\u00D0" => "D"
+
+# Ď [LATIN CAPITAL LETTER D WITH CARON]
+"\u010E" => "D"
+
+# Đ [LATIN CAPITAL LETTER D WITH STROKE]
+"\u0110" => "D"
+
+# Ɖ [LATIN CAPITAL LETTER AFRICAN D]
+"\u0189" => "D"
+
+# Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
+"\u018A" => "D"
+
+# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
+"\u018B" => "D"
+
+# ᴅ [LATIN LETTER SMALL CAPITAL D]
+"\u1D05" => "D"
+
+# ᴆ [LATIN LETTER SMALL CAPITAL ETH]
+"\u1D06" => "D"
+
+# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
+"\u1E0A" => "D"
+
+# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
+"\u1E0C" => "D"
+
+# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
+"\u1E0E" => "D"
+
+# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
+"\u1E10" => "D"
+
+# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
+"\u1E12" => "D"
+
+# Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
+"\u24B9" => "D"
+
+# Ꝺ [LATIN CAPITAL LETTER INSULAR D]
+"\uA779" => "D"
+
+# D [FULLWIDTH LATIN CAPITAL LETTER D]
+"\uFF24" => "D"
+
+# ð [LATIN SMALL LETTER ETH]
+"\u00F0" => "d"
+
+# ď [LATIN SMALL LETTER D WITH CARON]
+"\u010F" => "d"
+
+# đ [LATIN SMALL LETTER D WITH STROKE]
+"\u0111" => "d"
+
+# ƌ [LATIN SMALL LETTER D WITH TOPBAR]
+"\u018C" => "d"
+
+# ȡ [LATIN SMALL LETTER D WITH CURL]
+"\u0221" => "d"
+
+# ɖ [LATIN SMALL LETTER D WITH TAIL]
+"\u0256" => "d"
+
+# ɗ [LATIN SMALL LETTER D WITH HOOK]
+"\u0257" => "d"
+
+# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
+"\u1D6D" => "d"
+
+# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
+"\u1D81" => "d"
+
+# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
+"\u1D91" => "d"
+
+# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
+"\u1E0B" => "d"
+
+# ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
+"\u1E0D" => "d"
+
+# ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
+"\u1E0F" => "d"
+
+# ḑ [LATIN SMALL LETTER D WITH CEDILLA]
+"\u1E11" => "d"
+
+# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
+"\u1E13" => "d"
+
+# ⓓ [CIRCLED LATIN SMALL LETTER D]
+"\u24D3" => "d"
+
+# ꝺ [LATIN SMALL LETTER INSULAR D]
+"\uA77A" => "d"
+
+# d [FULLWIDTH LATIN SMALL LETTER D]
+"\uFF44" => "d"
+
+# DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
+"\u01C4" => "DZ"
+
+# DZ [LATIN CAPITAL LETTER DZ]
+"\u01F1" => "DZ"
+
+# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
+"\u01C5" => "Dz"
+
+# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
+"\u01F2" => "Dz"
+
+# ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
+"\u249F" => "(d)"
+
+# ȸ [LATIN SMALL LETTER DB DIGRAPH]
+"\u0238" => "db"
+
+# dž [LATIN SMALL LETTER DZ WITH CARON]
+"\u01C6" => "dz"
+
+# dz [LATIN SMALL LETTER DZ]
+"\u01F3" => "dz"
+
+# ʣ [LATIN SMALL LETTER DZ DIGRAPH]
+"\u02A3" => "dz"
+
+# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
+"\u02A5" => "dz"
+
+# È [LATIN CAPITAL LETTER E WITH GRAVE]
+"\u00C8" => "E"
+
+# É [LATIN CAPITAL LETTER E WITH ACUTE]
+"\u00C9" => "E"
+
+# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
+"\u00CA" => "E"
+
+# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
+"\u00CB" => "E"
+
+# Ē [LATIN CAPITAL LETTER E WITH MACRON]
+"\u0112" => "E"
+
+# Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
+"\u0114" => "E"
+
+# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
+"\u0116" => "E"
+
+# Ę [LATIN CAPITAL LETTER E WITH OGONEK]
+"\u0118" => "E"
+
+# Ě [LATIN CAPITAL LETTER E WITH CARON]
+"\u011A" => "E"
+
+# Ǝ [LATIN CAPITAL LETTER REVERSED E]
+"\u018E" => "E"
+
+# Ɛ [LATIN CAPITAL LETTER OPEN E]
+"\u0190" => "E"
+
+# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
+"\u0204" => "E"
+
+# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
+"\u0206" => "E"
+
+# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
+"\u0228" => "E"
+
+# Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
+"\u0246" => "E"
+
+# ᴇ [LATIN LETTER SMALL CAPITAL E]
+"\u1D07" => "E"
+
+# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
+"\u1E14" => "E"
+
+# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
+"\u1E16" => "E"
+
+# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
+"\u1E18" => "E"
+
+# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
+"\u1E1A" => "E"
+
+# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
+"\u1E1C" => "E"
+
+# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
+"\u1EB8" => "E"
+
+# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
+"\u1EBA" => "E"
+
+# Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
+"\u1EBC" => "E"
+
+# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
+"\u1EBE" => "E"
+
+# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
+"\u1EC0" => "E"
+
+# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1EC2" => "E"
+
+# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
+"\u1EC4" => "E"
+
+# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+"\u1EC6" => "E"
+
+# Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
+"\u24BA" => "E"
+
+# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
+"\u2C7B" => "E"
+
+# E [FULLWIDTH LATIN CAPITAL LETTER E]
+"\uFF25" => "E"
+
+# è [LATIN SMALL LETTER E WITH GRAVE]
+"\u00E8" => "e"
+
+# é [LATIN SMALL LETTER E WITH ACUTE]
+"\u00E9" => "e"
+
+# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
+"\u00EA" => "e"
+
+# ë [LATIN SMALL LETTER E WITH DIAERESIS]
+"\u00EB" => "e"
+
+# ē [LATIN SMALL LETTER E WITH MACRON]
+"\u0113" => "e"
+
+# ĕ [LATIN SMALL LETTER E WITH BREVE]
+"\u0115" => "e"
+
+# ė [LATIN SMALL LETTER E WITH DOT ABOVE]
+"\u0117" => "e"
+
+# ę [LATIN SMALL LETTER E WITH OGONEK]
+"\u0119" => "e"
+
+# ě [LATIN SMALL LETTER E WITH CARON]
+"\u011B" => "e"
+
+# ǝ [LATIN SMALL LETTER TURNED E]
+"\u01DD" => "e"
+
+# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
+"\u0205" => "e"
+
+# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
+"\u0207" => "e"
+
+# ȩ [LATIN SMALL LETTER E WITH CEDILLA]
+"\u0229" => "e"
+
+# ɇ [LATIN SMALL LETTER E WITH STROKE]
+"\u0247" => "e"
+
+# ɘ [LATIN SMALL LETTER REVERSED E]
+"\u0258" => "e"
+
+# ɛ [LATIN SMALL LETTER OPEN E]
+"\u025B" => "e"
+
+# ɜ [LATIN SMALL LETTER REVERSED OPEN E]
+"\u025C" => "e"
+
+# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
+"\u025D" => "e"
+
+# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
+"\u025E" => "e"
+
+# ʚ [LATIN SMALL LETTER CLOSED OPEN E]
+"\u029A" => "e"
+
+# ᴈ [LATIN SMALL LETTER TURNED OPEN E]
+"\u1D08" => "e"
+
+# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
+"\u1D92" => "e"
+
+# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
+"\u1D93" => "e"
+
+# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
+"\u1D94" => "e"
+
+# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
+"\u1E15" => "e"
+
+# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
+"\u1E17" => "e"
+
+# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
+"\u1E19" => "e"
+
+# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
+"\u1E1B" => "e"
+
+# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
+"\u1E1D" => "e"
+
+# ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
+"\u1EB9" => "e"
+
+# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
+"\u1EBB" => "e"
+
+# ẽ [LATIN SMALL LETTER E WITH TILDE]
+"\u1EBD" => "e"
+
+# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
+"\u1EBF" => "e"
+
+# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
+"\u1EC1" => "e"
+
+# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1EC3" => "e"
+
+# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
+"\u1EC5" => "e"
+
+# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+"\u1EC7" => "e"
+
+# ₑ [LATIN SUBSCRIPT SMALL LETTER E]
+"\u2091" => "e"
+
+# ⓔ [CIRCLED LATIN SMALL LETTER E]
+"\u24D4" => "e"
+
+# ⱸ [LATIN SMALL LETTER E WITH NOTCH]
+"\u2C78" => "e"
+
+# e [FULLWIDTH LATIN SMALL LETTER E]
+"\uFF45" => "e"
+
+# ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
+"\u24A0" => "(e)"
+
+# Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
+"\u0191" => "F"
+
+# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
+"\u1E1E" => "F"
+
+# Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
+"\u24BB" => "F"
+
+# ꜰ [LATIN LETTER SMALL CAPITAL F]
+"\uA730" => "F"
+
+# Ꝼ [LATIN CAPITAL LETTER INSULAR F]
+"\uA77B" => "F"
+
+# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
+"\uA7FB" => "F"
+
+# F [FULLWIDTH LATIN CAPITAL LETTER F]
+"\uFF26" => "F"
+
+# ƒ [LATIN SMALL LETTER F WITH HOOK]
+"\u0192" => "f"
+
+# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
+"\u1D6E" => "f"
+
+# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
+"\u1D82" => "f"
+
+# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
+"\u1E1F" => "f"
+
+# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
+"\u1E9B" => "f"
+
+# ⓕ [CIRCLED LATIN SMALL LETTER F]
+"\u24D5" => "f"
+
+# ꝼ [LATIN SMALL LETTER INSULAR F]
+"\uA77C" => "f"
+
+# f [FULLWIDTH LATIN SMALL LETTER F]
+"\uFF46" => "f"
+
+# ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
+"\u24A1" => "(f)"
+
+# ff [LATIN SMALL LIGATURE FF]
+"\uFB00" => "ff"
+
+# ffi [LATIN SMALL LIGATURE FFI]
+"\uFB03" => "ffi"
+
+# ffl [LATIN SMALL LIGATURE FFL]
+"\uFB04" => "ffl"
+
+# fi [LATIN SMALL LIGATURE FI]
+"\uFB01" => "fi"
+
+# fl [LATIN SMALL LIGATURE FL]
+"\uFB02" => "fl"
+
+# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
+"\u011C" => "G"
+
+# Ğ [LATIN CAPITAL LETTER G WITH BREVE]
+"\u011E" => "G"
+
+# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
+"\u0120" => "G"
+
+# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
+"\u0122" => "G"
+
+# Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
+"\u0193" => "G"
+
+# Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
+"\u01E4" => "G"
+
+# ǥ [LATIN SMALL LETTER G WITH STROKE]
+"\u01E5" => "G"
+
+# Ǧ [LATIN CAPITAL LETTER G WITH CARON]
+"\u01E6" => "G"
+
+# ǧ [LATIN SMALL LETTER G WITH CARON]
+"\u01E7" => "G"
+
+# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
+"\u01F4" => "G"
+
+# ɢ [LATIN LETTER SMALL CAPITAL G]
+"\u0262" => "G"
+
+# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
+"\u029B" => "G"
+
+# Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
+"\u1E20" => "G"
+
+# Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
+"\u24BC" => "G"
+
+# Ᵹ [LATIN CAPITAL LETTER INSULAR G]
+"\uA77D" => "G"
+
+# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
+"\uA77E" => "G"
+
+# G [FULLWIDTH LATIN CAPITAL LETTER G]
+"\uFF27" => "G"
+
+# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
+"\u011D" => "g"
+
+# ğ [LATIN SMALL LETTER G WITH BREVE]
+"\u011F" => "g"
+
+# ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
+"\u0121" => "g"
+
+# ģ [LATIN SMALL LETTER G WITH CEDILLA]
+"\u0123" => "g"
+
+# ǵ [LATIN SMALL LETTER G WITH ACUTE]
+"\u01F5" => "g"
+
+# ɠ [LATIN SMALL LETTER G WITH HOOK]
+"\u0260" => "g"
+
+# ɡ [LATIN SMALL LETTER SCRIPT G]
+"\u0261" => "g"
+
+# ᵷ [LATIN SMALL LETTER TURNED G]
+"\u1D77" => "g"
+
+# ᵹ [LATIN SMALL LETTER INSULAR G]
+"\u1D79" => "g"
+
+# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
+"\u1D83" => "g"
+
+# ḡ [LATIN SMALL LETTER G WITH MACRON]
+"\u1E21" => "g"
+
+# ⓖ [CIRCLED LATIN SMALL LETTER G]
+"\u24D6" => "g"
+
+# ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
+"\uA77F" => "g"
+
+# g [FULLWIDTH LATIN SMALL LETTER G]
+"\uFF47" => "g"
+
+# ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
+"\u24A2" => "(g)"
+
+# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
+"\u0124" => "H"
+
+# Ħ [LATIN CAPITAL LETTER H WITH STROKE]
+"\u0126" => "H"
+
+# Ȟ [LATIN CAPITAL LETTER H WITH CARON]
+"\u021E" => "H"
+
+# ʜ [LATIN LETTER SMALL CAPITAL H]
+"\u029C" => "H"
+
+# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
+"\u1E22" => "H"
+
+# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
+"\u1E24" => "H"
+
+# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
+"\u1E26" => "H"
+
+# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
+"\u1E28" => "H"
+
+# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
+"\u1E2A" => "H"
+
+# Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
+"\u24BD" => "H"
+
+# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
+"\u2C67" => "H"
+
+# Ⱶ [LATIN CAPITAL LETTER HALF H]
+"\u2C75" => "H"
+
+# H [FULLWIDTH LATIN CAPITAL LETTER H]
+"\uFF28" => "H"
+
+# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
+"\u0125" => "h"
+
+# ħ [LATIN SMALL LETTER H WITH STROKE]
+"\u0127" => "h"
+
+# ȟ [LATIN SMALL LETTER H WITH CARON]
+"\u021F" => "h"
+
+# ɥ [LATIN SMALL LETTER TURNED H]
+"\u0265" => "h"
+
+# ɦ [LATIN SMALL LETTER H WITH HOOK]
+"\u0266" => "h"
+
+# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
+"\u02AE" => "h"
+
+# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
+"\u02AF" => "h"
+
+# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
+"\u1E23" => "h"
+
+# ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
+"\u1E25" => "h"
+
+# ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
+"\u1E27" => "h"
+
+# ḩ [LATIN SMALL LETTER H WITH CEDILLA]
+"\u1E29" => "h"
+
+# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
+"\u1E2B" => "h"
+
+# ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
+"\u1E96" => "h"
+
+# ⓗ [CIRCLED LATIN SMALL LETTER H]
+"\u24D7" => "h"
+
+# ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
+"\u2C68" => "h"
+
+# ⱶ [LATIN SMALL LETTER HALF H]
+"\u2C76" => "h"
+
+# h [FULLWIDTH LATIN SMALL LETTER H]
+"\uFF48" => "h"
+
+# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
+"\u01F6" => "HV"
+
+# ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
+"\u24A3" => "(h)"
+
+# ƕ [LATIN SMALL LETTER HV]
+"\u0195" => "hv"
+
+# Ì [LATIN CAPITAL LETTER I WITH GRAVE]
+"\u00CC" => "I"
+
+# Í [LATIN CAPITAL LETTER I WITH ACUTE]
+"\u00CD" => "I"
+
+# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
+"\u00CE" => "I"
+
+# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
+"\u00CF" => "I"
+
+# Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
+"\u0128" => "I"
+
+# Ī [LATIN CAPITAL LETTER I WITH MACRON]
+"\u012A" => "I"
+
+# Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
+"\u012C" => "I"
+
+# Į [LATIN CAPITAL LETTER I WITH OGONEK]
+"\u012E" => "I"
+
+# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
+"\u0130" => "I"
+
+# Ɩ [LATIN CAPITAL LETTER IOTA]
+"\u0196" => "I"
+
+# Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
+"\u0197" => "I"
+
+# Ǐ [LATIN CAPITAL LETTER I WITH CARON]
+"\u01CF" => "I"
+
+# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
+"\u0208" => "I"
+
+# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
+"\u020A" => "I"
+
+# ɪ [LATIN LETTER SMALL CAPITAL I]
+"\u026A" => "I"
+
+# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
+"\u1D7B" => "I"
+
+# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
+"\u1E2C" => "I"
+
+# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
+"\u1E2E" => "I"
+
+# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
+"\u1EC8" => "I"
+
+# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
+"\u1ECA" => "I"
+
+# Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
+"\u24BE" => "I"
+
+# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
+"\uA7FE" => "I"
+
+# I [FULLWIDTH LATIN CAPITAL LETTER I]
+"\uFF29" => "I"
+
+# ì [LATIN SMALL LETTER I WITH GRAVE]
+"\u00EC" => "i"
+
+# í [LATIN SMALL LETTER I WITH ACUTE]
+"\u00ED" => "i"
+
+# î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
+"\u00EE" => "i"
+
+# ï [LATIN SMALL LETTER I WITH DIAERESIS]
+"\u00EF" => "i"
+
+# ĩ [LATIN SMALL LETTER I WITH TILDE]
+"\u0129" => "i"
+
+# ī [LATIN SMALL LETTER I WITH MACRON]
+"\u012B" => "i"
+
+# ĭ [LATIN SMALL LETTER I WITH BREVE]
+"\u012D" => "i"
+
+# į [LATIN SMALL LETTER I WITH OGONEK]
+"\u012F" => "i"
+
+# ı [LATIN SMALL LETTER DOTLESS I]
+"\u0131" => "i"
+
+# ǐ [LATIN SMALL LETTER I WITH CARON]
+"\u01D0" => "i"
+
+# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
+"\u0209" => "i"
+
+# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
+"\u020B" => "i"
+
+# ɨ [LATIN SMALL LETTER I WITH STROKE]
+"\u0268" => "i"
+
+# ᴉ [LATIN SMALL LETTER TURNED I]
+"\u1D09" => "i"
+
+# ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
+"\u1D62" => "i"
+
+# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
+"\u1D7C" => "i"
+
+# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
+"\u1D96" => "i"
+
+# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
+"\u1E2D" => "i"
+
+# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
+"\u1E2F" => "i"
+
+# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
+"\u1EC9" => "i"
+
+# ị [LATIN SMALL LETTER I WITH DOT BELOW]
+"\u1ECB" => "i"
+
+# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
+"\u2071" => "i"
+
+# ⓘ [CIRCLED LATIN SMALL LETTER I]
+"\u24D8" => "i"
+
+# i [FULLWIDTH LATIN SMALL LETTER I]
+"\uFF49" => "i"
+
+# IJ [LATIN CAPITAL LIGATURE IJ]
+"\u0132" => "IJ"
+
+# ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
+"\u24A4" => "(i)"
+
+# ij [LATIN SMALL LIGATURE IJ]
+"\u0133" => "ij"
+
+# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
+"\u0134" => "J"
+
+# Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
+"\u0248" => "J"
+
+# ᴊ [LATIN LETTER SMALL CAPITAL J]
+"\u1D0A" => "J"
+
+# Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
+"\u24BF" => "J"
+
+# J [FULLWIDTH LATIN CAPITAL LETTER J]
+"\uFF2A" => "J"
+
+# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
+"\u0135" => "j"
+
+# ǰ [LATIN SMALL LETTER J WITH CARON]
+"\u01F0" => "j"
+
+# ȷ [LATIN SMALL LETTER DOTLESS J]
+"\u0237" => "j"
+
+# ɉ [LATIN SMALL LETTER J WITH STROKE]
+"\u0249" => "j"
+
+# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
+"\u025F" => "j"
+
+# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
+"\u0284" => "j"
+
+# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
+"\u029D" => "j"
+
+# ⓙ [CIRCLED LATIN SMALL LETTER J]
+"\u24D9" => "j"
+
+# ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
+"\u2C7C" => "j"
+
+# j [FULLWIDTH LATIN SMALL LETTER J]
+"\uFF4A" => "j"
+
+# ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
+"\u24A5" => "(j)"
+
+# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
+"\u0136" => "K"
+
+# Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
+"\u0198" => "K"
+
+# Ǩ [LATIN CAPITAL LETTER K WITH CARON]
+"\u01E8" => "K"
+
+# ᴋ [LATIN LETTER SMALL CAPITAL K]
+"\u1D0B" => "K"
+
+# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
+"\u1E30" => "K"
+
+# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
+"\u1E32" => "K"
+
+# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
+"\u1E34" => "K"
+
+# Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
+"\u24C0" => "K"
+
+# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
+"\u2C69" => "K"
+
+# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
+"\uA740" => "K"
+
+# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
+"\uA742" => "K"
+
+# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
+"\uA744" => "K"
+
+# K [FULLWIDTH LATIN CAPITAL LETTER K]
+"\uFF2B" => "K"
+
+# ķ [LATIN SMALL LETTER K WITH CEDILLA]
+"\u0137" => "k"
+
+# ƙ [LATIN SMALL LETTER K WITH HOOK]
+"\u0199" => "k"
+
+# ǩ [LATIN SMALL LETTER K WITH CARON]
+"\u01E9" => "k"
+
+# ʞ [LATIN SMALL LETTER TURNED K]
+"\u029E" => "k"
+
+# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
+"\u1D84" => "k"
+
+# ḱ [LATIN SMALL LETTER K WITH ACUTE]
+"\u1E31" => "k"
+
+# ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
+"\u1E33" => "k"
+
+# ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
+"\u1E35" => "k"
+
+# ⓚ [CIRCLED LATIN SMALL LETTER K]
+"\u24DA" => "k"
+
+# ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
+"\u2C6A" => "k"
+
+# ꝁ [LATIN SMALL LETTER K WITH STROKE]
+"\uA741" => "k"
+
+# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
+"\uA743" => "k"
+
+# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
+"\uA745" => "k"
+
+# k [FULLWIDTH LATIN SMALL LETTER K]
+"\uFF4B" => "k"
+
+# ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
+"\u24A6" => "(k)"
+
+# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
+"\u0139" => "L"
+
+# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
+"\u013B" => "L"
+
+# Ľ [LATIN CAPITAL LETTER L WITH CARON]
+"\u013D" => "L"
+
+# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
+"\u013F" => "L"
+
+# Ł [LATIN CAPITAL LETTER L WITH STROKE]
+"\u0141" => "L"
+
+# Ƚ [LATIN CAPITAL LETTER L WITH BAR]
+"\u023D" => "L"
+
+# ʟ [LATIN LETTER SMALL CAPITAL L]
+"\u029F" => "L"
+
+# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
+"\u1D0C" => "L"
+
+# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
+"\u1E36" => "L"
+
+# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
+"\u1E38" => "L"
+
+# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
+"\u1E3A" => "L"
+
+# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
+"\u1E3C" => "L"
+
+# Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
+"\u24C1" => "L"
+
+# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
+"\u2C60" => "L"
+
+# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
+"\u2C62" => "L"
+
+# Ꝇ [LATIN CAPITAL LETTER BROKEN L]
+"\uA746" => "L"
+
+# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
+"\uA748" => "L"
+
+# Ꞁ [LATIN CAPITAL LETTER TURNED L]
+"\uA780" => "L"
+
+# L [FULLWIDTH LATIN CAPITAL LETTER L]
+"\uFF2C" => "L"
+
+# ĺ [LATIN SMALL LETTER L WITH ACUTE]
+"\u013A" => "l"
+
+# ļ [LATIN SMALL LETTER L WITH CEDILLA]
+"\u013C" => "l"
+
+# ľ [LATIN SMALL LETTER L WITH CARON]
+"\u013E" => "l"
+
+# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
+"\u0140" => "l"
+
+# ł [LATIN SMALL LETTER L WITH STROKE]
+"\u0142" => "l"
+
+# ƚ [LATIN SMALL LETTER L WITH BAR]
+"\u019A" => "l"
+
+# ȴ [LATIN SMALL LETTER L WITH CURL]
+"\u0234" => "l"
+
+# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
+"\u026B" => "l"
+
+# ɬ [LATIN SMALL LETTER L WITH BELT]
+"\u026C" => "l"
+
+# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
+"\u026D" => "l"
+
+# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
+"\u1D85" => "l"
+
+# ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
+"\u1E37" => "l"
+
+# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
+"\u1E39" => "l"
+
+# ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
+"\u1E3B" => "l"
+
+# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
+"\u1E3D" => "l"
+
+# ⓛ [CIRCLED LATIN SMALL LETTER L]
+"\u24DB" => "l"
+
+# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
+"\u2C61" => "l"
+
+# ꝇ [LATIN SMALL LETTER BROKEN L]
+"\uA747" => "l"
+
+# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
+"\uA749" => "l"
+
+# ꞁ [LATIN SMALL LETTER TURNED L]
+"\uA781" => "l"
+
+# l [FULLWIDTH LATIN SMALL LETTER L]
+"\uFF4C" => "l"
+
+# LJ [LATIN CAPITAL LETTER LJ]
+"\u01C7" => "LJ"
+
+# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
+"\u1EFA" => "LL"
+
+# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
+"\u01C8" => "Lj"
+
+# ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
+"\u24A7" => "(l)"
+
+# lj [LATIN SMALL LETTER LJ]
+"\u01C9" => "lj"
+
+# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
+"\u1EFB" => "ll"
+
+# ʪ [LATIN SMALL LETTER LS DIGRAPH]
+"\u02AA" => "ls"
+
+# ʫ [LATIN SMALL LETTER LZ DIGRAPH]
+"\u02AB" => "lz"
+
+# Ɯ [LATIN CAPITAL LETTER TURNED M]
+"\u019C" => "M"
+
+# ᴍ [LATIN LETTER SMALL CAPITAL M]
+"\u1D0D" => "M"
+
+# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
+"\u1E3E" => "M"
+
+# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
+"\u1E40" => "M"
+
+# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
+"\u1E42" => "M"
+
+# Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
+"\u24C2" => "M"
+
+# Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
+"\u2C6E" => "M"
+
+# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
+"\uA7FD" => "M"
+
+# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
+"\uA7FF" => "M"
+
+# M [FULLWIDTH LATIN CAPITAL LETTER M]
+"\uFF2D" => "M"
+
+# ɯ [LATIN SMALL LETTER TURNED M]
+"\u026F" => "m"
+
+# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
+"\u0270" => "m"
+
+# ɱ [LATIN SMALL LETTER M WITH HOOK]
+"\u0271" => "m"
+
+# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
+"\u1D6F" => "m"
+
+# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
+"\u1D86" => "m"
+
+# ḿ [LATIN SMALL LETTER M WITH ACUTE]
+"\u1E3F" => "m"
+
+# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
+"\u1E41" => "m"
+
+# ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
+"\u1E43" => "m"
+
+# ⓜ [CIRCLED LATIN SMALL LETTER M]
+"\u24DC" => "m"
+
+# m [FULLWIDTH LATIN SMALL LETTER M]
+"\uFF4D" => "m"
+
+# ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
+"\u24A8" => "(m)"
+
+# Ñ [LATIN CAPITAL LETTER N WITH TILDE]
+"\u00D1" => "N"
+
+# Ń [LATIN CAPITAL LETTER N WITH ACUTE]
+"\u0143" => "N"
+
+# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
+"\u0145" => "N"
+
+# Ň [LATIN CAPITAL LETTER N WITH CARON]
+"\u0147" => "N"
+
+# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
+"\u014A" => "N"
+
+# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
+"\u019D" => "N"
+
+# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
+"\u01F8" => "N"
+
+# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
+"\u0220" => "N"
+
+# ɴ [LATIN LETTER SMALL CAPITAL N]
+"\u0274" => "N"
+
+# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
+"\u1D0E" => "N"
+
+# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
+"\u1E44" => "N"
+
+# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
+"\u1E46" => "N"
+
+# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
+"\u1E48" => "N"
+
+# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
+"\u1E4A" => "N"
+
+# Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
+"\u24C3" => "N"
+
+# N [FULLWIDTH LATIN CAPITAL LETTER N]
+"\uFF2E" => "N"
+
+# ñ [LATIN SMALL LETTER N WITH TILDE]
+"\u00F1" => "n"
+
+# ń [LATIN SMALL LETTER N WITH ACUTE]
+"\u0144" => "n"
+
+# ņ [LATIN SMALL LETTER N WITH CEDILLA]
+"\u0146" => "n"
+
+# ň [LATIN SMALL LETTER N WITH CARON]
+"\u0148" => "n"
+
+# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
+"\u0149" => "n"
+
+# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
+"\u014B" => "n"
+
+# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
+"\u019E" => "n"
+
+# ǹ [LATIN SMALL LETTER N WITH GRAVE]
+"\u01F9" => "n"
+
+# ȵ [LATIN SMALL LETTER N WITH CURL]
+"\u0235" => "n"
+
+# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
+"\u0272" => "n"
+
+# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
+"\u0273" => "n"
+
+# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
+"\u1D70" => "n"
+
+# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
+"\u1D87" => "n"
+
+# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
+"\u1E45" => "n"
+
+# ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
+"\u1E47" => "n"
+
+# ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
+"\u1E49" => "n"
+
+# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
+"\u1E4B" => "n"
+
+# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
+"\u207F" => "n"
+
+# ⓝ [CIRCLED LATIN SMALL LETTER N]
+"\u24DD" => "n"
+
+# n [FULLWIDTH LATIN SMALL LETTER N]
+"\uFF4E" => "n"
+
+# NJ [LATIN CAPITAL LETTER NJ]
+"\u01CA" => "NJ"
+
+# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
+"\u01CB" => "Nj"
+
+# ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
+"\u24A9" => "(n)"
+
+# nj [LATIN SMALL LETTER NJ]
+"\u01CC" => "nj"
+
+# Ò [LATIN CAPITAL LETTER O WITH GRAVE]
+"\u00D2" => "O"
+
+# Ó [LATIN CAPITAL LETTER O WITH ACUTE]
+"\u00D3" => "O"
+
+# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
+"\u00D4" => "O"
+
+# Õ [LATIN CAPITAL LETTER O WITH TILDE]
+"\u00D5" => "O"
+
+# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
+"\u00D6" => "O"
+
+# Ø [LATIN CAPITAL LETTER O WITH STROKE]
+"\u00D8" => "O"
+
+# Ō [LATIN CAPITAL LETTER O WITH MACRON]
+"\u014C" => "O"
+
+# Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
+"\u014E" => "O"
+
+# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
+"\u0150" => "O"
+
+# Ɔ [LATIN CAPITAL LETTER OPEN O]
+"\u0186" => "O"
+
+# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
+"\u019F" => "O"
+
+# Ơ [LATIN CAPITAL LETTER O WITH HORN]
+"\u01A0" => "O"
+
+# Ǒ [LATIN CAPITAL LETTER O WITH CARON]
+"\u01D1" => "O"
+
+# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
+"\u01EA" => "O"
+
+# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
+"\u01EC" => "O"
+
+# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
+"\u01FE" => "O"
+
+# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
+"\u020C" => "O"
+
+# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
+"\u020E" => "O"
+
+# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
+"\u022A" => "O"
+
+# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
+"\u022C" => "O"
+
+# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
+"\u022E" => "O"
+
+# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
+"\u0230" => "O"
+
+# ᴏ [LATIN LETTER SMALL CAPITAL O]
+"\u1D0F" => "O"
+
+# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
+"\u1D10" => "O"
+
+# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
+"\u1E4C" => "O"
+
+# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
+"\u1E4E" => "O"
+
+# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
+"\u1E50" => "O"
+
+# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
+"\u1E52" => "O"
+
+# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
+"\u1ECC" => "O"
+
+# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
+"\u1ECE" => "O"
+
+# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
+"\u1ED0" => "O"
+
+# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
+"\u1ED2" => "O"
+
+# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1ED4" => "O"
+
+# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
+"\u1ED6" => "O"
+
+# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+"\u1ED8" => "O"
+
+# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
+"\u1EDA" => "O"
+
+# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
+"\u1EDC" => "O"
+
+# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
+"\u1EDE" => "O"
+
+# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
+"\u1EE0" => "O"
+
+# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
+"\u1EE2" => "O"
+
+# Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
+"\u24C4" => "O"
+
+# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
+"\uA74A" => "O"
+
+# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
+"\uA74C" => "O"
+
+# O [FULLWIDTH LATIN CAPITAL LETTER O]
+"\uFF2F" => "O"
+
+# ò [LATIN SMALL LETTER O WITH GRAVE]
+"\u00F2" => "o"
+
+# ó [LATIN SMALL LETTER O WITH ACUTE]
+"\u00F3" => "o"
+
+# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
+"\u00F4" => "o"
+
+# õ [LATIN SMALL LETTER O WITH TILDE]
+"\u00F5" => "o"
+
+# ö [LATIN SMALL LETTER O WITH DIAERESIS]
+"\u00F6" => "o"
+
+# ø [LATIN SMALL LETTER O WITH STROKE]
+"\u00F8" => "o"
+
+# ō [LATIN SMALL LETTER O WITH MACRON]
+"\u014D" => "o"
+
+# ŏ [LATIN SMALL LETTER O WITH BREVE]
+"\u014F" => "o"
+
+# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
+"\u0151" => "o"
+
+# ơ [LATIN SMALL LETTER O WITH HORN]
+"\u01A1" => "o"
+
+# ǒ [LATIN SMALL LETTER O WITH CARON]
+"\u01D2" => "o"
+
+# ǫ [LATIN SMALL LETTER O WITH OGONEK]
+"\u01EB" => "o"
+
+# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
+"\u01ED" => "o"
+
+# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
+"\u01FF" => "o"
+
+# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
+"\u020D" => "o"
+
+# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
+"\u020F" => "o"
+
+# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
+"\u022B" => "o"
+
+# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
+"\u022D" => "o"
+
+# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
+"\u022F" => "o"
+
+# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
+"\u0231" => "o"
+
+# ɔ [LATIN SMALL LETTER OPEN O]
+"\u0254" => "o"
+
+# ɵ [LATIN SMALL LETTER BARRED O]
+"\u0275" => "o"
+
+# ᴖ [LATIN SMALL LETTER TOP HALF O]
+"\u1D16" => "o"
+
+# ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
+"\u1D17" => "o"
+
+# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
+"\u1D97" => "o"
+
+# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
+"\u1E4D" => "o"
+
+# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
+"\u1E4F" => "o"
+
+# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
+"\u1E51" => "o"
+
+# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
+"\u1E53" => "o"
+
+# ọ [LATIN SMALL LETTER O WITH DOT BELOW]
+"\u1ECD" => "o"
+
+# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
+"\u1ECF" => "o"
+
+# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
+"\u1ED1" => "o"
+
+# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
+"\u1ED3" => "o"
+
+# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+"\u1ED5" => "o"
+
+# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
+"\u1ED7" => "o"
+
+# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+"\u1ED9" => "o"
+
+# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
+"\u1EDB" => "o"
+
+# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
+"\u1EDD" => "o"
+
+# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
+"\u1EDF" => "o"
+
+# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
+"\u1EE1" => "o"
+
+# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
+"\u1EE3" => "o"
+
+# ₒ [LATIN SUBSCRIPT SMALL LETTER O]
+"\u2092" => "o"
+
+# ⓞ [CIRCLED LATIN SMALL LETTER O]
+"\u24DE" => "o"
+
+# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
+"\u2C7A" => "o"
+
+# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
+"\uA74B" => "o"
+
+# ꝍ [LATIN SMALL LETTER O WITH LOOP]
+"\uA74D" => "o"
+
+# o [FULLWIDTH LATIN SMALL LETTER O]
+"\uFF4F" => "o"
+
+# Œ [LATIN CAPITAL LIGATURE OE]
+"\u0152" => "OE"
+
+# ɶ [LATIN LETTER SMALL CAPITAL OE]
+"\u0276" => "OE"
+
+# Ꝏ [LATIN CAPITAL LETTER OO]
+"\uA74E" => "OO"
+
+# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
+"\u0222" => "OU"
+
+# ᴕ [LATIN LETTER SMALL CAPITAL OU]
+"\u1D15" => "OU"
+
+# ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
+"\u24AA" => "(o)"
+
+# œ [LATIN SMALL LIGATURE OE]
+"\u0153" => "oe"
+
+# ᴔ [LATIN SMALL LETTER TURNED OE]
+"\u1D14" => "oe"
+
+# ꝏ [LATIN SMALL LETTER OO]
+"\uA74F" => "oo"
+
+# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
+"\u0223" => "ou"
+
+# Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
+"\u01A4" => "P"
+
+# ᴘ [LATIN LETTER SMALL CAPITAL P]
+"\u1D18" => "P"
+
+# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
+"\u1E54" => "P"
+
+# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
+"\u1E56" => "P"
+
+# Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
+"\u24C5" => "P"
+
+# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
+"\u2C63" => "P"
+
+# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
+"\uA750" => "P"
+
+# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
+"\uA752" => "P"
+
+# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
+"\uA754" => "P"
+
+# P [FULLWIDTH LATIN CAPITAL LETTER P]
+"\uFF30" => "P"
+
+# ƥ [LATIN SMALL LETTER P WITH HOOK]
+"\u01A5" => "p"
+
+# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
+"\u1D71" => "p"
+
+# ᵽ [LATIN SMALL LETTER P WITH STROKE]
+"\u1D7D" => "p"
+
+# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
+"\u1D88" => "p"
+
+# ṕ [LATIN SMALL LETTER P WITH ACUTE]
+"\u1E55" => "p"
+
+# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
+"\u1E57" => "p"
+
+# ⓟ [CIRCLED LATIN SMALL LETTER P]
+"\u24DF" => "p"
+
+# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
+"\uA751" => "p"
+
+# ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
+"\uA753" => "p"
+
+# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
+"\uA755" => "p"
+
+# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
+"\uA7FC" => "p"
+
+# p [FULLWIDTH LATIN SMALL LETTER P]
+"\uFF50" => "p"
+
+# ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
+"\u24AB" => "(p)"
+
+# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
+"\u024A" => "Q"
+
+# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
+"\u24C6" => "Q"
+
+# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
+"\uA756" => "Q"
+
+# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
+"\uA758" => "Q"
+
+# Q [FULLWIDTH LATIN CAPITAL LETTER Q]
+"\uFF31" => "Q"
+
+# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
+"\u0138" => "q"
+
+# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
+"\u024B" => "q"
+
+# ʠ [LATIN SMALL LETTER Q WITH HOOK]
+"\u02A0" => "q"
+
+# ⓠ [CIRCLED LATIN SMALL LETTER Q]
+"\u24E0" => "q"
+
+# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
+"\uA757" => "q"
+
+# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
+"\uA759" => "q"
+
+# q [FULLWIDTH LATIN SMALL LETTER Q]
+"\uFF51" => "q"
+
+# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
+"\u24AC" => "(q)"
+
+# ȹ [LATIN SMALL LETTER QP DIGRAPH]
+"\u0239" => "qp"
+
+# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
+"\u0154" => "R"
+
+# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
+"\u0156" => "R"
+
+# Ř [LATIN CAPITAL LETTER R WITH CARON]
+"\u0158" => "R"
+
+# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
+"\u0210" => "R"
+
+# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
+"\u0212" => "R"
+
+# Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
+"\u024C" => "R"
+
+# ʀ [LATIN LETTER SMALL CAPITAL R]
+"\u0280" => "R"
+
+# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
+"\u0281" => "R"
+
+# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
+"\u1D19" => "R"
+
+# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
+"\u1D1A" => "R"
+
+# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
+"\u1E58" => "R"
+
+# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
+"\u1E5A" => "R"
+
+# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
+"\u1E5C" => "R"
+
+# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
+"\u1E5E" => "R"
+
+# Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
+"\u24C7" => "R"
+
+# Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
+"\u2C64" => "R"
+
+# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
+"\uA75A" => "R"
+
+# Ꞃ [LATIN CAPITAL LETTER INSULAR R]
+"\uA782" => "R"
+
+# R [FULLWIDTH LATIN CAPITAL LETTER R]
+"\uFF32" => "R"
+
+# ŕ [LATIN SMALL LETTER R WITH ACUTE]
+"\u0155" => "r"
+
+# ŗ [LATIN SMALL LETTER R WITH CEDILLA]
+"\u0157" => "r"
+
+# ř [LATIN SMALL LETTER R WITH CARON]
+"\u0159" => "r"
+
+# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
+"\u0211" => "r"
+
+# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
+"\u0213" => "r"
+
+# ɍ [LATIN SMALL LETTER R WITH STROKE]
+"\u024D" => "r"
+
+# ɼ [LATIN SMALL LETTER R WITH LONG LEG]
+"\u027C" => "r"
+
+# ɽ [LATIN SMALL LETTER R WITH TAIL]
+"\u027D" => "r"
+
+# ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
+"\u027E" => "r"
+
+# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
+"\u027F" => "r"
+
+# ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
+"\u1D63" => "r"
+
+# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
+"\u1D72" => "r"
+
+# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
+"\u1D73" => "r"
+
+# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
+"\u1D89" => "r"
+
+# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
+"\u1E59" => "r"
+
+# ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
+"\u1E5B" => "r"
+
+# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
+"\u1E5D" => "r"
+
+# ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
+"\u1E5F" => "r"
+
+# ⓡ [CIRCLED LATIN SMALL LETTER R]
+"\u24E1" => "r"
+
+# ꝛ [LATIN SMALL LETTER R ROTUNDA]
+"\uA75B" => "r"
+
+# ꞃ [LATIN SMALL LETTER INSULAR R]
+"\uA783" => "r"
+
+# r [FULLWIDTH LATIN SMALL LETTER R]
+"\uFF52" => "r"
+
+# ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
+"\u24AD" => "(r)"
+
+# Ś [LATIN CAPITAL LETTER S WITH ACUTE]
+"\u015A" => "S"
+
+# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
+"\u015C" => "S"
+
+# Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
+"\u015E" => "S"
+
+# Š [LATIN CAPITAL LETTER S WITH CARON]
+"\u0160" => "S"
+
+# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
+"\u0218" => "S"
+
+# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
+"\u1E60" => "S"
+
+# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
+"\u1E62" => "S"
+
+# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
+"\u1E64" => "S"
+
+# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
+"\u1E66" => "S"
+
+# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
+"\u1E68" => "S"
+
+# Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
+"\u24C8" => "S"
+
+# ꜱ [LATIN LETTER SMALL CAPITAL S]
+"\uA731" => "S"
+
+# ꞅ [LATIN SMALL LETTER INSULAR S]
+"\uA785" => "S"
+
+# S [FULLWIDTH LATIN CAPITAL LETTER S]
+"\uFF33" => "S"
+
+# ś [LATIN SMALL LETTER S WITH ACUTE]
+"\u015B" => "s"
+
+# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
+"\u015D" => "s"
+
+# ş [LATIN SMALL LETTER S WITH CEDILLA]
+"\u015F" => "s"
+
+# š [LATIN SMALL LETTER S WITH CARON]
+"\u0161" => "s"
+
+# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
+"\u017F" => "s"
+
+# ș [LATIN SMALL LETTER S WITH COMMA BELOW]
+"\u0219" => "s"
+
+# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
+"\u023F" => "s"
+
+# ʂ [LATIN SMALL LETTER S WITH HOOK]
+"\u0282" => "s"
+
+# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
+"\u1D74" => "s"
+
+# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
+"\u1D8A" => "s"
+
+# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
+"\u1E61" => "s"
+
+# ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
+"\u1E63" => "s"
+
+# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
+"\u1E65" => "s"
+
+# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
+"\u1E67" => "s"
+
+# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
+"\u1E69" => "s"
+
+# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
+"\u1E9C" => "s"
+
+# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
+"\u1E9D" => "s"
+
+# ⓢ [CIRCLED LATIN SMALL LETTER S]
+"\u24E2" => "s"
+
+# Ꞅ [LATIN CAPITAL LETTER INSULAR S]
+"\uA784" => "s"
+
+# s [FULLWIDTH LATIN SMALL LETTER S]
+"\uFF53" => "s"
+
+# ẞ [LATIN CAPITAL LETTER SHARP S]
+"\u1E9E" => "SS"
+
+# ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
+"\u24AE" => "(s)"
+
+# ß [LATIN SMALL LETTER SHARP S]
+"\u00DF" => "ss"
+
+# st [LATIN SMALL LIGATURE ST]
+"\uFB06" => "st"
+
+# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
+"\u0162" => "T"
+
+# Ť [LATIN CAPITAL LETTER T WITH CARON]
+"\u0164" => "T"
+
+# Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
+"\u0166" => "T"
+
+# Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
+"\u01AC" => "T"
+
+# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
+"\u01AE" => "T"
+
+# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
+"\u021A" => "T"
+
+# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
+"\u023E" => "T"
+
+# ᴛ [LATIN LETTER SMALL CAPITAL T]
+"\u1D1B" => "T"
+
+# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
+"\u1E6A" => "T"
+
+# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
+"\u1E6C" => "T"
+
+# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
+"\u1E6E" => "T"
+
+# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
+"\u1E70" => "T"
+
+# Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
+"\u24C9" => "T"
+
+# Ꞇ [LATIN CAPITAL LETTER INSULAR T]
+"\uA786" => "T"
+
+# T [FULLWIDTH LATIN CAPITAL LETTER T]
+"\uFF34" => "T"
+
+# ţ [LATIN SMALL LETTER T WITH CEDILLA]
+"\u0163" => "t"
+
+# ť [LATIN SMALL LETTER T WITH CARON]
+"\u0165" => "t"
+
+# ŧ [LATIN SMALL LETTER T WITH STROKE]
+"\u0167" => "t"
+
+# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
+"\u01AB" => "t"
+
+# ƭ [LATIN SMALL LETTER T WITH HOOK]
+"\u01AD" => "t"
+
+# ț [LATIN SMALL LETTER T WITH COMMA BELOW]
+"\u021B" => "t"
+
+# ȶ [LATIN SMALL LETTER T WITH CURL]
+"\u0236" => "t"
+
+# ʇ [LATIN SMALL LETTER TURNED T]
+"\u0287" => "t"
+
+# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
+"\u0288" => "t"
+
+# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
+"\u1D75" => "t"
+
+# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
+"\u1E6B" => "t"
+
+# ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
+"\u1E6D" => "t"
+
+# ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
+"\u1E6F" => "t"
+
+# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
+"\u1E71" => "t"
+
+# ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
+"\u1E97" => "t"
+
+# ⓣ [CIRCLED LATIN SMALL LETTER T]
+"\u24E3" => "t"
+
+# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
+"\u2C66" => "t"
+
+# t [FULLWIDTH LATIN SMALL LETTER T]
+"\uFF54" => "t"
+
+# Þ [LATIN CAPITAL LETTER THORN]
+"\u00DE" => "TH"
+
+# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
+"\uA766" => "TH"
+
+# Ꜩ [LATIN CAPITAL LETTER TZ]
+"\uA728" => "TZ"
+
+# ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
+"\u24AF" => "(t)"
+
+# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
+"\u02A8" => "tc"
+
+# þ [LATIN SMALL LETTER THORN]
+"\u00FE" => "th"
+
+# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
+"\u1D7A" => "th"
+
+# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
+"\uA767" => "th"
+
+# ʦ [LATIN SMALL LETTER TS DIGRAPH]
+"\u02A6" => "ts"
+
+# ꜩ [LATIN SMALL LETTER TZ]
+"\uA729" => "tz"
+
+# Ù [LATIN CAPITAL LETTER U WITH GRAVE]
+"\u00D9" => "U"
+
+# Ú [LATIN CAPITAL LETTER U WITH ACUTE]
+"\u00DA" => "U"
+
+# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
+"\u00DB" => "U"
+
+# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
+"\u00DC" => "U"
+
+# Ũ [LATIN CAPITAL LETTER U WITH TILDE]
+"\u0168" => "U"
+
+# Ū [LATIN CAPITAL LETTER U WITH MACRON]
+"\u016A" => "U"
+
+# Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
+"\u016C" => "U"
+
+# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
+"\u016E" => "U"
+
+# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
+"\u0170" => "U"
+
+# Ų [LATIN CAPITAL LETTER U WITH OGONEK]
+"\u0172" => "U"
+
+# Ư [LATIN CAPITAL LETTER U WITH HORN]
+"\u01AF" => "U"
+
+# Ǔ [LATIN CAPITAL LETTER U WITH CARON]
+"\u01D3" => "U"
+
+# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
+"\u01D5" => "U"
+
+# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
+"\u01D7" => "U"
+
+# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
+"\u01D9" => "U"
+
+# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
+"\u01DB" => "U"
+
+# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
+"\u0214" => "U"
+
+# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
+"\u0216" => "U"
+
+# Ʉ [LATIN CAPITAL LETTER U BAR]
+"\u0244" => "U"
+
+# ᴜ [LATIN LETTER SMALL CAPITAL U]
+"\u1D1C" => "U"
+
+# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
+"\u1D7E" => "U"
+
+# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
+"\u1E72" => "U"
+
+# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
+"\u1E74" => "U"
+
+# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
+"\u1E76" => "U"
+
+# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
+"\u1E78" => "U"
+
+# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
+"\u1E7A" => "U"
+
+# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
+"\u1EE4" => "U"
+
+# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
+"\u1EE6" => "U"
+
+# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
+"\u1EE8" => "U"
+
+# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
+"\u1EEA" => "U"
+
+# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
+"\u1EEC" => "U"
+
+# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
+"\u1EEE" => "U"
+
+# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
+"\u1EF0" => "U"
+
+# Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
+"\u24CA" => "U"
+
+# U [FULLWIDTH LATIN CAPITAL LETTER U]
+"\uFF35" => "U"
+
+# ù [LATIN SMALL LETTER U WITH GRAVE]
+"\u00F9" => "u"
+
+# ú [LATIN SMALL LETTER U WITH ACUTE]
+"\u00FA" => "u"
+
+# û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
+"\u00FB" => "u"
+
+# ü [LATIN SMALL LETTER U WITH DIAERESIS]
+"\u00FC" => "u"
+
+# ũ [LATIN SMALL LETTER U WITH TILDE]
+"\u0169" => "u"
+
+# ū [LATIN SMALL LETTER U WITH MACRON]
+"\u016B" => "u"
+
+# ŭ [LATIN SMALL LETTER U WITH BREVE]
+"\u016D" => "u"
+
+# ů [LATIN SMALL LETTER U WITH RING ABOVE]
+"\u016F" => "u"
+
+# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
+"\u0171" => "u"
+
+# ų [LATIN SMALL LETTER U WITH OGONEK]
+"\u0173" => "u"
+
+# ư [LATIN SMALL LETTER U WITH HORN]
+"\u01B0" => "u"
+
+# ǔ [LATIN SMALL LETTER U WITH CARON]
+"\u01D4" => "u"
+
+# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
+"\u01D6" => "u"
+
+# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
+"\u01D8" => "u"
+
+# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
+"\u01DA" => "u"
+
+# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
+"\u01DC" => "u"
+
+# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
+"\u0215" => "u"
+
+# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
+"\u0217" => "u"
+
+# ʉ [LATIN SMALL LETTER U BAR]
+"\u0289" => "u"
+
+# ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
+"\u1D64" => "u"
+
+# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
+"\u1D99" => "u"
+
+# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
+"\u1E73" => "u"
+
+# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
+"\u1E75" => "u"
+
+# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
+"\u1E77" => "u"
+
+# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
+"\u1E79" => "u"
+
+# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
+"\u1E7B" => "u"
+
+# ụ [LATIN SMALL LETTER U WITH DOT BELOW]
+"\u1EE5" => "u"
+
+# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
+"\u1EE7" => "u"
+
+# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
+"\u1EE9" => "u"
+
+# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
+"\u1EEB" => "u"
+
+# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
+"\u1EED" => "u"
+
+# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
+"\u1EEF" => "u"
+
+# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
+"\u1EF1" => "u"
+
+# ⓤ [CIRCLED LATIN SMALL LETTER U]
+"\u24E4" => "u"
+
+# u [FULLWIDTH LATIN SMALL LETTER U]
+"\uFF55" => "u"
+
+# ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
+"\u24B0" => "(u)"
+
+# ᵫ [LATIN SMALL LETTER UE]
+"\u1D6B" => "ue"
+
+# Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
+"\u01B2" => "V"
+
+# Ʌ [LATIN CAPITAL LETTER TURNED V]
+"\u0245" => "V"
+
+# ᴠ [LATIN LETTER SMALL CAPITAL V]
+"\u1D20" => "V"
+
+# Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
+"\u1E7C" => "V"
+
+# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
+"\u1E7E" => "V"
+
+# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
+"\u1EFC" => "V"
+
+# Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
+"\u24CB" => "V"
+
+# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
+"\uA75E" => "V"
+
+# Ꝩ [LATIN CAPITAL LETTER VEND]
+"\uA768" => "V"
+
+# V [FULLWIDTH LATIN CAPITAL LETTER V]
+"\uFF36" => "V"
+
+# ʋ [LATIN SMALL LETTER V WITH HOOK]
+"\u028B" => "v"
+
+# ʌ [LATIN SMALL LETTER TURNED V]
+"\u028C" => "v"
+
+# ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
+"\u1D65" => "v"
+
+# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
+"\u1D8C" => "v"
+
+# ṽ [LATIN SMALL LETTER V WITH TILDE]
+"\u1E7D" => "v"
+
+# ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
+"\u1E7F" => "v"
+
+# ⓥ [CIRCLED LATIN SMALL LETTER V]
+"\u24E5" => "v"
+
+# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
+"\u2C71" => "v"
+
+# ⱴ [LATIN SMALL LETTER V WITH CURL]
+"\u2C74" => "v"
+
+# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
+"\uA75F" => "v"
+
+# v [FULLWIDTH LATIN SMALL LETTER V]
+"\uFF56" => "v"
+
+# Ꝡ [LATIN CAPITAL LETTER VY]
+"\uA760" => "VY"
+
+# ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
+"\u24B1" => "(v)"
+
+# ꝡ [LATIN SMALL LETTER VY]
+"\uA761" => "vy"
+
+# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
+"\u0174" => "W"
+
+# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
+"\u01F7" => "W"
+
+# ᴡ [LATIN LETTER SMALL CAPITAL W]
+"\u1D21" => "W"
+
+# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
+"\u1E80" => "W"
+
+# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
+"\u1E82" => "W"
+
+# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
+"\u1E84" => "W"
+
+# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
+"\u1E86" => "W"
+
+# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
+"\u1E88" => "W"
+
+# Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
+"\u24CC" => "W"
+
+# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
+"\u2C72" => "W"
+
+# W [FULLWIDTH LATIN CAPITAL LETTER W]
+"\uFF37" => "W"
+
+# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
+"\u0175" => "w"
+
+# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
+"\u01BF" => "w"
+
+# ʍ [LATIN SMALL LETTER TURNED W]
+"\u028D" => "w"
+
+# ẁ [LATIN SMALL LETTER W WITH GRAVE]
+"\u1E81" => "w"
+
+# ẃ [LATIN SMALL LETTER W WITH ACUTE]
+"\u1E83" => "w"
+
+# ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
+"\u1E85" => "w"
+
+# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
+"\u1E87" => "w"
+
+# ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
+"\u1E89" => "w"
+
+# ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
+"\u1E98" => "w"
+
+# ⓦ [CIRCLED LATIN SMALL LETTER W]
+"\u24E6" => "w"
+
+# ⱳ [LATIN SMALL LETTER W WITH HOOK]
+"\u2C73" => "w"
+
+# w [FULLWIDTH LATIN SMALL LETTER W]
+"\uFF57" => "w"
+
+# ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
+"\u24B2" => "(w)"
+
+# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
+"\u1E8A" => "X"
+
+# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
+"\u1E8C" => "X"
+
+# Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
+"\u24CD" => "X"
+
+# X [FULLWIDTH LATIN CAPITAL LETTER X]
+"\uFF38" => "X"
+
+# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
+"\u1D8D" => "x"
+
+# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
+"\u1E8B" => "x"
+
+# ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
+"\u1E8D" => "x"
+
+# ₓ [LATIN SUBSCRIPT SMALL LETTER X]
+"\u2093" => "x"
+
+# ⓧ [CIRCLED LATIN SMALL LETTER X]
+"\u24E7" => "x"
+
+# x [FULLWIDTH LATIN SMALL LETTER X]
+"\uFF58" => "x"
+
+# ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
+"\u24B3" => "(x)"
+
+# Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
+"\u00DD" => "Y"
+
+# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
+"\u0176" => "Y"
+
+# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
+"\u0178" => "Y"
+
+# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
+"\u01B3" => "Y"
+
+# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
+"\u0232" => "Y"
+
+# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
+"\u024E" => "Y"
+
+# ʏ [LATIN LETTER SMALL CAPITAL Y]
+"\u028F" => "Y"
+
+# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
+"\u1E8E" => "Y"
+
+# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
+"\u1EF2" => "Y"
+
+# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
+"\u1EF4" => "Y"
+
+# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
+"\u1EF6" => "Y"
+
+# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
+"\u1EF8" => "Y"
+
+# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
+"\u1EFE" => "Y"
+
+# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
+"\u24CE" => "Y"
+
+# Y [FULLWIDTH LATIN CAPITAL LETTER Y]
+"\uFF39" => "Y"
+
+# ý [LATIN SMALL LETTER Y WITH ACUTE]
+"\u00FD" => "y"
+
+# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
+"\u00FF" => "y"
+
+# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
+"\u0177" => "y"
+
+# ƴ [LATIN SMALL LETTER Y WITH HOOK]
+"\u01B4" => "y"
+
+# ȳ [LATIN SMALL LETTER Y WITH MACRON]
+"\u0233" => "y"
+
+# ɏ [LATIN SMALL LETTER Y WITH STROKE]
+"\u024F" => "y"
+
+# ʎ [LATIN SMALL LETTER TURNED Y]
+"\u028E" => "y"
+
+# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
+"\u1E8F" => "y"
+
+# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
+"\u1E99" => "y"
+
+# ỳ [LATIN SMALL LETTER Y WITH GRAVE]
+"\u1EF3" => "y"
+
+# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
+"\u1EF5" => "y"
+
+# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
+"\u1EF7" => "y"
+
+# ỹ [LATIN SMALL LETTER Y WITH TILDE]
+"\u1EF9" => "y"
+
+# ỿ [LATIN SMALL LETTER Y WITH LOOP]
+"\u1EFF" => "y"
+
+# ⓨ [CIRCLED LATIN SMALL LETTER Y]
+"\u24E8" => "y"
+
+# y [FULLWIDTH LATIN SMALL LETTER Y]
+"\uFF59" => "y"
+
+# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
+"\u24B4" => "(y)"
+
+# Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
+"\u0179" => "Z"
+
+# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
+"\u017B" => "Z"
+
+# Ž [LATIN CAPITAL LETTER Z WITH CARON]
+"\u017D" => "Z"
+
+# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
+"\u01B5" => "Z"
+
+# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
+"\u021C" => "Z"
+
+# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
+"\u0224" => "Z"
+
+# ᴢ [LATIN LETTER SMALL CAPITAL Z]
+"\u1D22" => "Z"
+
+# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
+"\u1E90" => "Z"
+
+# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
+"\u1E92" => "Z"
+
+# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
+"\u1E94" => "Z"
+
+# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
+"\u24CF" => "Z"
+
+# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
+"\u2C6B" => "Z"
+
+# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
+"\uA762" => "Z"
+
+# Z [FULLWIDTH LATIN CAPITAL LETTER Z]
+"\uFF3A" => "Z"
+
+# ź [LATIN SMALL LETTER Z WITH ACUTE]
+"\u017A" => "z"
+
+# ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
+"\u017C" => "z"
+
+# ž [LATIN SMALL LETTER Z WITH CARON]
+"\u017E" => "z"
+
+# ƶ [LATIN SMALL LETTER Z WITH STROKE]
+"\u01B6" => "z"
+
+# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
+"\u021D" => "z"
+
+# ȥ [LATIN SMALL LETTER Z WITH HOOK]
+"\u0225" => "z"
+
+# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
+"\u0240" => "z"
+
+# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
+"\u0290" => "z"
+
+# ʑ [LATIN SMALL LETTER Z WITH CURL]
+"\u0291" => "z"
+
+# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
+"\u1D76" => "z"
+
+# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
+"\u1D8E" => "z"
+
+# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
+"\u1E91" => "z"
+
+# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
+"\u1E93" => "z"
+
+# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
+"\u1E95" => "z"
+
+# ⓩ [CIRCLED LATIN SMALL LETTER Z]
+"\u24E9" => "z"
+
+# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
+"\u2C6C" => "z"
+
+# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
+"\uA763" => "z"
+
+# z [FULLWIDTH LATIN SMALL LETTER Z]
+"\uFF5A" => "z"
+
+# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
+"\u24B5" => "(z)"
+
+# ⁰ [SUPERSCRIPT ZERO]
+"\u2070" => "0"
+
+# ₀ [SUBSCRIPT ZERO]
+"\u2080" => "0"
+
+# ⓪ [CIRCLED DIGIT ZERO]
+"\u24EA" => "0"
+
+# ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
+"\u24FF" => "0"
+
+# 0 [FULLWIDTH DIGIT ZERO]
+"\uFF10" => "0"
+
+# ¹ [SUPERSCRIPT ONE]
+"\u00B9" => "1"
+
+# ₁ [SUBSCRIPT ONE]
+"\u2081" => "1"
+
+# ① [CIRCLED DIGIT ONE]
+"\u2460" => "1"
+
+# ⓵ [DOUBLE CIRCLED DIGIT ONE]
+"\u24F5" => "1"
+
+# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
+"\u2776" => "1"
+
+# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
+"\u2780" => "1"
+
+# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
+"\u278A" => "1"
+
+# 1 [FULLWIDTH DIGIT ONE]
+"\uFF11" => "1"
+
+# ⒈ [DIGIT ONE FULL STOP]
+"\u2488" => "1."
+
+# ⑴ [PARENTHESIZED DIGIT ONE]
+"\u2474" => "(1)"
+
+# ² [SUPERSCRIPT TWO]
+"\u00B2" => "2"
+
+# ₂ [SUBSCRIPT TWO]
+"\u2082" => "2"
+
+# ② [CIRCLED DIGIT TWO]
+"\u2461" => "2"
+
+# ⓶ [DOUBLE CIRCLED DIGIT TWO]
+"\u24F6" => "2"
+
+# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
+"\u2777" => "2"
+
+# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
+"\u2781" => "2"
+
+# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
+"\u278B" => "2"
+
+# 2 [FULLWIDTH DIGIT TWO]
+"\uFF12" => "2"
+
+# ⒉ [DIGIT TWO FULL STOP]
+"\u2489" => "2."
+
+# ⑵ [PARENTHESIZED DIGIT TWO]
+"\u2475" => "(2)"
+
+# ³ [SUPERSCRIPT THREE]
+"\u00B3" => "3"
+
+# ₃ [SUBSCRIPT THREE]
+"\u2083" => "3"
+
+# ③ [CIRCLED DIGIT THREE]
+"\u2462" => "3"
+
+# ⓷ [DOUBLE CIRCLED DIGIT THREE]
+"\u24F7" => "3"
+
+# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
+"\u2778" => "3"
+
+# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
+"\u2782" => "3"
+
+# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
+"\u278C" => "3"
+
+# 3 [FULLWIDTH DIGIT THREE]
+"\uFF13" => "3"
+
+# ⒊ [DIGIT THREE FULL STOP]
+"\u248A" => "3."
+
+# ⑶ [PARENTHESIZED DIGIT THREE]
+"\u2476" => "(3)"
+
+# ⁴ [SUPERSCRIPT FOUR]
+"\u2074" => "4"
+
+# ₄ [SUBSCRIPT FOUR]
+"\u2084" => "4"
+
+# ④ [CIRCLED DIGIT FOUR]
+"\u2463" => "4"
+
+# ⓸ [DOUBLE CIRCLED DIGIT FOUR]
+"\u24F8" => "4"
+
+# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
+"\u2779" => "4"
+
+# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
+"\u2783" => "4"
+
+# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
+"\u278D" => "4"
+
+# 4 [FULLWIDTH DIGIT FOUR]
+"\uFF14" => "4"
+
+# ⒋ [DIGIT FOUR FULL STOP]
+"\u248B" => "4."
+
+# ⑷ [PARENTHESIZED DIGIT FOUR]
+"\u2477" => "(4)"
+
+# ⁵ [SUPERSCRIPT FIVE]
+"\u2075" => "5"
+
+# ₅ [SUBSCRIPT FIVE]
+"\u2085" => "5"
+
+# ⑤ [CIRCLED DIGIT FIVE]
+"\u2464" => "5"
+
+# ⓹ [DOUBLE CIRCLED DIGIT FIVE]
+"\u24F9" => "5"
+
+# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
+"\u277A" => "5"
+
+# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
+"\u2784" => "5"
+
+# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
+"\u278E" => "5"
+
+# 5 [FULLWIDTH DIGIT FIVE]
+"\uFF15" => "5"
+
+# ⒌ [DIGIT FIVE FULL STOP]
+"\u248C" => "5."
+
+# ⑸ [PARENTHESIZED DIGIT FIVE]
+"\u2478" => "(5)"
+
+# ⁶ [SUPERSCRIPT SIX]
+"\u2076" => "6"
+
+# ₆ [SUBSCRIPT SIX]
+"\u2086" => "6"
+
+# ⑥ [CIRCLED DIGIT SIX]
+"\u2465" => "6"
+
+# ⓺ [DOUBLE CIRCLED DIGIT SIX]
+"\u24FA" => "6"
+
+# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
+"\u277B" => "6"
+
+# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
+"\u2785" => "6"
+
+# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
+"\u278F" => "6"
+
+# 6 [FULLWIDTH DIGIT SIX]
+"\uFF16" => "6"
+
+# ⒍ [DIGIT SIX FULL STOP]
+"\u248D" => "6."
+
+# ⑹ [PARENTHESIZED DIGIT SIX]
+"\u2479" => "(6)"
+
+# ⁷ [SUPERSCRIPT SEVEN]
+"\u2077" => "7"
+
+# ₇ [SUBSCRIPT SEVEN]
+"\u2087" => "7"
+
+# ⑦ [CIRCLED DIGIT SEVEN]
+"\u2466" => "7"
+
+# ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
+"\u24FB" => "7"
+
+# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
+"\u277C" => "7"
+
+# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
+"\u2786" => "7"
+
+# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
+"\u2790" => "7"
+
+# 7 [FULLWIDTH DIGIT SEVEN]
+"\uFF17" => "7"
+
+# ⒎ [DIGIT SEVEN FULL STOP]
+"\u248E" => "7."
+
+# ⑺ [PARENTHESIZED DIGIT SEVEN]
+"\u247A" => "(7)"
+
+# ⁸ [SUPERSCRIPT EIGHT]
+"\u2078" => "8"
+
+# ₈ [SUBSCRIPT EIGHT]
+"\u2088" => "8"
+
+# ⑧ [CIRCLED DIGIT EIGHT]
+"\u2467" => "8"
+
+# ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
+"\u24FC" => "8"
+
+# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
+"\u277D" => "8"
+
+# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
+"\u2787" => "8"
+
+# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
+"\u2791" => "8"
+
+# 8 [FULLWIDTH DIGIT EIGHT]
+"\uFF18" => "8"
+
+# ⒏ [DIGIT EIGHT FULL STOP]
+"\u248F" => "8."
+
+# ⑻ [PARENTHESIZED DIGIT EIGHT]
+"\u247B" => "(8)"
+
+# ⁹ [SUPERSCRIPT NINE]
+"\u2079" => "9"
+
+# ₉ [SUBSCRIPT NINE]
+"\u2089" => "9"
+
+# ⑨ [CIRCLED DIGIT NINE]
+"\u2468" => "9"
+
+# ⓽ [DOUBLE CIRCLED DIGIT NINE]
+"\u24FD" => "9"
+
+# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
+"\u277E" => "9"
+
+# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
+"\u2788" => "9"
+
+# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
+"\u2792" => "9"
+
+# 9 [FULLWIDTH DIGIT NINE]
+"\uFF19" => "9"
+
+# ⒐ [DIGIT NINE FULL STOP]
+"\u2490" => "9."
+
+# ⑼ [PARENTHESIZED DIGIT NINE]
+"\u247C" => "(9)"
+
+# ⑩ [CIRCLED NUMBER TEN]
+"\u2469" => "10"
+
+# ⓾ [DOUBLE CIRCLED NUMBER TEN]
+"\u24FE" => "10"
+
+# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
+"\u277F" => "10"
+
+# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
+"\u2789" => "10"
+
+# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
+"\u2793" => "10"
+
+# ⒑ [NUMBER TEN FULL STOP]
+"\u2491" => "10."
+
+# ⑽ [PARENTHESIZED NUMBER TEN]
+"\u247D" => "(10)"
+
+# ⑪ [CIRCLED NUMBER ELEVEN]
+"\u246A" => "11"
+
+# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
+"\u24EB" => "11"
+
+# ⒒ [NUMBER ELEVEN FULL STOP]
+"\u2492" => "11."
+
+# ⑾ [PARENTHESIZED NUMBER ELEVEN]
+"\u247E" => "(11)"
+
+# ⑫ [CIRCLED NUMBER TWELVE]
+"\u246B" => "12"
+
+# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
+"\u24EC" => "12"
+
+# ⒓ [NUMBER TWELVE FULL STOP]
+"\u2493" => "12."
+
+# ⑿ [PARENTHESIZED NUMBER TWELVE]
+"\u247F" => "(12)"
+
+# ⑬ [CIRCLED NUMBER THIRTEEN]
+"\u246C" => "13"
+
+# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
+"\u24ED" => "13"
+
+# ⒔ [NUMBER THIRTEEN FULL STOP]
+"\u2494" => "13."
+
+# ⒀ [PARENTHESIZED NUMBER THIRTEEN]
+"\u2480" => "(13)"
+
+# ⑭ [CIRCLED NUMBER FOURTEEN]
+"\u246D" => "14"
+
+# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
+"\u24EE" => "14"
+
+# ⒕ [NUMBER FOURTEEN FULL STOP]
+"\u2495" => "14."
+
+# ⒁ [PARENTHESIZED NUMBER FOURTEEN]
+"\u2481" => "(14)"
+
+# ⑮ [CIRCLED NUMBER FIFTEEN]
+"\u246E" => "15"
+
+# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
+"\u24EF" => "15"
+
+# ⒖ [NUMBER FIFTEEN FULL STOP]
+"\u2496" => "15."
+
+# ⒂ [PARENTHESIZED NUMBER FIFTEEN]
+"\u2482" => "(15)"
+
+# ⑯ [CIRCLED NUMBER SIXTEEN]
+"\u246F" => "16"
+
+# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
+"\u24F0" => "16"
+
+# ⒗ [NUMBER SIXTEEN FULL STOP]
+"\u2497" => "16."
+
+# ⒃ [PARENTHESIZED NUMBER SIXTEEN]
+"\u2483" => "(16)"
+
+# ⑰ [CIRCLED NUMBER SEVENTEEN]
+"\u2470" => "17"
+
+# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
+"\u24F1" => "17"
+
+# ⒘ [NUMBER SEVENTEEN FULL STOP]
+"\u2498" => "17."
+
+# ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
+"\u2484" => "(17)"
+
+# ⑱ [CIRCLED NUMBER EIGHTEEN]
+"\u2471" => "18"
+
+# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
+"\u24F2" => "18"
+
+# ⒙ [NUMBER EIGHTEEN FULL STOP]
+"\u2499" => "18."
+
+# ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
+"\u2485" => "(18)"
+
+# ⑲ [CIRCLED NUMBER NINETEEN]
+"\u2472" => "19"
+
+# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
+"\u24F3" => "19"
+
+# ⒚ [NUMBER NINETEEN FULL STOP]
+"\u249A" => "19."
+
+# ⒆ [PARENTHESIZED NUMBER NINETEEN]
+"\u2486" => "(19)"
+
+# ⑳ [CIRCLED NUMBER TWENTY]
+"\u2473" => "20"
+
+# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
+"\u24F4" => "20"
+
+# ⒛ [NUMBER TWENTY FULL STOP]
+"\u249B" => "20."
+
+# ⒇ [PARENTHESIZED NUMBER TWENTY]
+"\u2487" => "(20)"
+
+# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
+"\u00AB" => "\""
+
+# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
+"\u00BB" => "\""
+
+# “ [LEFT DOUBLE QUOTATION MARK]
+"\u201C" => "\""
+
+# ” [RIGHT DOUBLE QUOTATION MARK]
+"\u201D" => "\""
+
+# „ [DOUBLE LOW-9 QUOTATION MARK]
+"\u201E" => "\""
+
+# ″ [DOUBLE PRIME]
+"\u2033" => "\""
+
+# ‶ [REVERSED DOUBLE PRIME]
+"\u2036" => "\""
+
+# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
+"\u275D" => "\""
+
+# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
+"\u275E" => "\""
+
+# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+"\u276E" => "\""
+
+# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+"\u276F" => "\""
+
+# " [FULLWIDTH QUOTATION MARK]
+"\uFF02" => "\""
+
+# ‘ [LEFT SINGLE QUOTATION MARK]
+"\u2018" => "\'"
+
+# ’ [RIGHT SINGLE QUOTATION MARK]
+"\u2019" => "\'"
+
+# ‚ [SINGLE LOW-9 QUOTATION MARK]
+"\u201A" => "\'"
+
+# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
+"\u201B" => "\'"
+
+# ′ [PRIME]
+"\u2032" => "\'"
+
+# ‵ [REVERSED PRIME]
+"\u2035" => "\'"
+
+# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
+"\u2039" => "\'"
+
+# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
+"\u203A" => "\'"
+
+# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
+"\u275B" => "\'"
+
+# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
+"\u275C" => "\'"
+
+# ' [FULLWIDTH APOSTROPHE]
+"\uFF07" => "\'"
+
+# ‐ [HYPHEN]
+"\u2010" => "-"
+
+# ‑ [NON-BREAKING HYPHEN]
+"\u2011" => "-"
+
+# ‒ [FIGURE DASH]
+"\u2012" => "-"
+
+# – [EN DASH]
+"\u2013" => "-"
+
+# — [EM DASH]
+"\u2014" => "-"
+
+# ⁻ [SUPERSCRIPT MINUS]
+"\u207B" => "-"
+
+# ₋ [SUBSCRIPT MINUS]
+"\u208B" => "-"
+
+# - [FULLWIDTH HYPHEN-MINUS]
+"\uFF0D" => "-"
+
+# ⁅ [LEFT SQUARE BRACKET WITH QUILL]
+"\u2045" => "["
+
+# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
+"\u2772" => "["
+
+# [ [FULLWIDTH LEFT SQUARE BRACKET]
+"\uFF3B" => "["
+
+# ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
+"\u2046" => "]"
+
+# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
+"\u2773" => "]"
+
+# ] [FULLWIDTH RIGHT SQUARE BRACKET]
+"\uFF3D" => "]"
+
+# ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
+"\u207D" => "("
+
+# ₍ [SUBSCRIPT LEFT PARENTHESIS]
+"\u208D" => "("
+
+# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
+"\u2768" => "("
+
+# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
+"\u276A" => "("
+
+# ( [FULLWIDTH LEFT PARENTHESIS]
+"\uFF08" => "("
+
+# ⸨ [LEFT DOUBLE PARENTHESIS]
+"\u2E28" => "(("
+
+# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
+"\u207E" => ")"
+
+# ₎ [SUBSCRIPT RIGHT PARENTHESIS]
+"\u208E" => ")"
+
+# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
+"\u2769" => ")"
+
+# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
+"\u276B" => ")"
+
+# ) [FULLWIDTH RIGHT PARENTHESIS]
+"\uFF09" => ")"
+
+# ⸩ [RIGHT DOUBLE PARENTHESIS]
+"\u2E29" => "))"
+
+# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
+"\u276C" => "<"
+
+# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
+"\u2770" => "<"
+
+# < [FULLWIDTH LESS-THAN SIGN]
+"\uFF1C" => "<"
+
+# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+"\u276D" => ">"
+
+# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+"\u2771" => ">"
+
+# > [FULLWIDTH GREATER-THAN SIGN]
+"\uFF1E" => ">"
+
+# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
+"\u2774" => "{"
+
+# { [FULLWIDTH LEFT CURLY BRACKET]
+"\uFF5B" => "{"
+
+# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
+"\u2775" => "}"
+
+# } [FULLWIDTH RIGHT CURLY BRACKET]
+"\uFF5D" => "}"
+
+# ⁺ [SUPERSCRIPT PLUS SIGN]
+"\u207A" => "+"
+
+# ₊ [SUBSCRIPT PLUS SIGN]
+"\u208A" => "+"
+
+# + [FULLWIDTH PLUS SIGN]
+"\uFF0B" => "+"
+
+# ⁼ [SUPERSCRIPT EQUALS SIGN]
+"\u207C" => "="
+
+# ₌ [SUBSCRIPT EQUALS SIGN]
+"\u208C" => "="
+
+# = [FULLWIDTH EQUALS SIGN]
+"\uFF1D" => "="
+
+# ! [FULLWIDTH EXCLAMATION MARK]
+"\uFF01" => "!"
+
+# ‼ [DOUBLE EXCLAMATION MARK]
+"\u203C" => "!!"
+
+# ⁉ [EXCLAMATION QUESTION MARK]
+"\u2049" => "!?"
+
+# # [FULLWIDTH NUMBER SIGN]
+"\uFF03" => "#"
+
+# $ [FULLWIDTH DOLLAR SIGN]
+"\uFF04" => "$"
+
+# ⁒ [COMMERCIAL MINUS SIGN]
+"\u2052" => "%"
+
+# % [FULLWIDTH PERCENT SIGN]
+"\uFF05" => "%"
+
+# & [FULLWIDTH AMPERSAND]
+"\uFF06" => "&"
+
+# ⁎ [LOW ASTERISK]
+"\u204E" => "*"
+
+# * [FULLWIDTH ASTERISK]
+"\uFF0A" => "*"
+
+# , [FULLWIDTH COMMA]
+"\uFF0C" => ","
+
+# . [FULLWIDTH FULL STOP]
+"\uFF0E" => "."
+
+# ⁄ [FRACTION SLASH]
+"\u2044" => "/"
+
+# / [FULLWIDTH SOLIDUS]
+"\uFF0F" => "/"
+
+# : [FULLWIDTH COLON]
+"\uFF1A" => ":"
+
+# ⁏ [REVERSED SEMICOLON]
+"\u204F" => ";"
+
+# ; [FULLWIDTH SEMICOLON]
+"\uFF1B" => ";"
+
+# ? [FULLWIDTH QUESTION MARK]
+"\uFF1F" => "?"
+
+# ⁇ [DOUBLE QUESTION MARK]
+"\u2047" => "??"
+
+# ⁈ [QUESTION EXCLAMATION MARK]
+"\u2048" => "?!"
+
+# @ [FULLWIDTH COMMERCIAL AT]
+"\uFF20" => "@"
+
+# \ [FULLWIDTH REVERSE SOLIDUS]
+"\uFF3C" => "\\"
+
+# ‸ [CARET]
+"\u2038" => "^"
+
+# ^ [FULLWIDTH CIRCUMFLEX ACCENT]
+"\uFF3E" => "^"
+
+# _ [FULLWIDTH LOW LINE]
+"\uFF3F" => "_"
+
+# ⁓ [SWUNG DASH]
+"\u2053" => "~"
+
+# ~ [FULLWIDTH TILDE]
+"\uFF5E" => "~"
+
+################################################################
+# Below is the Perl script used to generate the above mappings #
+# from ASCIIFoldingFilter.java: #
+################################################################
+#
+# #!/usr/bin/perl
+#
+# use warnings;
+# use strict;
+#
+# my @source_chars = ();
+# my @source_char_descriptions = ();
+# my $target = '';
+#
+# while (<>) {
+# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) {
+# push @source_chars, $1;
+# push @source_char_descriptions, $2;
+# next;
+# }
+# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) {
+# $target .= $1;
+# next;
+# }
+# if (/break;/) {
+# $target = "\\\"" if ($target eq '"');
+# for my $source_char_num (0..$#source_chars) {
+# print "# $source_char_descriptions[$source_char_num]\n";
+# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n";
+# }
+# @source_chars = ();
+# @source_char_descriptions = ();
+# $target = '';
+# }
+# }
diff --git a/solr/homeDirectoryTemplate/conf/mapping-ISOLatin1Accent.txt b/solr/homeDirectoryTemplate/conf/mapping-ISOLatin1Accent.txt
index c4410432f..ede774258 100644
--- a/solr/homeDirectoryTemplate/conf/mapping-ISOLatin1Accent.txt
+++ b/solr/homeDirectoryTemplate/conf/mapping-ISOLatin1Accent.txt
@@ -1,246 +1,246 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Syntax:
-# "source" => "target"
-# "source".length() > 0 (source cannot be empty.)
-# "target".length() >= 0 (target can be empty.)
-
-# example:
-# "À" => "A"
-# "\u00C0" => "A"
-# "\u00C0" => "\u0041"
-# "ß" => "ss"
-# "\t" => " "
-# "\n" => ""
-
-# À => A
-"\u00C0" => "A"
-
-# Á => A
-"\u00C1" => "A"
-
-# Â => A
-"\u00C2" => "A"
-
-# Ã => A
-"\u00C3" => "A"
-
-# Ä => A
-"\u00C4" => "A"
-
-# Å => A
-"\u00C5" => "A"
-
-# Æ => AE
-"\u00C6" => "AE"
-
-# Ç => C
-"\u00C7" => "C"
-
-# È => E
-"\u00C8" => "E"
-
-# É => E
-"\u00C9" => "E"
-
-# Ê => E
-"\u00CA" => "E"
-
-# Ë => E
-"\u00CB" => "E"
-
-# Ì => I
-"\u00CC" => "I"
-
-# Í => I
-"\u00CD" => "I"
-
-# Î => I
-"\u00CE" => "I"
-
-# Ï => I
-"\u00CF" => "I"
-
-# IJ => IJ
-"\u0132" => "IJ"
-
-# Ð => D
-"\u00D0" => "D"
-
-# Ñ => N
-"\u00D1" => "N"
-
-# Ò => O
-"\u00D2" => "O"
-
-# Ó => O
-"\u00D3" => "O"
-
-# Ô => O
-"\u00D4" => "O"
-
-# Õ => O
-"\u00D5" => "O"
-
-# Ö => O
-"\u00D6" => "O"
-
-# Ø => O
-"\u00D8" => "O"
-
-# Œ => OE
-"\u0152" => "OE"
-
-# Þ
-"\u00DE" => "TH"
-
-# Ù => U
-"\u00D9" => "U"
-
-# Ú => U
-"\u00DA" => "U"
-
-# Û => U
-"\u00DB" => "U"
-
-# Ü => U
-"\u00DC" => "U"
-
-# Ý => Y
-"\u00DD" => "Y"
-
-# Ÿ => Y
-"\u0178" => "Y"
-
-# à => a
-"\u00E0" => "a"
-
-# á => a
-"\u00E1" => "a"
-
-# â => a
-"\u00E2" => "a"
-
-# ã => a
-"\u00E3" => "a"
-
-# ä => a
-"\u00E4" => "a"
-
-# å => a
-"\u00E5" => "a"
-
-# æ => ae
-"\u00E6" => "ae"
-
-# ç => c
-"\u00E7" => "c"
-
-# è => e
-"\u00E8" => "e"
-
-# é => e
-"\u00E9" => "e"
-
-# ê => e
-"\u00EA" => "e"
-
-# ë => e
-"\u00EB" => "e"
-
-# ì => i
-"\u00EC" => "i"
-
-# í => i
-"\u00ED" => "i"
-
-# î => i
-"\u00EE" => "i"
-
-# ï => i
-"\u00EF" => "i"
-
-# ij => ij
-"\u0133" => "ij"
-
-# ð => d
-"\u00F0" => "d"
-
-# ñ => n
-"\u00F1" => "n"
-
-# ò => o
-"\u00F2" => "o"
-
-# ó => o
-"\u00F3" => "o"
-
-# ô => o
-"\u00F4" => "o"
-
-# õ => o
-"\u00F5" => "o"
-
-# ö => o
-"\u00F6" => "o"
-
-# ø => o
-"\u00F8" => "o"
-
-# œ => oe
-"\u0153" => "oe"
-
-# ß => ss
-"\u00DF" => "ss"
-
-# þ => th
-"\u00FE" => "th"
-
-# ù => u
-"\u00F9" => "u"
-
-# ú => u
-"\u00FA" => "u"
-
-# û => u
-"\u00FB" => "u"
-
-# ü => u
-"\u00FC" => "u"
-
-# ý => y
-"\u00FD" => "y"
-
-# ÿ => y
-"\u00FF" => "y"
-
-# ff => ff
-"\uFB00" => "ff"
-
-# fi => fi
-"\uFB01" => "fi"
-
-# fl => fl
-"\uFB02" => "fl"
-
-# ffi => ffi
-"\uFB03" => "ffi"
-
-# ffl => ffl
-"\uFB04" => "ffl"
-
-# ſt => ft
-"\uFB05" => "ft"
-
-# st => st
-"\uFB06" => "st"
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Syntax:
+# "source" => "target"
+# "source".length() > 0 (source cannot be empty.)
+# "target".length() >= 0 (target can be empty.)
+
+# example:
+# "À" => "A"
+# "\u00C0" => "A"
+# "\u00C0" => "\u0041"
+# "ß" => "ss"
+# "\t" => " "
+# "\n" => ""
+
+# À => A
+"\u00C0" => "A"
+
+# Á => A
+"\u00C1" => "A"
+
+# Â => A
+"\u00C2" => "A"
+
+# Ã => A
+"\u00C3" => "A"
+
+# Ä => A
+"\u00C4" => "A"
+
+# Å => A
+"\u00C5" => "A"
+
+# Æ => AE
+"\u00C6" => "AE"
+
+# Ç => C
+"\u00C7" => "C"
+
+# È => E
+"\u00C8" => "E"
+
+# É => E
+"\u00C9" => "E"
+
+# Ê => E
+"\u00CA" => "E"
+
+# Ë => E
+"\u00CB" => "E"
+
+# Ì => I
+"\u00CC" => "I"
+
+# Í => I
+"\u00CD" => "I"
+
+# Î => I
+"\u00CE" => "I"
+
+# Ï => I
+"\u00CF" => "I"
+
+# IJ => IJ
+"\u0132" => "IJ"
+
+# Ð => D
+"\u00D0" => "D"
+
+# Ñ => N
+"\u00D1" => "N"
+
+# Ò => O
+"\u00D2" => "O"
+
+# Ó => O
+"\u00D3" => "O"
+
+# Ô => O
+"\u00D4" => "O"
+
+# Õ => O
+"\u00D5" => "O"
+
+# Ö => O
+"\u00D6" => "O"
+
+# Ø => O
+"\u00D8" => "O"
+
+# Œ => OE
+"\u0152" => "OE"
+
+# Þ
+"\u00DE" => "TH"
+
+# Ù => U
+"\u00D9" => "U"
+
+# Ú => U
+"\u00DA" => "U"
+
+# Û => U
+"\u00DB" => "U"
+
+# Ü => U
+"\u00DC" => "U"
+
+# Ý => Y
+"\u00DD" => "Y"
+
+# Ÿ => Y
+"\u0178" => "Y"
+
+# à => a
+"\u00E0" => "a"
+
+# á => a
+"\u00E1" => "a"
+
+# â => a
+"\u00E2" => "a"
+
+# ã => a
+"\u00E3" => "a"
+
+# ä => a
+"\u00E4" => "a"
+
+# å => a
+"\u00E5" => "a"
+
+# æ => ae
+"\u00E6" => "ae"
+
+# ç => c
+"\u00E7" => "c"
+
+# è => e
+"\u00E8" => "e"
+
+# é => e
+"\u00E9" => "e"
+
+# ê => e
+"\u00EA" => "e"
+
+# ë => e
+"\u00EB" => "e"
+
+# ì => i
+"\u00EC" => "i"
+
+# í => i
+"\u00ED" => "i"
+
+# î => i
+"\u00EE" => "i"
+
+# ï => i
+"\u00EF" => "i"
+
+# ij => ij
+"\u0133" => "ij"
+
+# ð => d
+"\u00F0" => "d"
+
+# ñ => n
+"\u00F1" => "n"
+
+# ò => o
+"\u00F2" => "o"
+
+# ó => o
+"\u00F3" => "o"
+
+# ô => o
+"\u00F4" => "o"
+
+# õ => o
+"\u00F5" => "o"
+
+# ö => o
+"\u00F6" => "o"
+
+# ø => o
+"\u00F8" => "o"
+
+# œ => oe
+"\u0153" => "oe"
+
+# ß => ss
+"\u00DF" => "ss"
+
+# þ => th
+"\u00FE" => "th"
+
+# ù => u
+"\u00F9" => "u"
+
+# ú => u
+"\u00FA" => "u"
+
+# û => u
+"\u00FB" => "u"
+
+# ü => u
+"\u00FC" => "u"
+
+# ý => y
+"\u00FD" => "y"
+
+# ÿ => y
+"\u00FF" => "y"
+
+# ff => ff
+"\uFB00" => "ff"
+
+# fi => fi
+"\uFB01" => "fi"
+
+# fl => fl
+"\uFB02" => "fl"
+
+# ffi => ffi
+"\uFB03" => "ffi"
+
+# ffl => ffl
+"\uFB04" => "ffl"
+
+# ſt => ft
+"\uFB05" => "ft"
+
+# st => st
+"\uFB06" => "st"
diff --git a/solr/homeDirectoryTemplate/conf/protwords.txt b/solr/homeDirectoryTemplate/conf/protwords.txt
index 5a32e5032..1dfc0abec 100644
--- a/solr/homeDirectoryTemplate/conf/protwords.txt
+++ b/solr/homeDirectoryTemplate/conf/protwords.txt
@@ -1,21 +1,21 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-# Use a protected word file to protect against the stemmer reducing two
-# unrelated words to the same base word.
-
-# Some non-words that normally won't be encountered,
-# just to test that they won't be stemmed.
-dontstems
-zwhacky
-
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/solr/homeDirectoryTemplate/conf/schema-old.xml b/solr/homeDirectoryTemplate/conf/schema-old.xml
new file mode 100644
index 000000000..30dcf0c74
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/schema-old.xml
@@ -0,0 +1,283 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ DocId
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/schema.xml b/solr/homeDirectoryTemplate/conf/schema.xml
index 30dcf0c74..0008ce381 100644
--- a/solr/homeDirectoryTemplate/conf/schema.xml
+++ b/solr/homeDirectoryTemplate/conf/schema.xml
@@ -1,179 +1,90 @@
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ http://www.apache.org/licenses/LICENSE-2.0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ PERFORMANCE NOTE: this schema includes many optional features and should not
+ be used for benchmarking. To improve performance one could
+ - set stored="false" for all fields possible (esp large fields) when you
+ only need to search on the field but don't need to return the original
+ value.
+ - set indexed="false" if you don't need to search on the field, but only
+ return the field as a result of searching on other indexed fields.
+ - remove all unneeded copyField statements
+ - for best index size and searching performance, set "index" to false
+ for all general text fields, use copyField to copy them to the
+ catchall "text" field, and use that for searching.
+ - For maximum indexing performance, use the StreamingUpdateSolrServer
+ java client.
+ - Remember to run the JVM in server mode, and use a higher logging level
+ that avoids logging every request
+-->
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
+
+
+
+
+
+
+
+
-
+
@@ -199,16 +127,16 @@
-
+
-
+
-
-
+
+
@@ -261,23 +189,1176 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Unless this field is marked with required="false", it will be a required field
+ -->
DocId
-
-
-
+
-
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/solrconfig-old.xml b/solr/homeDirectoryTemplate/conf/solrconfig-old.xml
new file mode 100644
index 000000000..9e292b3a4
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/solrconfig-old.xml
@@ -0,0 +1,1575 @@
+
+
+
+
+
+
+
+
+ ${solr.abortOnConfigurationError:true}
+
+
+ LUCENE_31
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+
+ 10
+
+ 32
+
+
+
+ 10000
+ 1000
+ 10000
+
+
+
+
+
+
+
+
+ native
+
+
+
+
+
+
+
+
+ false
+ 32
+ 10
+
+
+ false
+
+
+ true
+
+
+
+
+ 1
+
+ 0
+
+
+
+
+
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -->
+
+ 50000
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 20
+
+
+ 200
+
+
+
+
+
+
+
+
+
+
+
+ static firstSearcher warming in solrconfig.xml
+
+
+
+
+
+ false
+
+
+ 2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ edismax
+
+ ALLTEXT ALLTEXTUNSTEMMED nameText^2.0 nameUnstemmed^2.0 nameStemmed^2.0 nameLowercase
+ explicit
+ 2
+ 10
+ *:*
+ *,score
+ true
+ ALLTEXT
+ 160
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+
+
+ velocity
+
+ browse
+ layout
+ Solritas
+
+
+
+ *:*
+ 10
+ *,score
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ text,features,name,sku,id,manu,cat
+ 3
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+
+ on
+ cat
+ manu_exact
+ ipod
+ GB
+ 1
+ cat,inStock
+ price
+ 0
+ 600
+ 50
+ after
+ manufacturedate_dt
+ NOW/YEAR-10YEARS
+ NOW
+ +1YEAR
+ before
+ after
+
+
+
+ on
+ text features name
+ 0
+ name
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+ etag
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ true
+ ignored_
+
+
+ true
+ links
+ ignored_
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ search
+ solrpingquery
+ all
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+
+
+
+
+
+
+
+ textSpell
+
+
+
+
+
+ default
+ name
+ spellchecker
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ 1
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+ true
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+ default
+
+ org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+
+ 20
+
+
+ ENGLISH
+
+
+ stc
+ org.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+
+
+
+ true
+ default
+ true
+
+ name
+ id
+
+ features
+
+ true
+
+
+
+ false
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
+
+ clustering
+
+
+
+
+
+
+
+
+
+ true
+
+
+ terms
+
+
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+
+
+ elevator
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+
+
+ 70
+
+ 0.5
+
+ [-\w ,/\n\"']{20,200}
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,,
+ ,,
+ ,,
+ ,,
+ ,]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ etag
+ false
+ solr.processor.Lookup3Signature
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 5
+
+
+
+
+
+
+
+
+
+
+
+
+ *:*
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/solrconfig.xml b/solr/homeDirectoryTemplate/conf/solrconfig.xml
index acec3cc17..b5277e82e 100644
--- a/solr/homeDirectoryTemplate/conf/solrconfig.xml
+++ b/solr/homeDirectoryTemplate/conf/solrconfig.xml
@@ -29,27 +29,15 @@
have your own custom plugins.
-->
-
- ${solr.abortOnConfigurationError:true}
-
- LUCENE_31
+ -->
+ 4.7
-
-
-
-
-
-
-
-
-
-
-
+
+
-
+
+
+ class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
- WARNING: See also the section below for parameters
- that overfor Solr's main Lucene index.
+
+
+
-
+
+
+1000
+
+
- false
+
+ false
- 10
-
- 32
-
+
+
+
+ 100
- 10000
- 1000
- 10000
-
+
+ 10
+ 10
+
+ -->
+
+
+
+ 10
+
-
+
- native
-
-
-
-
-
-
-
-
- false
- 32
- 10
+ ${solr.lock.type:native}
+
- true
+ false
+
+
+
+
+ true
+
+
- 1
+ 1
- 0
+ 0
+
+
- false
+ false
+
-
+
+
+
+
+
+
-
+ openSearcher - if false, the commit causes recent index changes
+ to be flushed to stable storage, but does not cause a new
+ searcher to be opened to make those changes visible.
+ If the updateLog is enabled, then it's highly recommended to
+ have some sort of hard autoCommit to limit the log size.
+ -->
+
+
+
+
+
-
+
- -->
-
50000
@@ -448,6 +553,14 @@
initialSize="512"
autowarmCount="0"/>
+
+
+
+
+ multipartUploadLimitInKB="2048000"
+ formdataUploadLimitInKB="2048"
+ addHttpRequestToContext="false"/>
-
-
+
-
+
+ edismaxALLTEXT ALLTEXTUNSTEMMED nameText^2.0 nameUnstemmed^2.0 nameStemmed^2.0 nameLowercase
@@ -720,7 +853,16 @@
160]]>]]>
+
+ 100%
+
+
-
+
+
+
+
+
+
+ true
+ json
+ true
+
+
+
+
+
@@ -790,39 +964,51 @@
velocity
-
browselayoutSolritas
-
-
+
+
+
+
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text
+ 100%*:*10*,score
-
+
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
text,features,name,sku,id,manu,cat3
-
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-
-
+
+
oncatmanu_exact
+ content_type
+ author_sipodGB1cat,inStock
+ afterprice060050
- after
+ popularity
+ 0
+ 10
+ 3manufacturedate_dtNOW/YEAR-10YEARSNOW
@@ -830,60 +1016,91 @@
beforeafter
-
on
- text features name
+ content features title name
+ html
+ <b>
+ </b>
+ 0
+ title0name
+ 3
+ 200
+ content
+ 750
+
+
+ on
+ false
+ 5
+ 2
+ 5
+ true
+ true
+ 5
+ 3
+
+
spellcheck
-
-
-
-
-
+
+
+ etag
+
-
-
+
+
+ application/json
+
+
+
+
+ application/csv
+
+
+
+
+
-
-
-
-
-
-
-
-
+
text
@@ -904,8 +1122,10 @@
linksignored_
+
+
-
+
-
- search
+ solrpingquery
+
+ all
+
+
@@ -1012,34 +1242,44 @@
-
+
+
+
-
+
-
- textSpell
+
+
+ textSpell
+
-
-
+
defaultnamespellchecker
+
+ solr.DirectSolrSpellChecker
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1124,9 +1403,8 @@
freqlowerfilt
- spellcheckerFreq
+ solr.DirectSolrSpellCheckerfreq
- true
-->
@@ -1140,7 +1418,7 @@
-->
-
+
- false
- false
- 1
+ text
+
+ default
+ wordbreak
+ on
+ true
+ 10
+ 5
+ 5
+ true
+ true
+ 10
+ 5spellcheck
+
+
+
+ mySuggester
+ FuzzyLookupFactory
+ DocumentDictionaryFactory
+ cat
+ price
+ string
+
+
+
+
+
+ true
+ 10
+
+
+ suggest
+
+
-
+
+ texttrue
@@ -1189,55 +1502,60 @@
-
-
-
- default
-
org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-
- 20
-
-
- ENGLISH
+ clustering/carrot2
+
+
stcorg.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+ kmeans
+ org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
+
+
name
+
id
-
- features
-
- true
-
-
-
- false
-
- edismax
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-
- *:*
- 10
- *,score
-
+
+ features
+
+ true
+
+
+
+ false
+
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
clustering
@@ -1293,6 +1612,7 @@
true
+ falseterms
@@ -1318,6 +1638,7 @@
explicit
+ textelevator
@@ -1367,18 +1688,21 @@
-
+
-
+
+
+
+
]]>
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+
+ WORD
+
+
+ en
+ US
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
-
+
+
+
+
+
+
+
*:*
-
-
-
diff --git a/solr/homeDirectoryTemplate/conf/velocity/README.txt b/solr/homeDirectoryTemplate/conf/velocity/README.txt
new file mode 100644
index 000000000..5d560baec
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/README.txt
@@ -0,0 +1,101 @@
+Introduction
+------------
+Solr Search Velocity Templates
+
+A quick demo of using Solr using http://wiki.apache.org/solr/VelocityResponseWriter
+
+You typically access these templates via:
+ http://localhost:8983/solr/collection1/browse
+
+It's called "browse" because you can click around with your mouse
+without needing to type any search terms. And of course it
+also works as a standard search app as well.
+
+Known Limitations
+-----------------
+* The /browse and the VelocityResponseWriter component
+ serve content directly from Solr, which usually requires
+ Solr's HTTP API to be exposed. Advanced users could
+ potentially access other parts of Solr directly.
+* There are some hard coded fields in these templates.
+ Since these templates live under conf, they should be
+ considered part of the overall configuration, and
+ must be coordinated with schema.xml and solrconfig.xml
+
+Velocity Info
+-------------
+Java-based template language.
+
+It's nice in this context because change to the templates
+are immediately visible in browser on the next visit.
+
+Links:
+ http://velocity.apache.org
+ http://wiki.apache.org/velocity/
+ http://velocity.apache.org/engine/releases/velocity-1.7/user-guide.html
+
+
+File List
+---------
+
+System and Misc:
+ VM_global_library.vm - Macros used other templates,
+ exact filename is important for Velocity to see it
+ error.vm - shows errors, if any
+ debug.vm - includes toggle links for "explain" and "all fields"
+ activated by debug link in footer.vm
+ README.txt - this file
+
+Overall Page Composition:
+ browse.vm - Main entry point into templates
+ layout.vm - overall HTML page layout
+ head.vm - elements in the section of the HTML document
+ header.vm - top section of page visible to users
+ footer.vm - bottom section of page visible to users,
+ includes debug and help links
+ main.css - CSS style for overall pages
+ see also jquery.autocomplete.css
+
+Query Form and Options:
+ query_form.vm - renders query form
+ query_group.vm - group by fields
+ e.g.: Manufacturer or Poplularity
+ query_spatial.vm - select box for location based Geospacial search
+
+Spelling Suggestions:
+ did_you_mean.vm - hyperlinked spelling suggestions in results
+ suggest.vm - dynamic spelling suggestions
+ as you type in the search form
+ jquery.autocomplete.js - supporting files for dynamic suggestions
+ jquery.autocomplete.css - Most CSS is defined in main.css
+
+
+Search Results, General:
+ (see also browse.vm)
+ tabs.vm - provides navigation to advanced search options
+ pagination_top.vm - paging and staticis at top of results
+ pagination_bottom.vm - paging and staticis at bottom of results
+ results_list.vm
+ hit.vm - called for each matching doc,
+ decides which template to use
+ hit_grouped.vm - display results grouped by field values
+ product_doc.vm - display a Product
+ join_doc.vm - display a joined document
+ richtext_doc.vm - display a complex/misc. document
+ hit_plain.vm - basic display of all fields,
+ edit results_list.vm to enable this
+
+
+Search Results, Facets & Clusters:
+ facets.vm - calls the 4 facet and 1 cluster template
+ facet_fields.vm - display facets based on field values
+ e.g.: fields specified by &facet.field=
+ facet_queries.vm - display facets based on specific facet queries
+ e.g.: facets specified by &facet.query=
+ facet_ranges.vm - display facets based on ranges
+ e.g.: ranges specified by &facet.range=
+ facet_pivot.vm - display pivot based facets
+ e.g.: facets specified by &facet.pivot=
+ cluster.vm - if clustering is available
+ then call cluster_results.vm
+ cluster_results.vm - actual rendering of clusters
diff --git a/solr/homeDirectoryTemplate/conf/velocity/VM_global_library.vm b/solr/homeDirectoryTemplate/conf/velocity/VM_global_library.vm
new file mode 100644
index 000000000..5dda07c4e
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/VM_global_library.vm
@@ -0,0 +1,175 @@
+#**
+ * Global macros used by other templates.
+ * This file must be named VM_global_library.vm
+ * in order for Velocity to find it.
+ *#
+
+#macro(param $key)$request.params.get($key)#end
+
+#macro(url_root)/solr#end
+
+## TODO: s/url_for_solr/url_for_core/ and s/url_root/url_for_solr/
+#macro(core_name)$request.core.name#end
+#macro(url_for_solr)#{url_root}#if($request.core.name != "")/$request.core.name#end#end
+#macro(url_for_home)#url_for_solr/browse#end
+
+#macro(q)&q=$!{esc.url($params.get('q'))}#end
+
+#macro(fqs $p)#foreach($fq in $p)#if($velocityCount>1){end}fq=$esc.url($fq)#end#end
+
+#macro(debug)#if($request.params.get('debugQuery'))&debugQuery=true#end#end
+
+#macro(boostPrice)#if($request.params.get('bf') == 'price')&bf=price#end#end
+
+#macro(annotate)#if($request.params.get('annotateBrowse'))&annotateBrowse=true#end#end
+
+#macro(annTitle $msg)#if($annotate == true)title="$msg"#end#end
+
+#macro(spatial)#if($request.params.get('sfield'))&sfield=store#end#if($request.params.get('pt'))&pt=$request.params.get('pt')#end#if($request.params.get('d'))&d=$request.params.get('d')#end#end
+
+#macro(qOpts)#set($queryOpts = $request.params.get("queryOpts"))#if($queryOpts && $queryOpts != "")&queryOpts=$queryOpts#end#end
+
+#macro(group)#if($request.params.getBool("group") == true)&group=true#end#if($request.params.get("group.field"))#foreach($grp in $request.params.getParams('group.field'))&group.field=$grp#end#end#end
+
+#macro(sort $p)#if($p)#foreach($s in $p)&sort=$esc.url($s)#end#end#end
+
+#macro(lensNoQ)?#if($request.params.getParams('fq') and $list.size($request.params.getParams('fq')) > 0)fqs($request.params.getParams('fq'))#end#sort($request.params.getParams('sort'))#debug#boostPrice#annotate#spatial#qOpts#group#end
+#macro(lens)#lensNoQ#q#end
+
+
+#macro(url_for_lens)#{url_for_home}#lens#end
+
+#macro(url_for_start $start)#url_for_home#lens&start=$start#end
+
+#macro(url_for_filters $p)#url_for_home?#q#boostPrice#spatial#qOpts#if($list.size($p) > 0)fqs($p)#end#debug#end
+
+#macro(url_for_nested_facet_query $field)#url_for_home#lens&fq=$esc.url($field)#end
+
+## TODO: convert to use {!raw f=$field}$value (with escaping of course)
+#macro(url_for_facet_filter $field $value)#url_for_home#lens&fq=$esc.url($field):%22$esc.url($value)%22#end
+
+#macro(url_for_facet_date_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end
+
+#macro(url_for_facet_range_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end
+
+
+#macro(link_to_previous_page $text)
+ #if($page.current_page_number > 1)
+ #set($prev_start = $page.start - $page.results_per_page)
+ $text
+ #end
+#end
+
+#macro(link_to_next_page $text)
+ #if($page.current_page_number < $page.page_count)
+ #set($next_start = $page.start + $page.results_per_page)
+ $text
+ #end
+#end
+
+#macro(link_to_page $page_number $text)
+ #if($page_number == $page.current_page_number)
+ $text
+ #else
+ #if($page_number <= $page.page_count)
+ #set($page_start = $page_number * $page.results_per_page - $page.results_per_page)
+ $text
+ #end
+ #end
+#end
+
+#macro(display_facet_query $field, $display, $fieldName)
+ #if($field.size() > 0)
+ $display
+
diff --git a/solr/homeDirectoryTemplate/conf/velocity/cluster.vm b/solr/homeDirectoryTemplate/conf/velocity/cluster.vm
new file mode 100644
index 000000000..bfe2f3b8d
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/cluster.vm
@@ -0,0 +1,19 @@
+#**
+ * Check if Clustering is Enabled and then
+ * call cluster_results.vm
+ *#
+
+
+ Clusters
+
+
+## Div tag has placeholder text by default
+
+ Run Solr with java -Dsolr.clustering.enabled=true -jar start.jar to see clustered search results.
+
+
+## Replace the div content *if* Carrot^2 is available
+
diff --git a/solr/homeDirectoryTemplate/conf/velocity/cluster_results.vm b/solr/homeDirectoryTemplate/conf/velocity/cluster_results.vm
new file mode 100644
index 000000000..204480d5d
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/cluster_results.vm
@@ -0,0 +1,31 @@
+#**
+ * Actual rendering of Clusters
+ *#
+
+## For each cluster
+#foreach ($clusters in $response.response.clusters)
+
+ #set($labels = $clusters.get('labels'))
+ #set($docs = $clusters.get('docs'))
+
+ ## This Cluster's Heading
+
+ #foreach ($label in $labels)
+ ## Keep the following line together to prevent
+ ## a space appearing before each comma
+ $label#if( $foreach.hasNext ),#end
+ #end
+
+
+ ## This Cluster's Documents
+
+ ## For each doc in this cluster
+ #foreach ($cluDoc in $docs)
+
+ #end ## end if > 0
+ #end ## end for each facet field
+#end ## end if response has facet fields
diff --git a/solr/homeDirectoryTemplate/conf/velocity/facet_pivot.vm b/solr/homeDirectoryTemplate/conf/velocity/facet_pivot.vm
new file mode 100644
index 000000000..7aa50da3f
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/facet_pivot.vm
@@ -0,0 +1,12 @@
+#**
+ * Display Pivot-Based Facets
+ * e.g.: facets specified by &facet.pivot=
+ *#
+
+
+ Pivot Facets
+
+
+#set($pivot = $response.response.facet_counts.facet_pivot)
+
+#display_facet_pivot($pivot, "")
diff --git a/solr/homeDirectoryTemplate/conf/velocity/facet_queries.vm b/solr/homeDirectoryTemplate/conf/velocity/facet_queries.vm
new file mode 100644
index 000000000..37489c7e0
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/facet_queries.vm
@@ -0,0 +1,12 @@
+#**
+ * Display facets based on specific facet queries
+ * e.g.: facets specified by &facet.query=
+ *#
+
+#set($field = $response.response.facet_counts.facet_queries)
+
+
+ Query Facets
+
+
+#display_facet_query($field, "", "")
diff --git a/solr/homeDirectoryTemplate/conf/velocity/facet_ranges.vm b/solr/homeDirectoryTemplate/conf/velocity/facet_ranges.vm
new file mode 100644
index 000000000..a61084b22
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/facet_ranges.vm
@@ -0,0 +1,23 @@
+#**
+ * Display facets based on ranges of values, AKA "Bukets"
+ * e.g.: ranges specified by &facet.range=
+ *#
+
+
+ Range Facets
+
+
+#foreach ($field in $response.response.facet_counts.facet_ranges)
+ ## Hide facets without value
+ #if($field.value.counts.size() > 0)
+ #set($name = $field.key)
+ #set($display = $name)
+ #set($f = $field.value.counts)
+ #set($start = $field.value.start)
+ #set($end = $field.value.end)
+ #set($gap = $field.value.gap)
+ #set($before = $field.value.before)
+ #set($after = $field.value.after)
+ #display_facet_range($f, $display, $name, $start, $end, $gap, $before, $after)
+ #end ## end if has any values
+#end ## end for each facet range
diff --git a/solr/homeDirectoryTemplate/conf/velocity/facets.vm b/solr/homeDirectoryTemplate/conf/velocity/facets.vm
new file mode 100644
index 000000000..55d40c9ab
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/facets.vm
@@ -0,0 +1,10 @@
+#**
+ * Overall Facet display block
+ * Invokes the 4 facet and 1 cluster template
+ *#
+
+#parse('facet_fields.vm')
+#parse('facet_queries.vm')
+#parse('facet_ranges.vm')
+#parse('facet_pivot.vm')
+#parse('cluster.vm')
diff --git a/solr/homeDirectoryTemplate/conf/velocity/footer.vm b/solr/homeDirectoryTemplate/conf/velocity/footer.vm
new file mode 100644
index 000000000..0604c34cc
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/footer.vm
@@ -0,0 +1,43 @@
+#**
+ * Render the bottom section of the page visible to users
+ *#
+
+
+
+ Disclaimer:
+ The locations displayed in this demonstration are purely fictional.
+ It is more than likely that no store with the items listed actually
+ exists at that location!
+
diff --git a/solr/homeDirectoryTemplate/conf/velocity/head.vm b/solr/homeDirectoryTemplate/conf/velocity/head.vm
new file mode 100644
index 000000000..d1f6ee6eb
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/head.vm
@@ -0,0 +1,35 @@
+#**
+ * Provide elements for the section of the HTML document
+ *#
+
+ ## An example of using an arbitrary request parameter
+ #param('title')
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/velocity/header.vm b/solr/homeDirectoryTemplate/conf/velocity/header.vm
new file mode 100644
index 000000000..686604792
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/header.vm
@@ -0,0 +1,7 @@
+#**
+ * Render the top section of the page visible to users
+ *#
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/velocity/hit.vm b/solr/homeDirectoryTemplate/conf/velocity/hit.vm
new file mode 100644
index 000000000..a9c11f411
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/hit.vm
@@ -0,0 +1,25 @@
+#**
+ * Called for each matching document but then
+ * calls one of product_doc, join_doc or richtext_doc
+ * depending on which fields the doc has
+ *#
+
+#set($docId = $doc.getFieldValue('id'))
+
+
+
+ ## Has a "name" field ?
+ #if($doc.getFieldValue('name'))
+ #parse("product_doc.vm")
+
+ ## Has a "compName_s" field ?
+ #elseif($doc.getFieldValue('compName_s'))
+ #parse("join_doc.vm")
+
+ ## Fallback to richtext_doc
+ #else
+ #parse("richtext_doc.vm")
+
+ #end
+
+
+
+ #end ## end of foreach group in grouping.value.groups
+
## div tag for entire list of groups
+
+
## end of div class=result-document
diff --git a/solr/homeDirectoryTemplate/conf/velocity/hit_plain.vm b/solr/homeDirectoryTemplate/conf/velocity/hit_plain.vm
new file mode 100644
index 000000000..193439b59
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/hit_plain.vm
@@ -0,0 +1,25 @@
+#**
+ * An extremely plain / debug version of hit.vm
+ *#
+
+
+ ## For each field
+ #foreach( $fieldName in $doc.fieldNames )
+ ## For each value
+ #foreach( $value in $doc.getFieldValues($fieldName) )
+
+ #end ## end for each value
+ #end ## end for each field
+
+
diff --git a/solr/homeDirectoryTemplate/conf/velocity/join_doc.vm b/solr/homeDirectoryTemplate/conf/velocity/join_doc.vm
new file mode 100644
index 000000000..9956012b4
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/join_doc.vm
@@ -0,0 +1,20 @@
+#**
+ * Display documents that are joined to other documents
+ *#
+
+
+
+ #set($loc = $request.params.get('pt'))
+ ## Normalize first trip through to "none" because
+ ## an empty string generates an error message later on
+ #if( ! $loc )
+ #set( $loc = "none" )
+ #end
+
+ #set($dist = $request.params.get('d', "10"))
+
+ ## Cities for The Select List
+ #set( $cities = {
+ "none": "No Filter",
+ "45.17614,-93.87341": "Buffalo, MN",
+ "37.7752,-100.0232": "Dodge City, KS",
+ "35.0752,-97.032": "Oklahoma City, OK",
+ "37.7752,-122.4232": "San Francisco CA"
+ })
+
+
+
+
+ Distance (KM):
+
+
+
+
+
+
+
+
+
+
+
+#end
diff --git a/solr/homeDirectoryTemplate/conf/velocity/results_list.vm b/solr/homeDirectoryTemplate/conf/velocity/results_list.vm
new file mode 100644
index 000000000..f73532b2d
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/results_list.vm
@@ -0,0 +1,22 @@
+#**
+ * Render the main Results List
+ *#
+
+## Usually displayed inside
+
+#if($response.response.get('grouped'))
+
+ #foreach($grouping in $response.response.get('grouped'))
+ #parse("hit_grouped.vm")
+ #end
+
+#else
+
+ #foreach($doc in $response.results)
+ #parse("hit.vm")
+ ## Can get an extremely simple view of the doc
+ ## which might be nicer for debugging
+ ##parse("hit_plain.vm")
+ #end
+
+#end
diff --git a/solr/homeDirectoryTemplate/conf/velocity/richtext_doc.vm b/solr/homeDirectoryTemplate/conf/velocity/richtext_doc.vm
new file mode 100644
index 000000000..9e8d6cb71
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/richtext_doc.vm
@@ -0,0 +1,153 @@
+#**
+ * Render a complex document in the results list
+ *#
+
+## Load Mime-Type List and Mapping
+#parse('mime_type_lists.vm')
+## Sets:
+## * supportedMimeTypes, AKA supportedtypes
+## * mimeExtensionsMap, AKA extMap
+
+## Title
+#if($doc.getFieldValue('title'))
+ #set($title = $esc.html($doc.getFirstValue('title')))
+#else
+ #set($title = "["+$doc.getFieldValue('id')+"]")
+#end
+
+## URL
+#if($doc.getFieldValue('url'))
+ #set($url = $doc.getFieldValue('url'))
+#elseif($doc.getFieldValue('resourcename'))
+ #set($url = "file:///$doc.getFieldValue('resourcename')")
+#else
+ #set($url = "$doc.getFieldValue('id')")
+#end
+
+## Sort out Mime-Type
+#set($ct = $list.get($doc.getFirstValue('content_type').split(";"),0))
+#set($filename = $doc.getFieldValue('resourcename'))
+#set($filetype = false)
+#set($filetype = $mimeExtensionsMap.get($ct))
+
+## TODO: falling back to file extension is convenient,
+## except when you don't have an icon for that extension
+## example "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+## document with a .docx extension.
+## It'd be nice to fall back to an "unknown" or the existing "file" type
+## We sort of do this below, but only if the filename has no extension
+## (anything after the last dot).
+
+#if(!$filetype)
+ #set($filetype = $filename.substring($filename.lastIndexOf(".")).substring(1))
+#end
+
+## #if(!$filetype)
+## #set($filetype = "file")
+## #end
+## #if(!$supportedMimeTypes.contains($filetype))
+## #set($filetype = "file")
+## #end
+
+## Row 1: Icon and Title and mlt link
+
+ ## Icon
+ ## Small file type icons from http://www.splitbrain.org/projects/file_icons (public domain)
+
+
+ ## Title, hyperlinked
+
+ $title
+
+ ## Link for MLT / More Like This / Find Similar
+
+ #if($params.getBool('mlt', false) == false)
+
+ More Like This
+ #end
+
+
+
+ ## Else MLT Enabled but no mlt results for this query
+ #elseif($mltOn && $mlt.size() == 0)
+
No Similar Items Found
+ #end
+
## div class=mlt
+
+#parse('debug.vm')
diff --git a/solr/homeDirectoryTemplate/conf/velocity/suggest.vm b/solr/homeDirectoryTemplate/conf/velocity/suggest.vm
new file mode 100644
index 000000000..dae6b830d
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/suggest.vm
@@ -0,0 +1,8 @@
+#**
+ * Provides cynamic spelling suggestions
+ * as you type in the search form
+ *#
+
+#foreach($t in $response.response.terms.name)
+ $t.key
+#end
diff --git a/solr/homeDirectoryTemplate/conf/velocity/tabs.vm b/solr/homeDirectoryTemplate/conf/velocity/tabs.vm
new file mode 100644
index 000000000..da19cbc0b
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/velocity/tabs.vm
@@ -0,0 +1,50 @@
+#**
+ * Provides navigation/access to Advanced search options
+ * Usually displayed near the top of the page
+ *#
+
+##TODO: Make some nice tabs here
+
+#set($queryOpts = $params.get("queryOpts"))
+
+
+
+ Type of Search:
+
+ ##queryOpts=$queryOpts
+
+ ## return to Simple Search
+ ##set( $selected = ($queryOpts && $queryOpts != "") )
+ #set( $selected = ! $queryOpts )
+
+ #if($selected)
+ Simple
+ #else
+
+ Simple
+ #end
+
+
+ ## GEO-Spatial / Location Based
+ #set( $selected = ($queryOpts == "spatial") )
+
+ #if($selected)
+ Spatial
+ #else
+
+ Spatial
+ #end
+
+
+ ## Group By Field
+ #set( $selected = ($queryOpts == "group") )
+
+ #if($selected)
+ Group By
+ #else
+
+ Group By
+ #end
+
+
+
+
+
+
+ 1
+
+
+
+
+
+
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/conf/xslt/updateXml.xsl b/solr/homeDirectoryTemplate/conf/xslt/updateXml.xsl
new file mode 100644
index 000000000..daf1344b0
--- /dev/null
+++ b/solr/homeDirectoryTemplate/conf/xslt/updateXml.xsl
@@ -0,0 +1,70 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/solr.xml b/solr/homeDirectoryTemplate/solr.xml
index 6f0d0fd3c..4d1a84e75 100644
--- a/solr/homeDirectoryTemplate/solr.xml
+++ b/solr/homeDirectoryTemplate/solr.xml
@@ -1,34 +1,34 @@
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/homeDirectoryTemplate/solr_newversion.xml b/solr/homeDirectoryTemplate/solr_newversion.xml
new file mode 100644
index 000000000..94d60b6a2
--- /dev/null
+++ b/solr/homeDirectoryTemplate/solr_newversion.xml
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+
+
+ ${host:}
+ ${jetty.port:8983}
+ ${hostContext:solr}
+ ${zkClientTimeout:30000}
+ ${genericCoreNodeNames:true}
+
+
+
+ ${socketTimeout:0}
+ ${connTimeout:0}
+
+
+
diff --git a/solr/solr-4.7.2.war b/solr/solr-4.7.2.war
new file mode 100644
index 000000000..cd8f30876
Binary files /dev/null and b/solr/solr-4.7.2.war differ
diff --git a/solr/solrj-lib/commons-io-2.1.jar b/solr/solrj-lib/commons-io-2.1.jar
new file mode 100644
index 000000000..b5c7d692f
Binary files /dev/null and b/solr/solrj-lib/commons-io-2.1.jar differ
diff --git a/solr/solrj-lib/httpclient-4.3.1.jar b/solr/solrj-lib/httpclient-4.3.1.jar
new file mode 100644
index 000000000..cdd1e3874
Binary files /dev/null and b/solr/solrj-lib/httpclient-4.3.1.jar differ
diff --git a/solr/solrj-lib/httpcore-4.3.jar b/solr/solrj-lib/httpcore-4.3.jar
new file mode 100644
index 000000000..ddfe6dacb
Binary files /dev/null and b/solr/solrj-lib/httpcore-4.3.jar differ
diff --git a/solr/solrj-lib/httpmime-4.3.1.jar b/solr/solrj-lib/httpmime-4.3.1.jar
new file mode 100644
index 000000000..2b26d7a79
Binary files /dev/null and b/solr/solrj-lib/httpmime-4.3.1.jar differ
diff --git a/solr/solrj-lib/jcl-over-slf4j-1.6.6.jar b/solr/solrj-lib/jcl-over-slf4j-1.6.6.jar
new file mode 100644
index 000000000..ab898c041
Binary files /dev/null and b/solr/solrj-lib/jcl-over-slf4j-1.6.6.jar differ
diff --git a/solr/solrj-lib/jul-to-slf4j-1.6.6.jar b/solr/solrj-lib/jul-to-slf4j-1.6.6.jar
new file mode 100644
index 000000000..fa8640f16
Binary files /dev/null and b/solr/solrj-lib/jul-to-slf4j-1.6.6.jar differ
diff --git a/solr/solrj-lib/log4j-1.2.16.jar b/solr/solrj-lib/log4j-1.2.16.jar
new file mode 100644
index 000000000..5429a903e
Binary files /dev/null and b/solr/solrj-lib/log4j-1.2.16.jar differ
diff --git a/solr/solrj-lib/noggit-0.5.jar b/solr/solrj-lib/noggit-0.5.jar
new file mode 100644
index 000000000..163ced7cc
Binary files /dev/null and b/solr/solrj-lib/noggit-0.5.jar differ
diff --git a/solr/solrj-lib/slf4j-api-1.6.6.jar b/solr/solrj-lib/slf4j-api-1.6.6.jar
new file mode 100644
index 000000000..4c03fa6bb
Binary files /dev/null and b/solr/solrj-lib/slf4j-api-1.6.6.jar differ
diff --git a/solr/solrj-lib/slf4j-log4j12-1.6.6.jar b/solr/solrj-lib/slf4j-log4j12-1.6.6.jar
new file mode 100644
index 000000000..e72c2d66e
Binary files /dev/null and b/solr/solrj-lib/slf4j-log4j12-1.6.6.jar differ
diff --git a/solr/solrjClientLibs/wstx-asl-3.2.7.jar b/solr/solrj-lib/wstx-asl-3.2.7.jar
similarity index 100%
rename from solr/solrjClientLibs/wstx-asl-3.2.7.jar
rename to solr/solrj-lib/wstx-asl-3.2.7.jar
diff --git a/solr/solrj-lib/zookeeper-3.4.5.jar b/solr/solrj-lib/zookeeper-3.4.5.jar
new file mode 100644
index 000000000..a7966bbbc
Binary files /dev/null and b/solr/solrj-lib/zookeeper-3.4.5.jar differ
diff --git a/solr/solrjClientLibs/apache-solr-core-3.1.0.jar b/solr/solrjClientLibs/apache-solr-core-3.1.0.jar
deleted file mode 100644
index 00a842e4b..000000000
Binary files a/solr/solrjClientLibs/apache-solr-core-3.1.0.jar and /dev/null differ
diff --git a/solr/solrjClientLibs/apache-solr-solrj-3.1.0.jar b/solr/solrjClientLibs/apache-solr-solrj-3.1.0.jar
deleted file mode 100644
index 058d27350..000000000
Binary files a/solr/solrjClientLibs/apache-solr-solrj-3.1.0.jar and /dev/null differ
diff --git a/solr/solrjClientLibs/commons-codec-1.4.jar b/solr/solrjClientLibs/commons-codec-1.4.jar
deleted file mode 100644
index 458d432da..000000000
Binary files a/solr/solrjClientLibs/commons-codec-1.4.jar and /dev/null differ
diff --git a/solr/solrjClientLibs/commons-httpclient-3.1.jar b/solr/solrjClientLibs/commons-httpclient-3.1.jar
deleted file mode 100644
index 7c59774ae..000000000
Binary files a/solr/solrjClientLibs/commons-httpclient-3.1.jar and /dev/null differ
diff --git a/solr/solrjClientLibs/commons-io-1.4.jar b/solr/solrjClientLibs/commons-io-1.4.jar
deleted file mode 100644
index 133dc6cb3..000000000
Binary files a/solr/solrjClientLibs/commons-io-1.4.jar and /dev/null differ
diff --git a/solr/solrjClientLibs/geronimo-stax-api_1.0_spec-1.0.1.jar b/solr/solrjClientLibs/geronimo-stax-api_1.0_spec-1.0.1.jar
deleted file mode 100644
index ab1ee3ba6..000000000
Binary files a/solr/solrjClientLibs/geronimo-stax-api_1.0_spec-1.0.1.jar and /dev/null differ
diff --git a/solr/solrjClientLibs/jcl-over-slf4j-1.5.5.jar b/solr/solrjClientLibs/jcl-over-slf4j-1.5.5.jar
deleted file mode 100644
index 6b8ddd633..000000000
Binary files a/solr/solrjClientLibs/jcl-over-slf4j-1.5.5.jar and /dev/null differ
diff --git a/solr/solrjClientLibs/slf4j-api-1.5.5.jar b/solr/solrjClientLibs/slf4j-api-1.5.5.jar
deleted file mode 100644
index 4bb4abbb0..000000000
Binary files a/solr/solrjClientLibs/slf4j-api-1.5.5.jar and /dev/null differ
diff --git a/solr/solrjClientLibs/slf4j-log4j12-1.5.6.jar b/solr/solrjClientLibs/slf4j-log4j12-1.5.6.jar
deleted file mode 100644
index 0c40e9c8b..000000000
Binary files a/solr/solrjClientLibs/slf4j-log4j12-1.5.6.jar and /dev/null differ
diff --git a/webapp/build.xml b/webapp/build.xml
index e8bc13669..1adf47338 100644
--- a/webapp/build.xml
+++ b/webapp/build.xml
@@ -420,7 +420,7 @@
-
+
diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchInputDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchInputDocument.java
index 34e2dbd10..153eb337a 100644
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchInputDocument.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchInputDocument.java
@@ -22,33 +22,41 @@ public interface SearchInputDocument {
void addField(SearchInputField field);
/**
- * Create a field with this name and values, and put it into the document. If
- * a field with this name already exists in the document, it will be
- * replaced.
+ * Create a field with this name and values, and put it into the document.
+ *
+ * If a field with this name already exists in the document, these values
+ * will be added to the existing values on the field.
*/
void addField(String name, Object... values);
/**
- * Create a field with this name and values, and put it into the document. If
- * a field with this name already exists in the document, it will be
- * replaced.
+ * Create a field with this name and values, and put it into the document.
+ *
+ * If a field with this name already exists in the document, these values
+ * will be added to the existing values on the field.
*/
void addField(String name, Collection