added morphology analyzer
This commit is contained in:
parent
74665136ea
commit
fbd8b1163f
22 changed files with 25768 additions and 570 deletions
BIN
jmorphy-libs/commons-codec-1.10.jar
Normal file
BIN
jmorphy-libs/commons-codec-1.10.jar
Normal file
Binary file not shown.
BIN
jmorphy-libs/dawg-7.7.2.jar
Normal file
BIN
jmorphy-libs/dawg-7.7.2.jar
Normal file
Binary file not shown.
BIN
jmorphy-libs/guava-18.0.jar
Normal file
BIN
jmorphy-libs/guava-18.0.jar
Normal file
Binary file not shown.
BIN
jmorphy-libs/jmorphy2-core-7.7.2.jar
Normal file
BIN
jmorphy-libs/jmorphy2-core-7.7.2.jar
Normal file
Binary file not shown.
BIN
jmorphy-libs/jmorphy2-dicts-ru-7.7.2.jar
Normal file
BIN
jmorphy-libs/jmorphy2-dicts-ru-7.7.2.jar
Normal file
Binary file not shown.
BIN
jmorphy-libs/jmorphy2-dicts-uk-7.7.2.jar
Normal file
BIN
jmorphy-libs/jmorphy2-dicts-uk-7.7.2.jar
Normal file
Binary file not shown.
BIN
jmorphy-libs/jmorphy2-lucene-7.7.2.jar
Normal file
BIN
jmorphy-libs/jmorphy2-lucene-7.7.2.jar
Normal file
Binary file not shown.
BIN
jmorphy-libs/jmorphy2-nlp-7.7.2.jar
Normal file
BIN
jmorphy-libs/jmorphy2-nlp-7.7.2.jar
Normal file
Binary file not shown.
BIN
jmorphy-libs/noggit-0.7.jar
Normal file
BIN
jmorphy-libs/noggit-0.7.jar
Normal file
Binary file not shown.
680
vivocore/conf/lang/pymorphy2_dicts/grammemes.json
Normal file
680
vivocore/conf/lang/pymorphy2_dicts/grammemes.json
Normal file
|
@ -0,0 +1,680 @@
|
|||
[
|
||||
[
|
||||
"POST",
|
||||
"",
|
||||
"ЧР",
|
||||
"часть речи"
|
||||
],
|
||||
[
|
||||
"NOUN",
|
||||
"POST",
|
||||
"СУЩ",
|
||||
"имя существительное"
|
||||
],
|
||||
[
|
||||
"ADJF",
|
||||
"POST",
|
||||
"ПРИЛ",
|
||||
"имя прилагательное (полное)"
|
||||
],
|
||||
[
|
||||
"ADJS",
|
||||
"POST",
|
||||
"КР_ПРИЛ",
|
||||
"имя прилагательное (краткое)"
|
||||
],
|
||||
[
|
||||
"COMP",
|
||||
"POST",
|
||||
"КОМП",
|
||||
"компаратив"
|
||||
],
|
||||
[
|
||||
"VERB",
|
||||
"POST",
|
||||
"ГЛ",
|
||||
"глагол (личная форма)"
|
||||
],
|
||||
[
|
||||
"INFN",
|
||||
"POST",
|
||||
"ИНФ",
|
||||
"глагол (инфинитив)"
|
||||
],
|
||||
[
|
||||
"PRTF",
|
||||
"POST",
|
||||
"ПРИЧ",
|
||||
"причастие (полное)"
|
||||
],
|
||||
[
|
||||
"PRTS",
|
||||
"POST",
|
||||
"КР_ПРИЧ",
|
||||
"причастие (краткое)"
|
||||
],
|
||||
[
|
||||
"GRND",
|
||||
"POST",
|
||||
"ДЕЕПР",
|
||||
"деепричастие"
|
||||
],
|
||||
[
|
||||
"NUMR",
|
||||
"POST",
|
||||
"ЧИСЛ",
|
||||
"числительное"
|
||||
],
|
||||
[
|
||||
"ADVB",
|
||||
"POST",
|
||||
"Н",
|
||||
"наречие"
|
||||
],
|
||||
[
|
||||
"NPRO",
|
||||
"POST",
|
||||
"МС",
|
||||
"местоимение-существительное"
|
||||
],
|
||||
[
|
||||
"PRED",
|
||||
"POST",
|
||||
"ПРЕДК",
|
||||
"предикатив"
|
||||
],
|
||||
[
|
||||
"PREP",
|
||||
"POST",
|
||||
"ПР",
|
||||
"предлог"
|
||||
],
|
||||
[
|
||||
"CONJ",
|
||||
"POST",
|
||||
"СОЮЗ",
|
||||
"союз"
|
||||
],
|
||||
[
|
||||
"PRCL",
|
||||
"POST",
|
||||
"ЧАСТ",
|
||||
"частица"
|
||||
],
|
||||
[
|
||||
"INTJ",
|
||||
"POST",
|
||||
"МЕЖД",
|
||||
"междометие"
|
||||
],
|
||||
[
|
||||
"ANim",
|
||||
"",
|
||||
"Од-неод",
|
||||
"одушевлённость / одушевлённость не выражена"
|
||||
],
|
||||
[
|
||||
"anim",
|
||||
"ANim",
|
||||
"од",
|
||||
"одушевлённое"
|
||||
],
|
||||
[
|
||||
"inan",
|
||||
"ANim",
|
||||
"неод",
|
||||
"неодушевлённое"
|
||||
],
|
||||
[
|
||||
"GNdr",
|
||||
"",
|
||||
"хр",
|
||||
"род / род не выражен"
|
||||
],
|
||||
[
|
||||
"masc",
|
||||
"GNdr",
|
||||
"мр",
|
||||
"мужской род"
|
||||
],
|
||||
[
|
||||
"femn",
|
||||
"GNdr",
|
||||
"жр",
|
||||
"женский род"
|
||||
],
|
||||
[
|
||||
"neut",
|
||||
"GNdr",
|
||||
"ср",
|
||||
"средний род"
|
||||
],
|
||||
[
|
||||
"Ms-f",
|
||||
"",
|
||||
"ор",
|
||||
"общий род"
|
||||
],
|
||||
[
|
||||
"NMbr",
|
||||
"",
|
||||
"Число",
|
||||
"число"
|
||||
],
|
||||
[
|
||||
"sing",
|
||||
"NMbr",
|
||||
"ед",
|
||||
"единственное число"
|
||||
],
|
||||
[
|
||||
"plur",
|
||||
"NMbr",
|
||||
"мн",
|
||||
"множественное число"
|
||||
],
|
||||
[
|
||||
"Sgtm",
|
||||
"",
|
||||
"sg",
|
||||
"singularia tantum"
|
||||
],
|
||||
[
|
||||
"Pltm",
|
||||
"",
|
||||
"pl",
|
||||
"pluralia tantum"
|
||||
],
|
||||
[
|
||||
"Fixd",
|
||||
"",
|
||||
"0",
|
||||
"неизменяемое"
|
||||
],
|
||||
[
|
||||
"CAse",
|
||||
"",
|
||||
"Падеж",
|
||||
"категория падежа"
|
||||
],
|
||||
[
|
||||
"nomn",
|
||||
"CAse",
|
||||
"им",
|
||||
"именительный падеж"
|
||||
],
|
||||
[
|
||||
"gent",
|
||||
"CAse",
|
||||
"рд",
|
||||
"родительный падеж"
|
||||
],
|
||||
[
|
||||
"datv",
|
||||
"CAse",
|
||||
"дт",
|
||||
"дательный падеж"
|
||||
],
|
||||
[
|
||||
"accs",
|
||||
"CAse",
|
||||
"вн",
|
||||
"винительный падеж"
|
||||
],
|
||||
[
|
||||
"ablt",
|
||||
"CAse",
|
||||
"тв",
|
||||
"творительный падеж"
|
||||
],
|
||||
[
|
||||
"loct",
|
||||
"CAse",
|
||||
"пр",
|
||||
"предложный падеж"
|
||||
],
|
||||
[
|
||||
"voct",
|
||||
"nomn",
|
||||
"зв",
|
||||
"звательный падеж"
|
||||
],
|
||||
[
|
||||
"gen1",
|
||||
"gent",
|
||||
"рд1",
|
||||
"первый родительный падеж"
|
||||
],
|
||||
[
|
||||
"gen2",
|
||||
"gent",
|
||||
"рд2",
|
||||
"второй родительный (частичный) падеж"
|
||||
],
|
||||
[
|
||||
"acc2",
|
||||
"accs",
|
||||
"вн2",
|
||||
"второй винительный падеж"
|
||||
],
|
||||
[
|
||||
"loc1",
|
||||
"loct",
|
||||
"пр1",
|
||||
"первый предложный падеж"
|
||||
],
|
||||
[
|
||||
"loc2",
|
||||
"loct",
|
||||
"пр2",
|
||||
"второй предложный (местный) падеж"
|
||||
],
|
||||
[
|
||||
"Abbr",
|
||||
"",
|
||||
"аббр",
|
||||
"аббревиатура"
|
||||
],
|
||||
[
|
||||
"Name",
|
||||
"",
|
||||
"имя",
|
||||
"имя"
|
||||
],
|
||||
[
|
||||
"Surn",
|
||||
"",
|
||||
"фам",
|
||||
"фамилия"
|
||||
],
|
||||
[
|
||||
"Patr",
|
||||
"",
|
||||
"отч",
|
||||
"отчество"
|
||||
],
|
||||
[
|
||||
"Geox",
|
||||
"",
|
||||
"гео",
|
||||
"топоним"
|
||||
],
|
||||
[
|
||||
"Orgn",
|
||||
"",
|
||||
"орг",
|
||||
"организация"
|
||||
],
|
||||
[
|
||||
"Trad",
|
||||
"",
|
||||
"tm",
|
||||
"торговая марка"
|
||||
],
|
||||
[
|
||||
"Subx",
|
||||
"",
|
||||
"субст?",
|
||||
"возможна субстантивация"
|
||||
],
|
||||
[
|
||||
"Supr",
|
||||
"",
|
||||
"превосх",
|
||||
"превосходная степень"
|
||||
],
|
||||
[
|
||||
"Qual",
|
||||
"",
|
||||
"кач",
|
||||
"качественное"
|
||||
],
|
||||
[
|
||||
"Apro",
|
||||
"",
|
||||
"мест-п",
|
||||
"местоименное"
|
||||
],
|
||||
[
|
||||
"Anum",
|
||||
"",
|
||||
"числ-п",
|
||||
"порядковое"
|
||||
],
|
||||
[
|
||||
"Poss",
|
||||
"",
|
||||
"притяж",
|
||||
"притяжательное"
|
||||
],
|
||||
[
|
||||
"V-ey",
|
||||
"",
|
||||
"*ею",
|
||||
"форма на -ею"
|
||||
],
|
||||
[
|
||||
"V-oy",
|
||||
"",
|
||||
"*ою",
|
||||
"форма на -ою"
|
||||
],
|
||||
[
|
||||
"Cmp2",
|
||||
"",
|
||||
"сравн2",
|
||||
"сравнительная степень на по-"
|
||||
],
|
||||
[
|
||||
"V-ej",
|
||||
"",
|
||||
"*ей",
|
||||
"форма компаратива на -ей"
|
||||
],
|
||||
[
|
||||
"ASpc",
|
||||
"",
|
||||
"Вид",
|
||||
"категория вида"
|
||||
],
|
||||
[
|
||||
"perf",
|
||||
"ASpc",
|
||||
"сов",
|
||||
"совершенный вид"
|
||||
],
|
||||
[
|
||||
"impf",
|
||||
"ASpc",
|
||||
"несов",
|
||||
"несовершенный вид"
|
||||
],
|
||||
[
|
||||
"TRns",
|
||||
"",
|
||||
"Перех",
|
||||
"категория переходности"
|
||||
],
|
||||
[
|
||||
"tran",
|
||||
"TRns",
|
||||
"перех",
|
||||
"переходный"
|
||||
],
|
||||
[
|
||||
"intr",
|
||||
"TRns",
|
||||
"неперех",
|
||||
"непереходный"
|
||||
],
|
||||
[
|
||||
"Impe",
|
||||
"",
|
||||
"безл",
|
||||
"безличный"
|
||||
],
|
||||
[
|
||||
"Impx",
|
||||
"",
|
||||
"безл?",
|
||||
"возможно безличное употребление"
|
||||
],
|
||||
[
|
||||
"Mult",
|
||||
"",
|
||||
"мног",
|
||||
"многократный"
|
||||
],
|
||||
[
|
||||
"Refl",
|
||||
"",
|
||||
"возвр",
|
||||
"возвратный"
|
||||
],
|
||||
[
|
||||
"PErs",
|
||||
"",
|
||||
"Лицо",
|
||||
"категория лица"
|
||||
],
|
||||
[
|
||||
"1per",
|
||||
"PErs",
|
||||
"1л",
|
||||
"1 лицо"
|
||||
],
|
||||
[
|
||||
"2per",
|
||||
"PErs",
|
||||
"2л",
|
||||
"2 лицо"
|
||||
],
|
||||
[
|
||||
"3per",
|
||||
"PErs",
|
||||
"3л",
|
||||
"3 лицо"
|
||||
],
|
||||
[
|
||||
"TEns",
|
||||
"",
|
||||
"Время",
|
||||
"категория времени"
|
||||
],
|
||||
[
|
||||
"pres",
|
||||
"TEns",
|
||||
"наст",
|
||||
"настоящее время"
|
||||
],
|
||||
[
|
||||
"past",
|
||||
"TEns",
|
||||
"прош",
|
||||
"прошедшее время"
|
||||
],
|
||||
[
|
||||
"futr",
|
||||
"TEns",
|
||||
"буд",
|
||||
"будущее время"
|
||||
],
|
||||
[
|
||||
"MOod",
|
||||
"",
|
||||
"Накл",
|
||||
"категория наклонения"
|
||||
],
|
||||
[
|
||||
"indc",
|
||||
"MOod",
|
||||
"изъяв",
|
||||
"изъявительное наклонение"
|
||||
],
|
||||
[
|
||||
"impr",
|
||||
"MOod",
|
||||
"повел",
|
||||
"повелительное наклонение"
|
||||
],
|
||||
[
|
||||
"INvl",
|
||||
"",
|
||||
"Совм",
|
||||
"категория совместности"
|
||||
],
|
||||
[
|
||||
"incl",
|
||||
"INvl",
|
||||
"вкл",
|
||||
"говорящий включён (идем, идемте) "
|
||||
],
|
||||
[
|
||||
"excl",
|
||||
"INvl",
|
||||
"выкл",
|
||||
"говорящий не включён в действие (иди, идите)"
|
||||
],
|
||||
[
|
||||
"VOic",
|
||||
"",
|
||||
"Залог",
|
||||
"категория залога"
|
||||
],
|
||||
[
|
||||
"actv",
|
||||
"VOic",
|
||||
"действ",
|
||||
"действительный залог"
|
||||
],
|
||||
[
|
||||
"pssv",
|
||||
"VOic",
|
||||
"страд",
|
||||
"страдательный залог"
|
||||
],
|
||||
[
|
||||
"Infr",
|
||||
"",
|
||||
"разг",
|
||||
"разговорное"
|
||||
],
|
||||
[
|
||||
"Slng",
|
||||
"",
|
||||
"жарг",
|
||||
"жаргонное"
|
||||
],
|
||||
[
|
||||
"Arch",
|
||||
"",
|
||||
"арх",
|
||||
"устаревшее"
|
||||
],
|
||||
[
|
||||
"Litr",
|
||||
"",
|
||||
"лит",
|
||||
"литературный вариант"
|
||||
],
|
||||
[
|
||||
"Erro",
|
||||
"",
|
||||
"опеч",
|
||||
"опечатка"
|
||||
],
|
||||
[
|
||||
"Dist",
|
||||
"",
|
||||
"искаж",
|
||||
"искажение"
|
||||
],
|
||||
[
|
||||
"Ques",
|
||||
"",
|
||||
"вопр",
|
||||
"вопросительное"
|
||||
],
|
||||
[
|
||||
"Dmns",
|
||||
"",
|
||||
"указ",
|
||||
"указательное"
|
||||
],
|
||||
[
|
||||
"Prnt",
|
||||
"",
|
||||
"вводн",
|
||||
"вводное слово"
|
||||
],
|
||||
[
|
||||
"V-be",
|
||||
"",
|
||||
"*ье",
|
||||
"форма на -ье"
|
||||
],
|
||||
[
|
||||
"V-en",
|
||||
"",
|
||||
"*енен",
|
||||
"форма на -енен"
|
||||
],
|
||||
[
|
||||
"V-ie",
|
||||
"",
|
||||
"*ие",
|
||||
"отчество через -ие-"
|
||||
],
|
||||
[
|
||||
"V-bi",
|
||||
"",
|
||||
"*ьи",
|
||||
"форма на -ьи"
|
||||
],
|
||||
[
|
||||
"Fimp",
|
||||
"",
|
||||
"*несов",
|
||||
"деепричастие от глагола несовершенного вида"
|
||||
],
|
||||
[
|
||||
"Prdx",
|
||||
"",
|
||||
"предк?",
|
||||
"может выступать в роли предикатива"
|
||||
],
|
||||
[
|
||||
"Coun",
|
||||
"",
|
||||
"счетн",
|
||||
"счётная форма"
|
||||
],
|
||||
[
|
||||
"Coll",
|
||||
"",
|
||||
"собир",
|
||||
"собирательное числительное"
|
||||
],
|
||||
[
|
||||
"V-sh",
|
||||
"",
|
||||
"*ши",
|
||||
"деепричастие на -ши"
|
||||
],
|
||||
[
|
||||
"Af-p",
|
||||
"",
|
||||
"*предл",
|
||||
"форма после предлога"
|
||||
],
|
||||
[
|
||||
"Inmx",
|
||||
"",
|
||||
"не/одуш?",
|
||||
"может использоваться как одуш. / неодуш. "
|
||||
],
|
||||
[
|
||||
"Vpre",
|
||||
"",
|
||||
"в_предл",
|
||||
"Вариант предлога ( со, подо, ...)"
|
||||
],
|
||||
[
|
||||
"Anph",
|
||||
"",
|
||||
"Анаф",
|
||||
"Анафорическое (местоимение)"
|
||||
],
|
||||
[
|
||||
"Init",
|
||||
"",
|
||||
"иниц",
|
||||
"Инициал"
|
||||
],
|
||||
[
|
||||
"Adjx",
|
||||
"",
|
||||
"прил?",
|
||||
"может выступать в роли прилагательного"
|
||||
]
|
||||
]
|
4755
vivocore/conf/lang/pymorphy2_dicts/gramtab-opencorpora-ext.json
Normal file
4755
vivocore/conf/lang/pymorphy2_dicts/gramtab-opencorpora-ext.json
Normal file
File diff suppressed because it is too large
Load diff
4755
vivocore/conf/lang/pymorphy2_dicts/gramtab-opencorpora-int.json
Normal file
4755
vivocore/conf/lang/pymorphy2_dicts/gramtab-opencorpora-int.json
Normal file
File diff suppressed because it is too large
Load diff
102
vivocore/conf/lang/pymorphy2_dicts/meta.json
Normal file
102
vivocore/conf/lang/pymorphy2_dicts/meta.json
Normal file
|
@ -0,0 +1,102 @@
|
|||
[
|
||||
[
|
||||
"language_code",
|
||||
"ru"
|
||||
],
|
||||
[
|
||||
"format_version",
|
||||
"2.4"
|
||||
],
|
||||
[
|
||||
"pymorphy2_version",
|
||||
"0.8"
|
||||
],
|
||||
[
|
||||
"compiled_at",
|
||||
"2015-03-15T21:59:48.477191"
|
||||
],
|
||||
[
|
||||
"source",
|
||||
"opencorpora.org"
|
||||
],
|
||||
[
|
||||
"source_version",
|
||||
"0.92"
|
||||
],
|
||||
[
|
||||
"source_revision",
|
||||
"393658"
|
||||
],
|
||||
[
|
||||
"source_lexemes_count",
|
||||
389835
|
||||
],
|
||||
[
|
||||
"source_links_count",
|
||||
256468
|
||||
],
|
||||
[
|
||||
"gramtab_length",
|
||||
4753
|
||||
],
|
||||
[
|
||||
"gramtab_formats",
|
||||
{
|
||||
"opencorpora-int": "gramtab-opencorpora-int.json",
|
||||
"opencorpora-ext": "gramtab-opencorpora-ext.json"
|
||||
}
|
||||
],
|
||||
[
|
||||
"paradigms_length",
|
||||
3163
|
||||
],
|
||||
[
|
||||
"suffixes_length",
|
||||
15463
|
||||
],
|
||||
[
|
||||
"words_dawg_length",
|
||||
5096128
|
||||
],
|
||||
[
|
||||
"compile_options",
|
||||
{
|
||||
"max_suffix_length": 5,
|
||||
"paradigm_prefixes": [
|
||||
"",
|
||||
"по",
|
||||
"наи"
|
||||
],
|
||||
"min_ending_freq": 2,
|
||||
"min_paradigm_popularity": 3
|
||||
}
|
||||
],
|
||||
[
|
||||
"prediction_suffixes_dawg_lengths",
|
||||
[
|
||||
366134,
|
||||
1929,
|
||||
21
|
||||
]
|
||||
],
|
||||
[
|
||||
"P(t|w)",
|
||||
true
|
||||
],
|
||||
[
|
||||
"P(t|w)_unique_words",
|
||||
21121
|
||||
],
|
||||
[
|
||||
"P(t|w)_outcomes",
|
||||
248127
|
||||
],
|
||||
[
|
||||
"P(t|w)_min_word_freq",
|
||||
1
|
||||
],
|
||||
[
|
||||
"corpus_revision",
|
||||
"3725883"
|
||||
]
|
||||
]
|
BIN
vivocore/conf/lang/pymorphy2_dicts/p_t_given_w.intdawg
Normal file
BIN
vivocore/conf/lang/pymorphy2_dicts/p_t_given_w.intdawg
Normal file
Binary file not shown.
BIN
vivocore/conf/lang/pymorphy2_dicts/paradigms.array
Normal file
BIN
vivocore/conf/lang/pymorphy2_dicts/paradigms.array
Normal file
Binary file not shown.
BIN
vivocore/conf/lang/pymorphy2_dicts/prediction-suffixes-0.dawg
Normal file
BIN
vivocore/conf/lang/pymorphy2_dicts/prediction-suffixes-0.dawg
Normal file
Binary file not shown.
BIN
vivocore/conf/lang/pymorphy2_dicts/prediction-suffixes-1.dawg
Normal file
BIN
vivocore/conf/lang/pymorphy2_dicts/prediction-suffixes-1.dawg
Normal file
Binary file not shown.
BIN
vivocore/conf/lang/pymorphy2_dicts/prediction-suffixes-2.dawg
Normal file
BIN
vivocore/conf/lang/pymorphy2_dicts/prediction-suffixes-2.dawg
Normal file
Binary file not shown.
15465
vivocore/conf/lang/pymorphy2_dicts/suffixes.json
Normal file
15465
vivocore/conf/lang/pymorphy2_dicts/suffixes.json
Normal file
File diff suppressed because it is too large
Load diff
BIN
vivocore/conf/lang/pymorphy2_dicts/words.dawg
Normal file
BIN
vivocore/conf/lang/pymorphy2_dicts/words.dawg
Normal file
Binary file not shown.
|
@ -1,569 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Solr managed schema - automatically generated - DO NOT EDIT -->
|
||||
<schema name="example" version="1.5">
|
||||
<uniqueKey>DocId</uniqueKey>
|
||||
<fieldType name="alphaOnlySort" class="solr.TextField" omitNorms="true" sortMissingLast="true">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.TrimFilterFactory"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replace="all" replacement=""/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="ancestor_path" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="binary" class="solr.BinaryField"/>
|
||||
<fieldType name="boolean" class="solr.BoolField" omitNorms="true" sortMissingLast="true"/>
|
||||
<fieldType name="currency" class="solr.CurrencyField" currencyConfig="currency.xml" defaultCurrency="USD" precisionStep="8"/>
|
||||
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
|
||||
<fieldType name="descendent_path" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="double" class="solr.TrieDoubleField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
|
||||
<fieldType name="edgengram_stemmed" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords-name.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="25" minGramSize="1"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords-name.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="edgengram_untokenized" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="25" minGramSize="2"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="float" class="solr.TrieFloatField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
|
||||
<fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
|
||||
<fieldType name="int" class="solr.TrieIntField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
|
||||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
||||
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" maxDistErr="0.000009" distErrPct="0.025" distanceUnits="degrees"/>
|
||||
<fieldType name="long" class="solr.TrieLongField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
|
||||
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="payloads" class="solr.TextField" indexed="true" stored="false">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="phonetic" class="solr.TextField" indexed="true" stored="false">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.PhoneticFilterFactory" encoder="Metaphone" inject="false"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="point" class="solr.PointType" subFieldSuffix="_d" dimension="2"/>
|
||||
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>
|
||||
<fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true"/>
|
||||
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" positionIncrementGap="0" precisionStep="6"/>
|
||||
<fieldType name="tdouble" class="solr.TrieDoubleField" positionIncrementGap="0" precisionStep="8"/>
|
||||
<fieldType name="text" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="1" generateWordParts="1" catenateAll="0" catenateWords="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ar.txt" ignoreCase="true"/>
|
||||
<filter class="solr.ArabicNormalizationFilterFactory"/>
|
||||
<filter class="solr.ArabicStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_bg.txt" ignoreCase="true"/>
|
||||
<filter class="solr.BulgarianStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_ca.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ca.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.CJKWidthFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.CJKBigramFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.SoraniNormalizationFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ckb.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SoraniStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_cz.txt" ignoreCase="true"/>
|
||||
<filter class="solr.CzechStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_da.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_de.txt" ignoreCase="true"/>
|
||||
<filter class="solr.GermanNormalizationFilterFactory"/>
|
||||
<filter class="solr.GermanLightStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.GreekLowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_el.txt" ignoreCase="false"/>
|
||||
<filter class="solr.GreekStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_es.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SpanishLightStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_eu.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<charFilter class="solr.PersianCharFilterFactory"/>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ArabicNormalizationFilterFactory"/>
|
||||
<filter class="solr.PersianNormalizationFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_fa.txt" ignoreCase="true"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fi.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_fr.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fr.txt" ignoreCase="true"/>
|
||||
<filter class="solr.FrenchLightStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_ga.txt" ignoreCase="true"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/hyphenations_ga.txt" ignoreCase="true"/>
|
||||
<filter class="solr.IrishLowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ga.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ReversedWildcardFilterFactory" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_gl.txt" ignoreCase="true"/>
|
||||
<filter class="solr.GalicianStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.IndicNormalizationFilterFactory"/>
|
||||
<filter class="solr.HindiNormalizationFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_hi.txt" ignoreCase="true"/>
|
||||
<filter class="solr.HindiStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_hu.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_hy.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_id.txt" ignoreCase="true"/>
|
||||
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_it.txt" ignoreCase="true"/>
|
||||
<filter class="solr.ItalianLightStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_ja" class="solr.TextField" autoGeneratePhraseQueries="false" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
|
||||
<filter class="solr.JapaneseBaseFormFilterFactory"/>
|
||||
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt"/>
|
||||
<filter class="solr.CJKWidthFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ja.txt" ignoreCase="true"/>
|
||||
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_lv.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LatvianStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_nl.txt" ignoreCase="true"/>
|
||||
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_no.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_pt.txt" ignoreCase="true"/>
|
||||
<filter class="solr.PortugueseLightStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_ro.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_ru.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_stemmed" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords-name.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_sv.txt" ignoreCase="true"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.TurkishLowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_tr.txt" ignoreCase="false"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_unstemmed" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="0" generateWordParts="1" catenateAll="0" catenateWords="0"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="0" generateWordParts="1" catenateAll="0" catenateWords="1"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="0" generateWordParts="1" catenateAll="0" catenateWords="0"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="tfloat" class="solr.TrieFloatField" positionIncrementGap="0" precisionStep="8"/>
|
||||
<fieldType name="tint" class="solr.TrieIntField" omitNorms="true" positionIncrementGap="0" sortMissingLast="true" precisionStep="8"/>
|
||||
<fieldType name="tlong" class="solr.TrieLongField" positionIncrementGap="0" precisionStep="8"/>
|
||||
<field name="ALLTEXT" type="text" multiValued="true" indexed="true" stored="true"/>
|
||||
<field name="ALLTEXTUNSTEMMED" type="textgen" multiValued="true" indexed="true" stored="false"/>
|
||||
<field name="BETA" type="float" multiValued="false" indexed="true" stored="true"/>
|
||||
<field name="DocId" type="string" omitNorms="true" multiValued="false" indexed="true" required="true" stored="true"/>
|
||||
<field name="NAME_PHONETIC" type="phonetic" multiValued="true" indexed="true" stored="false"/>
|
||||
<field name="PREFERRED_TITLE" type="string" multiValued="true" indexed="true" stored="true"/>
|
||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" omitNorms="true" multiValued="true" indexed="true" stored="false"/>
|
||||
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||
<field name="THUMBNAIL_URL" type="string" indexed="false" stored="true"/>
|
||||
<field name="URI" type="string" omitNorms="true" multiValued="false" indexed="true" stored="true"/>
|
||||
<field name="_root_" type="string" indexed="true" stored="false"/>
|
||||
<field name="_version_" type="long" indexed="true" stored="true"/>
|
||||
<field name="acNameStemmed" type="edgengram_stemmed" multiValued="true" indexed="true" stored="false"/>
|
||||
<field name="acNameUntokenized" type="edgengram_untokenized" multiValued="true" indexed="true" stored="false"/>
|
||||
<field name="cat" type="string" multiValued="true" indexed="true" stored="true"/>
|
||||
<field name="classgroup" type="string" multiValued="true" indexed="true" stored="true"/>
|
||||
<field name="etag" type="string" multiValued="false" indexed="false" stored="true"/>
|
||||
<field name="features" type="text_general" multiValued="true" indexed="true" stored="true"/>
|
||||
<field name="inStock" type="boolean" indexed="true" stored="true"/>
|
||||
<field name="includes" type="text_general" termPositions="true" termVectors="true" indexed="true" termOffsets="true" stored="true"/>
|
||||
<field name="indexedTime" type="long" indexed="true" stored="true"/>
|
||||
<field name="manu" type="text_general" omitNorms="true" indexed="true" stored="true"/>
|
||||
<field name="mostSpecificTypeURIs" type="string" omitNorms="true" multiValued="true" indexed="true" stored="true"/>
|
||||
<field name="name" type="text_general" indexed="true" stored="true"/>
|
||||
<field name="nameLowercase" type="lowercase" omitNorms="false" multiValued="true" indexed="true" stored="false"/>
|
||||
<field name="nameLowercaseSingleValued" type="lowercase" omitNorms="true" multiValued="false" indexed="true" stored="false"/>
|
||||
<field name="nameRaw" type="string" omitNorms="false" multiValued="true" indexed="true" stored="true"/>
|
||||
<field name="nameStemmed" type="text_stemmed" omitNorms="false" multiValued="true" indexed="true" stored="false"/>
|
||||
<field name="nameText" type="text" multiValued="true" indexed="true" stored="false"/>
|
||||
<field name="nameUnstemmed" type="text_unstemmed" omitNorms="false" multiValued="true" indexed="true" stored="false"/>
|
||||
<field name="popularity" type="int" indexed="true" stored="true"/>
|
||||
<field name="price" type="float" indexed="true" stored="true"/>
|
||||
<field name="siteName" type="string" indexed="true" stored="true"/>
|
||||
<field name="siteURL" type="string" indexed="true" stored="true"/>
|
||||
<field name="sku" type="text_en_splitting_tight" omitNorms="true" indexed="true" stored="true"/>
|
||||
<field name="store" type="location" indexed="true" stored="true"/>
|
||||
<field name="timestamp" type="date" default="NOW" multiValued="false" indexed="true" stored="true"/>
|
||||
<field name="type" type="string" omitNorms="true" multiValued="true" indexed="true" stored="true"/>
|
||||
<field name="weight" type="float" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
||||
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
||||
<dynamicField name="*_string" type="string" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="random_*" type="random"/>
|
||||
<dynamicField name="*_tdate" type="tdate" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_text" type="text" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tint" type="tint" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="attr_*" type="text_general" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_txt" type="text_general" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dts" type="date" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_is" type="int" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ss" type="string" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ls" type="long" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_en" type="text_en" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_bs" type="boolean" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_fs" type="float" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ds" type="double" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_p" type="location" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
|
||||
<copyField source="nameRaw" dest="NAME_PHONETIC"/>
|
||||
<copyField source="nameRaw" dest="acNameStemmed"/>
|
||||
<copyField source="nameRaw" dest="acNameUntokenized"/>
|
||||
<copyField source="nameRaw" dest="nameLowercase"/>
|
||||
<copyField source="nameRaw" dest="nameStemmed"/>
|
||||
<copyField source="nameRaw" dest="nameText"/>
|
||||
<copyField source="nameRaw" dest="nameUnstemmed"/>
|
||||
</schema>
|
|
@ -145,7 +145,7 @@
|
|||
<field name="indexedTime" type="long" indexed="true" stored="true"/>
|
||||
<field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="ALLTEXT" type="text_ru_iph" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="ALLTEXT" type="text_ru_morph" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||
|
@ -1311,6 +1311,16 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_ru_morph" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<charFilter class="solr.HTMLStripCharFilterFactory"/>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
|
||||
<filter class="company.evo.jmorphy2.lucene.Jmorphy2StemFilterFactory" dict="lang/pymorphy2_dicts"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- Swedish -->
|
||||
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
|
||||
|
|
Loading…
Add table
Reference in a new issue