1*7f2fe78bSCy Schubert/* 2*7f2fe78bSCy Schubert * language_data.js 3*7f2fe78bSCy Schubert * ~~~~~~~~~~~~~~~~ 4*7f2fe78bSCy Schubert * 5*7f2fe78bSCy Schubert * This script contains the language-specific data used by searchtools.js, 6*7f2fe78bSCy Schubert * namely the list of stopwords, stemmer, scorer and splitter. 7*7f2fe78bSCy Schubert * 8*7f2fe78bSCy Schubert * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 9*7f2fe78bSCy Schubert * :license: BSD, see LICENSE for details. 10*7f2fe78bSCy Schubert * 11*7f2fe78bSCy Schubert */ 12*7f2fe78bSCy Schubert 13*7f2fe78bSCy Schubertvar stopwords = ["a","and","are","as","at","be","but","by","for","if","in","into","is","it","near","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"]; 14*7f2fe78bSCy Schubert 15*7f2fe78bSCy Schubert 16*7f2fe78bSCy Schubert/* Non-minified version is copied as a separate JS file, is available */ 17*7f2fe78bSCy Schubert 18*7f2fe78bSCy Schubert/** 19*7f2fe78bSCy Schubert * Porter Stemmer 20*7f2fe78bSCy Schubert */ 21*7f2fe78bSCy Schubertvar Stemmer = function() { 22*7f2fe78bSCy Schubert 23*7f2fe78bSCy Schubert var step2list = { 24*7f2fe78bSCy Schubert ational: 'ate', 25*7f2fe78bSCy Schubert tional: 'tion', 26*7f2fe78bSCy Schubert enci: 'ence', 27*7f2fe78bSCy Schubert anci: 'ance', 28*7f2fe78bSCy Schubert izer: 'ize', 29*7f2fe78bSCy Schubert bli: 'ble', 30*7f2fe78bSCy Schubert alli: 'al', 31*7f2fe78bSCy Schubert entli: 'ent', 32*7f2fe78bSCy Schubert eli: 'e', 33*7f2fe78bSCy Schubert ousli: 'ous', 34*7f2fe78bSCy Schubert ization: 'ize', 35*7f2fe78bSCy Schubert ation: 'ate', 36*7f2fe78bSCy Schubert ator: 'ate', 37*7f2fe78bSCy Schubert alism: 'al', 38*7f2fe78bSCy Schubert iveness: 'ive', 39*7f2fe78bSCy Schubert fulness: 'ful', 40*7f2fe78bSCy Schubert ousness: 'ous', 41*7f2fe78bSCy Schubert aliti: 'al', 42*7f2fe78bSCy Schubert iviti: 'ive', 43*7f2fe78bSCy Schubert biliti: 'ble', 44*7f2fe78bSCy Schubert logi: 'log' 45*7f2fe78bSCy Schubert }; 46*7f2fe78bSCy Schubert 47*7f2fe78bSCy Schubert var step3list = { 48*7f2fe78bSCy Schubert icate: 'ic', 49*7f2fe78bSCy Schubert ative: '', 50*7f2fe78bSCy Schubert alize: 'al', 51*7f2fe78bSCy Schubert iciti: 'ic', 52*7f2fe78bSCy Schubert ical: 'ic', 53*7f2fe78bSCy Schubert ful: '', 54*7f2fe78bSCy Schubert ness: '' 55*7f2fe78bSCy Schubert }; 56*7f2fe78bSCy Schubert 57*7f2fe78bSCy Schubert var c = "[^aeiou]"; // consonant 58*7f2fe78bSCy Schubert var v = "[aeiouy]"; // vowel 59*7f2fe78bSCy Schubert var C = c + "[^aeiouy]*"; // consonant sequence 60*7f2fe78bSCy Schubert var V = v + "[aeiou]*"; // vowel sequence 61*7f2fe78bSCy Schubert 62*7f2fe78bSCy Schubert var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 63*7f2fe78bSCy Schubert var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64*7f2fe78bSCy Schubert var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65*7f2fe78bSCy Schubert var s_v = "^(" + C + ")?" + v; // vowel in stem 66*7f2fe78bSCy Schubert 67*7f2fe78bSCy Schubert this.stemWord = function (w) { 68*7f2fe78bSCy Schubert var stem; 69*7f2fe78bSCy Schubert var suffix; 70*7f2fe78bSCy Schubert var firstch; 71*7f2fe78bSCy Schubert var origword = w; 72*7f2fe78bSCy Schubert 73*7f2fe78bSCy Schubert if (w.length < 3) 74*7f2fe78bSCy Schubert return w; 75*7f2fe78bSCy Schubert 76*7f2fe78bSCy Schubert var re; 77*7f2fe78bSCy Schubert var re2; 78*7f2fe78bSCy Schubert var re3; 79*7f2fe78bSCy Schubert var re4; 80*7f2fe78bSCy Schubert 81*7f2fe78bSCy Schubert firstch = w.substr(0,1); 82*7f2fe78bSCy Schubert if (firstch == "y") 83*7f2fe78bSCy Schubert w = firstch.toUpperCase() + w.substr(1); 84*7f2fe78bSCy Schubert 85*7f2fe78bSCy Schubert // Step 1a 86*7f2fe78bSCy Schubert re = /^(.+?)(ss|i)es$/; 87*7f2fe78bSCy Schubert re2 = /^(.+?)([^s])s$/; 88*7f2fe78bSCy Schubert 89*7f2fe78bSCy Schubert if (re.test(w)) 90*7f2fe78bSCy Schubert w = w.replace(re,"$1$2"); 91*7f2fe78bSCy Schubert else if (re2.test(w)) 92*7f2fe78bSCy Schubert w = w.replace(re2,"$1$2"); 93*7f2fe78bSCy Schubert 94*7f2fe78bSCy Schubert // Step 1b 95*7f2fe78bSCy Schubert re = /^(.+?)eed$/; 96*7f2fe78bSCy Schubert re2 = /^(.+?)(ed|ing)$/; 97*7f2fe78bSCy Schubert if (re.test(w)) { 98*7f2fe78bSCy Schubert var fp = re.exec(w); 99*7f2fe78bSCy Schubert re = new RegExp(mgr0); 100*7f2fe78bSCy Schubert if (re.test(fp[1])) { 101*7f2fe78bSCy Schubert re = /.$/; 102*7f2fe78bSCy Schubert w = w.replace(re,""); 103*7f2fe78bSCy Schubert } 104*7f2fe78bSCy Schubert } 105*7f2fe78bSCy Schubert else if (re2.test(w)) { 106*7f2fe78bSCy Schubert var fp = re2.exec(w); 107*7f2fe78bSCy Schubert stem = fp[1]; 108*7f2fe78bSCy Schubert re2 = new RegExp(s_v); 109*7f2fe78bSCy Schubert if (re2.test(stem)) { 110*7f2fe78bSCy Schubert w = stem; 111*7f2fe78bSCy Schubert re2 = /(at|bl|iz)$/; 112*7f2fe78bSCy Schubert re3 = new RegExp("([^aeiouylsz])\\1$"); 113*7f2fe78bSCy Schubert re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114*7f2fe78bSCy Schubert if (re2.test(w)) 115*7f2fe78bSCy Schubert w = w + "e"; 116*7f2fe78bSCy Schubert else if (re3.test(w)) { 117*7f2fe78bSCy Schubert re = /.$/; 118*7f2fe78bSCy Schubert w = w.replace(re,""); 119*7f2fe78bSCy Schubert } 120*7f2fe78bSCy Schubert else if (re4.test(w)) 121*7f2fe78bSCy Schubert w = w + "e"; 122*7f2fe78bSCy Schubert } 123*7f2fe78bSCy Schubert } 124*7f2fe78bSCy Schubert 125*7f2fe78bSCy Schubert // Step 1c 126*7f2fe78bSCy Schubert re = /^(.+?)y$/; 127*7f2fe78bSCy Schubert if (re.test(w)) { 128*7f2fe78bSCy Schubert var fp = re.exec(w); 129*7f2fe78bSCy Schubert stem = fp[1]; 130*7f2fe78bSCy Schubert re = new RegExp(s_v); 131*7f2fe78bSCy Schubert if (re.test(stem)) 132*7f2fe78bSCy Schubert w = stem + "i"; 133*7f2fe78bSCy Schubert } 134*7f2fe78bSCy Schubert 135*7f2fe78bSCy Schubert // Step 2 136*7f2fe78bSCy Schubert re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137*7f2fe78bSCy Schubert if (re.test(w)) { 138*7f2fe78bSCy Schubert var fp = re.exec(w); 139*7f2fe78bSCy Schubert stem = fp[1]; 140*7f2fe78bSCy Schubert suffix = fp[2]; 141*7f2fe78bSCy Schubert re = new RegExp(mgr0); 142*7f2fe78bSCy Schubert if (re.test(stem)) 143*7f2fe78bSCy Schubert w = stem + step2list[suffix]; 144*7f2fe78bSCy Schubert } 145*7f2fe78bSCy Schubert 146*7f2fe78bSCy Schubert // Step 3 147*7f2fe78bSCy Schubert re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148*7f2fe78bSCy Schubert if (re.test(w)) { 149*7f2fe78bSCy Schubert var fp = re.exec(w); 150*7f2fe78bSCy Schubert stem = fp[1]; 151*7f2fe78bSCy Schubert suffix = fp[2]; 152*7f2fe78bSCy Schubert re = new RegExp(mgr0); 153*7f2fe78bSCy Schubert if (re.test(stem)) 154*7f2fe78bSCy Schubert w = stem + step3list[suffix]; 155*7f2fe78bSCy Schubert } 156*7f2fe78bSCy Schubert 157*7f2fe78bSCy Schubert // Step 4 158*7f2fe78bSCy Schubert re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159*7f2fe78bSCy Schubert re2 = /^(.+?)(s|t)(ion)$/; 160*7f2fe78bSCy Schubert if (re.test(w)) { 161*7f2fe78bSCy Schubert var fp = re.exec(w); 162*7f2fe78bSCy Schubert stem = fp[1]; 163*7f2fe78bSCy Schubert re = new RegExp(mgr1); 164*7f2fe78bSCy Schubert if (re.test(stem)) 165*7f2fe78bSCy Schubert w = stem; 166*7f2fe78bSCy Schubert } 167*7f2fe78bSCy Schubert else if (re2.test(w)) { 168*7f2fe78bSCy Schubert var fp = re2.exec(w); 169*7f2fe78bSCy Schubert stem = fp[1] + fp[2]; 170*7f2fe78bSCy Schubert re2 = new RegExp(mgr1); 171*7f2fe78bSCy Schubert if (re2.test(stem)) 172*7f2fe78bSCy Schubert w = stem; 173*7f2fe78bSCy Schubert } 174*7f2fe78bSCy Schubert 175*7f2fe78bSCy Schubert // Step 5 176*7f2fe78bSCy Schubert re = /^(.+?)e$/; 177*7f2fe78bSCy Schubert if (re.test(w)) { 178*7f2fe78bSCy Schubert var fp = re.exec(w); 179*7f2fe78bSCy Schubert stem = fp[1]; 180*7f2fe78bSCy Schubert re = new RegExp(mgr1); 181*7f2fe78bSCy Schubert re2 = new RegExp(meq1); 182*7f2fe78bSCy Schubert re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183*7f2fe78bSCy Schubert if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184*7f2fe78bSCy Schubert w = stem; 185*7f2fe78bSCy Schubert } 186*7f2fe78bSCy Schubert re = /ll$/; 187*7f2fe78bSCy Schubert re2 = new RegExp(mgr1); 188*7f2fe78bSCy Schubert if (re.test(w) && re2.test(w)) { 189*7f2fe78bSCy Schubert re = /.$/; 190*7f2fe78bSCy Schubert w = w.replace(re,""); 191*7f2fe78bSCy Schubert } 192*7f2fe78bSCy Schubert 193*7f2fe78bSCy Schubert // and turn initial Y back to y 194*7f2fe78bSCy Schubert if (firstch == "y") 195*7f2fe78bSCy Schubert w = firstch.toLowerCase() + w.substr(1); 196*7f2fe78bSCy Schubert return w; 197*7f2fe78bSCy Schubert } 198*7f2fe78bSCy Schubert} 199*7f2fe78bSCy Schubert 200*7f2fe78bSCy Schubert 201*7f2fe78bSCy Schubert 202*7f2fe78bSCy Schubert 203*7f2fe78bSCy Schubertvar splitChars = (function() { 204*7f2fe78bSCy Schubert var result = {}; 205*7f2fe78bSCy Schubert var singles = [96, 180, 187, 191, 215, 247, 749, 885, 903, 907, 909, 930, 1014, 1648, 206*7f2fe78bSCy Schubert 1748, 1809, 2416, 2473, 2481, 2526, 2601, 2609, 2612, 2615, 2653, 2702, 207*7f2fe78bSCy Schubert 2706, 2729, 2737, 2740, 2857, 2865, 2868, 2910, 2928, 2948, 2961, 2971, 208*7f2fe78bSCy Schubert 2973, 3085, 3089, 3113, 3124, 3213, 3217, 3241, 3252, 3295, 3341, 3345, 209*7f2fe78bSCy Schubert 3369, 3506, 3516, 3633, 3715, 3721, 3736, 3744, 3748, 3750, 3756, 3761, 210*7f2fe78bSCy Schubert 3781, 3912, 4239, 4347, 4681, 4695, 4697, 4745, 4785, 4799, 4801, 4823, 211*7f2fe78bSCy Schubert 4881, 5760, 5901, 5997, 6313, 7405, 8024, 8026, 8028, 8030, 8117, 8125, 212*7f2fe78bSCy Schubert 8133, 8181, 8468, 8485, 8487, 8489, 8494, 8527, 11311, 11359, 11687, 11695, 213*7f2fe78bSCy Schubert 11703, 11711, 11719, 11727, 11735, 12448, 12539, 43010, 43014, 43019, 43587, 214*7f2fe78bSCy Schubert 43696, 43713, 64286, 64297, 64311, 64317, 64319, 64322, 64325, 65141]; 215*7f2fe78bSCy Schubert var i, j, start, end; 216*7f2fe78bSCy Schubert for (i = 0; i < singles.length; i++) { 217*7f2fe78bSCy Schubert result[singles[i]] = true; 218*7f2fe78bSCy Schubert } 219*7f2fe78bSCy Schubert var ranges = [[0, 47], [58, 64], [91, 94], [123, 169], [171, 177], [182, 184], [706, 709], 220*7f2fe78bSCy Schubert [722, 735], [741, 747], [751, 879], [888, 889], [894, 901], [1154, 1161], 221*7f2fe78bSCy Schubert [1318, 1328], [1367, 1368], [1370, 1376], [1416, 1487], [1515, 1519], [1523, 1568], 222*7f2fe78bSCy Schubert [1611, 1631], [1642, 1645], [1750, 1764], [1767, 1773], [1789, 1790], [1792, 1807], 223*7f2fe78bSCy Schubert [1840, 1868], [1958, 1968], [1970, 1983], [2027, 2035], [2038, 2041], [2043, 2047], 224*7f2fe78bSCy Schubert [2070, 2073], [2075, 2083], [2085, 2087], [2089, 2307], [2362, 2364], [2366, 2383], 225*7f2fe78bSCy Schubert [2385, 2391], [2402, 2405], [2419, 2424], [2432, 2436], [2445, 2446], [2449, 2450], 226*7f2fe78bSCy Schubert [2483, 2485], [2490, 2492], [2494, 2509], [2511, 2523], [2530, 2533], [2546, 2547], 227*7f2fe78bSCy Schubert [2554, 2564], [2571, 2574], [2577, 2578], [2618, 2648], [2655, 2661], [2672, 2673], 228*7f2fe78bSCy Schubert [2677, 2692], [2746, 2748], [2750, 2767], [2769, 2783], [2786, 2789], [2800, 2820], 229*7f2fe78bSCy Schubert [2829, 2830], [2833, 2834], [2874, 2876], [2878, 2907], [2914, 2917], [2930, 2946], 230*7f2fe78bSCy Schubert [2955, 2957], [2966, 2968], [2976, 2978], [2981, 2983], [2987, 2989], [3002, 3023], 231*7f2fe78bSCy Schubert [3025, 3045], [3059, 3076], [3130, 3132], [3134, 3159], [3162, 3167], [3170, 3173], 232*7f2fe78bSCy Schubert [3184, 3191], [3199, 3204], [3258, 3260], [3262, 3293], [3298, 3301], [3312, 3332], 233*7f2fe78bSCy Schubert [3386, 3388], [3390, 3423], [3426, 3429], [3446, 3449], [3456, 3460], [3479, 3481], 234*7f2fe78bSCy Schubert [3518, 3519], [3527, 3584], [3636, 3647], [3655, 3663], [3674, 3712], [3717, 3718], 235*7f2fe78bSCy Schubert [3723, 3724], [3726, 3731], [3752, 3753], [3764, 3772], [3774, 3775], [3783, 3791], 236*7f2fe78bSCy Schubert [3802, 3803], [3806, 3839], [3841, 3871], [3892, 3903], [3949, 3975], [3980, 4095], 237*7f2fe78bSCy Schubert [4139, 4158], [4170, 4175], [4182, 4185], [4190, 4192], [4194, 4196], [4199, 4205], 238*7f2fe78bSCy Schubert [4209, 4212], [4226, 4237], [4250, 4255], [4294, 4303], [4349, 4351], [4686, 4687], 239*7f2fe78bSCy Schubert [4702, 4703], [4750, 4751], [4790, 4791], [4806, 4807], [4886, 4887], [4955, 4968], 240*7f2fe78bSCy Schubert [4989, 4991], [5008, 5023], [5109, 5120], [5741, 5742], [5787, 5791], [5867, 5869], 241*7f2fe78bSCy Schubert [5873, 5887], [5906, 5919], [5938, 5951], [5970, 5983], [6001, 6015], [6068, 6102], 242*7f2fe78bSCy Schubert [6104, 6107], [6109, 6111], [6122, 6127], [6138, 6159], [6170, 6175], [6264, 6271], 243*7f2fe78bSCy Schubert [6315, 6319], [6390, 6399], [6429, 6469], [6510, 6511], [6517, 6527], [6572, 6592], 244*7f2fe78bSCy Schubert [6600, 6607], [6619, 6655], [6679, 6687], [6741, 6783], [6794, 6799], [6810, 6822], 245*7f2fe78bSCy Schubert [6824, 6916], [6964, 6980], [6988, 6991], [7002, 7042], [7073, 7085], [7098, 7167], 246*7f2fe78bSCy Schubert [7204, 7231], [7242, 7244], [7294, 7400], [7410, 7423], [7616, 7679], [7958, 7959], 247*7f2fe78bSCy Schubert [7966, 7967], [8006, 8007], [8014, 8015], [8062, 8063], [8127, 8129], [8141, 8143], 248*7f2fe78bSCy Schubert [8148, 8149], [8156, 8159], [8173, 8177], [8189, 8303], [8306, 8307], [8314, 8318], 249*7f2fe78bSCy Schubert [8330, 8335], [8341, 8449], [8451, 8454], [8456, 8457], [8470, 8472], [8478, 8483], 250*7f2fe78bSCy Schubert [8506, 8507], [8512, 8516], [8522, 8525], [8586, 9311], [9372, 9449], [9472, 10101], 251*7f2fe78bSCy Schubert [10132, 11263], [11493, 11498], [11503, 11516], [11518, 11519], [11558, 11567], 252*7f2fe78bSCy Schubert [11622, 11630], [11632, 11647], [11671, 11679], [11743, 11822], [11824, 12292], 253*7f2fe78bSCy Schubert [12296, 12320], [12330, 12336], [12342, 12343], [12349, 12352], [12439, 12444], 254*7f2fe78bSCy Schubert [12544, 12548], [12590, 12592], [12687, 12689], [12694, 12703], [12728, 12783], 255*7f2fe78bSCy Schubert [12800, 12831], [12842, 12880], [12896, 12927], [12938, 12976], [12992, 13311], 256*7f2fe78bSCy Schubert [19894, 19967], [40908, 40959], [42125, 42191], [42238, 42239], [42509, 42511], 257*7f2fe78bSCy Schubert [42540, 42559], [42592, 42593], [42607, 42622], [42648, 42655], [42736, 42774], 258*7f2fe78bSCy Schubert [42784, 42785], [42889, 42890], [42893, 43002], [43043, 43055], [43062, 43071], 259*7f2fe78bSCy Schubert [43124, 43137], [43188, 43215], [43226, 43249], [43256, 43258], [43260, 43263], 260*7f2fe78bSCy Schubert [43302, 43311], [43335, 43359], [43389, 43395], [43443, 43470], [43482, 43519], 261*7f2fe78bSCy Schubert [43561, 43583], [43596, 43599], [43610, 43615], [43639, 43641], [43643, 43647], 262*7f2fe78bSCy Schubert [43698, 43700], [43703, 43704], [43710, 43711], [43715, 43738], [43742, 43967], 263*7f2fe78bSCy Schubert [44003, 44015], [44026, 44031], [55204, 55215], [55239, 55242], [55292, 55295], 264*7f2fe78bSCy Schubert [57344, 63743], [64046, 64047], [64110, 64111], [64218, 64255], [64263, 64274], 265*7f2fe78bSCy Schubert [64280, 64284], [64434, 64466], [64830, 64847], [64912, 64913], [64968, 65007], 266*7f2fe78bSCy Schubert [65020, 65135], [65277, 65295], [65306, 65312], [65339, 65344], [65371, 65381], 267*7f2fe78bSCy Schubert [65471, 65473], [65480, 65481], [65488, 65489], [65496, 65497]]; 268*7f2fe78bSCy Schubert for (i = 0; i < ranges.length; i++) { 269*7f2fe78bSCy Schubert start = ranges[i][0]; 270*7f2fe78bSCy Schubert end = ranges[i][1]; 271*7f2fe78bSCy Schubert for (j = start; j <= end; j++) { 272*7f2fe78bSCy Schubert result[j] = true; 273*7f2fe78bSCy Schubert } 274*7f2fe78bSCy Schubert } 275*7f2fe78bSCy Schubert return result; 276*7f2fe78bSCy Schubert})(); 277*7f2fe78bSCy Schubert 278*7f2fe78bSCy Schubertfunction splitQuery(query) { 279*7f2fe78bSCy Schubert var result = []; 280*7f2fe78bSCy Schubert var start = -1; 281*7f2fe78bSCy Schubert for (var i = 0; i < query.length; i++) { 282*7f2fe78bSCy Schubert if (splitChars[query.charCodeAt(i)]) { 283*7f2fe78bSCy Schubert if (start !== -1) { 284*7f2fe78bSCy Schubert result.push(query.slice(start, i)); 285*7f2fe78bSCy Schubert start = -1; 286*7f2fe78bSCy Schubert } 287*7f2fe78bSCy Schubert } else if (start === -1) { 288*7f2fe78bSCy Schubert start = i; 289*7f2fe78bSCy Schubert } 290*7f2fe78bSCy Schubert } 291*7f2fe78bSCy Schubert if (start !== -1) { 292*7f2fe78bSCy Schubert result.push(query.slice(start)); 293*7f2fe78bSCy Schubert } 294*7f2fe78bSCy Schubert return result; 295*7f2fe78bSCy Schubert} 296*7f2fe78bSCy Schubert 297*7f2fe78bSCy Schubert 298