searching.js 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. class SmartSearch {
  2. constructor(options = {}) {
  3. this.config = {
  4. // Configuración de tolerancia
  5. minSearchLength: 2,
  6. maxLevenshteinDistance: 3,
  7. proportionalTolerance: 0.25,
  8. prefixTolerance: 2,
  9. wordTolerance: 1,
  10. // Configuración de scoring
  11. exactNameMatchScore: 100,
  12. exactCategoryMatchScore: 90,
  13. startsWithNameScore: 92,
  14. startsWithCategoryScore: 88,
  15. containsNameScore: 80,
  16. containsCategoryScore: 78,
  17. fuzzyNameScore: 40,
  18. fuzzyCategoryScore: 38,
  19. fuzzyWordScore: 35,
  20. subsequenceScore: 30,
  21. // Optimizaciones
  22. cacheEnabled: true,
  23. maxResults: 50,
  24. enableEarlyExit: true,
  25. ...options
  26. };
  27. this.cache = new Map();
  28. this.levenshteinCache = new Map();
  29. }
  30. /**
  31. * Búsqueda principal - API pública
  32. */
  33. search(items, searchTerm, options = {}) {
  34. const {
  35. sortByRelevance = true,
  36. caseSensitive = false
  37. } = options;
  38. const normalizedTerm = this.normalizeTerm(searchTerm, caseSensitive);
  39. if (normalizedTerm.length < this.config.minSearchLength) {
  40. return items;
  41. }
  42. const results = this.performSearch(items, normalizedTerm, caseSensitive);
  43. // Ordenar por relevancia si está habilitado
  44. const finalResults = sortByRelevance
  45. ? this.sortByRelevance(results)
  46. : results.map(r => r.item);
  47. // Limitar resultados
  48. const limitedResults = finalResults.slice(0, this.config.maxResults);
  49. return limitedResults;
  50. }
  51. /**
  52. * Realiza la búsqueda y scoring
  53. */
  54. performSearch(items, searchTerm, caseSensitive) {
  55. const results = [];
  56. const termLength = searchTerm.length;
  57. for (const item of items) {
  58. const text = this.extractText(item);
  59. const normalizedText = this.normalizeTerm(text, caseSensitive);
  60. const match = this.calculateMatch(searchTerm, normalizedText, termLength);
  61. if (match.score > 0) {
  62. results.push({
  63. item,
  64. score: match.score,
  65. matchType: match.type,
  66. distance: match.distance
  67. });
  68. // Early exit si tenemos suficientes coincidencias exactas
  69. if (this.config.enableEarlyExit &&
  70. match.score === this.config.exactNameMatchScore &&
  71. results.length >= 10) {
  72. break;
  73. }
  74. }
  75. }
  76. return results;
  77. }
  78. /**
  79. * Calcula el match y score para un texto
  80. */
  81. calculateMatch(searchTerm, text, termLength) {
  82. // 1. Coincidencia exacta
  83. const [name, category] = text.split("&cat&");
  84. if (name === searchTerm) {
  85. return { score: this.config.exactNameMatchScore, type: 'exact', distance: 0 };
  86. }
  87. if (category === searchTerm) {
  88. return { score: this.config.exactCategoryMatchScore, type: 'exact', distance: 0 };
  89. }
  90. // 2. Contiene el término completo
  91. if (name.includes(searchTerm)) {
  92. const position = text.indexOf(searchTerm);
  93. // Bonus si empieza con el término
  94. const score = position === 0
  95. ? this.config.startsWithNameScore
  96. : this.config.containsNameScore;
  97. return { score, type: position === 0 ? 'startsWith' : 'contains', distance: 0 };
  98. }
  99. if (category.includes(searchTerm)) {
  100. const position = text.indexOf(searchTerm);
  101. // Bonus si empieza con el término
  102. const score = position === 0
  103. ? this.config.startsWithCategoryScore
  104. : this.config.containsCategoryScore;
  105. return { score, type: position === 0 ? 'startsWith' : 'contains', distance: 0 };
  106. }
  107. // 3. Para términos cortos:
  108. if (termLength <= 4) {
  109. //verificar prefijo con tolerancia
  110. const prefix = text.substring(0, termLength + 2);
  111. let distance = this.getLevenshteinDistance(searchTerm, prefix);
  112. if (distance <= this.config.prefixTolerance) {
  113. const score = this.config.fuzzyScore + (2 - distance) * 10;
  114. return { score, type: 'prefix_fuzzy', distance };
  115. }
  116. // Verificar si el término es una palabra dentro del texto
  117. const words = name.split(" ");
  118. for (const word of words) {
  119. const distance = this.getLevenshteinDistance(searchTerm, word);
  120. if (distance <= this.config.wordTolerance) {
  121. const score = this.config.fuzzyScore + (2 - distance) * 10;
  122. return { score, type: 'word_fuzzy', distance };
  123. }
  124. }
  125. }
  126. // 4. Distancia de Levenshtein para términos más largos
  127. if (termLength > 4) {
  128. const maxAllowedDistance = Math.min(
  129. this.config.maxLevenshteinDistance,
  130. Math.floor(termLength * this.config.proportionalTolerance)
  131. );
  132. let distance = this.getLevenshteinDistance(searchTerm, name);
  133. if (distance <= maxAllowedDistance) {
  134. const score = this.config.fuzzyNameScore - (distance * 5);
  135. return { score: Math.max(1, score), type: 'fuzzy', distance };
  136. }
  137. const words = name.split(" ");
  138. const distances = []
  139. for (const word of words) {
  140. const distance = this.getLevenshteinDistance(searchTerm, word);
  141. distances.push(distance);
  142. }
  143. const minDistance = Math.min(...distances);
  144. if (minDistance <= this.config.wordTolerance) {
  145. const score = this.config.fuzzyWordScore + (2 - minDistance) * 10;
  146. if (words.includes("summer")){
  147. }
  148. return { score, type: 'word_fuzzy', distance: minDistance };
  149. }}
  150. // 5. Búsqueda por subsequencia (caracteres en orden)
  151. if (this.isSubsequence(searchTerm, name)) {
  152. const coverage = termLength / name.length;
  153. const score = this.config.subsequenceScore + (coverage * 20);
  154. if (score > this.config.subsequenceScore + 20) {
  155. return { score, type: 'subsequence', distance: name.length - termLength };
  156. }
  157. }
  158. return { score: 0, type: 'no_match', distance: Infinity };
  159. }
  160. /**
  161. * Verifica si searchTerm es subsequencia de text
  162. */
  163. isSubsequence(searchTerm, text) {
  164. let searchIndex = 0;
  165. for (const char of text) {
  166. if (searchIndex < searchTerm.length && char === searchTerm[searchIndex]) {
  167. searchIndex++;
  168. }
  169. }
  170. return searchIndex === searchTerm.length;
  171. }
  172. /**
  173. * Distancia de Levenshtein optimizada con cache
  174. */
  175. getLevenshteinDistance(str1, str2) {
  176. if (str1 === str2) return 0;
  177. if (str1.length === 0) return str2.length;
  178. if (str2.length === 0) return str1.length;
  179. // Optimización: intercambiar para que str1 sea la más corta
  180. if (str1.length > str2.length) {
  181. [str1, str2] = [str2, str1];
  182. }
  183. // Algoritmo optimizado de Levenshtein
  184. let previousRow = Array.from({ length: str1.length + 1 }, (_, i) => i);
  185. for (let i = 0; i < str2.length; i++) {
  186. const currentRow = [i + 1];
  187. for (let j = 0; j < str1.length; j++) {
  188. const cost = str1[j] === str2[i] ? 0 : 1;
  189. currentRow[j + 1] = Math.min(
  190. currentRow[j] + 1, // inserción
  191. previousRow[j + 1] + 1, // eliminación
  192. previousRow[j] + cost // sustitución
  193. );
  194. }
  195. previousRow = currentRow;
  196. }
  197. const distance = previousRow[str1.length];
  198. return distance;
  199. }
  200. /**
  201. * Ordena resultados por relevancia
  202. */
  203. sortByRelevance(results) {
  204. return results
  205. .sort((a, b) => {
  206. // Primero por score (descendente)
  207. if (a.score !== b.score) {
  208. return b.score - a.score;
  209. }
  210. // Luego por distancia (ascendente)
  211. if (a.distance !== b.distance) {
  212. return a.distance - b.distance;
  213. }
  214. // Finalmente por longitud del texto (ascendente)
  215. const aText = typeof a.item === 'string' ? a.item : JSON.stringify(a.item);
  216. const bText = typeof b.item === 'string' ? b.item : JSON.stringify(b.item);
  217. return aText.length - bText.length;
  218. })
  219. .map(result => result.item);
  220. }
  221. /**
  222. * Utilidades
  223. */
  224. normalizeTerm(term, caseSensitive = false) {
  225. if (typeof term !== 'string') return '';
  226. let normalized = term.trim();
  227. if (!caseSensitive) {
  228. normalized = normalized.toLowerCase();
  229. }
  230. // Remover caracteres especiales opcionales
  231. // normalized = normalized.replace(/[^\w\s]/g, '');
  232. return normalized;
  233. }
  234. extractText(item) {
  235. return `${item.name}&cat&${item.type}`;
  236. }
  237. /**
  238. * Limpia el cache
  239. */
  240. clearCache() {
  241. this.cache.clear();
  242. this.levenshteinCache.clear();
  243. }
  244. }
  245. // Función de conveniencia para uso rápido
  246. function smartSearch(items, searchTerm, options = {}) {
  247. const searcher = new SmartSearch();
  248. return searcher.search(items, searchTerm, options);
  249. }
  250. export { SmartSearch, smartSearch };