searching.js 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. class SmartSearch {
  2. constructor(options = {}) {
  3. this.config = {
  4. // Configuración de tolerancia
  5. minSearchLength: 2,
  6. maxLevenshteinDistance: 3,
  7. proportionalTolerance: 0.25,
  8. prefixTolerance: 2,
  9. // Configuración de scoring
  10. exactMatchScore: 100,
  11. startsWithScore: 90,
  12. containsScore: 80,
  13. subsequenceScore: 60,
  14. fuzzyScore: 40,
  15. // Optimizaciones
  16. cacheEnabled: true,
  17. maxResults: 50,
  18. enableEarlyExit: true,
  19. ...options
  20. };
  21. this.cache = new Map();
  22. this.levenshteinCache = new Map();
  23. }
  24. /**
  25. * Búsqueda principal - API pública
  26. */
  27. search(items, searchTerm, options = {}) {
  28. const {
  29. key = null,
  30. sortByRelevance = true,
  31. caseSensitive = false
  32. } = options;
  33. const normalizedTerm = this.normalizeTerm(searchTerm, caseSensitive);
  34. if (normalizedTerm.length < this.config.minSearchLength) {
  35. return items;
  36. }
  37. // Verificar cache
  38. const cacheKey = this.getCacheKey(normalizedTerm, key);
  39. if (this.config.cacheEnabled && this.cache.has(cacheKey)) {
  40. return this.cache.get(cacheKey);
  41. }
  42. const results = this.performSearch(items, normalizedTerm, key, caseSensitive);
  43. // Ordenar por relevancia si está habilitado
  44. const finalResults = sortByRelevance
  45. ? this.sortByRelevance(results)
  46. : results.map(r => r.item);
  47. // Limitar resultados
  48. const limitedResults = finalResults.slice(0, this.config.maxResults);
  49. // Guardar en cache
  50. if (this.config.cacheEnabled) {
  51. this.cache.set(cacheKey, limitedResults);
  52. }
  53. return limitedResults;
  54. }
  55. /**
  56. * Realiza la búsqueda y scoring
  57. */
  58. performSearch(items, searchTerm, key, caseSensitive) {
  59. const results = [];
  60. const termLength = searchTerm.length;
  61. for (const item of items) {
  62. const text = this.extractText(item, key);
  63. const normalizedText = this.normalizeTerm(text, caseSensitive);
  64. const match = this.calculateMatch(searchTerm, normalizedText, termLength);
  65. if (match.score > 0) {
  66. results.push({
  67. item,
  68. score: match.score,
  69. matchType: match.type,
  70. distance: match.distance
  71. });
  72. // Early exit si tenemos suficientes coincidencias exactas
  73. if (this.config.enableEarlyExit &&
  74. match.score === this.config.exactMatchScore &&
  75. results.length >= 10) {
  76. break;
  77. }
  78. }
  79. }
  80. return results;
  81. }
  82. /**
  83. * Calcula el match y score para un texto
  84. */
  85. calculateMatch(searchTerm, text, termLength) {
  86. // 1. Coincidencia exacta
  87. if (text === searchTerm) {
  88. return { score: this.config.exactMatchScore, type: 'exact', distance: 0 };
  89. }
  90. // 2. Contiene el término completo
  91. if (text.includes(searchTerm)) {
  92. const position = text.indexOf(searchTerm);
  93. // Bonus si empieza con el término
  94. const score = position === 0
  95. ? this.config.startsWithScore
  96. : this.config.containsScore;
  97. return { score, type: position === 0 ? 'startsWith' : 'contains', distance: 0 };
  98. }
  99. // 3. Para términos cortos: verificar prefijo con tolerancia
  100. if (termLength <= 4) {
  101. const prefix = text.substring(0, termLength + 2);
  102. const distance = this.getLevenshteinDistance(searchTerm, prefix);
  103. if (distance <= this.config.prefixTolerance) {
  104. const score = this.config.fuzzyScore + (2 - distance) * 10;
  105. return { score, type: 'prefix_fuzzy', distance };
  106. }
  107. }
  108. // 4. Búsqueda por subsequencia (caracteres en orden)
  109. if (this.isSubsequence(searchTerm, text)) {
  110. const coverage = termLength / text.length;
  111. const score = this.config.subsequenceScore + (coverage * 20);
  112. return { score, type: 'subsequence', distance: text.length - termLength };
  113. }
  114. // 5. Distancia de Levenshtein para términos más largos
  115. if (termLength > 4) {
  116. const maxAllowedDistance = Math.min(
  117. this.config.maxLevenshteinDistance,
  118. Math.floor(termLength * this.config.proportionalTolerance)
  119. );
  120. const distance = this.getLevenshteinDistance(searchTerm, text);
  121. if (distance <= maxAllowedDistance) {
  122. const score = this.config.fuzzyScore - (distance * 5);
  123. return { score: Math.max(1, score), type: 'fuzzy', distance };
  124. }
  125. }
  126. return { score: 0, type: 'no_match', distance: Infinity };
  127. }
  128. /**
  129. * Verifica si searchTerm es subsequencia de text
  130. */
  131. isSubsequence(searchTerm, text) {
  132. let searchIndex = 0;
  133. for (const char of text) {
  134. if (searchIndex < searchTerm.length && char === searchTerm[searchIndex]) {
  135. searchIndex++;
  136. }
  137. }
  138. return searchIndex === searchTerm.length;
  139. }
  140. /**
  141. * Distancia de Levenshtein optimizada con cache
  142. */
  143. getLevenshteinDistance(str1, str2) {
  144. if (str1 === str2) return 0;
  145. if (str1.length === 0) return str2.length;
  146. if (str2.length === 0) return str1.length;
  147. // Optimización: intercambiar para que str1 sea la más corta
  148. if (str1.length > str2.length) {
  149. [str1, str2] = [str2, str1];
  150. }
  151. const cacheKey = `${str1}|${str2}`;
  152. if (this.levenshteinCache.has(cacheKey)) {
  153. return this.levenshteinCache.get(cacheKey);
  154. }
  155. // Algoritmo optimizado de Levenshtein
  156. let previousRow = Array.from({ length: str1.length + 1 }, (_, i) => i);
  157. for (let i = 0; i < str2.length; i++) {
  158. const currentRow = [i + 1];
  159. for (let j = 0; j < str1.length; j++) {
  160. const cost = str1[j] === str2[i] ? 0 : 1;
  161. currentRow[j + 1] = Math.min(
  162. currentRow[j] + 1, // inserción
  163. previousRow[j + 1] + 1, // eliminación
  164. previousRow[j] + cost // sustitución
  165. );
  166. }
  167. previousRow = currentRow;
  168. }
  169. const distance = previousRow[str1.length];
  170. this.levenshteinCache.set(cacheKey, distance);
  171. console.log(`Levenshtein distance between "${str1}" and "${str2}": ${distance}`);
  172. return distance;
  173. }
  174. /**
  175. * Ordena resultados por relevancia
  176. */
  177. sortByRelevance(results) {
  178. return results
  179. .sort((a, b) => {
  180. // Primero por score (descendente)
  181. if (a.score !== b.score) {
  182. return b.score - a.score;
  183. }
  184. // Luego por distancia (ascendente)
  185. if (a.distance !== b.distance) {
  186. return a.distance - b.distance;
  187. }
  188. // Finalmente por longitud del texto (ascendente)
  189. const aText = typeof a.item === 'string' ? a.item : JSON.stringify(a.item);
  190. const bText = typeof b.item === 'string' ? b.item : JSON.stringify(b.item);
  191. return aText.length - bText.length;
  192. })
  193. .map(result => result.item);
  194. }
  195. /**
  196. * Utilidades
  197. */
  198. normalizeTerm(term, caseSensitive = false) {
  199. if (typeof term !== 'string') return '';
  200. let normalized = term.trim();
  201. if (!caseSensitive) {
  202. normalized = normalized.toLowerCase();
  203. }
  204. // Remover caracteres especiales opcionales
  205. // normalized = normalized.replace(/[^\w\s]/g, '');
  206. return normalized;
  207. }
  208. extractText(item, key) {
  209. if (key && typeof item === 'object' && item !== null) {
  210. return String(item[key] || '');
  211. }
  212. return String(item || '');
  213. }
  214. getCacheKey(term, key) {
  215. return `${term}:${key || 'default'}`;
  216. }
  217. /**
  218. * Limpia el cache
  219. */
  220. clearCache() {
  221. this.cache.clear();
  222. this.levenshteinCache.clear();
  223. }
  224. /**
  225. * Obtiene estadísticas del cache
  226. */
  227. getCacheStats() {
  228. return {
  229. searchCacheSize: this.cache.size,
  230. levenshteinCacheSize: this.levenshteinCache.size,
  231. totalMemoryUsage: this.cache.size + this.levenshteinCache.size
  232. };
  233. }
  234. }
  235. // Función de conveniencia para uso rápido
  236. function smartSearch(items, searchTerm, options = {}) {
  237. const searcher = new SmartSearch();
  238. return searcher.search(items, searchTerm, options);
  239. }
  240. export { SmartSearch, smartSearch };