searching.js 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. class SmartSearch {
  2. constructor(options = {}) {
  3. this.config = {
  4. // Configuración de tolerancia
  5. minSearchLength: 2,
  6. maxLevenshteinDistance: 3,
  7. proportionalTolerance: 0.25,
  8. prefixTolerance: 2,
  9. wordTolerance: 1,
  10. // Configuración de scoring
  11. exactNameMatchScore: 100,
  12. exactCategoryMatchScore: 90,
  13. startsWithNameScore: 92,
  14. startsWithCategoryScore: 88,
  15. containsNameScore: 80,
  16. containsCategoryScore: 78,
  17. fuzzyNameScore: 40,
  18. fuzzyCategoryScore: 38,
  19. fuzzyWordScore: 35,
  20. subsequenceScore: 30,
  21. // Optimizaciones
  22. cacheEnabled: true,
  23. maxResults: 50,
  24. enableEarlyExit: true,
  25. ...options
  26. };
  27. this.cache = new Map();
  28. this.levenshteinCache = new Map();
  29. }
  30. /**
  31. * Búsqueda principal - API pública
  32. */
  33. search(items, searchTerm, options = {}) {
  34. const {
  35. sortByRelevance = true,
  36. caseSensitive = false
  37. } = options;
  38. const normalizedTerm = this.normalizeTerm(searchTerm, caseSensitive);
  39. if (normalizedTerm.length < this.config.minSearchLength) {
  40. return items;
  41. }
  42. const results = this.performSearch(items, normalizedTerm, caseSensitive);
  43. console.log(results)
  44. // Ordenar por relevancia si está habilitado
  45. const finalResults = sortByRelevance
  46. ? this.sortByRelevance(results)
  47. : results.map(r => r.item);
  48. // Limitar resultados
  49. const limitedResults = finalResults.slice(0, this.config.maxResults);
  50. return limitedResults;
  51. }
  52. /**
  53. * Realiza la búsqueda y scoring
  54. */
  55. performSearch(items, searchTerm, caseSensitive) {
  56. const results = [];
  57. const termLength = searchTerm.length;
  58. for (const item of items) {
  59. const text = this.extractText(item);
  60. const normalizedText = this.normalizeTerm(text, caseSensitive);
  61. const match = this.calculateMatch(searchTerm, normalizedText, termLength);
  62. if (match.score > 0) {
  63. results.push({
  64. item,
  65. score: match.score,
  66. matchType: match.type,
  67. distance: match.distance
  68. });
  69. // Early exit si tenemos suficientes coincidencias exactas
  70. if (this.config.enableEarlyExit &&
  71. match.score === this.config.exactNameMatchScore &&
  72. results.length >= 10) {
  73. break;
  74. }
  75. }
  76. }
  77. return results;
  78. }
  79. /**
  80. * Calcula el match y score para un texto
  81. */
  82. calculateMatch(searchTerm, text, termLength) {
  83. // 1. Coincidencia exacta
  84. const [name, category] = text.split("&cat&");
  85. if (name === searchTerm) {
  86. return { score: this.config.exactNameMatchScore, type: 'exact', distance: 0 };
  87. }
  88. if (category === searchTerm) {
  89. return { score: this.config.exactCategoryMatchScore, type: 'exact', distance: 0 };
  90. }
  91. // 2. Contiene el término completo
  92. if (name.includes(searchTerm)) {
  93. const position = text.indexOf(searchTerm);
  94. // Bonus si empieza con el término
  95. const score = position === 0
  96. ? this.config.startsWithNameScore
  97. : this.config.containsNameScore;
  98. return { score, type: position === 0 ? 'startsWith' : 'contains', distance: 0 };
  99. }
  100. if (category.includes(searchTerm)) {
  101. const position = text.indexOf(searchTerm);
  102. // Bonus si empieza con el término
  103. const score = position === 0
  104. ? this.config.startsWithCategoryScore
  105. : this.config.containsCategoryScore;
  106. return { score, type: position === 0 ? 'startsWith' : 'contains', distance: 0 };
  107. }
  108. // 3. Para términos cortos:
  109. if (termLength <= 4) {
  110. //verificar prefijo con tolerancia
  111. const prefix = text.substring(0, termLength + 2);
  112. let distance = this.getLevenshteinDistance(searchTerm, prefix);
  113. if (distance <= this.config.prefixTolerance) {
  114. const score = this.config.fuzzyScore + (2 - distance) * 10;
  115. return { score, type: 'prefix_fuzzy', distance };
  116. }
  117. // Verificar si el término es una palabra dentro del texto
  118. const words = name.split(" ");
  119. for (const word of words) {
  120. const distance = this.getLevenshteinDistance(searchTerm, word);
  121. if (distance <= this.config.wordTolerance) {
  122. const score = this.config.fuzzyScore + (2 - distance) * 10;
  123. return { score, type: 'word_fuzzy', distance };
  124. }
  125. }
  126. }
  127. // 4. Distancia de Levenshtein para términos más largos
  128. if (termLength > 4) {
  129. const maxAllowedDistance = Math.min(
  130. this.config.maxLevenshteinDistance,
  131. Math.floor(termLength * this.config.proportionalTolerance)
  132. );
  133. let distance = this.getLevenshteinDistance(searchTerm, name);
  134. if (distance <= maxAllowedDistance) {
  135. const score = this.config.fuzzyNameScore - (distance * 5);
  136. return { score: Math.max(1, score), type: 'fuzzy', distance };
  137. }
  138. const words = name.split(" ");
  139. const distances = []
  140. for (const word of words) {
  141. const distance = this.getLevenshteinDistance(searchTerm, word);
  142. distances.push(distance);
  143. }
  144. const minDistance = Math.min(...distances);
  145. if (minDistance <= this.config.wordTolerance) {
  146. const score = this.config.fuzzyWordScore + (2 - minDistance) * 10;
  147. if (words.includes("summer")){
  148. console.log(words, distances);
  149. console.log("min distance", minDistance);
  150. console.log("score", score);
  151. }
  152. return { score, type: 'word_fuzzy', distance: minDistance };
  153. }}
  154. // 5. Búsqueda por subsequencia (caracteres en orden)
  155. if (this.isSubsequence(searchTerm, name)) {
  156. const coverage = termLength / name.length;
  157. const score = this.config.subsequenceScore + (coverage * 20);
  158. if (score > this.config.subsequenceScore + 20) {
  159. return { score, type: 'subsequence', distance: name.length - termLength };
  160. }
  161. }
  162. return { score: 0, type: 'no_match', distance: Infinity };
  163. }
  164. /**
  165. * Verifica si searchTerm es subsequencia de text
  166. */
  167. isSubsequence(searchTerm, text) {
  168. let searchIndex = 0;
  169. for (const char of text) {
  170. if (searchIndex < searchTerm.length && char === searchTerm[searchIndex]) {
  171. searchIndex++;
  172. }
  173. }
  174. return searchIndex === searchTerm.length;
  175. }
  176. /**
  177. * Distancia de Levenshtein optimizada con cache
  178. */
  179. getLevenshteinDistance(str1, str2) {
  180. if (str1 === str2) return 0;
  181. if (str1.length === 0) return str2.length;
  182. if (str2.length === 0) return str1.length;
  183. // Optimización: intercambiar para que str1 sea la más corta
  184. if (str1.length > str2.length) {
  185. [str1, str2] = [str2, str1];
  186. }
  187. // Algoritmo optimizado de Levenshtein
  188. let previousRow = Array.from({ length: str1.length + 1 }, (_, i) => i);
  189. for (let i = 0; i < str2.length; i++) {
  190. const currentRow = [i + 1];
  191. for (let j = 0; j < str1.length; j++) {
  192. const cost = str1[j] === str2[i] ? 0 : 1;
  193. currentRow[j + 1] = Math.min(
  194. currentRow[j] + 1, // inserción
  195. previousRow[j + 1] + 1, // eliminación
  196. previousRow[j] + cost // sustitución
  197. );
  198. }
  199. previousRow = currentRow;
  200. }
  201. const distance = previousRow[str1.length];
  202. return distance;
  203. }
  204. /**
  205. * Ordena resultados por relevancia
  206. */
  207. sortByRelevance(results) {
  208. return results
  209. .sort((a, b) => {
  210. // Primero por score (descendente)
  211. if (a.score !== b.score) {
  212. return b.score - a.score;
  213. }
  214. // Luego por distancia (ascendente)
  215. if (a.distance !== b.distance) {
  216. return a.distance - b.distance;
  217. }
  218. // Finalmente por longitud del texto (ascendente)
  219. const aText = typeof a.item === 'string' ? a.item : JSON.stringify(a.item);
  220. const bText = typeof b.item === 'string' ? b.item : JSON.stringify(b.item);
  221. return aText.length - bText.length;
  222. })
  223. .map(result => result.item);
  224. }
  225. /**
  226. * Utilidades
  227. */
  228. normalizeTerm(term, caseSensitive = false) {
  229. if (typeof term !== 'string') return '';
  230. let normalized = term.trim();
  231. if (!caseSensitive) {
  232. normalized = normalized.toLowerCase();
  233. }
  234. // Remover caracteres especiales opcionales
  235. // normalized = normalized.replace(/[^\w\s]/g, '');
  236. return normalized;
  237. }
  238. extractText(item) {
  239. if (!item.name){
  240. console.log("Item sin nombre:", item);
  241. }
  242. return `${item.name}&cat&${item.type}`;
  243. }
  244. /**
  245. * Limpia el cache
  246. */
  247. clearCache() {
  248. this.cache.clear();
  249. this.levenshteinCache.clear();
  250. }
  251. }
  252. // Función de conveniencia para uso rápido
  253. function smartSearch(items, searchTerm, options = {}) {
  254. const searcher = new SmartSearch();
  255. return searcher.search(items, searchTerm, options);
  256. }
  257. export { SmartSearch, smartSearch };