| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315 |
- class SmartSearch {
- constructor(options = {}) {
- this.config = {
- // Configuración de tolerancia
- minSearchLength: 2,
- maxLevenshteinDistance: 3,
- proportionalTolerance: 0.25,
- prefixTolerance: 2,
- wordTolerance: 1,
- // Configuración de scoring
- exactNameMatchScore: 100,
- exactCategoryMatchScore: 90,
- startsWithNameScore: 92,
- startsWithCategoryScore: 88,
- containsNameScore: 80,
- containsCategoryScore: 78,
-
- fuzzyNameScore: 40,
- fuzzyCategoryScore: 38,
- fuzzyWordScore: 35,
- subsequenceScore: 30,
-
- // Optimizaciones
- cacheEnabled: true,
- maxResults: 50,
- enableEarlyExit: true,
-
- ...options
- };
-
- this.cache = new Map();
- this.levenshteinCache = new Map();
- }
- /**
- * Búsqueda principal - API pública
- */
- search(items, searchTerm, options = {}) {
- const {
- sortByRelevance = true,
- caseSensitive = false
- } = options;
-
- const normalizedTerm = this.normalizeTerm(searchTerm, caseSensitive);
-
- if (normalizedTerm.length < this.config.minSearchLength) {
- return items;
- }
- const results = this.performSearch(items, normalizedTerm, caseSensitive);
- // Ordenar por relevancia si está habilitado
- const finalResults = sortByRelevance
- ? this.sortByRelevance(results)
- : results.map(r => r.item);
- // Limitar resultados
- const limitedResults = finalResults.slice(0, this.config.maxResults);
-
- return limitedResults;
- }
- /**
- * Realiza la búsqueda y scoring
- */
- performSearch(items, searchTerm, caseSensitive) {
- const results = [];
- const termLength = searchTerm.length;
- for (const item of items) {
- const text = this.extractText(item);
- const normalizedText = this.normalizeTerm(text, caseSensitive);
-
- const match = this.calculateMatch(searchTerm, normalizedText, termLength);
-
- if (match.score > 0) {
- results.push({
- item,
- score: match.score,
- matchType: match.type,
- distance: match.distance
- });
- // Early exit si tenemos suficientes coincidencias exactas
- if (this.config.enableEarlyExit &&
- match.score === this.config.exactNameMatchScore &&
- results.length >= 10) {
- break;
- }
- }
- }
- return results;
- }
- /**
- * Calcula el match y score para un texto
- */
- calculateMatch(searchTerm, text, termLength) {
- // 1. Coincidencia exacta
- const [name, category] = text.split("&cat&");
- if (name === searchTerm) {
- return { score: this.config.exactNameMatchScore, type: 'exact', distance: 0 };
- }
- if (category === searchTerm) {
- return { score: this.config.exactCategoryMatchScore, type: 'exact', distance: 0 };
- }
- // 2. Contiene el término completo
- if (name.includes(searchTerm)) {
- const position = text.indexOf(searchTerm);
- // Bonus si empieza con el término
- const score = position === 0
- ? this.config.startsWithNameScore
- : this.config.containsNameScore;
- return { score, type: position === 0 ? 'startsWith' : 'contains', distance: 0 };
- }
- if (category.includes(searchTerm)) {
- const position = text.indexOf(searchTerm);
- // Bonus si empieza con el término
- const score = position === 0
- ? this.config.startsWithCategoryScore
- : this.config.containsCategoryScore;
- return { score, type: position === 0 ? 'startsWith' : 'contains', distance: 0 };
- }
- // 3. Para términos cortos:
- if (termLength <= 4) {
- //verificar prefijo con tolerancia
- const prefix = text.substring(0, termLength + 2);
- let distance = this.getLevenshteinDistance(searchTerm, prefix);
-
- if (distance <= this.config.prefixTolerance) {
- const score = this.config.fuzzyScore + (2 - distance) * 10;
- return { score, type: 'prefix_fuzzy', distance };
- }
- // Verificar si el término es una palabra dentro del texto
- const words = name.split(" ");
- for (const word of words) {
- const distance = this.getLevenshteinDistance(searchTerm, word);
- if (distance <= this.config.wordTolerance) {
- const score = this.config.fuzzyScore + (2 - distance) * 10;
- return { score, type: 'word_fuzzy', distance };
- }
- }
- }
- // 4. Distancia de Levenshtein para términos más largos
- if (termLength > 4) {
-
- const maxAllowedDistance = Math.min(
- this.config.maxLevenshteinDistance,
- Math.floor(termLength * this.config.proportionalTolerance)
- );
- let distance = this.getLevenshteinDistance(searchTerm, name);
- if (distance <= maxAllowedDistance) {
- const score = this.config.fuzzyNameScore - (distance * 5);
- return { score: Math.max(1, score), type: 'fuzzy', distance };
- }
- const words = name.split(" ");
- const distances = []
- for (const word of words) {
- const distance = this.getLevenshteinDistance(searchTerm, word);
- distances.push(distance);
- }
-
-
- const minDistance = Math.min(...distances);
-
- if (minDistance <= this.config.wordTolerance) {
- const score = this.config.fuzzyWordScore + (2 - minDistance) * 10;
- if (words.includes("summer")){
- }
- return { score, type: 'word_fuzzy', distance: minDistance };
- }}
- // 5. Búsqueda por subsequencia (caracteres en orden)
- if (this.isSubsequence(searchTerm, name)) {
- const coverage = termLength / name.length;
- const score = this.config.subsequenceScore + (coverage * 20);
- if (score > this.config.subsequenceScore + 20) {
- return { score, type: 'subsequence', distance: name.length - termLength };
- }
- }
-
- return { score: 0, type: 'no_match', distance: Infinity };
- }
- /**
- * Verifica si searchTerm es subsequencia de text
- */
- isSubsequence(searchTerm, text) {
- let searchIndex = 0;
-
- for (const char of text) {
- if (searchIndex < searchTerm.length && char === searchTerm[searchIndex]) {
- searchIndex++;
- }
- }
-
- return searchIndex === searchTerm.length;
- }
- /**
- * Distancia de Levenshtein optimizada con cache
- */
- getLevenshteinDistance(str1, str2) {
- if (str1 === str2) return 0;
- if (str1.length === 0) return str2.length;
- if (str2.length === 0) return str1.length;
- // Optimización: intercambiar para que str1 sea la más corta
- if (str1.length > str2.length) {
- [str1, str2] = [str2, str1];
- }
- // Algoritmo optimizado de Levenshtein
- let previousRow = Array.from({ length: str1.length + 1 }, (_, i) => i);
-
- for (let i = 0; i < str2.length; i++) {
- const currentRow = [i + 1];
-
- for (let j = 0; j < str1.length; j++) {
- const cost = str1[j] === str2[i] ? 0 : 1;
- currentRow[j + 1] = Math.min(
- currentRow[j] + 1, // inserción
- previousRow[j + 1] + 1, // eliminación
- previousRow[j] + cost // sustitución
- );
- }
-
- previousRow = currentRow;
- }
- const distance = previousRow[str1.length];
- return distance;
- }
- /**
- * Ordena resultados por relevancia
- */
- sortByRelevance(results) {
- return results
- .sort((a, b) => {
- // Primero por score (descendente)
- if (a.score !== b.score) {
- return b.score - a.score;
- }
-
- // Luego por distancia (ascendente)
- if (a.distance !== b.distance) {
- return a.distance - b.distance;
- }
-
- // Finalmente por longitud del texto (ascendente)
- const aText = typeof a.item === 'string' ? a.item : JSON.stringify(a.item);
- const bText = typeof b.item === 'string' ? b.item : JSON.stringify(b.item);
- return aText.length - bText.length;
- })
- .map(result => result.item);
- }
- /**
- * Utilidades
- */
- normalizeTerm(term, caseSensitive = false) {
- if (typeof term !== 'string') return '';
-
- let normalized = term.trim();
- if (!caseSensitive) {
- normalized = normalized.toLowerCase();
- }
-
- // Remover caracteres especiales opcionales
- // normalized = normalized.replace(/[^\w\s]/g, '');
-
- return normalized;
- }
- extractText(item) {
- return `${item.name}&cat&${item.type}`;
- }
- /**
- * Limpia el cache
- */
- clearCache() {
- this.cache.clear();
- this.levenshteinCache.clear();
- }
- }
- // Función de conveniencia para uso rápido
- function smartSearch(items, searchTerm, options = {}) {
- const searcher = new SmartSearch();
- return searcher.search(items, searchTerm, options);
- }
- export { SmartSearch, smartSearch };
|