|
|
@@ -0,0 +1,295 @@
|
|
|
+class SmartSearch {
|
|
|
+ constructor(options = {}) {
|
|
|
+ this.config = {
|
|
|
+ // Configuración de tolerancia
|
|
|
+ minSearchLength: 2,
|
|
|
+ maxLevenshteinDistance: 3,
|
|
|
+ proportionalTolerance: 0.25,
|
|
|
+ prefixTolerance: 2,
|
|
|
+
|
|
|
+ // Configuración de scoring
|
|
|
+ exactMatchScore: 100,
|
|
|
+ startsWithScore: 90,
|
|
|
+ containsScore: 80,
|
|
|
+ subsequenceScore: 60,
|
|
|
+ fuzzyScore: 40,
|
|
|
+
|
|
|
+ // Optimizaciones
|
|
|
+ cacheEnabled: true,
|
|
|
+ maxResults: 50,
|
|
|
+ enableEarlyExit: true,
|
|
|
+
|
|
|
+ ...options
|
|
|
+ };
|
|
|
+
|
|
|
+ this.cache = new Map();
|
|
|
+ this.levenshteinCache = new Map();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Búsqueda principal - API pública
|
|
|
+ */
|
|
|
+ search(items, searchTerm, options = {}) {
|
|
|
+ const {
|
|
|
+ key = null,
|
|
|
+ sortByRelevance = true,
|
|
|
+ caseSensitive = false
|
|
|
+ } = options;
|
|
|
+
|
|
|
+ const normalizedTerm = this.normalizeTerm(searchTerm, caseSensitive);
|
|
|
+
|
|
|
+ if (normalizedTerm.length < this.config.minSearchLength) {
|
|
|
+ return items;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Verificar cache
|
|
|
+ const cacheKey = this.getCacheKey(normalizedTerm, key);
|
|
|
+ if (this.config.cacheEnabled && this.cache.has(cacheKey)) {
|
|
|
+ return this.cache.get(cacheKey);
|
|
|
+ }
|
|
|
+
|
|
|
+ const results = this.performSearch(items, normalizedTerm, key, caseSensitive);
|
|
|
+
|
|
|
+ // Ordenar por relevancia si está habilitado
|
|
|
+ const finalResults = sortByRelevance
|
|
|
+ ? this.sortByRelevance(results)
|
|
|
+ : results.map(r => r.item);
|
|
|
+
|
|
|
+ // Limitar resultados
|
|
|
+ const limitedResults = finalResults.slice(0, this.config.maxResults);
|
|
|
+
|
|
|
+ // Guardar en cache
|
|
|
+ if (this.config.cacheEnabled) {
|
|
|
+ this.cache.set(cacheKey, limitedResults);
|
|
|
+ }
|
|
|
+ return limitedResults;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Realiza la búsqueda y scoring
|
|
|
+ */
|
|
|
+ performSearch(items, searchTerm, key, caseSensitive) {
|
|
|
+ const results = [];
|
|
|
+ const termLength = searchTerm.length;
|
|
|
+
|
|
|
+ for (const item of items) {
|
|
|
+ const text = this.extractText(item, key);
|
|
|
+ const normalizedText = this.normalizeTerm(text, caseSensitive);
|
|
|
+
|
|
|
+ const match = this.calculateMatch(searchTerm, normalizedText, termLength);
|
|
|
+
|
|
|
+ if (match.score > 0) {
|
|
|
+ results.push({
|
|
|
+ item,
|
|
|
+ score: match.score,
|
|
|
+ matchType: match.type,
|
|
|
+ distance: match.distance
|
|
|
+ });
|
|
|
+
|
|
|
+ // Early exit si tenemos suficientes coincidencias exactas
|
|
|
+ if (this.config.enableEarlyExit &&
|
|
|
+ match.score === this.config.exactMatchScore &&
|
|
|
+ results.length >= 10) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ return results;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Calcula el match y score para un texto
|
|
|
+ */
|
|
|
+ calculateMatch(searchTerm, text, termLength) {
|
|
|
+ // 1. Coincidencia exacta
|
|
|
+ if (text === searchTerm) {
|
|
|
+ return { score: this.config.exactMatchScore, type: 'exact', distance: 0 };
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. Contiene el término completo
|
|
|
+ if (text.includes(searchTerm)) {
|
|
|
+ const position = text.indexOf(searchTerm);
|
|
|
+ // Bonus si empieza con el término
|
|
|
+ const score = position === 0
|
|
|
+ ? this.config.startsWithScore
|
|
|
+ : this.config.containsScore;
|
|
|
+ return { score, type: position === 0 ? 'startsWith' : 'contains', distance: 0 };
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3. Para términos cortos: verificar prefijo con tolerancia
|
|
|
+ if (termLength <= 4) {
|
|
|
+ const prefix = text.substring(0, termLength + 2);
|
|
|
+ const distance = this.getLevenshteinDistance(searchTerm, prefix);
|
|
|
+
|
|
|
+ if (distance <= this.config.prefixTolerance) {
|
|
|
+ const score = this.config.fuzzyScore + (2 - distance) * 10;
|
|
|
+ return { score, type: 'prefix_fuzzy', distance };
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 4. Búsqueda por subsequencia (caracteres en orden)
|
|
|
+ if (this.isSubsequence(searchTerm, text)) {
|
|
|
+ const coverage = termLength / text.length;
|
|
|
+ const score = this.config.subsequenceScore + (coverage * 20);
|
|
|
+ return { score, type: 'subsequence', distance: text.length - termLength };
|
|
|
+ }
|
|
|
+
|
|
|
+ // 5. Distancia de Levenshtein para términos más largos
|
|
|
+ if (termLength > 4) {
|
|
|
+ const maxAllowedDistance = Math.min(
|
|
|
+ this.config.maxLevenshteinDistance,
|
|
|
+ Math.floor(termLength * this.config.proportionalTolerance)
|
|
|
+ );
|
|
|
+
|
|
|
+ const distance = this.getLevenshteinDistance(searchTerm, text);
|
|
|
+
|
|
|
+ if (distance <= maxAllowedDistance) {
|
|
|
+ const score = this.config.fuzzyScore - (distance * 5);
|
|
|
+ return { score: Math.max(1, score), type: 'fuzzy', distance };
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return { score: 0, type: 'no_match', distance: Infinity };
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Verifica si searchTerm es subsequencia de text
|
|
|
+ */
|
|
|
+ isSubsequence(searchTerm, text) {
|
|
|
+ let searchIndex = 0;
|
|
|
+
|
|
|
+ for (const char of text) {
|
|
|
+ if (searchIndex < searchTerm.length && char === searchTerm[searchIndex]) {
|
|
|
+ searchIndex++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return searchIndex === searchTerm.length;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Distancia de Levenshtein optimizada con cache
|
|
|
+ */
|
|
|
+ getLevenshteinDistance(str1, str2) {
|
|
|
+ if (str1 === str2) return 0;
|
|
|
+ if (str1.length === 0) return str2.length;
|
|
|
+ if (str2.length === 0) return str1.length;
|
|
|
+
|
|
|
+ // Optimización: intercambiar para que str1 sea la más corta
|
|
|
+ if (str1.length > str2.length) {
|
|
|
+ [str1, str2] = [str2, str1];
|
|
|
+ }
|
|
|
+
|
|
|
+ const cacheKey = `${str1}|${str2}`;
|
|
|
+ if (this.levenshteinCache.has(cacheKey)) {
|
|
|
+ return this.levenshteinCache.get(cacheKey);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Algoritmo optimizado de Levenshtein
|
|
|
+ let previousRow = Array.from({ length: str1.length + 1 }, (_, i) => i);
|
|
|
+
|
|
|
+ for (let i = 0; i < str2.length; i++) {
|
|
|
+ const currentRow = [i + 1];
|
|
|
+
|
|
|
+ for (let j = 0; j < str1.length; j++) {
|
|
|
+ const cost = str1[j] === str2[i] ? 0 : 1;
|
|
|
+ currentRow[j + 1] = Math.min(
|
|
|
+ currentRow[j] + 1, // inserción
|
|
|
+ previousRow[j + 1] + 1, // eliminación
|
|
|
+ previousRow[j] + cost // sustitución
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ previousRow = currentRow;
|
|
|
+ }
|
|
|
+
|
|
|
+ const distance = previousRow[str1.length];
|
|
|
+ this.levenshteinCache.set(cacheKey, distance);
|
|
|
+ console.log(`Levenshtein distance between "${str1}" and "${str2}": ${distance}`);
|
|
|
+ return distance;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Ordena resultados por relevancia
|
|
|
+ */
|
|
|
+ sortByRelevance(results) {
|
|
|
+ return results
|
|
|
+ .sort((a, b) => {
|
|
|
+ // Primero por score (descendente)
|
|
|
+ if (a.score !== b.score) {
|
|
|
+ return b.score - a.score;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Luego por distancia (ascendente)
|
|
|
+ if (a.distance !== b.distance) {
|
|
|
+ return a.distance - b.distance;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Finalmente por longitud del texto (ascendente)
|
|
|
+ const aText = typeof a.item === 'string' ? a.item : JSON.stringify(a.item);
|
|
|
+ const bText = typeof b.item === 'string' ? b.item : JSON.stringify(b.item);
|
|
|
+ return aText.length - bText.length;
|
|
|
+ })
|
|
|
+ .map(result => result.item);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Utilidades
|
|
|
+ */
|
|
|
+ normalizeTerm(term, caseSensitive = false) {
|
|
|
+ if (typeof term !== 'string') return '';
|
|
|
+
|
|
|
+ let normalized = term.trim();
|
|
|
+ if (!caseSensitive) {
|
|
|
+ normalized = normalized.toLowerCase();
|
|
|
+ }
|
|
|
+
|
|
|
+ // Remover caracteres especiales opcionales
|
|
|
+ // normalized = normalized.replace(/[^\w\s]/g, '');
|
|
|
+
|
|
|
+ return normalized;
|
|
|
+ }
|
|
|
+
|
|
|
+ extractText(item, key) {
|
|
|
+ if (key && typeof item === 'object' && item !== null) {
|
|
|
+ return String(item[key] || '');
|
|
|
+ }
|
|
|
+ return String(item || '');
|
|
|
+ }
|
|
|
+
|
|
|
+ getCacheKey(term, key) {
|
|
|
+ return `${term}:${key || 'default'}`;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Limpia el cache
|
|
|
+ */
|
|
|
+ clearCache() {
|
|
|
+ this.cache.clear();
|
|
|
+ this.levenshteinCache.clear();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Obtiene estadísticas del cache
|
|
|
+ */
|
|
|
+ getCacheStats() {
|
|
|
+ return {
|
|
|
+ searchCacheSize: this.cache.size,
|
|
|
+ levenshteinCacheSize: this.levenshteinCache.size,
|
|
|
+ totalMemoryUsage: this.cache.size + this.levenshteinCache.size
|
|
|
+ };
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// Función de conveniencia para uso rápido
|
|
|
+function smartSearch(items, searchTerm, options = {}) {
|
|
|
+ const searcher = new SmartSearch();
|
|
|
+ return searcher.search(items, searchTerm, options);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+export { SmartSearch, smartSearch };
|