1use std::{collections::HashMap, path::PathBuf};
67
68use regex::Regex;
69use tokio::sync::RwLock;
70
71use crate::{AirError, Result};
72use crate::Indexing::State::CreateState::{FileIndex, FileMetadata, SymbolInfo, SymbolKind};
74
75pub const MAX_SEARCH_RESULTS_DEFAULT:u32 = 100;
77
78#[derive(Debug, Clone)]
80pub struct SearchQuery {
81 pub query:String,
83 pub mode:SearchMode,
85 pub case_sensitive:bool,
87 pub whole_word:bool,
89 pub regex:Option<Regex>,
91 pub max_results:u32,
93 pub page:u32,
95}
96
97#[derive(Debug, Clone, PartialEq)]
99pub enum SearchMode {
100 Literal,
102 Regex,
104 Fuzzy,
106 Exact,
108}
109
110#[derive(Debug, Clone)]
112pub struct SearchResult {
113 pub path:String,
115 pub file_name:String,
117 pub matches:Vec<SearchMatch>,
119 pub relevance:f64,
121 pub language:Option<String>,
123}
124
125#[derive(Debug, Clone)]
127pub struct SearchMatch {
128 pub line_number:u32,
130 pub line_content:String,
132 pub match_start:usize,
134 pub match_end:usize,
136 pub context_before:Vec<String>,
138 pub context_after:Vec<String>,
140}
141
142#[derive(Debug, Clone)]
144pub struct PaginatedSearchResults {
145 pub results:Vec<SearchResult>,
147 pub total_count:u32,
149 pub page:u32,
151 pub total_pages:u32,
153 pub page_size:u32,
155}
156
157impl IntoIterator for PaginatedSearchResults {
158 type Item = SearchResult;
159 type IntoIter = std::vec::IntoIter<SearchResult>;
160
161 fn into_iter(self) -> Self::IntoIter { self.results.into_iter() }
162}
163
164impl<'a> IntoIterator for &'a PaginatedSearchResults {
165 type Item = &'a SearchResult;
166 type IntoIter = std::slice::Iter<'a, SearchResult>;
167
168 fn into_iter(self) -> Self::IntoIter { self.results.iter() }
169}
170
171pub async fn QueryIndexSearch(
183 index:&FileIndex,
184 query:SearchQuery,
185 path_filter:Option<String>,
186 language_filter:Option<String>,
187) -> Result<PaginatedSearchResults> {
188 log::info!("[QueryIndex] Searching for: '{}' (mode: {:?})", query.query, query.mode);
189
190 let sanitized_query = SanitizeSearchQuery(&query.query)?;
192
193 let case_sensitive = query.case_sensitive;
195 let whole_word = query.whole_word;
196 let max_results = if query.max_results == 0 {
197 MAX_SEARCH_RESULTS_DEFAULT
198 } else {
199 query.max_results.min(1000) };
201
202 let mut all_results = Vec::new();
203
204 match query.mode {
206 SearchMode::Literal => {
207 QueryIndexLiteral(
208 &sanitized_query,
209 case_sensitive,
210 whole_word,
211 path_filter.as_deref(),
212 language_filter.as_deref(),
213 index,
214 &mut all_results,
215 )
216 .await;
217 },
218 SearchMode::Regex => {
219 if let Some(regex) = &query.regex {
220 QueryIndexRegex(
221 regex,
222 path_filter.as_deref(),
223 language_filter.as_deref(),
224 index,
225 &mut all_results,
226 )
227 .await;
228 } else {
229 if let Ok(regex) = Regex::new(&sanitized_query) {
231 QueryIndexRegex(
232 ®ex,
233 path_filter.as_deref(),
234 language_filter.as_deref(),
235 index,
236 &mut all_results,
237 )
238 .await;
239 }
240 }
241 },
242 SearchMode::Fuzzy => {
243 QueryIndexFuzzy(
244 &sanitized_query,
245 case_sensitive,
246 path_filter.as_deref(),
247 language_filter.as_deref(),
248 index,
249 &mut all_results,
250 )
251 .await;
252 },
253 SearchMode::Exact => {
254 QueryIndexExact(
255 &sanitized_query,
256 case_sensitive,
257 path_filter.as_deref(),
258 language_filter.as_deref(),
259 index,
260 &mut all_results,
261 )
262 .await;
263 },
264 }
265
266 all_results.sort_by(|a, b| b.relevance.partial_cmp(&a.relevance).unwrap());
268
269 let total_count = all_results.len() as u32;
271 let total_pages = if max_results == 0 { 0 } else { total_count.div_ceil(max_results) };
272 let page = query.page.min(total_pages.saturating_sub(1));
273
274 let start = (page * max_results) as usize;
276 let end = ((page + 1) * max_results).min(total_count) as usize;
277 let page_results = all_results[start..end].to_vec();
278
279 log::info!(
280 "[QueryIndex] Search completed: {} total results, page {} of {}",
281 total_count,
282 page + 1,
283 total_pages
284 );
285
286 Ok(PaginatedSearchResults { results:page_results, total_count, page, total_pages, page_size:max_results })
287}
288
289pub fn SanitizeSearchQuery(query:&str) -> Result<String> {
291 let sanitized:String = query.chars().filter(|c| *c != '\0' && !c.is_control()).collect();
293
294 if sanitized.len() > 1000 {
296 return Err(AirError::Validation(
297 "Search query exceeds maximum length of 1000 characters".to_string(),
298 ));
299 }
300
301 Ok(sanitized)
302}
303
304async fn QueryIndexLiteral(
306 query:&str,
307 case_sensitive:bool,
308 whole_word:bool,
309 path_filter:Option<&str>,
310 language_filter:Option<&str>,
311 index:&FileIndex,
312 results:&mut Vec<SearchResult>,
313) {
314 let search_query = if case_sensitive { query.to_string() } else { query.to_lowercase() };
315
316 if let Some(file_paths) = index.content_index.get(&search_query.to_lowercase()) {
318 for file_path in file_paths {
319 if let Some(metadata) = index.files.get(file_path) {
320 if MatchesFilters(file_path, metadata, path_filter, language_filter) {
321 if let Ok(search_result) =
322 FindMatchesInFile(file_path, &search_query, case_sensitive, whole_word, index).await
323 {
324 if !search_result.matches.is_empty() {
325 results.push(search_result);
326 }
327 }
328 }
329 }
330 }
331 }
332
333 for (file_path, metadata) in &index.files {
335 if results.len() >= 1000 {
336 break;
337 }
338
339 let file_name = file_path.file_name().unwrap_or_default().to_string_lossy().to_string();
340
341 let name_to_search = if case_sensitive { file_name.clone() } else { file_name.to_lowercase() };
342
343 if name_to_search.contains(&search_query) {
344 if MatchesFilters(file_path, metadata, path_filter, language_filter) {
345 results.push(SearchResult {
347 path:file_path.to_string_lossy().to_string(),
348 file_name,
349 matches:Vec::new(),
350 relevance:0.3,
351 language:metadata.language.clone(),
352 });
353 }
354 }
355 }
356}
357
358async fn QueryIndexRegex(
360 regex:&Regex,
361 path_filter:Option<&str>,
362 language_filter:Option<&str>,
363 index:&FileIndex,
364 results:&mut Vec<SearchResult>,
365) {
366 for (file_path, metadata) in &index.files {
367 if results.len() >= 1000 {
368 break;
369 }
370
371 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
372 continue;
373 }
374
375 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
376 let matches = FindRegexMatches(&content, regex);
377
378 if !matches.is_empty() {
379 let relevance = CalculateRelevance(&matches, metadata);
380
381 results.push(SearchResult {
382 path:file_path.to_string_lossy().to_string(),
383 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
384 matches,
385 relevance,
386 language:metadata.language.clone(),
387 });
388 }
389 }
390 }
391}
392
393async fn QueryIndexFuzzy(
395 query:&str,
396 case_sensitive:bool,
397 path_filter:Option<&str>,
398 language_filter:Option<&str>,
399 index:&FileIndex,
400 results:&mut Vec<SearchResult>,
401) {
402 let query_lower = query.to_lowercase();
403
404 for (file_path, metadata) in &index.files {
405 if results.len() >= 1000 {
406 break;
407 }
408
409 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
410 continue;
411 }
412
413 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
414 let max_distance = 2; let matches = FindFuzzyMatches(&content, &query_lower, case_sensitive, max_distance);
416
417 if !matches.is_empty() {
418 let relevance = CalculateRelevance(&matches, metadata) * 0.8; results.push(SearchResult {
421 path:file_path.to_string_lossy().to_string(),
422 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
423 matches,
424 relevance,
425 language:metadata.language.clone(),
426 });
427 }
428 }
429 }
430}
431
432async fn QueryIndexExact(
434 query:&str,
435 _case_sensitive:bool,
436 path_filter:Option<&str>,
437 language_filter:Option<&str>,
438 index:&FileIndex,
439 results:&mut Vec<SearchResult>,
440) {
441 for (file_path, metadata) in &index.files {
442 if results.len() >= 1000 {
443 break;
444 }
445
446 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
447 continue;
448 }
449
450 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
451 let matches = FindExactMatches(&content, query);
452
453 if !matches.is_empty() {
454 let relevance = CalculateRelevance(&matches, metadata) * 1.1; results.push(SearchResult {
457 path:file_path.to_string_lossy().to_string(),
458 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
459 matches,
460 relevance,
461 language:metadata.language.clone(),
462 });
463 }
464 }
465 }
466}
467
468async fn FindMatchesInFile(
470 file_path:&PathBuf,
471 query:&str,
472 case_sensitive:bool,
473 whole_word:bool,
474 index:&FileIndex,
475) -> Result<SearchResult> {
476 let content = tokio::fs::read_to_string(file_path)
477 .await
478 .map_err(|e| AirError::FileSystem(format!("Failed to read file: {}", e)))?;
479
480 let metadata = index
481 .files
482 .get(file_path)
483 .ok_or_else(|| AirError::Internal("File metadata not found in index".to_string()))?;
484
485 let matches = FindMatchesWithContext(&content, query, case_sensitive, whole_word);
486 let relevance = CalculateRelevance(&matches, metadata);
487
488 Ok(SearchResult {
489 path:file_path.to_string_lossy().to_string(),
490 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
491 matches,
492 relevance,
493 language:metadata.language.clone(),
494 })
495}
496
497fn FindMatchesWithContext(content:&str, query:&str, case_sensitive:bool, whole_word:bool) -> Vec<SearchMatch> {
499 let mut matches = Vec::new();
500 let lines:Vec<&str> = content.lines().collect();
501
502 let search_in = |line:&str| -> Option<(usize, usize)> {
503 let line_to_search = if case_sensitive { line.to_string() } else { line.to_lowercase() };
504
505 let query_to_find = if case_sensitive { query.to_string() } else { query.to_lowercase() };
506
507 let start = if whole_word {
508 FindWholeWordMatch(&line_to_search, &query_to_find)
509 } else {
510 line_to_search.find(&query_to_find)
511 };
512
513 start.map(|s| (s, s + query.len()))
514 };
515
516 for (line_idx, line) in lines.iter().enumerate() {
517 let line_number = line_idx as u32 + 1;
518
519 if let Some((match_start, match_end)) = search_in(line) {
520 let context_start = line_idx.saturating_sub(2);
522 let context_end = (line_idx + 3).min(lines.len());
523
524 let context_before = lines[context_start..line_idx].iter().map(|s| s.to_string()).collect();
525
526 let context_after = lines[line_idx + 1..context_end].iter().map(|s| s.to_string()).collect();
527
528 matches.push(SearchMatch {
529 line_number,
530 line_content:line.to_string(),
531 match_start,
532 match_end,
533 context_before,
534 context_after,
535 });
536 }
537 }
538
539 matches
540}
541
542fn FindWholeWordMatch(line:&str, word:&str) -> Option<usize> {
544 let mut start = 0;
545
546 while let Some(pos) = line[start..].find(word) {
547 let actual_pos = start + pos;
548
549 let valid_before = actual_pos == 0
551 || line
552 .chars()
553 .nth(actual_pos - 1)
554 .map_or(true, |c| !c.is_alphanumeric() && c != '_');
555
556 let match_end = actual_pos + word.len();
558 let valid_after =
559 match_end == line.len() || line.chars().nth(match_end).map_or(true, |c| !c.is_alphanumeric() && c != '_');
560
561 if valid_before && valid_after {
562 return Some(actual_pos);
563 }
564
565 start = actual_pos + 1;
566 }
567
568 None
569}
570
571fn FindRegexMatches(content:&str, regex:&Regex) -> Vec<SearchMatch> {
573 let mut matches = Vec::new();
574 let lines:Vec<&str> = content.lines().collect();
575
576 for (line_idx, line) in lines.iter().enumerate() {
577 let line_number = line_idx as u32 + 1;
578
579 for mat in regex.find_iter(line) {
580 matches.push(SearchMatch {
581 line_number,
582 line_content:line.to_string(),
583 match_start:mat.start(),
584 match_end:mat.end(),
585 context_before:Vec::new(),
586 context_after:Vec::new(),
587 });
588 }
589 }
590
591 matches
592}
593
594fn FindFuzzyMatches(content:&str, query:&str, case_sensitive:bool, max_distance:usize) -> Vec<SearchMatch> {
596 let mut matches = Vec::new();
597 let lines:Vec<&str> = content.lines().collect();
598
599 for (line_idx, line) in lines.iter().enumerate() {
600 let line_number = line_idx as u32 + 1;
601 let line_to_search = if case_sensitive { line.to_string() } else { line.to_lowercase() };
602
603 if let Some(pos) = line_to_search.find(query) {
605 let distance = CalculateLevenshteinDistance(&line_to_search[pos..pos.saturating_add(query.len())], query);
607
608 if distance <= max_distance {
609 matches.push(SearchMatch {
610 line_number,
611 line_content:line.to_string(),
612 match_start:pos,
613 match_end:pos + query.len(),
614 context_before:Vec::new(),
615 context_after:Vec::new(),
616 });
617 }
618 }
619 }
620
621 matches
622}
623
624fn FindExactMatches(content:&str, query:&str) -> Vec<SearchMatch> { FindMatchesWithContext(content, query, true, true) }
626
627fn CalculateLevenshteinDistance(s1:&str, s2:&str) -> usize {
629 let s1_chars:Vec<char> = s1.chars().collect();
630 let s2_chars:Vec<char> = s2.chars().collect();
631 let len1 = s1_chars.len();
632 let len2 = s2_chars.len();
633
634 let mut dp = vec![vec![0usize; len2 + 1]; len1 + 1];
636
637 for i in 0..=len1 {
639 dp[i][0] = i;
640 }
641 for j in 0..=len2 {
642 dp[0][j] = j;
643 }
644
645 for i in 1..=len1 {
647 for j in 1..=len2 {
648 if s1_chars[i - 1] == s2_chars[j - 1] {
649 dp[i][j] = dp[i - 1][j - 1];
650 } else {
651 dp[i][j] = 1 + [
652 dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1], ]
656 .into_iter()
657 .min()
658 .unwrap();
659 }
660 }
661 }
662
663 dp[len1][len2]
664}
665
666fn CalculateRelevance(matches:&[SearchMatch], metadata:&FileMetadata) -> f64 {
668 let match_count = matches.len();
669 let line_count = metadata.line_count.unwrap_or(1) as f64;
670
671 let mut relevance = (match_count as f64 / line_count) * 10.0;
673
674 if match_count > 0 {
676 relevance += (match_count as f64).log10() * 0.5;
677 }
678
679 let days_old = (chrono::Utc::now() - metadata.modified).num_days() as f64;
681 relevance += 1.0 / (days_old + 1.0).max(1.0);
682
683 relevance.min(10.0).max(0.0)
684}
685
686pub fn MatchesFilters(
688 file_path:&PathBuf,
689 metadata:&FileMetadata,
690 path_filter:Option<&str>,
691 language_filter:Option<&str>,
692) -> bool {
693 if let Some(search_path) = path_filter {
695 if !file_path.to_string_lossy().contains(search_path) {
696 return false;
697 }
698 }
699
700 if let Some(lang) = language_filter {
702 if metadata.language.as_deref() != Some(lang) {
703 return false;
704 }
705 }
706
707 true
708}