1pub mod State;
67pub mod Scan;
68pub mod Process;
69pub mod Language;
70pub mod Store;
71pub mod Watch;
72pub mod Background;
73
74use std::{collections::HashMap, path::PathBuf, sync::Arc};
76
77use tokio::sync::{Mutex, RwLock};
78
79use crate::{
80 AirError,
81 ApplicationState::ApplicationState,
82 Configuration::ConfigurationManager,
83 Indexing::{
84 Process::ExtractSymbols::{ExtractSymbols, GroupSymbolsByKind, SymbolStatistics},
85 Scan::{
86 ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
87 ScanFile::IndexFileInternal,
88 },
89 State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
90 Store::{
91 QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
92 StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
93 UpdateIndex::UpdateFileContent,
94 },
95 },
96 Result,
97};
98use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
100
101const MAX_PARALLEL_INDEXING:usize = 10;
103
104#[derive(Debug, Clone)]
106pub struct IndexResult {
107 pub files_indexed:u32,
109 pub total_size:u64,
111 pub duration_seconds:f64,
113 pub symbols_extracted:u32,
115 pub files_with_errors:u32,
117}
118
119#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
121pub struct IndexStatistics {
122 pub file_count:u32,
123 pub total_size:u64,
124 pub total_symbols:u32,
125 pub language_counts:HashMap<String, u32>,
126 pub last_updated:chrono::DateTime<chrono::Utc>,
127 pub index_version:String,
128}
129
130pub struct FileIndexer {
140 AppState:Arc<ApplicationState>,
142
143 file_index:Arc<RwLock<FileIndex>>,
145
146 index_directory:PathBuf,
148
149 file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
151
152 indexing_semaphore:Arc<tokio::sync::Semaphore>,
154
155 corruption_detected:Arc<Mutex<bool>>,
157}
158
159impl FileIndexer {
160 pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
168 let config = &AppState.Configuration.Indexing;
169
170 let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
172
173 EnsureIndexDirectory(&index_directory).await?;
175
176 let file_index = LoadOrCreateIndex(&index_directory).await?;
178
179 let indexer = Self {
180 AppState:AppState.clone(),
181 file_index:Arc::new(RwLock::new(file_index)),
182 index_directory:index_directory.clone(),
183 file_watcher:Arc::new(Mutex::new(None)),
184 indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
185 corruption_detected:Arc::new(Mutex::new(false)),
186 };
187
188 indexer.VerifyIndexIntegrity().await?;
190
191 indexer
193 .AppState
194 .UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
195 .await
196 .map_err(|e| AirError::Internal(e.to_string()))?;
197
198 log::info!("[FileIndexer] Initialized with index directory: {}", index_directory.display());
199
200 Ok(indexer)
201 }
202
203 fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
205 let expanded = ConfigurationManager::ExpandPath(path)?;
206
207 let path_str = expanded.to_string_lossy();
209 if path_str.contains("..") {
210 return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
211 }
212
213 Ok(expanded)
214 }
215
216 async fn VerifyIndexIntegrity(&self) -> Result<()> {
218 let index = self.file_index.read().await;
219
220 ValidateIndexConsistency(&index)?;
222
223 let mut missing_files = 0;
225 for file_path in index.files.keys() {
226 if !file_path.exists() {
227 missing_files += 1;
228 }
229 }
230
231 if missing_files > 0 {
232 log::warn!("[FileIndexer] Found {} missing files in index", missing_files);
233 }
234
235 log::info!("[FileIndexer] Index integrity verified successfully");
236
237 Ok(())
238 }
239
240 pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
242 let start_time = std::time::Instant::now();
243
244 log::info!("[FileIndexer] Starting directory index: {}", path);
245
246 let config = &self.AppState.Configuration.Indexing;
247
248 let (files_to_index, scan_result) =
250 ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
251
252 let index_arc = self.file_index.clone();
254 let semaphore = self.indexing_semaphore.clone();
255 let config_clone = config.clone();
256 let mut index_tasks = Vec::new();
257
258 for file_path in files_to_index {
259 let permit = semaphore.clone().acquire_owned().await.unwrap();
260 let index_ref = index_arc.clone();
261 let config_for_task = config_clone.clone();
262
263 let task = tokio::spawn(async move {
264 let _permit = permit;
265 IndexFileInternal(&file_path, &config_for_task, &index_ref, &[]).await
266 });
267
268 index_tasks.push(task);
269 }
270
271 let mut index = self.file_index.write().await;
273 let mut indexed_paths = std::collections::HashSet::new();
274 let mut files_indexed = 0u32;
275 let mut total_size = 0u64;
276 let mut symbols_extracted = 0u32;
277 let mut files_with_errors = 0u32;
278
279 for task in index_tasks {
280 match task.await {
281 Ok(Ok((metadata, symbols))) => {
282 let file_path = metadata.path.clone();
283
284 index.files.insert(file_path.clone(), metadata.clone());
285 indexed_paths.insert(file_path.clone());
286
287 if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
289 log::warn!("[FileIndexer] Failed to index content for {}: {}", file_path.display(), e);
290 }
291
292 index.file_symbols.insert(file_path.clone(), symbols.clone());
294 symbols_extracted += symbols.len() as u32;
295
296 for symbol in symbols {
298 index
299 .symbol_index
300 .entry(symbol.name.clone())
301 .or_insert_with(Vec::new)
302 .push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
303 }
304
305 files_indexed += 1;
306 total_size += metadata.size;
307 },
308 Ok(Err(_)) => {
309 files_with_errors += 1;
310 },
311 Err(e) => {
312 log::error!("[FileIndexer] Indexing task failed: {}", e);
313 files_with_errors += 1;
314 },
315 }
316 }
317
318 ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
320
321 UpdateIndexMetadata(&mut index)?;
323
324 SaveIndex(&self.index_directory, &index).await?;
326
327 let duration = start_time.elapsed().as_secs_f64();
328
329 log::info!(
330 "[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
331 files_indexed,
332 total_size,
333 symbols_extracted,
334 files_with_errors,
335 duration
336 );
337
338 Ok(IndexResult {
339 files_indexed,
340 total_size,
341 duration_seconds:duration,
342 symbols_extracted,
343 files_with_errors,
344 })
345 }
346
347 pub async fn SearchFiles(
349 &self,
350 query:SearchQuery,
351 path:Option<String>,
352 language:Option<String>,
353 ) -> Result<PaginatedSearchResults> {
354 let index = self.file_index.read().await;
355 QueryIndexSearch(&index, query, path, language).await
356 }
357
358 pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
360 let index = self.file_index.read().await;
361 let query_lower = query.to_lowercase();
362 let mut results = Vec::new();
363
364 for (symbol_name, locations) in &index.symbol_index {
365 if symbol_name.to_lowercase().contains(&query_lower) {
366 for loc in locations.iter().take(max_results as usize) {
367 results.push(loc.symbol.clone());
368 if results.len() >= max_results as usize {
369 break;
370 }
371 }
372 }
373 }
374
375 Ok(results)
376 }
377
378 pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
380 let index = self.file_index.read().await;
381 Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
382 }
383
384 pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
386 let file_path = Self::ValidateAndExpandPath(&path)?;
387 let index = self.file_index.read().await;
388
389 Ok(index.files.get(&file_path).cloned())
390 }
391
392 pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
394 let index = self.file_index.read().await;
395
396 let mut language_counts:HashMap<String, u32> = HashMap::new();
397 let total_size = index.files.values().map(|m| m.size).sum();
398 let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
399
400 for metadata in index.files.values() {
401 if let Some(lang) = &metadata.language {
402 *language_counts.entry(lang.clone()).or_insert(0) += 1;
403 }
404 }
405
406 Ok(IndexStatistics {
407 file_count:index.files.len() as u32,
408 total_size,
409 total_symbols,
410 language_counts,
411 last_updated:index.last_updated,
412 index_version:index.index_version.clone(),
413 })
414 }
415
416 pub async fn recover_from_corruption(&self) -> Result<()> {
418 log::info!("[FileIndexer] Recovering from corrupted index...");
419
420 BackupCorruptedIndex(&self.index_directory).await?;
422
423 let new_index = CreateNewIndex();
425 *self.file_index.write().await = new_index;
426
427 *self.corruption_detected.lock().await = false;
429
430 log::info!("[FileIndexer] Index recovery completed");
431
432 Ok(())
433 }
434}
435
436impl Clone for FileIndexer {
437 fn clone(&self) -> Self {
438 Self {
439 AppState:self.AppState.clone(),
440 file_index:self.file_index.clone(),
441 index_directory:self.index_directory.clone(),
442 file_watcher:self.file_watcher.clone(),
443 indexing_semaphore:self.indexing_semaphore.clone(),
444 corruption_detected:self.corruption_detected.clone(),
445 }
446 }
447}