AirLibrary/Indexing/Store/
StoreEntry.rs

1//! # StoreEntry
2//!
3//! ## File: Indexing/Store/StoreEntry.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides index storage functionality for the File Indexer service,
8//! handling serialization and persistence of the file index to disk.
9//!
10//! ## Primary Responsibility
11//!
12//! Store the file index to disk with atomic writes and corruption recovery
13//! mechanisms.
14//!
15//! ## Secondary Responsibilities
16//!
17//! - Load index from disk with validation
18//! - Backup corrupted indexes automatically
19//! - Atomic writes using temp files
20//! - Index integrity verification
21//!
22//! ## Dependencies
23//!
24//! **External Crates:**
25//! - `serde_json` - JSON serialization/deserialization
26//! - `tokio` - Async file I/O operations
27//!
28//! **Internal Modules:**
29//! - `crate::Result` - Error handling type
30//! - `crate::AirError` - Error types
31//! - `super::super::FileIndex` - Index structure definitions
32//! - `super::super::State::CreateState` - State creation utilities
33//!
34//! ## Dependents
35//!
36//! - `Indexing::mod::FileIndexer` - Main file indexer implementation
37//!
38//! ## VSCode Pattern Reference
39//!
40//! Inspired by VSCode's index storage in
41//! `src/vs/workbench/services/search/common/`
42//!
43//! ## Security Considerations
44//!
45//! - Atomic writes prevent partial index corruption
46//! - Permission checking on index directory
47//! - Path traversal protection
48//!
49//! ## Performance Considerations
50//!
51//! - Temp file pattern for atomic writes
52//! - Lazy loading of in-memory index
53//! - Efficient serialization with serde
54//!
55//! ## Error Handling Strategy
56//!
57//! Storage operations return detailed error messages for failures and
58//! automatically backup corrupted indexes when loading fails.
59//!
60//! ## Thread Safety
61//!
62//! Storage operations use async file I/O and return results that can be
63//! safely merged into shared Ar c<RwLock<>> state.
64
65use std::path::{Path, PathBuf};
66
67use tokio::sync::RwLock;
68
69use crate::{
70	AirError,
71	Indexing::State::CreateState::{FileIndex, FileMetadata, SymbolInfo, SymbolKind, SymbolLocation},
72	Result,
73};
74
75/// Save index to disk with atomic write
76pub async fn SaveIndex(index_directory:&Path, index:&FileIndex) -> Result<()> {
77	let index_file = index_directory.join("file_index.json");
78	let temp_file = index_directory.join("file_index.json.tmp");
79
80	let content = serde_json::to_string_pretty(index)
81		.map_err(|e| AirError::Serialization(format!("Failed to serialize index: {}", e)))?;
82
83	// Write to temp file first
84	tokio::fs::write(&temp_file, content)
85		.await
86		.map_err(|e| AirError::FileSystem(format!("Failed to write temp index file: {}", e)))?;
87
88	// Atomic rename
89	tokio::fs::rename(&temp_file, &index_file)
90		.await
91		.map_err(|e| AirError::FileSystem(format!("Failed to rename index file: {}", e)))?;
92
93	log::debug!(
94		"[StoreEntry] Index saved to: {} ({} files, {} symbols)",
95		index_file.display(),
96		index.files.len(),
97		index.symbol_index.len()
98	);
99
100	Ok(())
101}
102
103/// Load index from disk with corruption detection
104pub async fn LoadIndex(index_directory:&Path) -> Result<FileIndex> {
105	let index_file = index_directory.join("file_index.json");
106
107	if !index_file.exists() {
108		return Err(AirError::FileSystem(format!(
109			"Index file does not exist: {}",
110			index_file.display()
111		)));
112	}
113
114	let content = tokio::fs::read_to_string(&index_file)
115		.await
116		.map_err(|e| AirError::FileSystem(format!("Failed to read index file: {}", e)))?;
117
118	let index:FileIndex = serde_json::from_str(&content)
119		.map_err(|e| AirError::Serialization(format!("Failed to parse index file: {}", e)))?;
120
121	// Verify index structure
122	if index.index_version.is_empty() || index.index_checksum.is_empty() {
123		return Err(AirError::Serialization("Index missing version or checksum".to_string()));
124	}
125
126	// Verify index checksum
127	use crate::Indexing::State::CreateState::{CalculateIndexChecksum, CreateNewIndex};
128	let expected_checksum = CalculateIndexChecksum(&index)?;
129	if index.index_checksum != expected_checksum {
130		return Err(AirError::Serialization(format!(
131			"Index checksum mismatch: expected {}, got {}",
132			expected_checksum, index.index_checksum
133		)));
134	}
135
136	Ok(index)
137}
138
139/// Load or create index with corruption detection
140pub async fn LoadOrCreateIndex(index_directory:&Path) -> Result<FileIndex> {
141	let index_file = index_directory.join("file_index.json");
142
143	if index_file.exists() {
144		// Try to load existing index
145		match LoadIndex(index_directory).await {
146			Ok(index) => {
147				log::info!("[StoreEntry] Loaded index with {} files", index.files.len());
148				Ok(index)
149			},
150			Err(e) => {
151				log::warn!(
152					"[StoreEntry] Failed to load index (may be corrupted): {}. Creating new index.",
153					e
154				);
155				// Backup corrupted index
156				BackupCorruptedIndex(index_directory).await?;
157				Ok(CreateNewIndex())
158			},
159		}
160	} else {
161		// Create new index
162		Ok(CreateNewIndex())
163	}
164}
165
166/// Create a new empty index
167fn CreateNewIndex() -> FileIndex {
168	use crate::Indexing::State::CreateState::CreateNewIndex as StateCreateNewIndex;
169	StateCreateNewIndex()
170}
171
172/// Ensure index directory exists with proper error handling
173pub async fn EnsureIndexDirectory(index_directory:&Path) -> Result<()> {
174	tokio::fs::create_dir_all(index_directory).await.map_err(|e| {
175		AirError::Configuration(format!("Failed to create index directory {}: {}", index_directory.display(), e))
176	})?;
177	Ok(())
178}
179
180/// Backup corrupted index before creating new one
181pub async fn BackupCorruptedIndex(index_directory:&Path) -> Result<()> {
182	let index_file = index_directory.join("file_index.json");
183	let backup_file = index_directory.join(format!("file_index.corrupted.{}.json", chrono::Utc::now().timestamp()));
184
185	if !index_file.exists() {
186		return Ok(());
187	}
188
189	// Rename corrupted file to backup
190	tokio::fs::rename(&index_file, &backup_file)
191		.await
192		.map_err(|e| AirError::FileSystem(format!("Failed to backup corrupted index: {}", e)))?;
193
194	log::info!("[StoreEntry] Backed up corrupted index to: {}", backup_file.display());
195
196	Ok(())
197}
198
199/// Load index with automatic recovery on corruption
200pub async fn LoadIndexWithRecovery(index_directory:&Path, max_retries:usize) -> Result<FileIndex> {
201	let mut last_error = None;
202
203	for attempt in 0..max_retries {
204		match LoadOrCreateIndex(index_directory).await {
205			Ok(index) => {
206				if attempt > 0 {
207					log::info!("[StoreEntry] Successfully loaded index after {} attempts", attempt + 1);
208				}
209				return Ok(index);
210			},
211			Err(e) => {
212				last_error = Some(e);
213				log::warn!("[StoreEntry] Load attempt {} failed", attempt + 1);
214
215				// Wait before retry
216				if attempt < max_retries - 1 {
217					tokio::time::sleep(tokio::time::Duration::from_millis(100 * (attempt + 1) as u64)).await;
218				}
219			},
220		}
221	}
222
223	Err(last_error.unwrap_or_else(|| AirError::Internal("Failed to load index after retries".to_string())))
224}
225
226/// Get index file path
227pub fn GetIndexFilePath(index_directory:&Path) -> PathBuf { index_directory.join("file_index.json") }
228
229/// Check if index file exists and is readable
230pub async fn IndexFileExists(index_directory:&Path) -> Result<bool> {
231	let index_file = index_directory.join("file_index.json");
232
233	if !index_file.exists() {
234		return Ok(false);
235	}
236
237	// Try to read metadata to verify accessibility
238	match tokio::fs::metadata(&index_file).await {
239		Ok(_) => Ok(true),
240		Err(_) => Ok(false),
241	}
242}
243
244/// Get index file size in bytes
245pub async fn GetIndexFileSize(index_directory:&Path) -> Result<u64> {
246	let index_file = index_directory.join("file_index.json");
247
248	let metadata = tokio::fs::metadata(&index_file)
249		.await
250		.map_err(|e| AirError::FileSystem(format!("Failed to get index file metadata: {}", e)))?;
251
252	Ok(metadata.len())
253}
254
255/// Clean up old backup files
256pub async fn CleanupOldBackups(index_directory:&Path, keep_count:usize) -> Result<usize> {
257	let mut entries = tokio::fs::read_dir(index_directory)
258		.await
259		.map_err(|e| AirError::FileSystem(format!("Failed to read index directory: {}", e)))?;
260
261	let mut backups = Vec::new();
262
263	while let Some(entry) = entries
264		.next_entry()
265		.await
266		.map_err(|e| AirError::FileSystem(format!("Failed to read directory entry: {}", e)))?
267	{
268		let file_name = entry.file_name().to_string_lossy().to_string();
269
270		if file_name.starts_with("file_index.corrupted.") && file_name.ends_with(".json") {
271			if let Ok(metadata) = entry.metadata().await {
272				if let Ok(modified) = metadata.modified() {
273					backups.push((entry.path(), modified));
274				}
275			}
276		}
277	}
278
279	// Sort by modified time (oldest first)
280	backups.sort_by_key(|b| b.1);
281
282	let mut removed_count = 0;
283
284	// Remove old backups beyond keep_count
285	for (path, _) in backups.iter().take(backups.len().saturating_sub(keep_count)) {
286		match tokio::fs::remove_file(path).await {
287			Ok(_) => {
288				log::info!("[StoreEntry] Removed old backup: {}", path.display());
289				removed_count += 1;
290			},
291			Err(e) => {
292				log::warn!("[StoreEntry] Failed to remove backup {}: {}", path.display(), e);
293			},
294		}
295	}
296
297	Ok(removed_count)
298}
299
300/// Validate index file format before loading
301pub async fn ValidateIndexFormat(index_directory:&Path) -> Result<()> {
302	let index_file = index_directory.join("file_index.json");
303
304	let content = tokio::fs::read_to_string(&index_file)
305		.await
306		.map_err(|e| AirError::FileSystem(format!("Failed to read index file: {}", e)))?;
307
308	// Try to parse as JSON
309	let _:serde_json::Value = serde_json::from_str(&content)
310		.map_err(|e| AirError::Serialization(format!("Index file is not valid JSON: {}", e)))?;
311
312	Ok(())
313}