cu29_intern_strs/
lib.rs

1use anyhow::{Context, Result};
2use bincode::config::standard;
3use bincode::{Decode, Encode, decode_from_slice, encode_to_vec};
4use fs2::FileExt;
5use std::collections::HashMap;
6use std::fs::{self, OpenOptions};
7use std::io::{Read, Seek, SeekFrom, Write};
8use std::path::{Path, PathBuf};
9
10type IndexType = u32;
11
12#[cfg(feature = "macro_debug")]
13const COLORED_PREFIX_BUILD_LOG: &str = "\x1b[32mCLog:\x1b[0m";
14
15#[cfg(feature = "macro_debug")]
16macro_rules! build_log {
17    ($($arg:tt)*) => {
18        eprintln!("{} {}", COLORED_PREFIX_BUILD_LOG, format!($($arg)*));
19    };
20}
21
22/// The name of the directory where the log index is stored.
23const INDEX_DIR_NAME: &str = "cu29_log_index";
24const DB_FILE_NAME: &str = "strings.bin";
25
26#[derive(Encode, Decode, Default)]
27struct InternDb {
28    next_index: IndexType,
29    strings: Vec<String>,
30    string_to_index: HashMap<String, IndexType>,
31}
32
33impl InternDb {
34    fn new() -> Self {
35        Self {
36            next_index: 1, // keep 0 reserved as before
37            ..Default::default()
38        }
39    }
40}
41
42fn parent_n_times(path: &Path, n: usize) -> Option<PathBuf> {
43    let mut result = Some(path.to_path_buf());
44    for _ in 0..n {
45        result = result?.parent().map(PathBuf::from);
46    }
47    result
48}
49
50/// Convenience function to returns the default path for the log index directory.
51pub fn default_log_index_dir() -> PathBuf {
52    let outdir = std::env::var("LOG_INDEX_DIR").expect("no LOG_INDEX_DIR system variable set, be sure build.rs sets it, see cu29_log/build.rs for example.");
53    let outdir_path = Path::new(&outdir);
54    parent_n_times(outdir_path, 3).unwrap().join(INDEX_DIR_NAME)
55}
56
57fn database_path(base: &Path) -> PathBuf {
58    match base.extension() {
59        Some(_) => base.to_path_buf(),
60        None => base.join(DB_FILE_NAME),
61    }
62}
63
64/// Reads all interned strings from the index at the specified path.
65/// The index is created at compile time within your project output directory.
66pub fn read_interned_strings(index: &Path) -> Result<Vec<String>> {
67    let db_path = database_path(index);
68    let db =
69        load_db_shared(&db_path).context("Could not open the string index. Check the path.")?;
70    Ok(db.strings)
71}
72
73pub fn intern_string(s: &str) -> Option<IndexType> {
74    let base_dir = default_log_index_dir();
75    let db_path = database_path(&base_dir);
76    if let Some(parent) = db_path.parent() {
77        fs::create_dir_all(parent).ok()?;
78    }
79
80    #[cfg(feature = "macro_debug")]
81    log_db_info_once(&db_path);
82
83    let mut file = OpenOptions::new()
84        .read(true)
85        .write(true)
86        .create(true)
87        .truncate(false)
88        .open(&db_path)
89        .ok()?;
90    file.lock_exclusive().ok()?;
91
92    let mut buf = Vec::new();
93    file.read_to_end(&mut buf).ok()?;
94    file.seek(SeekFrom::Start(0)).ok()?;
95
96    let mut db = if buf.is_empty() {
97        InternDb::new()
98    } else {
99        decode_from_slice(&buf, standard()).ok()?.0
100    };
101
102    if let Some(&idx) = db.string_to_index.get(s) {
103        #[cfg(feature = "macro_debug")]
104        {
105            build_log!("#{:0>3} [r] -> {}.", idx, s);
106        }
107        return Some(idx);
108    }
109
110    let idx = db.next_index;
111    let idx_usize = idx as usize;
112    if db.strings.len() <= idx_usize {
113        db.strings.resize(idx_usize + 1, String::new());
114    }
115    db.strings[idx_usize] = s.to_string();
116    db.string_to_index.insert(s.to_string(), idx);
117    db.next_index = db.next_index.checked_add(1)?;
118
119    let encoded = encode_to_vec(&db, standard()).ok()?;
120    file.set_len(0).ok()?;
121    file.write_all(&encoded).ok()?;
122    file.flush().ok()?;
123    let _ = file.unlock();
124
125    #[cfg(feature = "macro_debug")]
126    {
127        build_log!("#{:0>3} [n] -> {}.", idx, s);
128    }
129
130    Some(idx)
131}
132
133#[cfg(feature = "macro_debug")]
134fn log_db_info_once(db_path: &Path) {
135    use std::sync::OnceLock;
136    static ONCE: OnceLock<()> = OnceLock::new();
137    ONCE.get_or_init(|| {
138        build_log!(
139            "=================================================================================="
140        );
141        build_log!("Interned strings are stored in: {:?}", db_path);
142        build_log!("   [r] is reused index and [n] is new index.");
143        build_log!(
144            "=================================================================================="
145        );
146    });
147}
148
149fn load_db_shared(path: &Path) -> std::result::Result<InternDb, anyhow::Error> {
150    let mut file = match OpenOptions::new().read(true).open(path) {
151        Ok(f) => f,
152        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(InternDb::new()),
153        Err(e) => return Err(e).context("Failed to open index file"),
154    };
155    file.lock_shared()
156        .context("Failed to lock index for read")?;
157
158    let mut buf = Vec::new();
159    file.read_to_end(&mut buf)
160        .context("Failed to read index file")?;
161    let _ = file.unlock();
162
163    if buf.is_empty() {
164        return Ok(InternDb::new());
165    }
166
167    let (db, _): (InternDb, _) =
168        decode_from_slice(&buf, standard()).context("Failed to decode index")?;
169    Ok(db)
170}
171
172#[allow(dead_code)]
173pub fn record_callsite(filename: &str, line_number: u32) -> Option<IndexType> {
174    intern_string(format!("{filename}:{line_number}").as_str())
175}