//! HTML sanitisation for user-supplied text fields.
//!
//! Uses ammonia to strip dangerous HTML tags and attributes while preserving
//! the visible text content. Sanitisation that actually fires is logged at
//! WARN so operators can spot abuse patterns.
use sha2::Digest;
use std::collections::HashSet;
/// Sanitise `value` for the named `field`.
///
/// Strips all HTML tags (keeping their text content) and removes dangerous
/// attributes. Returns `(sanitised_value, was_modified)`. When `was_modified`
/// is `true` the caller should log at WARN.
pub(super) fn sanitize_html(field: &str, value: &str) -> (String, bool) {
// Build an ammonia cleaner that allows NO tags but keeps text content.
// clear_content_tags is also set to empty so that
// content is preserved as literal text rather than silently discarded.
let clean = ammonia::Builder::new()
.tags(HashSet::new())
.clean_content_tags(HashSet::new())
.clean(value)
.to_string();
let modified = clean != value;
if modified {
crate::slog_warn!(
"[validation] HTML sanitised in field '{}': fingerprint={}",
field,
fingerprint(value)
);
}
(clean, modified)
}
/// Return an 8-hex-char SHA-256 fingerprint of the input string.
fn fingerprint(input: &str) -> String {
let hash = sha2::Sha256::digest(input.as_bytes());
hash[..4].iter().map(|b| format!("{b:02x}")).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn script_tags_stripped_content_preserved() {
let (out, modified) = sanitize_html("name", "");
assert!(modified);
assert!(!out.contains("