//! HTML sanitisation for user-supplied text fields. //! //! Uses ammonia to strip dangerous HTML tags and attributes while preserving //! the visible text content. Sanitisation that actually fires is logged at //! WARN so operators can spot abuse patterns. use sha2::Digest; use std::collections::HashSet; /// Sanitise `value` for the named `field`. /// /// Strips all HTML tags (keeping their text content) and removes dangerous /// attributes. Returns `(sanitised_value, was_modified)`. When `was_modified` /// is `true` the caller should log at WARN. pub(super) fn sanitize_html(field: &str, value: &str) -> (String, bool) { // Build an ammonia cleaner that allows NO tags but keeps text content. // clear_content_tags is also set to empty so that // content is preserved as literal text rather than silently discarded. let clean = ammonia::Builder::new() .tags(HashSet::new()) .clean_content_tags(HashSet::new()) .clean(value) .to_string(); let modified = clean != value; if modified { crate::slog_warn!( "[validation] HTML sanitised in field '{}': fingerprint={}", field, fingerprint(value) ); } (clean, modified) } /// Return an 8-hex-char SHA-256 fingerprint of the input string. fn fingerprint(input: &str) -> String { let hash = sha2::Sha256::digest(input.as_bytes()); hash[..4].iter().map(|b| format!("{b:02x}")).collect() } #[cfg(test)] mod tests { use super::*; #[test] fn script_tags_stripped_content_preserved() { let (out, modified) = sanitize_html("name", ""); assert!(modified); assert!(!out.contains("