No slur handles allowed

This commit is contained in:
lewis
2025-12-28 12:50:22 +02:00
parent 8cb82abc82
commit e90308ba9e
13 changed files with 634 additions and 31 deletions

1
Cargo.lock generated
View File

@@ -6305,6 +6305,7 @@ dependencies = [
"p384",
"rand 0.8.5",
"redis",
"regex",
"reqwest",
"serde",
"serde_bytes",

View File

@@ -30,6 +30,7 @@ k256 = { version = "0.13.3", features = ["ecdsa", "pem", "pkcs8"] }
multibase = "0.9.1"
multihash = "0.19.3"
rand = "0.8.5"
regex = "1"
reqwest = { version = "0.12.28", features = ["json"] }
serde = { version = "1.0.228", features = ["derive"] }
serde_bytes = "0.11.14"

View File

@@ -175,7 +175,7 @@ export const api = {
});
const data = await response.json();
if (!response.ok) {
throw new ApiError(data.error, data.message, response.status);
throw new ApiError(response.status, data.error, data.message);
}
return data;
},

View File

@@ -194,7 +194,15 @@ pub async fn create_account(
.into_response();
}
}
input.handle.to_lowercase()
let handle_lower = input.handle.to_lowercase();
if crate::moderation::has_explicit_slur(&handle_lower) {
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": "InvalidHandle", "message": "Inappropriate language in handle"})),
)
.into_response();
}
handle_lower
};
let email: Option<String> = input
.email

View File

@@ -582,6 +582,13 @@ pub async fn update_handle(
)
.into_response();
}
if crate::moderation::has_explicit_slur(new_handle) {
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": "InvalidHandle", "message": "Inappropriate language in handle"})),
)
.into_response();
}
let hostname = std::env::var("PDS_HOSTNAME").unwrap_or_else(|_| "localhost".to_string());
let suffix = format!(".{}", hostname);
let is_service_domain = crate::handle::is_service_domain_handle(new_handle, &hostname);

View File

@@ -1,4 +1,4 @@
use super::validation::validate_record;
use super::validation::validate_record_with_rkey;
use super::write::has_verified_comms_channel;
use crate::api::repo::record::utils::{CommitParams, RecordOp, commit_and_log, extract_blob_cids};
use crate::delegation::{self, DelegationActionType};
@@ -304,7 +304,8 @@ pub async fn apply_writes(
value,
} => {
if input.validate.unwrap_or(true)
&& let Err(err_response) = validate_record(value, collection)
&& let Err(err_response) =
validate_record_with_rkey(value, collection, rkey.as_deref())
{
return *err_response;
}
@@ -357,7 +358,8 @@ pub async fn apply_writes(
value,
} => {
if input.validate.unwrap_or(true)
&& let Err(err_response) = validate_record(value, collection)
&& let Err(err_response) =
validate_record_with_rkey(value, collection, Some(rkey))
{
return *err_response;
}

View File

@@ -7,8 +7,16 @@ use axum::{
use serde_json::json;
pub fn validate_record(record: &serde_json::Value, collection: &str) -> Result<(), Box<Response>> {
validate_record_with_rkey(record, collection, None)
}
pub fn validate_record_with_rkey(
record: &serde_json::Value,
collection: &str,
rkey: Option<&str>,
) -> Result<(), Box<Response>> {
let validator = RecordValidator::new();
match validator.validate(record, collection) {
match validator.validate_with_rkey(record, collection, rkey) {
Ok(_) => Ok(()),
Err(ValidationError::MissingType) => Err(Box::new((
StatusCode::BAD_REQUEST,
@@ -30,6 +38,10 @@ pub fn validate_record(record: &serde_json::Value, collection: &str) -> Result<(
StatusCode::BAD_REQUEST,
Json(json!({"error": "InvalidRecord", "message": format!("Invalid datetime format at '{}'", path)})),
).into_response())),
Err(ValidationError::BannedContent { path }) => Err(Box::new((
StatusCode::BAD_REQUEST,
Json(json!({"error": "InvalidRecord", "message": format!("Unacceptable slur in record at '{}'", path)})),
).into_response())),
Err(e) => Err(Box::new((
StatusCode::BAD_REQUEST,
Json(json!({"error": "InvalidRecord", "message": e.to_string()})),

View File

@@ -1,4 +1,4 @@
use super::validation::validate_record;
use super::validation::validate_record_with_rkey;
use crate::api::repo::record::utils::{CommitParams, RecordOp, commit_and_log, extract_blob_cids};
use crate::delegation::{self, DelegationActionType};
use crate::repo::tracking::TrackingBlockStore;
@@ -257,7 +257,8 @@ pub async fn create_record(
}
};
if input.validate.unwrap_or(true)
&& let Err(err_response) = validate_record(&input.record, &input.collection)
&& let Err(err_response) =
validate_record_with_rkey(&input.record, &input.collection, input.rkey.as_deref())
{
return *err_response;
}
@@ -480,7 +481,8 @@ pub async fn put_record(
};
let key = format!("{}/{}", collection_nsid, input.rkey);
if input.validate.unwrap_or(true)
&& let Err(err_response) = validate_record(&input.record, &input.collection)
&& let Err(err_response) =
validate_record_with_rkey(&input.record, &input.collection, Some(&input.rkey))
{
return *err_response;
}

View File

@@ -16,6 +16,7 @@ pub enum HandleValidationError {
StartsWithInvalidChar,
EndsWithInvalidChar,
ContainsSpaces,
BannedWord,
}
impl std::fmt::Display for HandleValidationError {
@@ -41,6 +42,7 @@ impl std::fmt::Display for HandleValidationError {
}
Self::EndsWithInvalidChar => write!(f, "Handle cannot end with a hyphen or underscore"),
Self::ContainsSpaces => write!(f, "Handle cannot contain spaces"),
Self::BannedWord => write!(f, "Inappropriate language in handle"),
}
}
}
@@ -82,6 +84,10 @@ pub fn validate_short_handle(handle: &str) -> Result<String, HandleValidationErr
}
}
if crate::moderation::has_explicit_slur(handle) {
return Err(HandleValidationError::BannedWord);
}
Ok(handle.to_lowercase())
}

View File

@@ -10,6 +10,7 @@ pub mod delegation;
pub mod handle;
pub mod image;
pub mod metrics;
pub mod moderation;
pub mod oauth;
pub mod plc;
pub mod rate_limit;

262
src/moderation/mod.rs Normal file
View File

@@ -0,0 +1,262 @@
/*
* CONTENT WARNING
*
* This file contains explicit slurs and hateful language. We're sorry you have to see them.
*
* These words exist here for one reason: to ensure our moderation system correctly blocks them.
* We can't verify the filter catches the n-word without testing against the actual word.
* Euphemisms wouldn't prove the protection works.
*
* If reading this file has caused you distress, please know:
* - you are valued and welcome in this community
* - these words do not reflect the views of this project or its contributors
* - we maintain this code precisely because we believe everyone deserves an experience on the web that is free from this kinda language
*/
use regex::Regex;
use std::sync::OnceLock;
static SLUR_REGEXES: OnceLock<Vec<Regex>> = OnceLock::new();
static EXTRA_BANNED_WORDS: OnceLock<Vec<String>> = OnceLock::new();
fn get_slur_regexes() -> &'static Vec<Regex> {
SLUR_REGEXES.get_or_init(|| {
vec![
Regex::new(r"\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][hĤĥȞȟḦḧḢḣḨḩḤḥḪḫH̱ẖĦħⱧⱨꞪɦꞕΗНн][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIı1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴ][nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПп][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
Regex::new(r"\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺ0]{2}[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПп][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
Regex::new(r"\b[fḞḟƑƒᵮᶂ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚ@4][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠ]{1,2}([ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺ0e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳiÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIı1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴ][tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ]{1,2}([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIı1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴ][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳ])?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
Regex::new(r"\b[kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIı1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴyÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳ]([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIı1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴ][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳ])?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]*\b").unwrap(),
Regex::new(r"\b[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПп][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIı1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴoÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺІіa4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚ][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠqꝖꝗꝘꝙɋʠ]{2}(l[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳ]t|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳaÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚ][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ]?|n[ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺ0][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠqꝖꝗꝘꝙɋʠ]|[a4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚ]?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
Regex::new(r"[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПп][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIı1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴoÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺІіa4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚ][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠqꝖꝗꝘꝙɋʠ]{2}(l[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳ]t|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳ][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ])[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?").unwrap(),
Regex::new(r"\b[tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚ4]+[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПп]{1,2}([iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIı1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴ][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳ]|[yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳ][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ])[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b").unwrap(),
]
})
}
fn get_extra_banned_words() -> &'static Vec<String> {
EXTRA_BANNED_WORDS.get_or_init(|| {
std::env::var("PDS_BANNED_WORDS")
.unwrap_or_default()
.split(',')
.map(|s| s.trim().to_lowercase())
.filter(|s| !s.is_empty())
.collect()
})
}
fn strip_trailing_digits(s: &str) -> &str {
s.trim_end_matches(|c: char| c.is_ascii_digit())
}
fn normalize_leetspeak(s: &str) -> String {
s.chars()
.map(|c| match c {
'4' | '@' => 'a',
'3' => 'e',
'1' | '!' | '|' => 'i',
'0' => 'o',
'5' | '$' => 's',
'7' => 't',
'8' => 'b',
'9' => 'g',
_ => c,
})
.collect()
}
pub fn has_explicit_slur(text: &str) -> bool {
has_explicit_slur_with_extra_words(text, get_extra_banned_words())
}
fn has_explicit_slur_with_extra_words(text: &str, extra_words: &[String]) -> bool {
let text_lower = text.to_lowercase();
let normalized = text_lower.replace('.', "").replace('-', "").replace('_', "");
let stripped = strip_trailing_digits(&text_lower);
let normalized_stripped = strip_trailing_digits(&normalized);
let regexes = get_slur_regexes();
if regexes.iter().any(|r| {
r.is_match(&text_lower)
|| r.is_match(&normalized)
|| r.is_match(stripped)
|| r.is_match(normalized_stripped)
}) {
return true;
}
if !extra_words.is_empty() {
let leet_normalized = normalize_leetspeak(&normalized);
let leet_stripped = normalize_leetspeak(strip_trailing_digits(&leet_normalized));
if extra_words.iter().any(|w| {
text_lower.contains(w)
|| normalized.contains(w)
|| stripped.contains(w)
|| normalized_stripped.contains(w)
|| leet_normalized.contains(w)
|| leet_stripped.contains(w)
}) {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chink_pattern() {
assert!(has_explicit_slur("chink"));
assert!(has_explicit_slur("chinks"));
assert!(has_explicit_slur("CHINK"));
assert!(has_explicit_slur("Chinks"));
}
#[test]
fn test_coon_pattern() {
assert!(has_explicit_slur("coon"));
assert!(has_explicit_slur("coons"));
assert!(has_explicit_slur("COON"));
}
#[test]
fn test_fag_pattern() {
assert!(has_explicit_slur("fag"));
assert!(has_explicit_slur("fags"));
assert!(has_explicit_slur("faggot"));
assert!(has_explicit_slur("faggots"));
assert!(has_explicit_slur("faggotry"));
}
#[test]
fn test_kike_pattern() {
assert!(has_explicit_slur("kike"));
assert!(has_explicit_slur("kikes"));
assert!(has_explicit_slur("KIKE"));
assert!(has_explicit_slur("kikery"));
}
#[test]
fn test_nigger_pattern() {
assert!(has_explicit_slur("nigger"));
assert!(has_explicit_slur("niggers"));
assert!(has_explicit_slur("NIGGER"));
assert!(has_explicit_slur("nigga"));
assert!(has_explicit_slur("niggas"));
}
#[test]
fn test_tranny_pattern() {
assert!(has_explicit_slur("tranny"));
assert!(has_explicit_slur("trannies"));
assert!(has_explicit_slur("TRANNY"));
}
#[test]
fn test_normalization_bypass() {
assert!(has_explicit_slur("n.i.g.g.e.r"));
assert!(has_explicit_slur("n-i-g-g-e-r"));
assert!(has_explicit_slur("n_i_g_g_e_r"));
assert!(has_explicit_slur("f.a.g"));
assert!(has_explicit_slur("f-a-g"));
assert!(has_explicit_slur("c.h.i.n.k"));
assert!(has_explicit_slur("k_i_k_e"));
}
#[test]
fn test_trailing_digits_bypass() {
assert!(has_explicit_slur("faggot123"));
assert!(has_explicit_slur("nigger69"));
assert!(has_explicit_slur("chink420"));
assert!(has_explicit_slur("fag1"));
assert!(has_explicit_slur("kike2024"));
assert!(has_explicit_slur("n_i_g_g_e_r123"));
}
#[test]
fn test_embedded_in_sentence() {
assert!(has_explicit_slur("you are a faggot"));
assert!(has_explicit_slur("stupid nigger"));
assert!(has_explicit_slur("go away chink"));
}
#[test]
fn test_safe_words_not_matched() {
assert!(!has_explicit_slur("hello"));
assert!(!has_explicit_slur("world"));
assert!(!has_explicit_slur("bluesky"));
assert!(!has_explicit_slur("tranquil"));
assert!(!has_explicit_slur("programmer"));
assert!(!has_explicit_slur("trigger"));
assert!(!has_explicit_slur("bigger"));
assert!(!has_explicit_slur("digger"));
assert!(!has_explicit_slur("figure"));
assert!(!has_explicit_slur("configure"));
}
#[test]
fn test_similar_but_safe_words() {
assert!(!has_explicit_slur("niggardly"));
assert!(!has_explicit_slur("raccoon"));
}
#[test]
fn test_empty_and_whitespace() {
assert!(!has_explicit_slur(""));
assert!(!has_explicit_slur(" "));
assert!(!has_explicit_slur("\t\n"));
}
#[test]
fn test_case_insensitive() {
assert!(has_explicit_slur("NIGGER"));
assert!(has_explicit_slur("Nigger"));
assert!(has_explicit_slur("NiGgEr"));
assert!(has_explicit_slur("FAGGOT"));
assert!(has_explicit_slur("Faggot"));
}
#[test]
fn test_leetspeak_bypass() {
assert!(has_explicit_slur("f4ggot"));
assert!(has_explicit_slur("f4gg0t"));
assert!(has_explicit_slur("n1gger"));
assert!(has_explicit_slur("n1gg3r"));
assert!(has_explicit_slur("k1ke"));
assert!(has_explicit_slur("ch1nk"));
assert!(has_explicit_slur("tr4nny"));
}
#[test]
fn test_normalize_leetspeak() {
assert_eq!(normalize_leetspeak("h3llo"), "hello");
assert_eq!(normalize_leetspeak("w0rld"), "world");
assert_eq!(normalize_leetspeak("t3$t"), "test");
assert_eq!(normalize_leetspeak("b4dw0rd"), "badword");
assert_eq!(normalize_leetspeak("l33t5p34k"), "leetspeak");
assert_eq!(normalize_leetspeak("@ss"), "ass");
assert_eq!(normalize_leetspeak("sh!t"), "shit");
assert_eq!(normalize_leetspeak("normal"), "normal");
}
#[test]
fn test_extra_banned_words() {
let extra = vec!["badword".to_string(), "offensive".to_string()];
assert!(has_explicit_slur_with_extra_words("badword", &extra));
assert!(has_explicit_slur_with_extra_words("BADWORD", &extra));
assert!(has_explicit_slur_with_extra_words("b.a.d.w.o.r.d", &extra));
assert!(has_explicit_slur_with_extra_words("b-a-d-w-o-r-d", &extra));
assert!(has_explicit_slur_with_extra_words("b_a_d_w_o_r_d", &extra));
assert!(has_explicit_slur_with_extra_words("badword123", &extra));
assert!(has_explicit_slur_with_extra_words("b4dw0rd", &extra));
assert!(has_explicit_slur_with_extra_words("b4dw0rd789", &extra));
assert!(has_explicit_slur_with_extra_words("b.4.d.w.0.r.d", &extra));
assert!(has_explicit_slur_with_extra_words("this contains badword here", &extra));
assert!(has_explicit_slur_with_extra_words("0ff3n$1v3", &extra));
assert!(!has_explicit_slur_with_extra_words("goodword", &extra));
assert!(!has_explicit_slur_with_extra_words("hello world", &extra));
}
}

View File

@@ -17,6 +17,8 @@ pub enum ValidationError {
InvalidRecord(String),
#[error("Unknown record type: {0}")]
UnknownType(String),
#[error("Unacceptable slur in record at {path}")]
BannedContent { path: String },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -52,6 +54,15 @@ impl RecordValidator {
&self,
record: &Value,
collection: &str,
) -> Result<ValidationStatus, ValidationError> {
self.validate_with_rkey(record, collection, None)
}
pub fn validate_with_rkey(
&self,
record: &Value,
collection: &str,
rkey: Option<&str>,
) -> Result<ValidationStatus, ValidationError> {
let obj = record.as_object().ok_or_else(|| {
ValidationError::InvalidRecord("Record must be an object".to_string())
@@ -78,9 +89,10 @@ impl RecordValidator {
"app.bsky.graph.block" => self.validate_block(obj)?,
"app.bsky.graph.list" => self.validate_list(obj)?,
"app.bsky.graph.listitem" => self.validate_list_item(obj)?,
"app.bsky.feed.generator" => self.validate_feed_generator(obj)?,
"app.bsky.feed.generator" => self.validate_feed_generator(obj, rkey)?,
"app.bsky.feed.threadgate" => self.validate_threadgate(obj)?,
"app.bsky.labeler.service" => self.validate_labeler_service(obj)?,
"app.bsky.graph.starterpack" => self.validate_starterpack(obj)?,
_ => {
if self.require_lexicon {
return Err(ValidationError::UnknownType(record_type.to_string()));
@@ -126,13 +138,39 @@ impl RecordValidator {
});
}
for (i, tag) in tags.iter().enumerate() {
if let Some(tag_str) = tag.as_str()
&& tag_str.len() > 640
{
return Err(ValidationError::InvalidField {
path: format!("tags/{}", i),
message: "Tag exceeds maximum length of 640 bytes".to_string(),
});
if let Some(tag_str) = tag.as_str() {
if tag_str.len() > 640 {
return Err(ValidationError::InvalidField {
path: format!("tags/{}", i),
message: "Tag exceeds maximum length of 640 bytes".to_string(),
});
}
if crate::moderation::has_explicit_slur(tag_str) {
return Err(ValidationError::BannedContent {
path: format!("tags/{}", i),
});
}
}
}
}
if let Some(facets) = obj.get("facets").and_then(|v| v.as_array()) {
for (i, facet) in facets.iter().enumerate() {
if let Some(features) = facet.get("features").and_then(|v| v.as_array()) {
for (j, feature) in features.iter().enumerate() {
let is_tag = feature
.get("$type")
.and_then(|v| v.as_str())
.is_some_and(|t| t == "app.bsky.richtext.facet#tag");
if is_tag {
if let Some(tag) = feature.get("tag").and_then(|v| v.as_str()) {
if crate::moderation::has_explicit_slur(tag) {
return Err(ValidationError::BannedContent {
path: format!("facets/{}/features/{}/tag", i, j),
});
}
}
}
}
}
}
}
@@ -154,6 +192,11 @@ impl RecordValidator {
),
});
}
if crate::moderation::has_explicit_slur(display_name) {
return Err(ValidationError::BannedContent {
path: "displayName".to_string(),
});
}
}
if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
let grapheme_count = description.chars().count();
@@ -166,6 +209,11 @@ impl RecordValidator {
),
});
}
if crate::moderation::has_explicit_slur(description) {
return Err(ValidationError::BannedContent {
path: "description".to_string(),
});
}
}
Ok(())
}
@@ -238,13 +286,18 @@ impl RecordValidator {
if !obj.contains_key("createdAt") {
return Err(ValidationError::MissingField("createdAt".to_string()));
}
if let Some(name) = obj.get("name").and_then(|v| v.as_str())
&& (name.is_empty() || name.len() > 64)
{
return Err(ValidationError::InvalidField {
path: "name".to_string(),
message: "Name must be 1-64 characters".to_string(),
});
if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
if name.is_empty() || name.len() > 64 {
return Err(ValidationError::InvalidField {
path: "name".to_string(),
message: "Name must be 1-64 characters".to_string(),
});
}
if crate::moderation::has_explicit_slur(name) {
return Err(ValidationError::BannedContent {
path: "name".to_string(),
});
}
}
Ok(())
}
@@ -268,6 +321,7 @@ impl RecordValidator {
fn validate_feed_generator(
&self,
obj: &serde_json::Map<String, Value>,
rkey: Option<&str>,
) -> Result<(), ValidationError> {
if !obj.contains_key("did") {
return Err(ValidationError::MissingField("did".to_string()));
@@ -278,13 +332,64 @@ impl RecordValidator {
if !obj.contains_key("createdAt") {
return Err(ValidationError::MissingField("createdAt".to_string()));
}
if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str())
&& (display_name.is_empty() || display_name.len() > 240)
{
return Err(ValidationError::InvalidField {
path: "displayName".to_string(),
message: "displayName must be 1-240 characters".to_string(),
});
if let Some(rkey) = rkey {
if crate::moderation::has_explicit_slur(rkey) {
return Err(ValidationError::BannedContent {
path: "rkey".to_string(),
});
}
}
if let Some(display_name) = obj.get("displayName").and_then(|v| v.as_str()) {
if display_name.is_empty() || display_name.len() > 240 {
return Err(ValidationError::InvalidField {
path: "displayName".to_string(),
message: "displayName must be 1-240 characters".to_string(),
});
}
if crate::moderation::has_explicit_slur(display_name) {
return Err(ValidationError::BannedContent {
path: "displayName".to_string(),
});
}
}
Ok(())
}
fn validate_starterpack(
&self,
obj: &serde_json::Map<String, Value>,
) -> Result<(), ValidationError> {
if !obj.contains_key("name") {
return Err(ValidationError::MissingField("name".to_string()));
}
if !obj.contains_key("createdAt") {
return Err(ValidationError::MissingField("createdAt".to_string()));
}
if let Some(name) = obj.get("name").and_then(|v| v.as_str()) {
if name.is_empty() || name.len() > 500 {
return Err(ValidationError::InvalidField {
path: "name".to_string(),
message: "name must be 1-500 characters".to_string(),
});
}
if crate::moderation::has_explicit_slur(name) {
return Err(ValidationError::BannedContent {
path: "name".to_string(),
});
}
}
if let Some(description) = obj.get("description").and_then(|v| v.as_str()) {
if description.len() > 3000 {
return Err(ValidationError::InvalidField {
path: "description".to_string(),
message: "description must be at most 3000 characters".to_string(),
});
}
if crate::moderation::has_explicit_slur(description) {
return Err(ValidationError::BannedContent {
path: "description".to_string(),
});
}
}
Ok(())
}

196
tests/banned_words.rs Normal file
View File

@@ -0,0 +1,196 @@
/*
* CONTENT WARNING
*
* This file contains explicit slurs and hateful language. We're sorry you have to see them.
*
* These words exist here for one reason: to ensure our moderation system correctly blocks them.
* We can't verify the filter catches the n-word without testing against the actual word.
* Euphemisms wouldn't prove the protection works.
*
* If reading this file has caused you distress, please know:
* - you are valued and welcome in this community
* - these words do not reflect the views of this project or its contributors
* - we maintain this code precisely because we believe everyone deserves an experience on the web that is free from this kinda language
*/
mod common;
mod helpers;
use common::*;
use helpers::*;
use reqwest::StatusCode;
use serde_json::json;
#[tokio::test]
async fn test_handle_with_slur_rejected() {
let client = client();
let timestamp = chrono::Utc::now().timestamp_millis();
let offensive_handle = format!("nigger{}", timestamp);
let create_payload = json!({
"handle": offensive_handle,
"email": format!("test{}@example.com", timestamp),
"password": "TestPassword123!"
});
let res = client
.post(format!(
"{}/xrpc/com.atproto.server.createAccount",
base_url().await
))
.json(&create_payload)
.send()
.await
.expect("Request failed");
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
let body: serde_json::Value = res.json().await.unwrap();
assert_eq!(body["error"], "InvalidHandle");
assert!(body["message"]
.as_str()
.unwrap_or("")
.contains("Inappropriate language"));
}
#[tokio::test]
async fn test_handle_with_normalized_slur_rejected() {
let client = client();
let timestamp = chrono::Utc::now().timestamp_millis();
let offensive_handle = format!("n-i-g-g-e-r{}", timestamp);
let create_payload = json!({
"handle": offensive_handle,
"email": format!("test{}@example.com", timestamp),
"password": "TestPassword123!"
});
let res = client
.post(format!(
"{}/xrpc/com.atproto.server.createAccount",
base_url().await
))
.json(&create_payload)
.send()
.await
.expect("Request failed");
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
let body: serde_json::Value = res.json().await.unwrap();
assert_eq!(body["error"], "InvalidHandle");
}
#[tokio::test]
async fn test_handle_update_with_slur_rejected() {
let client = client();
let (_, jwt) = setup_new_user("handleupdate").await;
let update_payload = json!({
"handle": "faggots"
});
let res = client
.post(format!(
"{}/xrpc/com.atproto.identity.updateHandle",
base_url().await
))
.bearer_auth(&jwt)
.json(&update_payload)
.send()
.await
.expect("Request failed");
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
let body: serde_json::Value = res.json().await.unwrap();
assert_eq!(body["error"], "InvalidHandle");
}
#[tokio::test]
async fn test_profile_displayname_with_slur_rejected() {
let client = client();
let (did, jwt) = setup_new_user("profileslur").await;
let profile = json!({
"repo": did,
"collection": "app.bsky.actor.profile",
"rkey": "self",
"record": {
"$type": "app.bsky.actor.profile",
"displayName": "I am a kike"
}
});
let res = client
.post(format!(
"{}/xrpc/com.atproto.repo.putRecord",
base_url().await
))
.bearer_auth(&jwt)
.json(&profile)
.send()
.await
.expect("Request failed");
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
let body: serde_json::Value = res.json().await.unwrap();
assert_eq!(body["error"], "InvalidRecord");
}
#[tokio::test]
async fn test_profile_description_with_slur_rejected() {
let client = client();
let (did, jwt) = setup_new_user("profiledesc").await;
let profile = json!({
"repo": did,
"collection": "app.bsky.actor.profile",
"rkey": "self",
"record": {
"$type": "app.bsky.actor.profile",
"displayName": "Normal Name",
"description": "I hate all chinks"
}
});
let res = client
.post(format!(
"{}/xrpc/com.atproto.repo.putRecord",
base_url().await
))
.bearer_auth(&jwt)
.json(&profile)
.send()
.await
.expect("Request failed");
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
let body: serde_json::Value = res.json().await.unwrap();
assert_eq!(body["error"], "InvalidRecord");
}
#[tokio::test]
async fn test_clean_content_allowed() {
let client = client();
let (did, jwt) = setup_new_user("cleanpost").await;
let post = json!({
"repo": did,
"collection": "app.bsky.feed.post",
"record": {
"$type": "app.bsky.feed.post",
"text": "This is a perfectly normal post about coding and technology!",
"createdAt": chrono::Utc::now().to_rfc3339()
}
});
let res = client
.post(format!(
"{}/xrpc/com.atproto.repo.createRecord",
base_url().await
))
.bearer_auth(&jwt)
.json(&post)
.send()
.await
.expect("Request failed");
assert_eq!(res.status(), StatusCode::OK);
}