279 lines
9.5 KiB
C++
279 lines
9.5 KiB
C++
#include "CensorService.h"
|
|
#include <drogon/drogon.h>
|
|
#include <drogon/HttpClient.h>
|
|
#include <sstream>
|
|
#include <algorithm>
|
|
#include <cctype>
|
|
|
|
namespace services {
|
|
|
|
CensorService& CensorService::getInstance() {
|
|
static CensorService instance;
|
|
return instance;
|
|
}
|
|
|
|
void CensorService::initialize() {
|
|
initialized_ = true;
|
|
LOG_INFO << "CensorService initialized";
|
|
}
|
|
|
|
void CensorService::scheduleFetch() {
|
|
LOG_INFO << "Scheduling censored words fetch in 2 seconds...";
|
|
drogon::app().getLoop()->runAfter(2.0, [this]() {
|
|
LOG_INFO << "Pre-fetching censored words from backend...";
|
|
fetchCensoredWordsAsync();
|
|
});
|
|
// No periodic refresh - cache invalidation is triggered by backend
|
|
}
|
|
|
|
void CensorService::invalidateCache() {
|
|
LOG_INFO << "Cache invalidation requested, fetching censored words from backend...";
|
|
fetchCensoredWordsAsync();
|
|
}
|
|
|
|
void CensorService::fetchCensoredWordsFromBackend() {
|
|
auto config = drogon::app().getCustomConfig();
|
|
auto backendConfig = config.get("backend_api", Json::Value::null);
|
|
std::string host;
|
|
int port;
|
|
|
|
if (backendConfig.isNull() || !backendConfig.isMember("host")) {
|
|
host = "drogon-backend";
|
|
port = 8080;
|
|
} else {
|
|
host = backendConfig.get("host", "drogon-backend").asString();
|
|
port = backendConfig.get("port", 8080).asInt();
|
|
}
|
|
|
|
auto client = drogon::HttpClient::newHttpClient("http://" + host + ":" + std::to_string(port));
|
|
auto req = drogon::HttpRequest::newHttpRequest();
|
|
req->setMethod(drogon::Get);
|
|
req->setPath("/api/internal/censored-words");
|
|
|
|
std::pair<drogon::ReqResult, drogon::HttpResponsePtr> result = client->sendRequest(req, 5.0);
|
|
|
|
if (result.first != drogon::ReqResult::Ok) {
|
|
LOG_ERROR << "Failed to fetch censored words from backend: request failed";
|
|
return;
|
|
}
|
|
|
|
auto resp = result.second;
|
|
if (resp->getStatusCode() != drogon::k200OK) {
|
|
LOG_ERROR << "Failed to fetch censored words from backend: HTTP " << resp->getStatusCode();
|
|
return;
|
|
}
|
|
|
|
try {
|
|
auto json = resp->getJsonObject();
|
|
if (!json || !(*json)["success"].asBool()) {
|
|
LOG_ERROR << "Failed to fetch censored words: invalid response";
|
|
return;
|
|
}
|
|
|
|
std::string wordsStr = (*json)["censored_words"].asString();
|
|
|
|
// Build new data in temporary variables
|
|
std::vector<std::string> newWords;
|
|
std::optional<std::regex> newPattern;
|
|
|
|
if (!wordsStr.empty()) {
|
|
std::stringstream ss(wordsStr);
|
|
std::string word;
|
|
while (std::getline(ss, word, ',') && newWords.size() < MAX_WORD_COUNT) {
|
|
size_t start = word.find_first_not_of(" \t\r\n");
|
|
size_t end = word.find_last_not_of(" \t\r\n");
|
|
if (start != std::string::npos && end != std::string::npos) {
|
|
word = word.substr(start, end - start + 1);
|
|
// Skip empty words and words exceeding max length (ReDoS prevention)
|
|
if (!word.empty() && word.length() <= MAX_WORD_LENGTH) {
|
|
newWords.push_back(word);
|
|
} else if (word.length() > MAX_WORD_LENGTH) {
|
|
LOG_WARN << "Skipping censored word exceeding " << MAX_WORD_LENGTH << " chars";
|
|
}
|
|
}
|
|
}
|
|
newPattern = buildCombinedPattern(newWords);
|
|
}
|
|
|
|
// Atomic swap under lock
|
|
{
|
|
std::unique_lock<std::shared_mutex> lock(mutex_);
|
|
censoredWords_ = std::move(newWords);
|
|
combinedPattern_ = std::move(newPattern);
|
|
}
|
|
|
|
LOG_DEBUG << "Fetched " << censoredWords_.size() << " censored words from backend";
|
|
} catch (const std::exception& e) {
|
|
LOG_ERROR << "Error parsing censored words response: " << e.what();
|
|
}
|
|
}
|
|
|
|
void CensorService::fetchCensoredWordsAsync() {
|
|
auto config = drogon::app().getCustomConfig();
|
|
auto backendConfig = config.get("backend_api", Json::Value::null);
|
|
std::string host;
|
|
int port;
|
|
|
|
if (backendConfig.isNull() || !backendConfig.isMember("host")) {
|
|
host = "drogon-backend";
|
|
port = 8080;
|
|
} else {
|
|
host = backendConfig.get("host", "drogon-backend").asString();
|
|
port = backendConfig.get("port", 8080).asInt();
|
|
}
|
|
|
|
std::string url = "http://" + host + ":" + std::to_string(port);
|
|
auto client = drogon::HttpClient::newHttpClient(url, drogon::app().getLoop());
|
|
auto req = drogon::HttpRequest::newHttpRequest();
|
|
req->setMethod(drogon::Get);
|
|
req->setPath("/api/internal/censored-words");
|
|
|
|
client->sendRequest(req, [this, client](drogon::ReqResult result, const drogon::HttpResponsePtr& resp) {
|
|
if (result != drogon::ReqResult::Ok) {
|
|
LOG_ERROR << "Async fetch censored words failed";
|
|
return;
|
|
}
|
|
|
|
if (resp->getStatusCode() != drogon::k200OK) {
|
|
LOG_ERROR << "Async fetch censored words failed: HTTP " << resp->getStatusCode();
|
|
return;
|
|
}
|
|
|
|
try {
|
|
auto json = resp->getJsonObject();
|
|
if (!json || !(*json)["success"].asBool()) {
|
|
LOG_ERROR << "Async fetch censored words: invalid response";
|
|
return;
|
|
}
|
|
|
|
std::string wordsStr = (*json)["censored_words"].asString();
|
|
|
|
// Build new data in temporary variables
|
|
std::vector<std::string> newWords;
|
|
std::optional<std::regex> newPattern;
|
|
|
|
if (!wordsStr.empty()) {
|
|
std::stringstream ss(wordsStr);
|
|
std::string word;
|
|
while (std::getline(ss, word, ',') && newWords.size() < MAX_WORD_COUNT) {
|
|
size_t start = word.find_first_not_of(" \t\r\n");
|
|
size_t end = word.find_last_not_of(" \t\r\n");
|
|
if (start != std::string::npos && end != std::string::npos) {
|
|
word = word.substr(start, end - start + 1);
|
|
// Skip empty words and words exceeding max length (ReDoS prevention)
|
|
if (!word.empty() && word.length() <= MAX_WORD_LENGTH) {
|
|
newWords.push_back(word);
|
|
} else if (word.length() > MAX_WORD_LENGTH) {
|
|
LOG_WARN << "Skipping censored word exceeding " << MAX_WORD_LENGTH << " chars";
|
|
}
|
|
}
|
|
}
|
|
newPattern = buildCombinedPattern(newWords);
|
|
}
|
|
|
|
// Atomic swap under lock
|
|
{
|
|
std::unique_lock<std::shared_mutex> lock(mutex_);
|
|
censoredWords_ = std::move(newWords);
|
|
combinedPattern_ = std::move(newPattern);
|
|
}
|
|
|
|
LOG_INFO << "Successfully fetched " << censoredWords_.size() << " censored words from backend";
|
|
} catch (const std::exception& e) {
|
|
LOG_ERROR << "Error parsing async censored words response: " << e.what();
|
|
}
|
|
}, 10.0);
|
|
}
|
|
|
|
std::optional<std::regex> CensorService::buildCombinedPattern(const std::vector<std::string>& words) {
|
|
if (words.empty()) {
|
|
return std::nullopt;
|
|
}
|
|
|
|
try {
|
|
// Build combined pattern: \b(word1|word2|word3)\b
|
|
std::string pattern = "\\b(";
|
|
bool first = true;
|
|
|
|
for (const auto& word : words) {
|
|
if (!first) {
|
|
pattern += "|";
|
|
}
|
|
first = false;
|
|
|
|
// Escape special regex characters
|
|
for (char c : word) {
|
|
if (c == '.' || c == '^' || c == '$' || c == '*' || c == '+' ||
|
|
c == '?' || c == '(' || c == ')' || c == '[' || c == ']' ||
|
|
c == '{' || c == '}' || c == '|' || c == '\\') {
|
|
pattern += '\\';
|
|
}
|
|
pattern += c;
|
|
}
|
|
}
|
|
|
|
pattern += ")\\b";
|
|
|
|
return std::regex(pattern, std::regex_constants::icase);
|
|
} catch (const std::regex_error& e) {
|
|
LOG_ERROR << "Failed to build combined censored pattern: " << e.what();
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
std::string CensorService::censor(const std::string& text) {
|
|
if (text.empty()) {
|
|
return text;
|
|
}
|
|
|
|
std::shared_lock<std::shared_mutex> lock(mutex_);
|
|
|
|
if (!combinedPattern_) {
|
|
return text;
|
|
}
|
|
|
|
std::string result;
|
|
try {
|
|
// Replace censored words with asterisks
|
|
std::sregex_iterator begin(text.begin(), text.end(), *combinedPattern_);
|
|
std::sregex_iterator end;
|
|
|
|
size_t lastPos = 0;
|
|
for (std::sregex_iterator it = begin; it != end; ++it) {
|
|
const std::smatch& match = *it;
|
|
// Append text before match
|
|
result += text.substr(lastPos, match.position() - lastPos);
|
|
// Replace match with fixed asterisks
|
|
result += "****";
|
|
lastPos = match.position() + match.length();
|
|
}
|
|
// Append remaining text
|
|
result += text.substr(lastPos);
|
|
} catch (const std::regex_error& e) {
|
|
LOG_ERROR << "Regex replace error: " << e.what();
|
|
return text;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool CensorService::containsCensoredWords(const std::string& text) {
|
|
if (text.empty()) {
|
|
return false;
|
|
}
|
|
|
|
std::shared_lock<std::shared_mutex> lock(mutex_);
|
|
|
|
if (!combinedPattern_) {
|
|
return false;
|
|
}
|
|
|
|
try {
|
|
return std::regex_search(text, *combinedPattern_);
|
|
} catch (const std::regex_error& e) {
|
|
LOG_ERROR << "Regex search error: " << e.what();
|
|
return false;
|
|
}
|
|
}
|
|
|
|
} // namespace services
|