174 lines
6.3 KiB
Python
174 lines
6.3 KiB
Python
import time
|
|
import os
|
|
import random
|
|
import json
|
|
import re
|
|
|
|
try:
|
|
# 'regex' on PyPI supports `\p{L}`, `\p{N}`, etc.
|
|
import regex
|
|
USE_REGEX_LIB = True
|
|
except ImportError:
|
|
# Fallback to Python's built-in 're' if 'regex' isn't installed
|
|
USE_REGEX_LIB = False
|
|
|
|
DICTIONARY_PATH = "dictionary/" # Path to dictionary files
|
|
|
|
def format_uptime(seconds: float) -> tuple[str, int]:
|
|
"""
|
|
Convert seconds into a human-readable string:
|
|
- Example outputs:
|
|
"32 minutes"
|
|
"8 days, 4 hours"
|
|
"1 year, 3 months"
|
|
- Returns a tuple:
|
|
(Human-readable string, total seconds)
|
|
"""
|
|
seconds = int(seconds) # Ensure integer seconds
|
|
|
|
# Define time units
|
|
units = [
|
|
("year", 31536000), # 365 days
|
|
("month", 2592000), # 30 days
|
|
("day", 86400), # 24 hours
|
|
("hour", 3600), # 60 minutes
|
|
("minute", 60),
|
|
("second", 1)
|
|
]
|
|
|
|
# Compute time breakdown
|
|
time_values = []
|
|
for unit_name, unit_seconds in units:
|
|
value, seconds = divmod(seconds, unit_seconds)
|
|
if value > 0:
|
|
time_values.append(f"{value} {unit_name}{'s' if value > 1 else ''}") # Auto pluralize
|
|
|
|
# Return only the **two most significant** time units (e.g., "3 days, 4 hours")
|
|
return (", ".join(time_values[:2]), seconds) if time_values else ("0 seconds", 0)
|
|
|
|
def get_random_reply(dictionary_name: str, category: str, **variables) -> str:
|
|
"""
|
|
Fetches a random string from a given dictionary and category.
|
|
Supports variable substitution using keyword arguments.
|
|
|
|
:param dictionary_name: The name of the dictionary file (without .json)
|
|
:param category: The category (key) inside the dictionary to fetch a response from
|
|
:param variables: Keyword arguments to replace placeholders in the string
|
|
:return: A formatted string with the variables replaced
|
|
"""
|
|
file_path = os.path.join(DICTIONARY_PATH, f"{dictionary_name}.json")
|
|
|
|
# Ensure file exists
|
|
if not os.path.exists(file_path):
|
|
return f"[Error: Missing {dictionary_name}.json]"
|
|
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as file:
|
|
data = json.load(file)
|
|
except json.JSONDecodeError:
|
|
return f"[Error: Failed to load {dictionary_name}.json]"
|
|
|
|
# Ensure category exists
|
|
if category not in data or not isinstance(data[category], list):
|
|
return f"[Error: No valid entries for {category} in {dictionary_name}.json]"
|
|
|
|
# Select a random reply
|
|
response = random.choice(data[category])
|
|
|
|
# Replace placeholders with provided variables
|
|
return response.format(**variables)
|
|
|
|
##############################
|
|
# Basic sanitization
|
|
# DO NOT RELY SOLELY ON THIS
|
|
##############################
|
|
def sanitize_user_input(
|
|
user_input: str,
|
|
usage: str = "GENERAL",
|
|
max_length: int = 500
|
|
):
|
|
"""
|
|
A whitelisting-based function for sanitizing user input.
|
|
|
|
Returns a tuple of:
|
|
(sanitized_str, sanitization_applied_bool, sanitization_reason, original_str)
|
|
|
|
:param user_input: The raw string from the user (e.g., from Twitch or Discord).
|
|
:param usage:
|
|
- 'CALC': Keep digits, math operators, parentheses, etc.
|
|
- 'GENERAL': Keep typical readable characters & punctuation.
|
|
:param max_length: Truncate the input if it exceeds this length.
|
|
:return: (sanitized_str, bool, reason_string, original_str)
|
|
|
|
======================
|
|
SECURITY RECOMMENDATIONS
|
|
======================
|
|
1) For database storage (MariaDB, etc.):
|
|
- **Always** use parameterized queries or an ORM with bound parameters.
|
|
- Do not rely solely on string sanitization to prevent SQL injection.
|
|
|
|
2) For code execution (e.g., 'eval'):
|
|
- Avoid using eval/exec on user input.
|
|
- If you must, consider a restricted math parser or an audited sandbox.
|
|
|
|
3) For HTML sanitization:
|
|
- Bleach is deprecated; research modern alternatives or frameworks that
|
|
safely sanitize HTML output. This function does *not* sanitize HTML tags.
|
|
"""
|
|
|
|
original_string = str(user_input)
|
|
reasons = []
|
|
sanitization_applied = False
|
|
|
|
# 1. Truncate and remove newlines, tabs, etc.
|
|
truncated = original_string[:max_length]
|
|
truncated = re.sub(r"[\r\n\t]+", " ", truncated)
|
|
|
|
sanitized = truncated
|
|
|
|
# 2. Choose how to filter based on usage
|
|
usage = usage.upper()
|
|
|
|
if usage == "CALC":
|
|
# Allow digits, +, -, *, /, %, parentheses, decimal points, ^ for exponent, spaces
|
|
# Remove everything else
|
|
pattern = r"[^0-9+\-*/%().^ \t]"
|
|
new_sanitized = re.sub(pattern, "", sanitized)
|
|
if new_sanitized != sanitized:
|
|
sanitization_applied = True
|
|
reasons.append("CALC: Removed non-math characters.")
|
|
sanitized = new_sanitized
|
|
|
|
else: # GENERAL usage
|
|
if USE_REGEX_LIB:
|
|
# Remove ASCII control chars (0-31, 127) first
|
|
step1 = re.sub(r"[\x00-\x1F\x7F]", "", sanitized)
|
|
# Then apply a fairly broad whitelist:
|
|
# \p{L}: letters; \p{N}: numbers; \p{P}: punctuation; \p{S}: symbols; \p{Z}: separators (including spaces).
|
|
# This keeps emojis, foreign characters, typical punctuation, etc.
|
|
pattern = r"[^\p{L}\p{N}\p{P}\p{S}\p{Z}]"
|
|
new_sanitized = regex.sub(pattern, "", step1)
|
|
|
|
if new_sanitized != sanitized:
|
|
sanitization_applied = True
|
|
reasons.append("GENERAL: Removed disallowed chars via regex.")
|
|
sanitized = new_sanitized
|
|
else:
|
|
# Fallback: If 'regex' is not installed, remove control chars and keep ASCII printable only.
|
|
step1 = re.sub(r"[\x00-\x1F\x7F]", "", sanitized)
|
|
pattern = r"[^ -~]" # Keep only ASCII 32-126
|
|
new_sanitized = re.sub(pattern, "", step1)
|
|
|
|
if new_sanitized != sanitized:
|
|
sanitization_applied = True
|
|
reasons.append("GENERAL: Removed non-ASCII or control chars (fallback).")
|
|
sanitized = new_sanitized
|
|
|
|
# 3. Final trim
|
|
sanitized = sanitized.strip()
|
|
|
|
# 4. Prepare output
|
|
reason_string = "; ".join(reasons)
|
|
return (sanitized, sanitization_applied, reason_string, original_string)
|
|
|