import time import os import random import json import re try: # 'regex' on PyPI supports `\p{L}`, `\p{N}`, etc. import regex USE_REGEX_LIB = True except ImportError: # Fallback to Python's built-in 're' if 'regex' isn't installed USE_REGEX_LIB = False DICTIONARY_PATH = "dictionary/" # Path to dictionary files def format_uptime(seconds: float) -> tuple[str, int]: """ Convert seconds into a human-readable string: - Example outputs: "32 minutes" "8 days, 4 hours" "1 year, 3 months" - Returns a tuple: (Human-readable string, total seconds) """ seconds = int(seconds) # Ensure integer seconds # Define time units units = [ ("year", 31536000), # 365 days ("month", 2592000), # 30 days ("day", 86400), # 24 hours ("hour", 3600), # 60 minutes ("minute", 60), ("second", 1) ] # Compute time breakdown time_values = [] for unit_name, unit_seconds in units: value, seconds = divmod(seconds, unit_seconds) if value > 0: time_values.append(f"{value} {unit_name}{'s' if value > 1 else ''}") # Auto pluralize # Return only the **two most significant** time units (e.g., "3 days, 4 hours") return (", ".join(time_values[:2]), seconds) if time_values else ("0 seconds", 0) def get_random_reply(dictionary_name: str, category: str, **variables) -> str: """ Fetches a random string from a given dictionary and category. Supports variable substitution using keyword arguments. :param dictionary_name: The name of the dictionary file (without .json) :param category: The category (key) inside the dictionary to fetch a response from :param variables: Keyword arguments to replace placeholders in the string :return: A formatted string with the variables replaced """ file_path = os.path.join(DICTIONARY_PATH, f"{dictionary_name}.json") # Ensure file exists if not os.path.exists(file_path): return f"[Error: Missing {dictionary_name}.json]" try: with open(file_path, "r", encoding="utf-8") as file: data = json.load(file) except json.JSONDecodeError: return f"[Error: Failed to load {dictionary_name}.json]" # Ensure category exists if category not in data or not isinstance(data[category], list): return f"[Error: No valid entries for {category} in {dictionary_name}.json]" # Select a random reply response = random.choice(data[category]) # Replace placeholders with provided variables return response.format(**variables) ############################## # Basic sanitization # DO NOT RELY SOLELY ON THIS ############################## def sanitize_user_input( user_input: str, usage: str = "GENERAL", max_length: int = 500 ): """ A whitelisting-based function for sanitizing user input. Returns a tuple of: (sanitized_str, sanitization_applied_bool, sanitization_reason, original_str) :param user_input: The raw string from the user (e.g., from Twitch or Discord). :param usage: - 'CALC': Keep digits, math operators, parentheses, etc. - 'GENERAL': Keep typical readable characters & punctuation. :param max_length: Truncate the input if it exceeds this length. :return: (sanitized_str, bool, reason_string, original_str) ====================== SECURITY RECOMMENDATIONS ====================== 1) For database storage (MariaDB, etc.): - **Always** use parameterized queries or an ORM with bound parameters. - Do not rely solely on string sanitization to prevent SQL injection. 2) For code execution (e.g., 'eval'): - Avoid using eval/exec on user input. - If you must, consider a restricted math parser or an audited sandbox. 3) For HTML sanitization: - Bleach is deprecated; research modern alternatives or frameworks that safely sanitize HTML output. This function does *not* sanitize HTML tags. """ original_string = str(user_input) reasons = [] sanitization_applied = False # 1. Truncate and remove newlines, tabs, etc. truncated = original_string[:max_length] truncated = re.sub(r"[\r\n\t]+", " ", truncated) sanitized = truncated # 2. Choose how to filter based on usage usage = usage.upper() if usage == "CALC": # Allow digits, +, -, *, /, %, parentheses, decimal points, ^ for exponent, spaces # Remove everything else pattern = r"[^0-9+\-*/%().^ \t]" new_sanitized = re.sub(pattern, "", sanitized) if new_sanitized != sanitized: sanitization_applied = True reasons.append("CALC: Removed non-math characters.") sanitized = new_sanitized else: # GENERAL usage if USE_REGEX_LIB: # Remove ASCII control chars (0-31, 127) first step1 = re.sub(r"[\x00-\x1F\x7F]", "", sanitized) # Then apply a fairly broad whitelist: # \p{L}: letters; \p{N}: numbers; \p{P}: punctuation; \p{S}: symbols; \p{Z}: separators (including spaces). # This keeps emojis, foreign characters, typical punctuation, etc. pattern = r"[^\p{L}\p{N}\p{P}\p{S}\p{Z}]" new_sanitized = regex.sub(pattern, "", step1) if new_sanitized != sanitized: sanitization_applied = True reasons.append("GENERAL: Removed disallowed chars via regex.") sanitized = new_sanitized else: # Fallback: If 'regex' is not installed, remove control chars and keep ASCII printable only. step1 = re.sub(r"[\x00-\x1F\x7F]", "", sanitized) pattern = r"[^ -~]" # Keep only ASCII 32-126 new_sanitized = re.sub(pattern, "", step1) if new_sanitized != sanitized: sanitization_applied = True reasons.append("GENERAL: Removed non-ASCII or control chars (fallback).") sanitized = new_sanitized # 3. Final trim sanitized = sanitized.strip() # 4. Prepare output reason_string = "; ".join(reasons) return (sanitized, sanitization_applied, reason_string, original_string)