From 7531b8f2947c60c3a0a36dd44c39e3b46c4dd58e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Deuchnord?= Date: Sun, 7 Nov 2021 17:36:58 +0100 Subject: [PATCH] feat: add link moderator --- poetry.lock | 66 +++++++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 1 + twason/config.py | 10 +++++++ twason/moderator.py | 52 +++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 34e986f..feec426 100644 --- a/poetry.lock +++ b/poetry.lock @@ -84,6 +84,38 @@ files = [ {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"}, ] +[[package]] +name = "filelock" +version = "3.18.0" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"}, + {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"}, +] + +[package.extras] +docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] +typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] + +[[package]] +name = "idna" +version = "3.10" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, +] + +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + [[package]] name = "irc3" version = "1.1.10" @@ -146,7 +178,7 @@ version = "4.3.8" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"}, {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"}, @@ -157,6 +189,36 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-a test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] type = ["mypy (>=1.14.1)"] +[[package]] +name = "uritools" +version = "5.0.0" +description = "URI parsing, classification and composition" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "uritools-5.0.0-py3-none-any.whl", hash = "sha256:cead3a49ba8fbca3f91857343849d506d8639718f4a2e51b62e87393b493bd6f"}, + {file = "uritools-5.0.0.tar.gz", hash = "sha256:68180cad154062bd5b5d9ffcdd464f8de6934414b25462ae807b00b8df9345de"}, +] + +[[package]] +name = "urlextract" +version = "1.9.0" +description = "Collects and extracts URLs from given text." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "urlextract-1.9.0-py3-none-any.whl", hash = "sha256:f88963532488b1c7c405e21bd162ae97871754ea04b60e18d33ee075b19b82fd"}, + {file = "urlextract-1.9.0.tar.gz", hash = "sha256:70508e02ba9df372e25cf0642db367cece273e8712cd0ce78178fc5dd7ea00db"}, +] + +[package.dependencies] +filelock = "*" +idna = "*" +platformdirs = "*" +uritools = "*" + [[package]] name = "venusian" version = "3.1.1" @@ -176,4 +238,4 @@ testing = ["coverage", "pytest", "pytest-cov"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "5d7d7513799a39e01fdb8f7159317204c36855b5948f12b89a71ef7b47bd6a1d" +content-hash = "2f1ca53ab2001cae84774e065f5289b84fe9a77ae87307012ffc6fff0db436d4" diff --git a/pyproject.toml b/pyproject.toml index 68d59c8..7a75966 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ twason = 'twason.__main__:main' [tool.poetry.dependencies] python = "^3.12" irc3 = "^1.1" +urlextract = "^1.9" [tool.poetry.group.dev.dependencies] black = "^25.1" diff --git a/twason/config.py b/twason/config.py index 7167af9..1ed1446 100644 --- a/twason/config.py +++ b/twason/config.py @@ -166,6 +166,16 @@ class Config: moderator_config.get("min-time-between-occurrence", None), ) ) + if mod == 'links': + moderators.append(moderator.LinksModerator( + moderator_config.get( + "message", + "{author}, your message contained forbidden links, it had to be removed for safety." + ), + cls.parse_decision(moderator_config.get("decision", "delete")), + moderator_config.get("duration", None), + moderator_config.get("authorized", []) + )) # Generate help command if params.get("help", True): diff --git a/twason/moderator.py b/twason/moderator.py index 0397465..0e18bcb 100644 --- a/twason/moderator.py +++ b/twason/moderator.py @@ -19,6 +19,9 @@ from abc import ABC, abstractmethod from enum import Enum from typing import Union from datetime import datetime, timedelta +from urlextract import URLExtract +from fnmatch import fnmatch +import re EPOCH = datetime(1970, 1, 1) @@ -162,3 +165,52 @@ class FloodModerator(Moderator): def declare_raid(self): self.last_raid = datetime.now() + + +class LinksModerator(Moderator): + def __init__( + self, + message: str, + decision: ModerationDecision, + timeout_duration: Union[None, int], + authorized_urls: [str] + ): + super().__init__(message, decision, timeout_duration) + self.authorized_urls = authorized_urls + + def get_name(self) -> str: + return 'Link' + + def vote(self, msg: str, author: str) -> ModerationDecision: + url_extractor = URLExtract() + links = url_extractor.find_urls(msg) + + if len(links) == 0: + return ModerationDecision.ABSTAIN + + if not self.are_urls_authorized(links): + return self.decision + + return ModerationDecision.ABSTAIN + + def are_urls_authorized(self, links: [str]) -> bool: + + for link in links: + is_link_authorized = False + print(link) + + for pattern in self.authorized_urls: + print(pattern) + if dump(fnmatch(link, pattern)) or dump(fnmatch(f"http://{link}", pattern)) or dump(fnmatch(f"https://{link}", pattern)): + is_link_authorized = True + break + + if not is_link_authorized: + return False + + return True + + +def dump(what): + print(what) + return what \ No newline at end of file