初始化 telegram-downloader 并接入群晖 CI/CD

2026-04-22 21:29:03 +08:00
commit cf40343c51
153 changed files with 33376 additions and 0 deletions
@@ -0,0 +1,5 @@
+"""Init namespace"""
+
+__version__ = "2.2.5"
+__license__ = "MIT License"
+__copyright__ = "Copyright (C) 2025 tangyoha <https://github.com/tangyoha>"
@@ -0,0 +1,77 @@
+"""Crypto utils"""
+
+import base64
+
+from Crypto.Cipher import AES
+
+
+class AesBase64(object):
+    """for AES encryption"""
+
+    def __init__(self, key: str, iv: str):
+        self.key = key.encode("utf-8")
+        self.iv = iv.encode("utf-8")
+        self.mode = AES.MODE_CBC
+
+    def encrypt(self, content):
+        """
+        Encrypts the given content using the AES encryption algorithm.
+
+        Parameters:
+            content (str): The content to be encrypted.
+
+        Returns:
+            str: The encrypted content encoded in base64.
+        """
+        cipher = AES.new(self.key, AES.MODE_CBC, self.iv)
+        content_padding = self.pkcs7padding(content)
+        encrypt_bytes = cipher.encrypt(content_padding.encode("utf-8"))
+        return base64.b64encode(encrypt_bytes)
+
+    def decrypt(self, content):
+        """
+        Decrypts the given content using AES encryption
+        with Cipher Block Chaining (CBC) mode.
+
+        Parameters:
+            content (str): The content to be decrypted.
+
+        Returns:
+            str: The decrypted text.
+        """
+        cipher = AES.new(self.key, AES.MODE_CBC, self.iv)
+        content = base64.b64decode(content)
+        text = cipher.decrypt(content).decode("utf-8")
+        return self.pkcs7unpadding(text)
+
+    def pkcs7unpadding(self, text):
+        """
+        Removes the PKCS#7 padding from the given text.
+
+        Parameters:
+            text (str): The text to remove padding from.
+
+        Returns:
+            str: The text without PKCS#7 padding.
+        """
+        length = len(text)
+        unpadding = ord(text[length - 1])
+        return text[0 : length - unpadding]
+
+    def pkcs7padding(self, text):
+        """
+        Adds PKCS7 padding to the given text.
+
+        Args:
+            text (str): The text to be padded.
+
+        Returns:
+            str: The padded text.
+        """
+        bs = 16
+        length = len(text)
+        bytes_length = len(text.encode("utf-8"))
+        padding_size = length if (bytes_length == length) else bytes_length
+        padding = bs - padding_size % bs
+        padding_text = chr(padding) * padding
+        return text + padding_text
@@ -0,0 +1,78 @@
+"""Utility functions to handle downloaded files."""
+import glob
+import os
+import pathlib
+from hashlib import md5
+
+
+def get_next_name(file_path: str) -> str:
+    """
+    Get next available name to download file.
+
+    Parameters
+    ----------
+    file_path: str
+        Absolute path of the file for which next available name to
+        be generated.
+
+    Returns
+    -------
+    str
+        Absolute path of the next available name for the file.
+    """
+    posix_path = pathlib.Path(file_path)
+    counter: int = 1
+    new_file_name: str = os.path.join("{0}", "{1}-copy{2}{3}")
+    while os.path.isfile(
+        new_file_name.format(
+            posix_path.parent,
+            posix_path.stem,
+            counter,
+            "".join(posix_path.suffixes),
+        )
+    ):
+        counter += 1
+    return new_file_name.format(
+        posix_path.parent,
+        posix_path.stem,
+        counter,
+        "".join(posix_path.suffixes),
+    )
+
+
+def manage_duplicate_file(file_path: str):
+    """
+    Check if a file is duplicate.
+
+    Compare the md5 of files with copy name pattern
+    and remove if the md5 hash is same.
+
+    Parameters
+    ----------
+    file_path: str
+        Absolute path of the file for which duplicates needs to
+        be managed.
+
+    Returns
+    -------
+    str
+        Absolute path of the duplicate managed file.
+    """
+    # pylint: disable = R1732
+    posix_path = pathlib.Path(file_path)
+    file_base_name: str = "".join(posix_path.stem.split("-copy")[0])
+    name_pattern: str = f"{posix_path.parent}/{file_base_name}*"
+    # Reason for using `str.translate()`
+    # https://stackoverflow.com/q/22055500/6730439
+    old_files: list = glob.glob(
+        name_pattern.translate({ord("["): "[[]", ord("]"): "[]]"})
+    )
+    if file_path in old_files:
+        old_files.remove(file_path)
+    current_file_md5: str = md5(open(file_path, "rb").read()).hexdigest()
+    for old_file_path in old_files:
+        old_file_md5: str = md5(open(old_file_path, "rb").read()).hexdigest()
+        if current_file_md5 == old_file_md5:
+            os.remove(file_path)
+            return old_file_path
+    return file_path
@@ -0,0 +1,286 @@
+"""util format"""
+
+import math
+import os
+import re
+import unicodedata
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional, Union
+from urllib.parse import parse_qs, urlparse
+
+
+@dataclass
+class Link:
+    """Telegram Link"""
+
+    group_id: Union[str, int, None] = None
+    post_id: Optional[int] = None
+    comment_id: Optional[int] = None
+    topic_id: Optional[int] = None
+
+
+def format_byte(size: float, dot=2):
+    """format byte"""
+
+    # pylint: disable = R0912
+    if 0 <= size < 1:
+        human_size = str(round(size / 0.125, dot)) + "b"
+    elif 1 <= size < 1024:
+        human_size = str(round(size, dot)) + "B"
+    elif math.pow(1024, 1) <= size < math.pow(1024, 2):
+        human_size = str(round(size / math.pow(1024, 1), dot)) + "KB"
+    elif math.pow(1024, 2) <= size < math.pow(1024, 3):
+        human_size = str(round(size / math.pow(1024, 2), dot)) + "MB"
+    elif math.pow(1024, 3) <= size < math.pow(1024, 4):
+        human_size = str(round(size / math.pow(1024, 3), dot)) + "GB"
+    elif math.pow(1024, 4) <= size < math.pow(1024, 5):
+        human_size = str(round(size / math.pow(1024, 4), dot)) + "TB"
+    elif math.pow(1024, 5) <= size < math.pow(1024, 6):
+        human_size = str(round(size / math.pow(1024, 5), dot)) + "PB"
+    elif math.pow(1024, 6) <= size < math.pow(1024, 7):
+        human_size = str(round(size / math.pow(1024, 6), dot)) + "EB"
+    elif math.pow(1024, 7) <= size < math.pow(1024, 8):
+        human_size = str(round(size / math.pow(1024, 7), dot)) + "ZB"
+    elif math.pow(1024, 8) <= size < math.pow(1024, 9):
+        human_size = str(round(size / math.pow(1024, 8), dot)) + "YB"
+    elif math.pow(1024, 9) <= size < math.pow(1024, 10):
+        human_size = str(round(size / math.pow(1024, 9), dot)) + "BB"
+    elif math.pow(1024, 10) <= size < math.pow(1024, 11):
+        human_size = str(round(size / math.pow(1024, 10), dot)) + "NB"
+    elif math.pow(1024, 11) <= size < math.pow(1024, 12):
+        human_size = str(round(size / math.pow(1024, 11), dot)) + "DB"
+    elif math.pow(1024, 12) <= size:
+        human_size = str(round(size / math.pow(1024, 12), dot)) + "CB"
+    else:
+        raise ValueError(
+            f'format_byte() takes number than or equal to 0, " \
+            " but less than 0 given. {size}'
+        )
+    return human_size
+
+
+class SearchDateTimeResult:
+    """search result for datetime"""
+
+    def __init__(
+        self,
+        value: str = "",
+        right_str: str = "",
+        left_str: str = "",
+        match: bool = False,
+    ):
+        self.value = value
+        self.right_str = right_str
+        self.left_str = left_str
+        self.match = match
+
+
+def get_date_time(text: str, fmt: str) -> SearchDateTimeResult:
+    """Get first of date time,and split two part
+
+    Parameters
+    ----------
+    text: str
+        ready to search text
+
+    Returns
+    -------
+    SearchDateTimeResult
+
+    """
+    res = SearchDateTimeResult()
+    search_text = re.sub(r"\s+", " ", text)
+    regex_list = [
+        # 2013.8.15 22:46:21
+        r"\d{4}[-/\.]{1}\d{1,2}[-/\.]{1}\d{1,2}[ ]{1,}\d{1,2}:\d{1,2}:\d{1,2}",
+        # "2013.8.15 22:46"
+        r"\d{4}[-/\.]{1}\d{1,2}[-/\.]{1}\d{1,2}[ ]{1,}\d{1,2}:\d{1,2}",
+        # "2014.5.11"
+        r"\d{4}[-/\.]{1}\d{1,2}[-/\.]{1}\d{1,2}",
+        # "2014.5"
+        r"\d{4}[-/\.]{1}\d{1,2}",
+    ]
+
+    format_list = [
+        "%Y-%m-%d %H:%M:%S",
+        "%Y-%m-%d %H:%M",
+        "%Y-%m-%d",
+        "%Y-%m",
+    ]
+
+    for i, value in enumerate(regex_list):
+        search_res = re.search(value, search_text)
+        if search_res:
+            time_str = search_res.group(0)
+            try:
+                res.value = datetime.strptime(
+                    time_str.replace("/", "-").replace(".", "-").strip(), format_list[i]
+                ).strftime(fmt)
+            except Exception:
+                break
+            if search_res.start() != 0:
+                res.left_str = search_text[0 : search_res.start()]
+            if search_res.end() + 1 <= len(search_text):
+                res.right_str = search_text[search_res.end() :]
+            res.match = True
+            return res
+
+    return res
+
+
+def replace_date_time(text: str, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
+    """Replace text all datetime to the right fmt
+
+    Parameters
+    ----------
+    text: str
+        ready to search text
+
+    fmt: str
+        the right datetime format
+
+    Returns
+    -------
+    str
+        The right format datetime str
+
+    """
+
+    if not text:
+        return text
+    res_str = ""
+    res = get_date_time(text, fmt)
+    if not res.match:
+        return text
+    if res.left_str:
+        res_str += replace_date_time(res.left_str)
+    res_str += res.value
+    if res.right_str:
+        res_str += replace_date_time(res.right_str)
+
+    return res_str
+
+
+_BYTE_UNIT = ["B", "KB", "MB", "GB", "TB"]
+
+
+def get_byte_from_str(byte_str: str) -> Optional[int]:
+    """Get byte from str
+
+    Parameters
+    ----------
+    byte_str: str
+        Include byte str
+
+    Returns
+    -------
+    int
+        Byte
+    """
+    search_res = re.match(r"(\d{1,})(B|KB|MB|GB|TB)", byte_str)
+    if search_res:
+        unit_str = search_res.group(2)
+        unit: int = 1
+        for it in _BYTE_UNIT:
+            if it == unit_str:
+                break
+            unit *= 1024
+
+        return int(search_res.group(1)) * unit
+
+    return None
+
+
+def truncate_filename(path: str, limit: int = 230) -> str:
+    """Truncate filename to the max len.
+
+    Parameters
+    ----------
+    path: str
+        File name path
+
+    limit: int
+        limit file name len(utf-8 byte)
+
+    Returns
+    -------
+    str
+        if file name len more than limit then return truncate filename or return filename
+
+    """
+    p, f = os.path.split(os.path.normpath(path))
+    f, e = os.path.splitext(f)
+    f_max = limit - len(e.encode("utf-8"))
+    f = unicodedata.normalize("NFC", f)
+    f_trunc = f.encode()[:f_max].decode("utf-8", errors="ignore")
+    return os.path.join(p, f_trunc + e)
+
+
+def extract_info_from_link(link: str) -> Link:
+    """Extract info from link"""
+    if link in ("me", "self"):
+        return Link(group_id=link)
+
+    try:
+        u = urlparse(link)
+        paths = [p for p in u.path.split("/") if p]
+        query = parse_qs(u.query)
+    except ValueError:
+        return Link()
+
+    result = Link()
+
+    if "comment" in query:
+        result.group_id = paths[0]
+        result.comment_id = int(query["comment"][0])
+    elif len(paths) == 1 and paths[0] != "c":
+        result.group_id = paths[0]
+    elif len(paths) == 2:
+        if paths[0] == "c":
+            result.group_id = int(f"-100{paths[1]}")
+        else:
+            result.group_id = paths[0]
+            result.post_id = int(paths[1])
+    elif len(paths) == 3:
+        if paths[0] == "c":
+            result.group_id = int(f"-100{paths[1]}")
+            result.post_id = int(paths[2])
+        else:
+            result.group_id = paths[0]
+            result.topic_id = int(paths[1])
+            result.post_id = int(paths[2])
+    elif len(paths) == 4 and paths[0] == "c":
+        result.group_id = int(f"-100{paths[1]}")
+        result.topic_id = int(paths[2])
+        result.post_id = int(paths[3])
+
+    return result
+
+
+def validate_title(title: str) -> str:
+    """Fix if title validation fails
+
+    Parameters
+    ----------
+    title: str
+        Chat title
+
+    """
+
+    r_str = r"[/\\:*?\"<>|\n]"  # '/ \ : * ? " < > |'
+    new_title = re.sub(r_str, "_", title)
+    return new_title
+
+
+def create_progress_bar(progress, total_bars=10):
+    """
+    example
+    progress = 50
+    progress_bar = create_progress_bar(progress)
+    print(f'Progress: [{progress_bar}] ({progress}%)')
+    """
+    completed_bars = int(progress * total_bars / 100)
+    remaining_bars = total_bars - completed_bars
+    progress_bar = "█" * completed_bars + "░" * remaining_bars
+    return progress_bar
@@ -0,0 +1,16 @@
+"""Util module to handle logs."""
+import logging
+
+
+class LogFilter(logging.Filter):
+    """
+    Custom Log Filter.
+
+    Ignore logs from specific functions.
+    """
+
+    # pylint: disable = W0221
+    def filter(self, record):
+        if record.funcName in ("invoke"):
+            return False
+        return True
@@ -0,0 +1,23 @@
+"""Utility module to manage meta info."""
+import platform
+
+from rich.console import Console
+
+from . import __copyright__, __license__, __version__
+
+APP_VERSION = f"Telegram Media Downloader {__version__}"
+DEVICE_MODEL = f"{platform.python_implementation()} {platform.python_version()}"
+SYSTEM_VERSION = f"{platform.system()} {platform.release()}"
+LANG_CODE = "en"
+
+
+def print_meta(logger):
+    """Prints meta-data of the downloader script."""
+    console = Console()
+    # pylint: disable = C0301
+    console.log(
+        f"[bold]Telegram Media Downloader v{__version__}[/bold],\n[i]{__copyright__}[/i]"
+    )
+    console.log(f"Licensed under the terms of the {__license__}", end="\n\n")
+    logger.info(f"Device: {DEVICE_MODEL} - {APP_VERSION}")
+    logger.info(f"System: {SYSTEM_VERSION} ({LANG_CODE.upper()})")
@@ -0,0 +1,122 @@
+"""Meta data for download filter"""
+
+
+class ReString:
+    """for re match"""
+
+    def __init__(self, re_string: str):
+        self.re_string = re_string
+
+
+class NoneObj:
+    """for None obj to match"""
+
+    def __init__(self):
+        pass
+
+
+# pylint: disable=R0902
+# pylint: disable=R0913
+class MetaData:
+    """
+    * `message_date` : - Date the message was sent
+    * like: message_date > 2022.03.04 && message_date < 2022.03.08
+    * `message_id` : - Message 's id
+    * `media_file_size` : - File size
+    * `media_width` : - Include photo and video
+    * `media_height` : - Include photo and video
+    * `media_file_name` : - file name
+    * `message_caption` : - message_caption
+    * `message_duration` : - message_duration
+    * `sender_id` : - Sender id, empty for messages sent to channels.
+    * `sender_name` : - Sender name, empty for messages sent to channels.
+    " `reply_to_message_id` : - reply_to_message_id
+    """
+
+    AVAILABLE_MEDIA = (
+        "audio",
+        "document",
+        "photo",
+        "sticker",
+        "animation",
+        "video",
+        "voice",
+        "video_note",
+        "new_chat_photo",
+    )
+
+    def __init__(
+        self,
+        message_date: str = None,
+        message_id: int = None,
+        message_caption: str = None,
+        media_file_size: int = None,
+        media_width: int = None,
+        media_height: int = None,
+        media_file_name: str = None,
+        media_duration: int = None,
+        media_type: str = None,
+        file_extension: str = None,
+        sender_id: int = None,
+        sender_name: str = None,
+        reply_to_message_id: int = None,
+        message_thread_id: int = None,
+    ):
+        self.message_date = message_date
+        self.message_id = message_id
+        self.message_caption = message_caption
+        self.media_file_size = media_file_size
+        self.media_width = media_width
+        self.media_height = media_height
+        self.media_file_name = media_file_name
+        self.media_duration = media_duration
+        self.media_type = media_type
+        self.file_extension = file_extension
+        self.sender_id = sender_id
+        self.sender_name = sender_name
+        self.reply_to_message_id = reply_to_message_id
+        self.message_thread_id = message_thread_id
+
+    def data(self) -> dict:
+        """Meta map"""
+        return {
+            "message_date": self.message_date,
+            "message_id": self.message_id,
+            "message_caption": self.message_caption,
+            "media_file_size": self.media_file_size,
+            "media_width": self.media_width,
+            "media_height": self.media_height,
+            "media_file_name": self.media_file_name,
+            "media_duration": self.media_duration,
+            "id": self.message_id,
+            "caption": self.message_caption,
+            "file_size": self.media_file_size,
+            "file_name": self.media_file_name,
+            "media_type": self.media_type,
+            "file_extension": self.file_extension,
+            "sender_id": self.sender_id,
+            "sender_name": self.sender_name,
+            "reply_to_message_id": self.reply_to_message_id,
+            "message_thread_id": self.message_thread_id,
+            "topic_id": self.message_thread_id,
+        }
+
+    def export(self) -> dict:
+        """Export meta data"""
+        return {
+            "message_date": self.message_date,
+            "message_id": self.message_id,
+            "message_caption": self.message_caption,
+            "media_file_size": self.media_file_size,
+            "media_width": self.media_width,
+            "media_height": self.media_height,
+            "media_file_name": self.media_file_name,
+            "media_duration": self.media_duration,
+            "media_type": self.media_type,
+            "file_extension": self.file_extension,
+            "sender_id": self.sender_id,
+            "sender_name": self.sender_name,
+            "reply_to_message_id": self.reply_to_message_id,
+            "message_thread_id": self.message_thread_id,
+            "topic_id": self.message_thread_id,
+        }
@@ -0,0 +1,35 @@
+"""for package download"""
+
+import platform
+
+# def get_platform() -> str:
+#     """Get platform title
+#     Returns
+#     -------
+#     str
+#         window amd64 return "windows-amd64"
+#     """
+#     sys_platform = platform.system().lower()
+#     platform_str: str = sys_platform
+#     if "macos" in sys_platform:
+#         platform_str = "osx"
+
+#     machine = platform.machine().lower()
+
+#     if "i386" in machine:
+#         platform_str += "-386"
+#     else:
+#         platform_str += "-" + machine
+
+#     return platform_str
+
+
+def get_exe_ext() -> str:
+    """Get exe ext
+    Returns
+    str
+        if in window then return "exe" other return ""
+    """
+    if "windows" in platform.system().lower():
+        return ".exe"
+    return ""
@@ -0,0 +1,80 @@
+"""Utility module to check for new release of telegram-media-downloader"""
+import json
+
+import requests  # type: ignore
+from loguru import logger
+from rich.console import Console
+from rich.markdown import Markdown
+
+from . import __version__
+
+
+# pylint: disable = C0301
+def get_latest_release(proxy_config: dict = None) -> dict:
+    """
+    Get the latest release information.
+
+    :param proxy_config: A dictionary containing proxy configuration settings (default: {}).
+    :type proxy_config: dict
+    :return: A dictionary containing the latest release information.
+    :rtype: dict
+    """
+    headers: dict = {
+        "Content-Type": "application/json",
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",
+    }
+
+    if proxy_config:
+        scheme = proxy_config.get("scheme", "")
+        hostname = proxy_config.get("hostname", "")
+        port = proxy_config.get("port", "")
+        username = proxy_config.get("username")
+        password = proxy_config.get("password")
+
+    proxies = {}
+    if proxy_config:
+        proxies = {
+            "http": f"{scheme}://{hostname}:{port}",
+            "https": f"{scheme}://{hostname}:{port}",
+        }
+
+        if username and password:
+            proxies["http"] = f"{scheme}://{username}:{password}@{hostname}:{port}"
+            proxies["https"] = f"{scheme}://{username}:{password}@{hostname}:{port}"
+    try:
+        response = requests.get(
+            url="https://api.github.com/repos/tangyoha/telegram_media_downloader/releases/latest",
+            headers=headers,
+            proxies=proxies,
+            timeout=60,
+        )
+
+    except Exception as e:
+        logger.warning(f"{e}")
+        return {}
+
+    latest_release: dict = json.loads(response.text)
+
+    if f"v{__version__}" != latest_release["tag_name"]:
+        return latest_release
+
+    return {}
+
+
+def check_for_updates(proxy_config: dict = None):
+    """Checks for new releases.
+
+    Using Github API checks for new release and prints information of new release if available.
+    """
+    console = Console()
+    latest_release = get_latest_release(proxy_config)
+    try:
+        if latest_release:
+            update_message: str = (
+                f"## New version of Telegram-Media-Downloader is available - {latest_release['name']}\n"
+                f"You are using an outdated version v{__version__} please pull in the changes using `git pull` or download the latest release.\n\n"
+                f"Find more details about the latest release here - {latest_release['html_url']}"
+            )
+            console.print(Markdown(update_message))
+    except Exception as e:
+        logger.warning(f"{e}")