287 lines
7.7 KiB
Python
287 lines
7.7 KiB
Python
"""util format"""
|
|
|
|
import math
|
|
import os
|
|
import re
|
|
import unicodedata
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from typing import Optional, Union
|
|
from urllib.parse import parse_qs, urlparse
|
|
|
|
|
|
@dataclass
|
|
class Link:
|
|
"""Telegram Link"""
|
|
|
|
group_id: Union[str, int, None] = None
|
|
post_id: Optional[int] = None
|
|
comment_id: Optional[int] = None
|
|
topic_id: Optional[int] = None
|
|
|
|
|
|
def format_byte(size: float, dot=2):
|
|
"""format byte"""
|
|
|
|
# pylint: disable = R0912
|
|
if 0 <= size < 1:
|
|
human_size = str(round(size / 0.125, dot)) + "b"
|
|
elif 1 <= size < 1024:
|
|
human_size = str(round(size, dot)) + "B"
|
|
elif math.pow(1024, 1) <= size < math.pow(1024, 2):
|
|
human_size = str(round(size / math.pow(1024, 1), dot)) + "KB"
|
|
elif math.pow(1024, 2) <= size < math.pow(1024, 3):
|
|
human_size = str(round(size / math.pow(1024, 2), dot)) + "MB"
|
|
elif math.pow(1024, 3) <= size < math.pow(1024, 4):
|
|
human_size = str(round(size / math.pow(1024, 3), dot)) + "GB"
|
|
elif math.pow(1024, 4) <= size < math.pow(1024, 5):
|
|
human_size = str(round(size / math.pow(1024, 4), dot)) + "TB"
|
|
elif math.pow(1024, 5) <= size < math.pow(1024, 6):
|
|
human_size = str(round(size / math.pow(1024, 5), dot)) + "PB"
|
|
elif math.pow(1024, 6) <= size < math.pow(1024, 7):
|
|
human_size = str(round(size / math.pow(1024, 6), dot)) + "EB"
|
|
elif math.pow(1024, 7) <= size < math.pow(1024, 8):
|
|
human_size = str(round(size / math.pow(1024, 7), dot)) + "ZB"
|
|
elif math.pow(1024, 8) <= size < math.pow(1024, 9):
|
|
human_size = str(round(size / math.pow(1024, 8), dot)) + "YB"
|
|
elif math.pow(1024, 9) <= size < math.pow(1024, 10):
|
|
human_size = str(round(size / math.pow(1024, 9), dot)) + "BB"
|
|
elif math.pow(1024, 10) <= size < math.pow(1024, 11):
|
|
human_size = str(round(size / math.pow(1024, 10), dot)) + "NB"
|
|
elif math.pow(1024, 11) <= size < math.pow(1024, 12):
|
|
human_size = str(round(size / math.pow(1024, 11), dot)) + "DB"
|
|
elif math.pow(1024, 12) <= size:
|
|
human_size = str(round(size / math.pow(1024, 12), dot)) + "CB"
|
|
else:
|
|
raise ValueError(
|
|
f'format_byte() takes number than or equal to 0, " \
|
|
" but less than 0 given. {size}'
|
|
)
|
|
return human_size
|
|
|
|
|
|
class SearchDateTimeResult:
|
|
"""search result for datetime"""
|
|
|
|
def __init__(
|
|
self,
|
|
value: str = "",
|
|
right_str: str = "",
|
|
left_str: str = "",
|
|
match: bool = False,
|
|
):
|
|
self.value = value
|
|
self.right_str = right_str
|
|
self.left_str = left_str
|
|
self.match = match
|
|
|
|
|
|
def get_date_time(text: str, fmt: str) -> SearchDateTimeResult:
|
|
"""Get first of date time,and split two part
|
|
|
|
Parameters
|
|
----------
|
|
text: str
|
|
ready to search text
|
|
|
|
Returns
|
|
-------
|
|
SearchDateTimeResult
|
|
|
|
"""
|
|
res = SearchDateTimeResult()
|
|
search_text = re.sub(r"\s+", " ", text)
|
|
regex_list = [
|
|
# 2013.8.15 22:46:21
|
|
r"\d{4}[-/\.]{1}\d{1,2}[-/\.]{1}\d{1,2}[ ]{1,}\d{1,2}:\d{1,2}:\d{1,2}",
|
|
# "2013.8.15 22:46"
|
|
r"\d{4}[-/\.]{1}\d{1,2}[-/\.]{1}\d{1,2}[ ]{1,}\d{1,2}:\d{1,2}",
|
|
# "2014.5.11"
|
|
r"\d{4}[-/\.]{1}\d{1,2}[-/\.]{1}\d{1,2}",
|
|
# "2014.5"
|
|
r"\d{4}[-/\.]{1}\d{1,2}",
|
|
]
|
|
|
|
format_list = [
|
|
"%Y-%m-%d %H:%M:%S",
|
|
"%Y-%m-%d %H:%M",
|
|
"%Y-%m-%d",
|
|
"%Y-%m",
|
|
]
|
|
|
|
for i, value in enumerate(regex_list):
|
|
search_res = re.search(value, search_text)
|
|
if search_res:
|
|
time_str = search_res.group(0)
|
|
try:
|
|
res.value = datetime.strptime(
|
|
time_str.replace("/", "-").replace(".", "-").strip(), format_list[i]
|
|
).strftime(fmt)
|
|
except Exception:
|
|
break
|
|
if search_res.start() != 0:
|
|
res.left_str = search_text[0 : search_res.start()]
|
|
if search_res.end() + 1 <= len(search_text):
|
|
res.right_str = search_text[search_res.end() :]
|
|
res.match = True
|
|
return res
|
|
|
|
return res
|
|
|
|
|
|
def replace_date_time(text: str, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
|
|
"""Replace text all datetime to the right fmt
|
|
|
|
Parameters
|
|
----------
|
|
text: str
|
|
ready to search text
|
|
|
|
fmt: str
|
|
the right datetime format
|
|
|
|
Returns
|
|
-------
|
|
str
|
|
The right format datetime str
|
|
|
|
"""
|
|
|
|
if not text:
|
|
return text
|
|
res_str = ""
|
|
res = get_date_time(text, fmt)
|
|
if not res.match:
|
|
return text
|
|
if res.left_str:
|
|
res_str += replace_date_time(res.left_str)
|
|
res_str += res.value
|
|
if res.right_str:
|
|
res_str += replace_date_time(res.right_str)
|
|
|
|
return res_str
|
|
|
|
|
|
_BYTE_UNIT = ["B", "KB", "MB", "GB", "TB"]
|
|
|
|
|
|
def get_byte_from_str(byte_str: str) -> Optional[int]:
|
|
"""Get byte from str
|
|
|
|
Parameters
|
|
----------
|
|
byte_str: str
|
|
Include byte str
|
|
|
|
Returns
|
|
-------
|
|
int
|
|
Byte
|
|
"""
|
|
search_res = re.match(r"(\d{1,})(B|KB|MB|GB|TB)", byte_str)
|
|
if search_res:
|
|
unit_str = search_res.group(2)
|
|
unit: int = 1
|
|
for it in _BYTE_UNIT:
|
|
if it == unit_str:
|
|
break
|
|
unit *= 1024
|
|
|
|
return int(search_res.group(1)) * unit
|
|
|
|
return None
|
|
|
|
|
|
def truncate_filename(path: str, limit: int = 230) -> str:
|
|
"""Truncate filename to the max len.
|
|
|
|
Parameters
|
|
----------
|
|
path: str
|
|
File name path
|
|
|
|
limit: int
|
|
limit file name len(utf-8 byte)
|
|
|
|
Returns
|
|
-------
|
|
str
|
|
if file name len more than limit then return truncate filename or return filename
|
|
|
|
"""
|
|
p, f = os.path.split(os.path.normpath(path))
|
|
f, e = os.path.splitext(f)
|
|
f_max = limit - len(e.encode("utf-8"))
|
|
f = unicodedata.normalize("NFC", f)
|
|
f_trunc = f.encode()[:f_max].decode("utf-8", errors="ignore")
|
|
return os.path.join(p, f_trunc + e)
|
|
|
|
|
|
def extract_info_from_link(link: str) -> Link:
|
|
"""Extract info from link"""
|
|
if link in ("me", "self"):
|
|
return Link(group_id=link)
|
|
|
|
try:
|
|
u = urlparse(link)
|
|
paths = [p for p in u.path.split("/") if p]
|
|
query = parse_qs(u.query)
|
|
except ValueError:
|
|
return Link()
|
|
|
|
result = Link()
|
|
|
|
if "comment" in query:
|
|
result.group_id = paths[0]
|
|
result.comment_id = int(query["comment"][0])
|
|
elif len(paths) == 1 and paths[0] != "c":
|
|
result.group_id = paths[0]
|
|
elif len(paths) == 2:
|
|
if paths[0] == "c":
|
|
result.group_id = int(f"-100{paths[1]}")
|
|
else:
|
|
result.group_id = paths[0]
|
|
result.post_id = int(paths[1])
|
|
elif len(paths) == 3:
|
|
if paths[0] == "c":
|
|
result.group_id = int(f"-100{paths[1]}")
|
|
result.post_id = int(paths[2])
|
|
else:
|
|
result.group_id = paths[0]
|
|
result.topic_id = int(paths[1])
|
|
result.post_id = int(paths[2])
|
|
elif len(paths) == 4 and paths[0] == "c":
|
|
result.group_id = int(f"-100{paths[1]}")
|
|
result.topic_id = int(paths[2])
|
|
result.post_id = int(paths[3])
|
|
|
|
return result
|
|
|
|
|
|
def validate_title(title: str) -> str:
|
|
"""Fix if title validation fails
|
|
|
|
Parameters
|
|
----------
|
|
title: str
|
|
Chat title
|
|
|
|
"""
|
|
|
|
r_str = r"[/\\:*?\"<>|\n]" # '/ \ : * ? " < > |'
|
|
new_title = re.sub(r_str, "_", title)
|
|
return new_title
|
|
|
|
|
|
def create_progress_bar(progress, total_bars=10):
|
|
"""
|
|
example
|
|
progress = 50
|
|
progress_bar = create_progress_bar(progress)
|
|
print(f'Progress: [{progress_bar}] ({progress}%)')
|
|
"""
|
|
completed_bars = int(progress * total_bars / 100)
|
|
remaining_bars = total_bars - completed_bars
|
|
progress_bar = "█" * completed_bars + "░" * remaining_bars
|
|
return progress_bar
|