This commit is contained in:
@@ -0,0 +1,78 @@
|
||||
"""Utility functions to handle downloaded files."""
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
from hashlib import md5
|
||||
|
||||
|
||||
def get_next_name(file_path: str) -> str:
|
||||
"""
|
||||
Get next available name to download file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_path: str
|
||||
Absolute path of the file for which next available name to
|
||||
be generated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Absolute path of the next available name for the file.
|
||||
"""
|
||||
posix_path = pathlib.Path(file_path)
|
||||
counter: int = 1
|
||||
new_file_name: str = os.path.join("{0}", "{1}-copy{2}{3}")
|
||||
while os.path.isfile(
|
||||
new_file_name.format(
|
||||
posix_path.parent,
|
||||
posix_path.stem,
|
||||
counter,
|
||||
"".join(posix_path.suffixes),
|
||||
)
|
||||
):
|
||||
counter += 1
|
||||
return new_file_name.format(
|
||||
posix_path.parent,
|
||||
posix_path.stem,
|
||||
counter,
|
||||
"".join(posix_path.suffixes),
|
||||
)
|
||||
|
||||
|
||||
def manage_duplicate_file(file_path: str):
|
||||
"""
|
||||
Check if a file is duplicate.
|
||||
|
||||
Compare the md5 of files with copy name pattern
|
||||
and remove if the md5 hash is same.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_path: str
|
||||
Absolute path of the file for which duplicates needs to
|
||||
be managed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Absolute path of the duplicate managed file.
|
||||
"""
|
||||
# pylint: disable = R1732
|
||||
posix_path = pathlib.Path(file_path)
|
||||
file_base_name: str = "".join(posix_path.stem.split("-copy")[0])
|
||||
name_pattern: str = f"{posix_path.parent}/{file_base_name}*"
|
||||
# Reason for using `str.translate()`
|
||||
# https://stackoverflow.com/q/22055500/6730439
|
||||
old_files: list = glob.glob(
|
||||
name_pattern.translate({ord("["): "[[]", ord("]"): "[]]"})
|
||||
)
|
||||
if file_path in old_files:
|
||||
old_files.remove(file_path)
|
||||
current_file_md5: str = md5(open(file_path, "rb").read()).hexdigest()
|
||||
for old_file_path in old_files:
|
||||
old_file_md5: str = md5(open(old_file_path, "rb").read()).hexdigest()
|
||||
if current_file_md5 == old_file_md5:
|
||||
os.remove(file_path)
|
||||
return old_file_path
|
||||
return file_path
|
||||
Reference in New Issue
Block a user