"""
This module provides utilities for file management.
- get_root_path: Get the absolute path of the directory
- relative_path: Get the relative path of a file relative to a given directory
- super_json_dump: Writes the provided data to a JSON file with a specified indentation depth.
- super_json_load: Load a JSON file from the given path
- super_csv_dump: Writes data to a CSV file with customizable options
- super_csv_load: Load a CSV file from the given path
- super_copy: Copy a file (or a folder) from the source to the destination (always create the directory)
- super_open: Open a file with the given mode, creating the directory if it doesn't exist (only if writing)
- replace_tilde: Replace the "~" by the user's home directory
- clean_path: Clean the path by replacing backslashes with forward slashes and simplifying the path
.. image:: https://raw.githubusercontent.com/Stoupy51/stouputils/refs/heads/main/assets/io_module.gif
:alt: stouputils io examples
"""
# Imports
import csv
import os
import re
import shutil
from io import StringIO
from typing import IO, Any
import pyfastcopy # type: ignore # noqa: F401
# Function that takes a relative path and returns the absolute path of the directory
[docs]
def get_root_path(relative_path: str, go_up: int = 0) -> str:
""" Get the absolute path of the directory.
Usually used to get the root path of the project using the __file__ variable.
Args:
relative_path (str): The path to get the absolute directory path from
go_up (int): Number of parent directories to go up (default: 0)
Returns:
str: The absolute path of the directory
Examples:
.. code-block:: python
> get_root_path(__file__)
'C:/Users/Alexandre-PC/AppData/Local/Programs/Python/Python310/lib/site-packages/stouputils'
> get_root_path(__file__, 3)
'C:/Users/Alexandre-PC/AppData/Local/Programs/Python/Python310'
"""
return clean_path(
os.path.dirname(os.path.abspath(relative_path))
+ "/.." * go_up
) or "."
# Function that returns the relative path of a file
[docs]
def relative_path(file_path: str, relative_to: str = "") -> str:
""" Get the relative path of a file relative to a given directory.
Args:
file_path (str): The path to get the relative path from
relative_to (str): The path to get the relative path to (default: current working directory -> os.getcwd())
Returns:
str: The relative path of the file
Examples:
>>> relative_path("D:/some/random/path/stouputils/io.py", "D:\\\\some")
'random/path/stouputils/io.py'
>>> relative_path("D:/some/random/path/stouputils/io.py", "D:\\\\some\\\\")
'random/path/stouputils/io.py'
"""
if not relative_to:
relative_to = os.getcwd()
file_path = clean_path(file_path)
relative_to = clean_path(relative_to)
if file_path.startswith(relative_to):
return clean_path(os.path.relpath(file_path, relative_to)) or "."
else:
return file_path or "."
# JSON dump with indentation for levels
[docs]
def super_json_dump(
data: Any,
file: IO[Any] | str | None = None,
max_level: int | None = 2,
indent: str | int = '\t',
suffix: str = "\n"
) -> str:
r""" Writes the provided data to a JSON file with a specified indentation depth.
For instance, setting max_level to 2 will limit the indentation to 2 levels.
Args:
data (Any): The data to dump (usually a dict or a list)
file (IO[Any] | str): The file object or path to dump the data to
max_level (int | None): The depth of indentation to stop at (-1 for infinite), None will default to 2
indent (str | int): The indentation character (default: '\t')
suffix (str): The suffix to add at the end of the string (default: '\n')
Returns:
str: The content of the file in every case
>>> super_json_dump({"a": [[1,2,3]], "b": 2}, max_level = 0)
'{"a": [[1,2,3]],"b": 2}\n'
>>> super_json_dump({"a": [[1,2,3]], "b": 2}, max_level = 1)
'{\n\t"a": [[1,2,3]],\n\t"b": 2\n}\n'
>>> super_json_dump({"a": [[1,2,3]], "b": 2}, max_level = 2)
'{\n\t"a": [\n\t\t[1,2,3]\n\t],\n\t"b": 2\n}\n'
>>> super_json_dump({"a": [[1,2,3]], "b": 2}, max_level = 3)
'{\n\t"a": [\n\t\t[\n\t\t\t1,\n\t\t\t2,\n\t\t\t3\n\t\t]\n\t],\n\t"b": 2\n}\n'
"""
# Imports
import orjson
# Normalize indentation to string, and handle None values for max_level
if isinstance(indent, int):
indent = ' ' * indent
if max_level is None:
max_level = 2
# Dump content with 2-space indent and replace it with the desired indent
content: str = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode("utf-8")
if indent != " ":
content = re.sub(
pattern=r'^(\s{2})+', # Match groups of 2 spaces at start of lines
repl=lambda match: indent * (len(match.group(0)) // 2), # Convert to desired indent
string=content,
flags=re.MULTILINE
)
# Limit max depth of indentation
if max_level > -1:
escape: str = re.escape(indent)
pattern: re.Pattern[str] = re.compile(
r"\n" + escape + "{" + str(max_level + 1) + r",}(.*)"
r"|\n" + escape + "{" + str(max_level) + r"}([}\]])"
)
content = pattern.sub(r"\1\2", content)
# Final newline and write
content += suffix
if file:
if isinstance(file, str):
with super_open(file, "w") as f:
f.write(content)
else:
file.write(content)
return content
# JSON load from file path
[docs]
def super_json_load(file_path: str) -> Any:
""" Load a JSON file from the given path
Args:
file_path (str): The path to the JSON file
Returns:
Any: The content of the JSON file
"""
import orjson
with super_open(file_path, "r") as f:
return orjson.loads(f.read())
# CSV dump to file
[docs]
def super_csv_dump(
data: Any,
file: IO[Any] | str | None = None,
delimiter: str = ',',
has_header: bool = True,
index: bool = False,
*args: Any,
**kwargs: Any
) -> str:
""" Writes data to a CSV file with customizable options and returns the CSV content as a string.
Args:
data (list[list[Any]] | list[dict[str, Any]] | pd.DataFrame | pl.DataFrame):
The data to write, either a list of lists, list of dicts, pandas DataFrame, or Polars DataFrame
file (IO[Any] | str): The file object or path to dump the data to
delimiter (str): The delimiter to use (default: ',')
has_header (bool): Whether to include headers (default: True, applies to dict and DataFrame data)
index (bool): Whether to include the index (default: False, only applies to pandas DataFrame)
*args (Any): Additional positional arguments to pass to the underlying CSV writer or DataFrame method
**kwargs (Any): Additional keyword arguments to pass to the underlying CSV writer or DataFrame method
Returns:
str: The CSV content as a string
Examples:
>>> super_csv_dump([["a", "b", "c"], [1, 2, 3], [4, 5, 6]])
'a,b,c\\r\\n1,2,3\\r\\n4,5,6\\r\\n'
>>> super_csv_dump([{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}])
'name,age\\r\\nAlice,30\\r\\nBob,25\\r\\n'
"""
if isinstance(data, str | bytes | dict):
raise ValueError("Data must be a list of lists, list of dicts, pandas DataFrame, or Polars DataFrame")
output = StringIO()
done: bool = False
# Handle Polars DataFrame
try:
import polars as pl # type: ignore
if isinstance(data, pl.DataFrame):
copy_kwargs = kwargs.copy()
copy_kwargs.setdefault("separator", delimiter)
copy_kwargs.setdefault("include_header", has_header)
data.write_csv(output, *args, **copy_kwargs)
done = True
except Exception:
pass
# Handle pandas DataFrame
if not done:
try:
import pandas as pd # type: ignore
if isinstance(data, pd.DataFrame):
copy_kwargs = kwargs.copy()
copy_kwargs.setdefault("index", index)
copy_kwargs.setdefault("sep", delimiter)
copy_kwargs.setdefault("header", has_header)
data.to_csv(output, *args, **copy_kwargs)
except Exception:
pass
if not done:
# Handle list of dicts
if isinstance(data[0], dict):
fieldnames = list(data[0].keys()) # type: ignore
kwargs.setdefault("fieldnames", fieldnames)
kwargs.setdefault("delimiter", delimiter)
dict_writer = csv.DictWriter(output, *args, **kwargs)
if has_header:
dict_writer.writeheader()
dict_writer.writerows(data) # type: ignore
done = True
# Handle list of lists
else:
kwargs.setdefault("delimiter", delimiter)
list_writer = csv.writer(output, *args, **kwargs)
list_writer.writerows(data) # type: ignore
done = True
# If still not done, raise error
if not done:
output.close()
raise ValueError(f"Data must be a list of lists, list of dicts, pandas DataFrame, or Polars DataFrame, got {type(data)} instead")
# Get content and write to file if needed
content: str = output.getvalue()
if file:
if isinstance(file, str):
with super_open(file, "w") as f:
f.write(content)
else:
file.write(content)
output.close()
return content
# CSV load from file path
[docs]
def super_csv_load(file_path: str, delimiter: str = ',', has_header: bool = True, as_dict: bool = False, as_dataframe: bool = False, use_polars: bool = False, *args: Any, **kwargs: Any) -> Any:
""" Load a CSV file from the given path
Args:
file_path (str): The path to the CSV file
delimiter (str): The delimiter used in the CSV (default: ',')
has_header (bool): Whether the CSV has a header row (default: True)
as_dict (bool): Whether to return data as list of dicts (default: False)
as_dataframe (bool): Whether to return data as a DataFrame (default: False)
use_polars (bool): Whether to use Polars instead of pandas for DataFrame (default: False, requires polars)
*args: Additional positional arguments to pass to the underlying CSV reader or DataFrame method
**kwargs: Additional keyword arguments to pass to the underlying CSV reader or DataFrame method
Returns:
list[list[str]] | list[dict[str, str]] | pd.DataFrame | pl.DataFrame: The content of the CSV file
Examples:
.. code-block:: python
> Assuming "test.csv" contains: a,b,c\\n1,2,3\\n4,5,6
> super_csv_load("test.csv")
[['1', '2', '3'], ['4', '5', '6']]
> super_csv_load("test.csv", as_dict=True)
[{'a': '1', 'b': '2', 'c': '3'}, {'a': '4', 'b': '5', 'c': '6'}]
> super_csv_load("test.csv", as_dataframe=True)
a b c
0 1 2 3
1 4 5 6
.. code-block:: console
> super_csv_load("test.csv", as_dataframe=True, use_polars=True)
shape: (2, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1 ┆ 2 ┆ 3 │
│ 4 ┆ 5 ┆ 6 │
└─────┴─────┴─────┘
""" # noqa: E101
# Handle DataFrame loading
if as_dataframe:
if use_polars:
import polars as pl # type: ignore
if not os.path.exists(file_path):
return pl.DataFrame() # type: ignore
kwargs.setdefault("separator", delimiter)
kwargs.setdefault("has_header", has_header)
return pl.read_csv(file_path, *args, **kwargs) # type: ignore
else:
import pandas as pd # type: ignore
if not os.path.exists(file_path):
return pd.DataFrame() # type: ignore
kwargs.setdefault("sep", delimiter)
kwargs.setdefault("header", 0 if has_header else None)
return pd.read_csv(file_path, *args, **kwargs) # type: ignore
# Handle dict or list
if not os.path.exists(file_path):
return []
with super_open(file_path, "r") as f:
if as_dict or has_header:
kwargs.setdefault("delimiter", delimiter)
reader = csv.DictReader(f, *args, **kwargs)
return list(reader)
else:
kwargs.setdefault("delimiter", delimiter)
reader = csv.reader(f, *args, **kwargs)
return list(reader)
# For easy file copy
[docs]
def super_copy(src: str, dst: str, create_dir: bool = True, symlink: bool = False) -> str:
""" Copy a file (or a folder) from the source to the destination
Args:
src (str): The source path
dst (str): The destination path
create_dir (bool): Whether to create the directory if it doesn't exist (default: True)
symlink (bool): Whether to create a symlink instead of copying (Linux only, default: True)
Returns:
str: The destination path
"""
# Disable symlink functionality on Windows as it uses shortcuts instead of proper symlinks
if os.name == "nt":
symlink = False
# Create destination directory if needed
if create_dir:
os.makedirs(os.path.dirname(dst), exist_ok=True)
# Handle directory copying
if os.path.isdir(src):
if symlink:
# Remove existing destination if it's different from source
if os.path.exists(dst):
if os.path.samefile(src, dst) is False:
if os.path.isdir(dst):
shutil.rmtree(dst)
else:
os.remove(dst)
return os.symlink(src.rstrip('/'), dst.rstrip('/'), target_is_directory=True) or dst
else:
return os.symlink(src.rstrip('/'), dst.rstrip('/'), target_is_directory=True) or dst
# Regular directory copy
else:
return shutil.copytree(src, dst, dirs_exist_ok = True)
# Handle file copying
else:
if symlink:
# Remove existing destination if it's different from source
if os.path.exists(dst):
if os.path.samefile(src, dst) is False:
os.remove(dst)
return os.symlink(src, dst, target_is_directory=False) or dst
else:
return os.symlink(src, dst, target_is_directory=False) or dst
# Regular file copy
else:
return shutil.copy(src, dst)
return ""
# For easy file management
[docs]
def super_open(file_path: str, mode: str, encoding: str = "utf-8") -> IO[Any]:
""" Open a file with the given mode, creating the directory if it doesn't exist (only if writing)
Args:
file_path (str): The path to the file
mode (str): The mode to open the file with, ex: "w", "r", "a", "wb", "rb", "ab"
encoding (str): The encoding to use when opening the file (default: "utf-8")
Returns:
open: The file object, ready to be used
"""
# Make directory
file_path = clean_path(file_path)
if "/" in file_path and ("w" in mode or "a" in mode):
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# Open file and return
if "b" in mode:
return open(file_path, mode)
else:
return open(file_path, mode, encoding = encoding) # Always use utf-8 encoding to avoid issues
[docs]
def read_file(file_path: str, encoding: str = "utf-8") -> str:
""" Read the content of a file and return it as a string
Args:
file_path (str): The path to the file
encoding (str): The encoding to use when opening the file (default: "utf-8")
Returns:
str: The content of the file
"""
with super_open(file_path, "r", encoding=encoding) as f:
return f.read()
# Function that replace the "~" by the user's home directory
[docs]
def replace_tilde(path: str) -> str:
""" Replace the "~" by the user's home directory
Args:
path (str): The path to replace the "~" by the user's home directory
Returns:
str: The path with the "~" replaced by the user's home directory
Examples:
.. code-block:: python
> replace_tilde("~/Documents/test.txt")
'/home/user/Documents/test.txt'
"""
return path.replace("~", os.path.expanduser("~")).replace("\\", "/")
# Utility function to clean the path
[docs]
def clean_path(file_path: str, trailing_slash: bool = True) -> str:
""" Clean the path by replacing backslashes with forward slashes and simplifying the path
Args:
file_path (str): The path to clean
trailing_slash (bool): Whether to keep the trailing slash, ex: "test/" -> "test/"
Returns:
str: The cleaned path
Examples:
>>> clean_path("C:\\\\Users\\\\Stoupy\\\\Documents\\\\test.txt")
'C:/Users/Stoupy/Documents/test.txt'
>>> clean_path("Some Folder////")
'Some Folder/'
>>> clean_path("test/uwu/1/../../")
'test/'
>>> clean_path("some/./folder/../")
'some/'
>>> clean_path("folder1/folder2/../../folder3")
'folder3'
>>> clean_path("./test/./folder/")
'test/folder/'
>>> clean_path("C:/folder1\\\\folder2")
'C:/folder1/folder2'
"""
# Replace tilde
file_path = replace_tilde(str(file_path))
# Check if original path ends with slash
ends_with_slash: bool = file_path.endswith('/') or file_path.endswith('\\')
# Use os.path.normpath to clean up the path
file_path = os.path.normpath(file_path)
# Convert backslashes to forward slashes
file_path = file_path.replace(os.sep, '/')
# Add trailing slash back if original had one
if ends_with_slash and not file_path.endswith('/'):
file_path += '/'
# Remove trailing slash if requested
if not trailing_slash and file_path.endswith('/'):
file_path = file_path[:-1]
# Return the cleaned path
return file_path if file_path != "." else ""