Source code for stouputils.io

"""
This module provides utilities for file management.

- get_root_path: Get the absolute path of the directory
- relative_path: Get the relative path of a file relative to a given directory
- super_json_dump: Writes the provided data to a JSON file with a specified indentation depth.
- super_json_load: Load a JSON file from the given path
- super_csv_dump: Writes data to a CSV file with customizable options
- super_csv_load: Load a CSV file from the given path
- super_copy: Copy a file (or a folder) from the source to the destination (always create the directory)
- super_open: Open a file with the given mode, creating the directory if it doesn't exist (only if writing)
- replace_tilde: Replace the "~" by the user's home directory
- clean_path: Clean the path by replacing backslashes with forward slashes and simplifying the path

.. image:: https://raw.githubusercontent.com/Stoupy51/stouputils/refs/heads/main/assets/io_module.gif
  :alt: stouputils io examples
"""

# Imports
import csv
import os
import re
import shutil
from io import StringIO
from typing import IO, Any

import pyfastcopy  # type: ignore  # noqa: F401


# Function that takes a relative path and returns the absolute path of the directory

[docs]
def get_root_path(relative_path: str, go_up: int = 0) -> str:
	""" Get the absolute path of the directory.
	Usually used to get the root path of the project using the __file__ variable.

	Args:
		relative_path   (str): The path to get the absolute directory path from
		go_up           (int): Number of parent directories to go up (default: 0)
	Returns:
		str: The absolute path of the directory

	Examples:

		.. code-block:: python

			> get_root_path(__file__)
			'C:/Users/Alexandre-PC/AppData/Local/Programs/Python/Python310/lib/site-packages/stouputils'

			> get_root_path(__file__, 3)
			'C:/Users/Alexandre-PC/AppData/Local/Programs/Python/Python310'
	"""
	return clean_path(
		os.path.dirname(os.path.abspath(relative_path))
		+ "/.." * go_up
	) or "."


# Function that returns the relative path of a file

[docs]
def relative_path(file_path: str, relative_to: str = "") -> str:
	""" Get the relative path of a file relative to a given directory.

	Args:
		file_path     (str): The path to get the relative path from
		relative_to   (str): The path to get the relative path to (default: current working directory -> os.getcwd())
	Returns:
		str: The relative path of the file
	Examples:

		>>> relative_path("D:/some/random/path/stouputils/io.py", "D:\\\\some")
		'random/path/stouputils/io.py'
		>>> relative_path("D:/some/random/path/stouputils/io.py", "D:\\\\some\\\\")
		'random/path/stouputils/io.py'
	"""
	if not relative_to:
		relative_to = os.getcwd()
	file_path = clean_path(file_path)
	relative_to = clean_path(relative_to)
	if file_path.startswith(relative_to):
		return clean_path(os.path.relpath(file_path, relative_to)) or "."
	else:
		return file_path or "."


# JSON dump with indentation for levels

[docs]
def super_json_dump(
	data: Any,
	file: IO[Any] | str | None = None,
	max_level: int | None = 2,
	indent: str | int = '\t',
	suffix: str = "\n"
) -> str:
	r""" Writes the provided data to a JSON file with a specified indentation depth.
	For instance, setting max_level to 2 will limit the indentation to 2 levels.

	Args:
		data		(Any): 				The data to dump (usually a dict or a list)
		file		(IO[Any] | str): 	The file object or path to dump the data to
		max_level	(int | None):		The depth of indentation to stop at (-1 for infinite), None will default to 2
		indent		(str | int):		The indentation character (default: '\t')
		suffix		(str):				The suffix to add at the end of the string (default: '\n')
	Returns:
		str: The content of the file in every case

	>>> super_json_dump({"a": [[1,2,3]], "b": 2}, max_level = 0)
	'{"a": [[1,2,3]],"b": 2}\n'
	>>> super_json_dump({"a": [[1,2,3]], "b": 2}, max_level = 1)
	'{\n\t"a": [[1,2,3]],\n\t"b": 2\n}\n'
	>>> super_json_dump({"a": [[1,2,3]], "b": 2}, max_level = 2)
	'{\n\t"a": [\n\t\t[1,2,3]\n\t],\n\t"b": 2\n}\n'
	>>> super_json_dump({"a": [[1,2,3]], "b": 2}, max_level = 3)
	'{\n\t"a": [\n\t\t[\n\t\t\t1,\n\t\t\t2,\n\t\t\t3\n\t\t]\n\t],\n\t"b": 2\n}\n'
	"""
	# Imports
	import orjson

	# Normalize indentation to string, and handle None values for max_level
	if isinstance(indent, int):
		indent = ' ' * indent
	if max_level is None:
		max_level = 2

	# Dump content with 2-space indent and replace it with the desired indent
	content: str = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode("utf-8")
	if indent != "  ":
		content = re.sub(
			pattern=r'^(\s{2})+',  # Match groups of 2 spaces at start of lines
			repl=lambda match: indent * (len(match.group(0)) // 2),  # Convert to desired indent
			string=content,
			flags=re.MULTILINE
		)

	# Limit max depth of indentation
	if max_level > -1:
		escape: str = re.escape(indent)
		pattern: re.Pattern[str] = re.compile(
			r"\n" + escape + "{" + str(max_level + 1) + r",}(.*)"
			r"|\n" + escape + "{" + str(max_level) + r"}([}\]])"
		)
		content = pattern.sub(r"\1\2", content)

	# Final newline and write
	content += suffix
	if file:
		if isinstance(file, str):
			with super_open(file, "w") as f:
				f.write(content)
		else:
			file.write(content)
	return content


# JSON load from file path

[docs]
def super_json_load(file_path: str) -> Any:
	""" Load a JSON file from the given path

	Args:
		file_path (str): The path to the JSON file
	Returns:
		Any: The content of the JSON file
	"""
	import orjson
	with super_open(file_path, "r") as f:
		return orjson.loads(f.read())


# CSV dump to file

[docs]
def super_csv_dump(
	data: Any,
	file: IO[Any] | str | None = None,
	delimiter: str = ',',
	has_header: bool = True,
	index: bool = False,
	*args: Any,
	**kwargs: Any
) -> str:
	""" Writes data to a CSV file with customizable options and returns the CSV content as a string.

	Args:
		data		(list[list[Any]] | list[dict[str, Any]] | pd.DataFrame | pl.DataFrame):
						The data to write, either a list of lists, list of dicts, pandas DataFrame, or Polars DataFrame
		file		(IO[Any] | str): The file object or path to dump the data to
		delimiter	(str): The delimiter to use (default: ',')
		has_header	(bool): Whether to include headers (default: True, applies to dict and DataFrame data)
		index		(bool): Whether to include the index (default: False, only applies to pandas DataFrame)
		*args		(Any): Additional positional arguments to pass to the underlying CSV writer or DataFrame method
		**kwargs	(Any): Additional keyword arguments to pass to the underlying CSV writer or DataFrame method
	Returns:
		str: The CSV content as a string

	Examples:

		>>> super_csv_dump([["a", "b", "c"], [1, 2, 3], [4, 5, 6]])
		'a,b,c\\r\\n1,2,3\\r\\n4,5,6\\r\\n'

		>>> super_csv_dump([{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}])
		'name,age\\r\\nAlice,30\\r\\nBob,25\\r\\n'
	"""
	if isinstance(data, str | bytes | dict):
		raise ValueError("Data must be a list of lists, list of dicts, pandas DataFrame, or Polars DataFrame")
	output = StringIO()
	done: bool = False

	# Handle Polars DataFrame
	try:
		import polars as pl  # type: ignore
		if isinstance(data, pl.DataFrame):
			copy_kwargs = kwargs.copy()
			copy_kwargs.setdefault("separator", delimiter)
			copy_kwargs.setdefault("include_header", has_header)
			data.write_csv(output, *args, **copy_kwargs)
			done = True
	except Exception:
		pass

	# Handle pandas DataFrame
	if not done:
		try:
			import pandas as pd  # type: ignore
			if isinstance(data, pd.DataFrame):
				copy_kwargs = kwargs.copy()
				copy_kwargs.setdefault("index", index)
				copy_kwargs.setdefault("sep", delimiter)
				copy_kwargs.setdefault("header", has_header)
				data.to_csv(output, *args, **copy_kwargs)
		except Exception:
			pass

	if not done:
		# Handle list of dicts
		if isinstance(data[0], dict):
			fieldnames = list(data[0].keys()) # type: ignore
			kwargs.setdefault("fieldnames", fieldnames)
			kwargs.setdefault("delimiter", delimiter)
			dict_writer = csv.DictWriter(output, *args, **kwargs)
			if has_header:
				dict_writer.writeheader()
			dict_writer.writerows(data)  # type: ignore
			done = True

		# Handle list of lists
		else:
			kwargs.setdefault("delimiter", delimiter)
			list_writer = csv.writer(output, *args, **kwargs)
			list_writer.writerows(data) # type: ignore
			done = True

	# If still not done, raise error
	if not done:
		output.close()
		raise ValueError(f"Data must be a list of lists, list of dicts, pandas DataFrame, or Polars DataFrame, got {type(data)} instead")

	# Get content and write to file if needed
	content: str = output.getvalue()
	if file:
		if isinstance(file, str):
			with super_open(file, "w") as f:
				f.write(content)
		else:
			file.write(content)
	output.close()
	return content


# CSV load from file path

[docs]
def super_csv_load(file_path: str, delimiter: str = ',', has_header: bool = True, as_dict: bool = False, as_dataframe: bool = False, use_polars: bool = False, *args: Any, **kwargs: Any) -> Any:
	""" Load a CSV file from the given path

	Args:
		file_path (str): The path to the CSV file
		delimiter (str): The delimiter used in the CSV (default: ',')
		has_header (bool): Whether the CSV has a header row (default: True)
		as_dict (bool): Whether to return data as list of dicts (default: False)
		as_dataframe (bool): Whether to return data as a DataFrame (default: False)
		use_polars (bool): Whether to use Polars instead of pandas for DataFrame (default: False, requires polars)
		*args: Additional positional arguments to pass to the underlying CSV reader or DataFrame method
		**kwargs: Additional keyword arguments to pass to the underlying CSV reader or DataFrame method
	Returns:
		list[list[str]] | list[dict[str, str]] | pd.DataFrame | pl.DataFrame: The content of the CSV file

	Examples:

		.. code-block:: python

			> Assuming "test.csv" contains: a,b,c\\n1,2,3\\n4,5,6
			> super_csv_load("test.csv")
			[['1', '2', '3'], ['4', '5', '6']]

			> super_csv_load("test.csv", as_dict=True)
			[{'a': '1', 'b': '2', 'c': '3'}, {'a': '4', 'b': '5', 'c': '6'}]

			> super_csv_load("test.csv", as_dataframe=True)
			   a  b  c
			0  1  2  3
			1  4  5  6

		.. code-block:: console

			> super_csv_load("test.csv", as_dataframe=True, use_polars=True)
			shape: (2, 3)
			┌─────┬─────┬─────┐
			│ a   ┆ b   ┆ c   │
			│ --- ┆ --- ┆ --- │
			│ i64 ┆ i64 ┆ i64 │
			╞═════╪═════╪═════╡
			│ 1   ┆ 2   ┆ 3   │
			│ 4   ┆ 5   ┆ 6   │
			└─────┴─────┴─────┘
	"""  # noqa: E101
	# Handle DataFrame loading
	if as_dataframe:
		if use_polars:
			import polars as pl  # type: ignore
			if not os.path.exists(file_path):
				return pl.DataFrame() # type: ignore
			kwargs.setdefault("separator", delimiter)
			kwargs.setdefault("has_header", has_header)
			return pl.read_csv(file_path, *args, **kwargs) # type: ignore
		else:
			import pandas as pd  # type: ignore
			if not os.path.exists(file_path):
				return pd.DataFrame() # type: ignore
			kwargs.setdefault("sep", delimiter)
			kwargs.setdefault("header", 0 if has_header else None)
			return pd.read_csv(file_path, *args, **kwargs) # type: ignore

	# Handle dict or list
	if not os.path.exists(file_path):
		return []
	with super_open(file_path, "r") as f:
		if as_dict or has_header:
			kwargs.setdefault("delimiter", delimiter)
			reader = csv.DictReader(f, *args, **kwargs)
			return list(reader)
		else:
			kwargs.setdefault("delimiter", delimiter)
			reader = csv.reader(f, *args, **kwargs)
			return list(reader)


# For easy file copy

[docs]
def super_copy(src: str, dst: str, create_dir: bool = True, symlink: bool = False) -> str:
	""" Copy a file (or a folder) from the source to the destination

	Args:
		src         (str):  The source path
		dst         (str):  The destination path
		create_dir  (bool): Whether to create the directory if it doesn't exist (default: True)
		symlink     (bool): Whether to create a symlink instead of copying (Linux only, default: True)
	Returns:
		str: The destination path
	"""
	# Disable symlink functionality on Windows as it uses shortcuts instead of proper symlinks
	if os.name == "nt":
		symlink = False

	# Create destination directory if needed
	if create_dir:
		os.makedirs(os.path.dirname(dst), exist_ok=True)

	# Handle directory copying
	if os.path.isdir(src):
		if symlink:

			# Remove existing destination if it's different from source
			if os.path.exists(dst):
				if os.path.samefile(src, dst) is False:
					if os.path.isdir(dst):
						shutil.rmtree(dst)
					else:
						os.remove(dst)
					return os.symlink(src.rstrip('/'), dst.rstrip('/'), target_is_directory=True) or dst
			else:
				return os.symlink(src.rstrip('/'), dst.rstrip('/'), target_is_directory=True) or dst

		# Regular directory copy
		else:
			return shutil.copytree(src, dst, dirs_exist_ok = True)

	# Handle file copying
	else:
		if symlink:

			# Remove existing destination if it's different from source
			if os.path.exists(dst):
				if os.path.samefile(src, dst) is False:
					os.remove(dst)
					return os.symlink(src, dst, target_is_directory=False) or dst
			else:
				return os.symlink(src, dst, target_is_directory=False) or dst

		# Regular file copy
		else:
			return shutil.copy(src, dst)
	return ""


# For easy file management

[docs]
def super_open(file_path: str, mode: str, encoding: str = "utf-8") -> IO[Any]:
	""" Open a file with the given mode, creating the directory if it doesn't exist (only if writing)

	Args:
		file_path	(str): The path to the file
		mode		(str): The mode to open the file with, ex: "w", "r", "a", "wb", "rb", "ab"
		encoding	(str): The encoding to use when opening the file (default: "utf-8")
	Returns:
		open: The file object, ready to be used
	"""
	# Make directory
	file_path = clean_path(file_path)
	if "/" in file_path and ("w" in mode or "a" in mode):
		os.makedirs(os.path.dirname(file_path), exist_ok=True)

	# Open file and return
	if "b" in mode:
		return open(file_path, mode)
	else:
		return open(file_path, mode, encoding = encoding) # Always use utf-8 encoding to avoid issues



[docs]
def read_file(file_path: str, encoding: str = "utf-8") -> str:
	""" Read the content of a file and return it as a string

	Args:
		file_path (str): The path to the file
		encoding  (str): The encoding to use when opening the file (default: "utf-8")
	Returns:
		str: The content of the file
	"""
	with super_open(file_path, "r", encoding=encoding) as f:
		return f.read()


# Function that replace the "~" by the user's home directory

[docs]
def replace_tilde(path: str) -> str:
	""" Replace the "~" by the user's home directory

	Args:
		path (str): The path to replace the "~" by the user's home directory
	Returns:
		str: The path with the "~" replaced by the user's home directory
	Examples:

		.. code-block:: python

			> replace_tilde("~/Documents/test.txt")
			'/home/user/Documents/test.txt'
	"""
	return path.replace("~", os.path.expanduser("~")).replace("\\", "/")


# Utility function to clean the path

[docs]
def clean_path(file_path: str, trailing_slash: bool = True) -> str:
	""" Clean the path by replacing backslashes with forward slashes and simplifying the path

	Args:
		file_path (str): The path to clean
		trailing_slash (bool): Whether to keep the trailing slash, ex: "test/" -> "test/"
	Returns:
		str: The cleaned path
	Examples:
		>>> clean_path("C:\\\\Users\\\\Stoupy\\\\Documents\\\\test.txt")
		'C:/Users/Stoupy/Documents/test.txt'

		>>> clean_path("Some Folder////")
		'Some Folder/'

		>>> clean_path("test/uwu/1/../../")
		'test/'

		>>> clean_path("some/./folder/../")
		'some/'

		>>> clean_path("folder1/folder2/../../folder3")
		'folder3'

		>>> clean_path("./test/./folder/")
		'test/folder/'

		>>> clean_path("C:/folder1\\\\folder2")
		'C:/folder1/folder2'
	"""
	# Replace tilde
	file_path = replace_tilde(str(file_path))

	# Check if original path ends with slash
	ends_with_slash: bool = file_path.endswith('/') or file_path.endswith('\\')

	# Use os.path.normpath to clean up the path
	file_path = os.path.normpath(file_path)

	# Convert backslashes to forward slashes
	file_path = file_path.replace(os.sep, '/')

	# Add trailing slash back if original had one
	if ends_with_slash and not file_path.endswith('/'):
		file_path += '/'

	# Remove trailing slash if requested
	if not trailing_slash and file_path.endswith('/'):
		file_path = file_path[:-1]

	# Return the cleaned path
	return file_path if file_path != "." else ""