Source code for argopy.utils.caching
import os
import shutil
import logging
import pickle
import json
import fsspec
import pandas as pd
from packaging import version
from ..options import OPTIONS
from ..errors import FileSystemHasNoCache
log = logging.getLogger("argopy.utils.caching")
[docs]
def clear_cache(fs=None):
"""Delete argopy cache folder content"""
if os.path.exists(OPTIONS["cachedir"]):
# shutil.rmtree(OPTIONS["cachedir"])
for filename in os.listdir(OPTIONS["cachedir"]):
file_path = os.path.join(OPTIONS["cachedir"], filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print("Failed to delete %s. Reason: %s" % (file_path, e))
if fs:
fs.clear_cache()
[docs]
def lscache(cache_path: str = "", prt=True, errors='raise'):
"""Decode and list cache folder content
Parameters
----------
cache_path: str
prt: bool, default=True
Return a printable string or a :class:`pandas.DataFrame`
errors: str, default: ``raise``
Define how to handle errors raised during listing:
- ``raise`` (default): Raise any error encountered
- ``ignore``: Do not stop processing, simply issue a debug message in logging console
- ``silent``: Do not stop processing and do not issue log message
Returns
-------
str or :class:`pandas.DataFrame`
"""
from datetime import datetime
import math
summary = []
cache_path = OPTIONS["cachedir"] if cache_path == "" else cache_path
apath = os.path.abspath(cache_path)
log.debug("Listing cache content at: %s" % cache_path)
def convert_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return "%s %s" % (s, size_name[i])
cached_files = []
fn = os.path.join(apath, "cache")
if os.path.exists(fn):
if version.parse(fsspec.__version__) <= version.parse("2023.6.0"):
with open(fn, "rb") as f:
loaded_cached_files = pickle.load(
f
) # nosec B301 because files controlled internally
else:
with open(fn, "r") as f:
loaded_cached_files = json.load(f)
for c in loaded_cached_files.values():
if isinstance(c["blocks"], list):
c["blocks"] = set(c["blocks"])
cached_files.append(loaded_cached_files)
else:
if errors == 'raise':
raise FileSystemHasNoCache("No fsspec cache system at: %s" % apath)
elif errors == 'ignore':
log.debug("No fsspec cache system at: %s" % apath)
else:
return summary
cached_files = cached_files or [{}]
cached_files = cached_files[-1]
N_FILES = len(cached_files)
TOTAL_SIZE = 0
for cfile in cached_files:
path = os.path.join(apath, cached_files[cfile]["fn"])
TOTAL_SIZE += os.path.getsize(path)
summary.append(
"%s %s"
% (
"=" * 20,
"%i files in fsspec cache folder (%s)"
% (N_FILES, convert_size(TOTAL_SIZE)),
)
)
summary.append("lscache %s" % os.path.sep.join([apath, ""]))
summary.append("=" * 20)
listing = {
"fn": [],
"size": [],
"time": [],
"original": [],
"uid": [],
"blocks": [],
}
for cfile in cached_files:
summary.append("- %s" % cached_files[cfile]["fn"])
listing["fn"].append(cached_files[cfile]["fn"])
path = os.path.join(cache_path, cached_files[cfile]["fn"])
summary.append("\t%8s: %s" % ("SIZE", convert_size(os.path.getsize(path))))
listing["size"].append(os.path.getsize(path))
key = "time"
ts = cached_files[cfile][key]
tsf = pd.to_datetime(datetime.fromtimestamp(ts)).strftime("%c")
summary.append("\t%8s: %s (%s)" % (key, tsf, ts))
listing["time"].append(pd.to_datetime(datetime.fromtimestamp(ts)))
if version.parse(fsspec.__version__) > version.parse("0.8.7"):
key = "original"
summary.append("\t%8s: %s" % (key, cached_files[cfile][key]))
listing[key].append(cached_files[cfile][key])
key = "uid"
summary.append("\t%8s: %s" % (key, cached_files[cfile][key]))
listing[key].append(cached_files[cfile][key])
key = "blocks"
summary.append("\t%8s: %s" % (key, cached_files[cfile][key]))
listing[key].append(cached_files[cfile][key])
summary.append("=" * 20)
summary = "\n".join(summary)
if prt:
# Return string to be printed:
return summary
else:
# Return dataframe listing:
# log.debug(summary)
return pd.DataFrame(listing)