import os
import json
import re
import hou

def get_cache_path(node, parm):
    cache_path = node.parm(parm).eval()

    frame_pattern = r'(.*\\.)(\\d+)(\\..+)$'
    if re.match(frame_pattern, cache_path):
        # If it matches the pattern, replace with $F
        return re.sub(frame_pattern, r'\\1$F\\3', cache_path)
    else:
        # If no frame number is found, return the original path
        return cache_path

def save_sim_metadata(node, published=True):
    parm = 'file'

    cache_path = get_cache_path(node, parm)

    is_animated = '$F' in cache_path

    # Split the path to get directory and filename
    directory = os.path.dirname(cache_path)
    cache_filename = os.path.basename(cache_path)

    # Extract sim name and version from directory
    folder_name = os.path.basename(directory)
    sim_name = folder_name.split('_v')[0]
    version = folder_name.split(sim_name + '_')[1]

    # Get cache name and extension
    parts = cache_filename.split('.')
    cache_name = parts[0]

    # Remove frame numbers from extension using regex
    raw_extension = '.'.join(parts[1:])
    extension = re.sub(r'^\\$F\\.', '', raw_extension)
    print(extension)

    # Prepare new cache metadata with published flag
    new_cache = {
        "name": cache_name,
        "extension": extension,
        "full_path": cache_path,
        "is_animated": is_animated,
        "published": published
    }

    # Get the cache directory and metadata path
    cache_dir = os.path.dirname(directory)
    metadata_path = os.path.join(cache_dir, "sim_metadata.json")

    # Load existing metadata if it exists
    metadata = {}
    if os.path.exists(metadata_path):
        try:
            with open(metadata_path, 'r') as f:
                metadata = json.load(f)
        except json.JSONDecodeError:
            metadata = {}

# Update metadata structure
if sim_name not in metadata:
    # New simulation
    metadata[sim_name] = {version: {"caches": [new_cache]}}
elif version not in metadata[sim_name]:
    # New version
    metadata[sim_name][version] = {"caches": [new_cache]}
else:
    # Version exists, check if cache exists and update it, or append new cache
    cache_exists = False
    for cache in metadata[sim_name][version]["caches"]:
        if cache["name"] == cache_name:
            cache_exists = True
            break
    
    if not cache_exists:
        # Add new cache since it wasn't found
        metadata[sim_name][version]["caches"].append(new_cache)
    
# Save updated metadata
with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=4)
print("Metadata saved to: {0}".format(metadata_path))

node = hou.node('/obj/test_SIM/test_cache')
save_sim_metadata(node, published=True)

Setup shotpath

path = 'C:/vfx/projects/test_project_02/shots/shot01'
hou.putenv('SHOTPATH', path)

Getting the path with frame number

If a path has frame numbers after the cache name it will replace them with $F. If not i will return the original path. If a cache path is /simulations/fluid_v01/main/fluid.0042.bgeo.sc, this function would convert it to /simulations/fluid_v01/main/fluid.$F.bgeo.sc

import os
import hou

def get_cache_path(node, parm):
    cache_path = node.parm(parm).eval()

    frame_pattern = r'(.*\\.)(\\d+)(\\..+)$'
    if re.match(frame_pattern, cache_path):
        # If it matches the pattern, replace with $F
        return re.sub(frame_pattern, r'\\1$F\\3', cache_path)
    else:
        # If no frame number is found, return the original path
        return cache_path

# Do it         
node = hou.node('/obj/test/test_cache')
parm = 'file'
print(get_cache_path(node, parm))

If cache_path has $F return True

using the get_cache_path() function get the ‘file’ parm and if it has $F return true

parm = 'file'
node = hou.node('/obj/test/test_cache')
cache_path = get_cache_path(node, parm)

is_animated = '$F' in cache_path
print(is_animated)

Splitting filename and the path

os.path.dirname(cache_path) is used to get everything up to the last folder of a path

os.path.basename(cache_path) is used to get the final part of the path

import os
cache_path = 'C:/vfx/projects/test_project_02/shots/shot01/3d/cache/SIM/test_SIM_v01/test_cache.$F.bgeo.sc'

# Split the path to get directory and filename
directory = os.path.dirname(cache_path) # C:/vfx/projects/test_project_02/shots/shot01/3d/cache/SIM/test_SIM_v01
cache_filename = os.path.basename(cache_path) # test_cache.$F.bgeo.sc

# Extract sim name and version from directory
folder_name = os.path.basename(directory) # test_SIM_v01
sim_name = folder_name.split('_v')[0] # test_SIM
version = folder_name.split(sim_name + '_')[1] # v01

Getting the cache name and file extension

cache_filename.split will return ['test_cache', '$F', 'bgeo', 'sc'] and parts[0] will get the firsts component of that list

The regex pattern will get anything after the $F

import re
cache_filename = 'test_cache.$F.bgeo.sc'

# Get cache name and extension
parts = cache_filename.split('.')
cache_name = parts[0] # test_cache

# Remove frame numbers from extension using regex
raw_extension = '.'.join(parts[1:])
extension = re.sub(r'^\\$F\\.', '', raw_extension) # bgeo.sc

Setting up the dictionary for json

# Prepare new cache metadata with published flag
new_cache = {
    "name": cache_name,
    "extension": extension,
    "full_path": cache_path,
    "is_animated": is_animated,
    "published": published # True or False value set in the function. True by defualt
}