Files
alpinebits_python/src/postprocessing.py
2025-09-23 09:48:29 +02:00

148 lines
4.7 KiB
Python

"""
Script to verify if all classes in alpine_bits_classes.py with the same base name (e.g., CustomerType, CustomerType125, CustomerType576)
are structurally identical. This helps to identify duplicate dataclasses generated by generateDS.
Usage:
python src/postprocessing.py
Requirements:
- Only uses the standard library.
"""
import ast
import re
from collections import defaultdict
from pathlib import Path
def get_class_basenames(classname):
"""Returns the base name of a class (e.g., CustomerType125 -> CustomerType)"""
return re.sub(r'\d+$', '', classname)
def extract_classes(filepath):
"""Parse the file and extract all class definitions as AST nodes."""
with open(filepath, "r", encoding="utf-8") as f:
source = f.read()
tree = ast.parse(source)
classes = {}
for node in tree.body:
if isinstance(node, ast.ClassDef):
classes[node.name] = node
return classes
def class_struct_signature(class_node):
"""Return a tuple representing the structure of the class: base classes, method names, attribute names."""
bases = tuple(base.id if isinstance(base, ast.Name) else ast.dump(base) for base in class_node.bases)
methods = []
attrs = []
for item in class_node.body:
if isinstance(item, ast.FunctionDef):
methods.append(item.name)
elif isinstance(item, ast.Assign):
for target in item.targets:
if isinstance(target, ast.Name):
attrs.append(target.id)
return (bases, tuple(sorted(methods)), tuple(sorted(attrs)))
def remove_identical_class_suffixes(filepath: Path):
"""
Removes duplicate class definitions with numeric suffixes if they are structurally identical,
keeping only the base (unsuffixed) class.
"""
import shutil
# Parse classes and group by base name
classes = extract_classes(filepath)
grouped = defaultdict(list)
for cname in classes:
base = get_class_basenames(cname)
grouped[base].append(cname)
# Find identical groups
identical = []
for base, classnames in grouped.items():
if len(classnames) > 1:
sigs = [class_struct_signature(classes[c]) for c in classnames]
if all(s == sigs[0] for s in sigs):
identical.append((base, classnames))
# Read original file lines
with open(filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
# Find line numbers for all class definitions
class_lines = {}
for i, line in enumerate(lines):
m = re.match(r'class (\w+)\b', line)
if m:
class_lines[m.group(1)] = i
# Mark classes to remove (all but the base, unsuffixed one)
to_remove = set()
for base, classnames in identical:
for cname in classnames:
if cname != base:
to_remove.add(cname)
# Remove class definitions with suffixes
new_lines = []
skip = False
for i, line in enumerate(lines):
m = re.match(r'class (\w+)\b', line)
if m and m.group(1) in to_remove:
skip = True
if not skip:
new_lines.append(line)
# End skipping at the next class or end of file
if skip and (i + 1 == len(lines) or re.match(r'class \w+\b', lines[i + 1])):
skip = False
# Backup original file
backup_path = filepath.with_suffix(filepath.suffix + ".bak")
shutil.copy(filepath, backup_path)
# Write cleaned file
with open(filepath, "w", encoding="utf-8") as f:
f.writelines(new_lines)
print(f"Removed {len(to_remove)} duplicate class definitions. Backup saved as {filepath}.bak")
# Example usage:
# remove_identical_class_suffixes("src/alpine_bits_classes.py")
def main():
file_path = Path(__file__).parent / "alpine_bits_classes.py"
classes = extract_classes(file_path)
grouped = defaultdict(list)
for cname in classes:
base = get_class_basenames(cname)
grouped[base].append(cname)
identical = []
different = []
for base, classnames in grouped.items():
if len(classnames) > 1:
sigs = [class_struct_signature(classes[c]) for c in classnames]
if all(s == sigs[0] for s in sigs):
identical.append((base, classnames))
else:
different.append((base, classnames))
print("=== Structurally Identical Groups ===")
for base, classnames in identical:
print(f"{base}: {', '.join(classnames)}")
print("\n=== Structurally Different Groups ===")
for base, classnames in different:
print(f"{base}: {', '.join(classnames)}")
remove_identical_class_suffixes(file_path)
if __name__ == "__main__":
main()