47 lines
1.5 KiB
Python
47 lines
1.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Format a large XML file for readability."""
|
|
|
|
import xml.dom.minidom
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
def format_xml(input_path, output_path=None):
|
|
"""Format XML file with proper indentation."""
|
|
input_file = Path(input_path)
|
|
|
|
if not input_file.exists():
|
|
print(f"Error: File {input_path} not found", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print(f"Reading {input_file.name}...", file=sys.stderr)
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
xml_content = f.read()
|
|
|
|
print("Parsing XML...", file=sys.stderr)
|
|
dom = xml.dom.minidom.parseString(xml_content)
|
|
|
|
print("Formatting XML...", file=sys.stderr)
|
|
pretty_xml = dom.toprettyxml(indent=" ")
|
|
|
|
# Remove extra blank lines that toprettyxml adds
|
|
pretty_xml = "\n".join([line for line in pretty_xml.split("\n") if line.strip()])
|
|
|
|
if output_path is None:
|
|
output_path = input_file.with_stem(input_file.stem + "_formatted")
|
|
|
|
print(f"Writing formatted XML to {output_path}...", file=sys.stderr)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(pretty_xml)
|
|
|
|
print(f"Done! Formatted XML saved to {output_path}", file=sys.stderr)
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python format_xml.py <input_file> [output_file]", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
input_file = sys.argv[1]
|
|
output_file = sys.argv[2] if len(sys.argv) > 2 else None
|
|
|
|
format_xml(input_file, output_file)
|