initial commit
This commit is contained in:
517
extract_symbols.py
Normal file
517
extract_symbols.py
Normal file
@@ -0,0 +1,517 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
KiCad 9 Symbol Metadata Extractor
|
||||
==================================
|
||||
Walks every .kicad_sch file in the project directory and extracts
|
||||
metadata for every placed symbol (component instance), correctly
|
||||
expanding hierarchical sheet instances so that each unique reference
|
||||
in the final design becomes its own record.
|
||||
|
||||
KiCad stores multi-instance sheets by embedding an `(instances ...)`
|
||||
block in each symbol. That block contains one `(path ...)` entry per
|
||||
sheet instantiation, each with the authoritative reference for that
|
||||
copy. This script reads those paths so a sheet used N times produces
|
||||
N distinct records per symbol.
|
||||
|
||||
Output: extract_symbols.json (same directory as this script)
|
||||
|
||||
Usage:
|
||||
python3 extract_symbols.py [project_dir]
|
||||
|
||||
If project_dir is omitted, the directory containing this script is used.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# S-expression parser
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _tokenize(text: str) -> list:
|
||||
"""
|
||||
Convert raw KiCad S-expression text into a flat list of tokens.
|
||||
Token forms:
|
||||
('OPEN',) – opening paren
|
||||
('CLOSE',) – closing paren
|
||||
('ATOM', value) – unquoted word / number / bool
|
||||
('STR', value) – double-quoted string (escapes resolved)
|
||||
"""
|
||||
tokens = []
|
||||
i, n = 0, len(text)
|
||||
while i < n:
|
||||
c = text[i]
|
||||
if c in ' \t\r\n':
|
||||
i += 1
|
||||
elif c == '(':
|
||||
tokens.append(('OPEN',))
|
||||
i += 1
|
||||
elif c == ')':
|
||||
tokens.append(('CLOSE',))
|
||||
i += 1
|
||||
elif c == '"':
|
||||
j = i + 1
|
||||
buf = []
|
||||
while j < n:
|
||||
if text[j] == '\\' and j + 1 < n:
|
||||
buf.append(text[j + 1])
|
||||
j += 2
|
||||
elif text[j] == '"':
|
||||
j += 1
|
||||
break
|
||||
else:
|
||||
buf.append(text[j])
|
||||
j += 1
|
||||
tokens.append(('STR', ''.join(buf)))
|
||||
i = j
|
||||
else:
|
||||
j = i
|
||||
while j < n and text[j] not in ' \t\r\n()':
|
||||
j += 1
|
||||
tokens.append(('ATOM', text[i:j]))
|
||||
i = j
|
||||
return tokens
|
||||
|
||||
|
||||
def _parse(tokens: list, pos: int) -> tuple:
|
||||
"""
|
||||
Recursively parse one S-expression value starting at *pos*.
|
||||
Returns (parsed_value, next_pos).
|
||||
A list/node becomes a Python list; atoms and strings become strings.
|
||||
"""
|
||||
tok = tokens[pos]
|
||||
kind = tok[0]
|
||||
if kind == 'OPEN':
|
||||
pos += 1
|
||||
items = []
|
||||
while tokens[pos][0] != 'CLOSE':
|
||||
item, pos = _parse(tokens, pos)
|
||||
items.append(item)
|
||||
return items, pos + 1 # consume CLOSE
|
||||
elif kind in ('ATOM', 'STR'):
|
||||
return tok[1], pos + 1
|
||||
else:
|
||||
raise ValueError(f"Unexpected token at pos {pos}: {tok}")
|
||||
|
||||
|
||||
def parse_sexp(text: str):
|
||||
"""Parse a complete KiCad S-expression file. Returns the root list."""
|
||||
tokens = _tokenize(text)
|
||||
root, _ = _parse(tokens, 0)
|
||||
return root
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers to navigate parsed S-expressions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def tag(node) -> str:
|
||||
if isinstance(node, list) and node and isinstance(node[0], str):
|
||||
return node[0]
|
||||
return ''
|
||||
|
||||
|
||||
def children(node: list) -> list:
|
||||
return node[1:] if isinstance(node, list) else []
|
||||
|
||||
|
||||
def first_child_with_tag(node: list, name: str):
|
||||
for child in children(node):
|
||||
if isinstance(child, list) and tag(child) == name:
|
||||
return child
|
||||
return None
|
||||
|
||||
|
||||
def all_children_with_tag(node: list, name: str) -> list:
|
||||
return [c for c in children(node) if isinstance(c, list) and tag(c) == name]
|
||||
|
||||
|
||||
def scalar(node, index: int = 1, default=None):
|
||||
if isinstance(node, list) and len(node) > index:
|
||||
return node[index]
|
||||
return default
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Instance path extraction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def extract_instances(sym_node: list) -> list[dict]:
|
||||
"""
|
||||
Parse the (instances ...) block of a symbol and return one dict per
|
||||
hierarchical path. Each dict has:
|
||||
path – the full UUID path string
|
||||
reference – the reference designator for that instance
|
||||
unit – the unit number for that instance
|
||||
project – the project name
|
||||
|
||||
If there is no instances block (unusual), returns an empty list.
|
||||
"""
|
||||
instances_node = first_child_with_tag(sym_node, 'instances')
|
||||
if instances_node is None:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for project_node in all_children_with_tag(instances_node, 'project'):
|
||||
project_name = scalar(project_node, 1, '')
|
||||
for path_node in all_children_with_tag(project_node, 'path'):
|
||||
path_str = scalar(path_node, 1, '')
|
||||
ref_node = first_child_with_tag(path_node, 'reference')
|
||||
unit_node = first_child_with_tag(path_node, 'unit')
|
||||
results.append({
|
||||
'path': path_str,
|
||||
'reference': scalar(ref_node, 1) if ref_node else None,
|
||||
'unit': scalar(unit_node, 1) if unit_node else None,
|
||||
'project': project_name,
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Symbol extraction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def extract_symbol_records(sym_node: list, sheet_file: str) -> list[dict]:
|
||||
"""
|
||||
Extract metadata from a placed-symbol node and return one record per
|
||||
hierarchical instance (i.e. one record per path in the instances block).
|
||||
|
||||
For a sheet used only once, this produces a single record.
|
||||
For a sheet instantiated N times, this produces N records — each with
|
||||
its own unique reference designator from the instances block.
|
||||
"""
|
||||
# --- Shared fields (same for all instances of this symbol placement) ---
|
||||
shared = {
|
||||
'sheet_file': sheet_file,
|
||||
'lib_id': None,
|
||||
'at': None,
|
||||
'exclude_from_sim': None,
|
||||
'in_bom': None,
|
||||
'on_board': None,
|
||||
'dnp': None,
|
||||
'uuid': None,
|
||||
'properties': {},
|
||||
}
|
||||
|
||||
for child in children(sym_node):
|
||||
if not isinstance(child, list):
|
||||
continue
|
||||
t = tag(child)
|
||||
if t == 'lib_id':
|
||||
shared['lib_id'] = scalar(child, 1)
|
||||
elif t == 'at':
|
||||
shared['at'] = {
|
||||
'x': scalar(child, 1),
|
||||
'y': scalar(child, 2),
|
||||
'angle': scalar(child, 3, 0),
|
||||
}
|
||||
elif t == 'exclude_from_sim':
|
||||
shared['exclude_from_sim'] = scalar(child, 1) == 'yes'
|
||||
elif t == 'in_bom':
|
||||
shared['in_bom'] = scalar(child, 1) == 'yes'
|
||||
elif t == 'on_board':
|
||||
shared['on_board'] = scalar(child, 1) == 'yes'
|
||||
elif t == 'dnp':
|
||||
shared['dnp'] = scalar(child, 1) == 'yes'
|
||||
elif t == 'uuid':
|
||||
shared['uuid'] = scalar(child, 1)
|
||||
elif t == 'property':
|
||||
prop_name = scalar(child, 1)
|
||||
prop_val = scalar(child, 2)
|
||||
if prop_name is not None:
|
||||
shared['properties'][prop_name] = prop_val
|
||||
|
||||
# Promote standard properties for convenient access
|
||||
props = shared['properties']
|
||||
shared['value'] = props.get('Value')
|
||||
shared['footprint'] = props.get('Footprint')
|
||||
shared['datasheet'] = props.get('Datasheet')
|
||||
shared['description'] = props.get('Description')
|
||||
|
||||
# --- Per-instance fields (one record per path in instances block) ---
|
||||
instances = extract_instances(sym_node)
|
||||
|
||||
if not instances:
|
||||
# Fallback: no instances block — use top-level Reference property
|
||||
record = dict(shared)
|
||||
record['reference'] = props.get('Reference')
|
||||
record['instance_path'] = None
|
||||
record['instance_unit'] = shared.get('unit')
|
||||
record['instance_project']= None
|
||||
return [record]
|
||||
|
||||
records = []
|
||||
for inst in instances:
|
||||
record = dict(shared)
|
||||
record['properties'] = dict(shared['properties']) # copy so each is independent
|
||||
record['reference'] = inst['reference']
|
||||
record['instance_path'] = inst['path']
|
||||
record['instance_unit'] = inst['unit']
|
||||
record['instance_project'] = inst['project']
|
||||
records.append(record)
|
||||
|
||||
return records
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hierarchy walker
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def find_reachable_sheets(root_sch: Path) -> list[Path]:
|
||||
"""
|
||||
Walk the sheet hierarchy starting from *root_sch* and return an ordered
|
||||
list of every .kicad_sch file that is actually reachable (i.e. referenced
|
||||
directly or transitively as a sub-sheet). Handles repeated sub-sheet
|
||||
references (same file used N times) by visiting the file only once.
|
||||
"""
|
||||
reachable: list[Path] = []
|
||||
visited_names: set[str] = set()
|
||||
queue: list[Path] = [root_sch]
|
||||
|
||||
while queue:
|
||||
sch = queue.pop(0)
|
||||
if sch.name in visited_names:
|
||||
continue
|
||||
visited_names.add(sch.name)
|
||||
reachable.append(sch)
|
||||
|
||||
try:
|
||||
text = sch.read_text(encoding='utf-8')
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
root_node = parse_sexp(text)
|
||||
for child in children(root_node):
|
||||
if tag(child) != 'sheet':
|
||||
continue
|
||||
for prop in all_children_with_tag(child, 'property'):
|
||||
if scalar(prop, 1) == 'Sheetfile':
|
||||
child_filename = scalar(prop, 2)
|
||||
if child_filename:
|
||||
child_path = sch.parent / child_filename
|
||||
if child_path.exists() and child_path.name not in visited_names:
|
||||
queue.append(child_path)
|
||||
|
||||
return reachable
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-file parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def extract_from_schematic(sch_path: Path) -> list[dict]:
|
||||
"""
|
||||
Parse one .kicad_sch file and return a list of symbol records.
|
||||
lib_symbols definitions are skipped; only placed instances are returned.
|
||||
"""
|
||||
text = sch_path.read_text(encoding='utf-8')
|
||||
root = parse_sexp(text)
|
||||
|
||||
results = []
|
||||
for child in children(root):
|
||||
if not isinstance(child, list):
|
||||
continue
|
||||
t = tag(child)
|
||||
if t == 'lib_symbols':
|
||||
continue # skip library definitions
|
||||
if t == 'symbol' and first_child_with_tag(child, 'lib_id') is not None:
|
||||
records = extract_symbol_records(child, sch_path.name)
|
||||
results.extend(records)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_root_uuid(project_dir: Path) -> str | None:
|
||||
"""
|
||||
Find the UUID of the root schematic by reading the .kicad_pro file
|
||||
(which names the root sheet) or by scanning for the top-level sheet.
|
||||
Returns the UUID string, or None if it cannot be determined.
|
||||
"""
|
||||
# The .kicad_pro file tells us the root schematic filename
|
||||
pro_files = list(project_dir.glob('*.kicad_pro'))
|
||||
root_sch: Path | None = None
|
||||
|
||||
if pro_files:
|
||||
import json as _json
|
||||
try:
|
||||
pro = _json.loads(pro_files[0].read_text(encoding='utf-8'))
|
||||
root_name = pro.get('sheets', [{}])[0] if pro.get('sheets') else None
|
||||
# Fall back: just find a .kicad_sch with the same stem as the .pro
|
||||
root_sch = project_dir / (pro_files[0].stem + '.kicad_sch')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if root_sch is None or not root_sch.exists():
|
||||
# Guess: the .kicad_sch whose stem matches the .kicad_pro
|
||||
if pro_files:
|
||||
candidate = project_dir / (pro_files[0].stem + '.kicad_sch')
|
||||
if candidate.exists():
|
||||
root_sch = candidate
|
||||
|
||||
if root_sch is None or not root_sch.exists():
|
||||
return None
|
||||
|
||||
# Extract the first (uuid ...) at the root level of the file
|
||||
import re
|
||||
text = root_sch.read_text(encoding='utf-8')
|
||||
m = re.search(r'\(uuid\s+"([^"]+)"', text)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def main(project_dir: Path):
|
||||
# Determine root schematic and walk the real hierarchy
|
||||
root_uuid = get_root_uuid(project_dir)
|
||||
|
||||
pro_files = list(project_dir.glob('*.kicad_pro'))
|
||||
root_sch = project_dir / (pro_files[0].stem + '.kicad_sch') if pro_files else None
|
||||
|
||||
if root_sch and root_sch.exists():
|
||||
sch_files = find_reachable_sheets(root_sch)
|
||||
print(f"Root sheet: {root_sch.name}")
|
||||
print(f"Found {len(sch_files)} reachable schematic file(s) in hierarchy:")
|
||||
else:
|
||||
# Fallback: glob everything
|
||||
sch_files = sorted(
|
||||
p for p in project_dir.rglob('*.kicad_sch')
|
||||
if not p.name.startswith('_autosave')
|
||||
and not p.suffix.endswith('.bak')
|
||||
)
|
||||
print(f"Warning: could not find root schematic; scanning all {len(sch_files)} files.\n")
|
||||
|
||||
if not sch_files:
|
||||
print(f"No .kicad_sch files found in {project_dir}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
for f in sch_files:
|
||||
print(f" {f.relative_to(project_dir)}")
|
||||
|
||||
all_records: list[dict] = []
|
||||
|
||||
for sch_path in sch_files:
|
||||
print(f"\nParsing {sch_path.name} ...", end=' ', flush=True)
|
||||
records = extract_from_schematic(sch_path)
|
||||
print(f"{len(records)} instance record(s)")
|
||||
all_records.extend(records)
|
||||
|
||||
# All records come from reachable sheets, so no orphan filtering needed.
|
||||
# Optionally still filter by root UUID to catch stale instance paths.
|
||||
if root_uuid:
|
||||
active_prefix = f'/{root_uuid}/'
|
||||
active = [r for r in all_records
|
||||
if (r.get('instance_path') or '').startswith(active_prefix)]
|
||||
stale = len(all_records) - len(active)
|
||||
print(f"\nTotal records : {len(all_records)}")
|
||||
if stale:
|
||||
print(f"Stale paths dropped: {stale}")
|
||||
else:
|
||||
active = all_records
|
||||
print(f"\nTotal records: {len(all_records)}")
|
||||
|
||||
# ---- Stage 1: dedup by (instance_path, uuid) ----
|
||||
# Collapses records that were seen from multiple sheet scans into one.
|
||||
seen: set = set()
|
||||
stage1: list[dict] = []
|
||||
for r in active:
|
||||
key = (r.get('instance_path'), r.get('uuid'))
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
stage1.append(r)
|
||||
|
||||
# ---- Stage 2: dedup by uuid across different sheet files ----
|
||||
# If the SAME uuid appears in two *different* .kicad_sch files, that is a
|
||||
# UUID collision in the design (copy-paste without UUID regeneration).
|
||||
# The same uuid appearing in the same sheet file with different instance
|
||||
# paths is *correct* — it is how multi-instance sheets work, so those are
|
||||
# left alone.
|
||||
uuid_sheets: dict = {} # uuid -> set of sheet_files seen
|
||||
uuid_collisions: dict = {} # uuid -> list of colliding records
|
||||
unique: list[dict] = []
|
||||
for r in stage1:
|
||||
u = r.get('uuid')
|
||||
sf = r.get('sheet_file', '')
|
||||
sheets_so_far = uuid_sheets.setdefault(u, set())
|
||||
if not sheets_so_far or sf in sheets_so_far:
|
||||
# First time seeing this uuid, OR it's from the same sheet file
|
||||
# (legitimate multi-instance expansion) — keep it.
|
||||
sheets_so_far.add(sf)
|
||||
unique.append(r)
|
||||
else:
|
||||
# Same uuid, but from a DIFFERENT sheet file → UUID collision.
|
||||
uuid_collisions.setdefault(u, []).append(r)
|
||||
# Don't append to unique — drop the duplicate.
|
||||
|
||||
if uuid_collisions:
|
||||
print(f"\nNote: {len(uuid_collisions)} UUID collision(s) detected "
|
||||
f"(same symbol UUID in multiple sheet files — likely copy-paste artifacts).")
|
||||
print(" Only the first occurrence is kept in the output.")
|
||||
for u, recs in list(uuid_collisions.items())[:10]:
|
||||
refs = [r.get('reference') for r in recs]
|
||||
files = [r.get('sheet_file') for r in recs]
|
||||
print(f" uuid={u[:8]}... refs={refs} sheets={files}")
|
||||
|
||||
print(f"\nUnique instances after dedup: {len(unique)}")
|
||||
|
||||
# Separate power symbols from real parts
|
||||
real = [r for r in unique if not (r.get('lib_id') or '').startswith('power:')]
|
||||
power = [r for r in unique if (r.get('lib_id') or '').startswith('power:')]
|
||||
print(f" Non-power parts : {len(real)}")
|
||||
print(f" Power symbols : {len(power)}")
|
||||
|
||||
# Check for true reference duplicates (same ref, different uuid = multi-unit)
|
||||
from collections import defaultdict, Counter
|
||||
by_ref: dict[str, list] = defaultdict(list)
|
||||
for r in unique:
|
||||
by_ref[r.get('reference', '')].append(r)
|
||||
|
||||
multi_unit = {ref: recs for ref, recs in by_ref.items()
|
||||
if len(recs) > 1 and len({r['uuid'] for r in recs}) > 1}
|
||||
if multi_unit:
|
||||
refs = [r for r in multi_unit if not r.startswith('#')]
|
||||
if refs:
|
||||
print(f"\nMulti-unit components ({len(refs)} references, expected for split-unit symbols):")
|
||||
for ref in sorted(refs):
|
||||
units = [r['instance_unit'] for r in multi_unit[ref]]
|
||||
print(f" {ref}: units {units}")
|
||||
|
||||
output = {
|
||||
"project_dir": str(project_dir),
|
||||
"root_uuid": root_uuid,
|
||||
"schematic_files": [str(f.relative_to(project_dir)) for f in sch_files],
|
||||
"total_instances": len(unique),
|
||||
"non_power_count": len(real),
|
||||
"symbols": unique,
|
||||
}
|
||||
|
||||
out_path = project_dir / 'extract_symbols.json'
|
||||
out_path.write_text(json.dumps(output, indent=2, ensure_ascii=False), encoding='utf-8')
|
||||
print(f"\nOutput written to: {out_path}")
|
||||
|
||||
# Print a summary table
|
||||
print("\n--- Summary (non-power parts, sorted by reference) ---")
|
||||
for r in sorted(real, key=lambda x: x.get('reference') or ''):
|
||||
ref = r.get('reference', '')
|
||||
value = r.get('value', '')
|
||||
lib = r.get('lib_id', '')
|
||||
mpn = r['properties'].get('MPN', '')
|
||||
sheet = r.get('sheet_file', '')
|
||||
unit = r.get('instance_unit', '')
|
||||
print(f" {ref:<12} u{unit:<2} {value:<30} {lib:<40} MPN={mpn:<25} [{sheet}]")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1:
|
||||
project_dir = Path(sys.argv[1]).resolve()
|
||||
else:
|
||||
project_dir = Path(__file__).parent.resolve()
|
||||
|
||||
if not project_dir.is_dir():
|
||||
print(f"Error: {project_dir} is not a directory", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
main(project_dir)
|
||||
Reference in New Issue
Block a user