Symbols (2 libraries): - symbols/ultra-mini.kicad_sym - symbols/M8Mini.kicad_sym - symbols/Res_0402.kicad_sym (R_temp template + 694 generated symbols) Footprints (33 used in ultra project): - footprints/custom.pretty/ (30 mods) - footprints/M8Mini.pretty/ (3 mods) 3D models (31 STEP/STP files) Scripts: - scripts/extract_symbols.py KiCad 9 symbol metadata extractor - scripts/export_bom.py BOM CSV exporter - scripts/gen_resistors_0402.py 0402 resistor symbol generator from parts list
518 lines
18 KiB
Python
518 lines
18 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
KiCad 9 Symbol Metadata Extractor
|
||
==================================
|
||
Walks every .kicad_sch file in the project directory and extracts
|
||
metadata for every placed symbol (component instance), correctly
|
||
expanding hierarchical sheet instances so that each unique reference
|
||
in the final design becomes its own record.
|
||
|
||
KiCad stores multi-instance sheets by embedding an `(instances ...)`
|
||
block in each symbol. That block contains one `(path ...)` entry per
|
||
sheet instantiation, each with the authoritative reference for that
|
||
copy. This script reads those paths so a sheet used N times produces
|
||
N distinct records per symbol.
|
||
|
||
Output: extract_symbols.json (same directory as this script)
|
||
|
||
Usage:
|
||
python3 extract_symbols.py [project_dir]
|
||
|
||
If project_dir is omitted, the directory containing this script is used.
|
||
"""
|
||
|
||
import json
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# S-expression parser
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _tokenize(text: str) -> list:
|
||
"""
|
||
Convert raw KiCad S-expression text into a flat list of tokens.
|
||
Token forms:
|
||
('OPEN',) – opening paren
|
||
('CLOSE',) – closing paren
|
||
('ATOM', value) – unquoted word / number / bool
|
||
('STR', value) – double-quoted string (escapes resolved)
|
||
"""
|
||
tokens = []
|
||
i, n = 0, len(text)
|
||
while i < n:
|
||
c = text[i]
|
||
if c in ' \t\r\n':
|
||
i += 1
|
||
elif c == '(':
|
||
tokens.append(('OPEN',))
|
||
i += 1
|
||
elif c == ')':
|
||
tokens.append(('CLOSE',))
|
||
i += 1
|
||
elif c == '"':
|
||
j = i + 1
|
||
buf = []
|
||
while j < n:
|
||
if text[j] == '\\' and j + 1 < n:
|
||
buf.append(text[j + 1])
|
||
j += 2
|
||
elif text[j] == '"':
|
||
j += 1
|
||
break
|
||
else:
|
||
buf.append(text[j])
|
||
j += 1
|
||
tokens.append(('STR', ''.join(buf)))
|
||
i = j
|
||
else:
|
||
j = i
|
||
while j < n and text[j] not in ' \t\r\n()':
|
||
j += 1
|
||
tokens.append(('ATOM', text[i:j]))
|
||
i = j
|
||
return tokens
|
||
|
||
|
||
def _parse(tokens: list, pos: int) -> tuple:
|
||
"""
|
||
Recursively parse one S-expression value starting at *pos*.
|
||
Returns (parsed_value, next_pos).
|
||
A list/node becomes a Python list; atoms and strings become strings.
|
||
"""
|
||
tok = tokens[pos]
|
||
kind = tok[0]
|
||
if kind == 'OPEN':
|
||
pos += 1
|
||
items = []
|
||
while tokens[pos][0] != 'CLOSE':
|
||
item, pos = _parse(tokens, pos)
|
||
items.append(item)
|
||
return items, pos + 1 # consume CLOSE
|
||
elif kind in ('ATOM', 'STR'):
|
||
return tok[1], pos + 1
|
||
else:
|
||
raise ValueError(f"Unexpected token at pos {pos}: {tok}")
|
||
|
||
|
||
def parse_sexp(text: str):
|
||
"""Parse a complete KiCad S-expression file. Returns the root list."""
|
||
tokens = _tokenize(text)
|
||
root, _ = _parse(tokens, 0)
|
||
return root
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Helpers to navigate parsed S-expressions
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def tag(node) -> str:
|
||
if isinstance(node, list) and node and isinstance(node[0], str):
|
||
return node[0]
|
||
return ''
|
||
|
||
|
||
def children(node: list) -> list:
|
||
return node[1:] if isinstance(node, list) else []
|
||
|
||
|
||
def first_child_with_tag(node: list, name: str):
|
||
for child in children(node):
|
||
if isinstance(child, list) and tag(child) == name:
|
||
return child
|
||
return None
|
||
|
||
|
||
def all_children_with_tag(node: list, name: str) -> list:
|
||
return [c for c in children(node) if isinstance(c, list) and tag(c) == name]
|
||
|
||
|
||
def scalar(node, index: int = 1, default=None):
|
||
if isinstance(node, list) and len(node) > index:
|
||
return node[index]
|
||
return default
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Instance path extraction
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def extract_instances(sym_node: list) -> list[dict]:
|
||
"""
|
||
Parse the (instances ...) block of a symbol and return one dict per
|
||
hierarchical path. Each dict has:
|
||
path – the full UUID path string
|
||
reference – the reference designator for that instance
|
||
unit – the unit number for that instance
|
||
project – the project name
|
||
|
||
If there is no instances block (unusual), returns an empty list.
|
||
"""
|
||
instances_node = first_child_with_tag(sym_node, 'instances')
|
||
if instances_node is None:
|
||
return []
|
||
|
||
results = []
|
||
for project_node in all_children_with_tag(instances_node, 'project'):
|
||
project_name = scalar(project_node, 1, '')
|
||
for path_node in all_children_with_tag(project_node, 'path'):
|
||
path_str = scalar(path_node, 1, '')
|
||
ref_node = first_child_with_tag(path_node, 'reference')
|
||
unit_node = first_child_with_tag(path_node, 'unit')
|
||
results.append({
|
||
'path': path_str,
|
||
'reference': scalar(ref_node, 1) if ref_node else None,
|
||
'unit': scalar(unit_node, 1) if unit_node else None,
|
||
'project': project_name,
|
||
})
|
||
return results
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Symbol extraction
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def extract_symbol_records(sym_node: list, sheet_file: str) -> list[dict]:
|
||
"""
|
||
Extract metadata from a placed-symbol node and return one record per
|
||
hierarchical instance (i.e. one record per path in the instances block).
|
||
|
||
For a sheet used only once, this produces a single record.
|
||
For a sheet instantiated N times, this produces N records — each with
|
||
its own unique reference designator from the instances block.
|
||
"""
|
||
# --- Shared fields (same for all instances of this symbol placement) ---
|
||
shared = {
|
||
'sheet_file': sheet_file,
|
||
'lib_id': None,
|
||
'at': None,
|
||
'exclude_from_sim': None,
|
||
'in_bom': None,
|
||
'on_board': None,
|
||
'dnp': None,
|
||
'uuid': None,
|
||
'properties': {},
|
||
}
|
||
|
||
for child in children(sym_node):
|
||
if not isinstance(child, list):
|
||
continue
|
||
t = tag(child)
|
||
if t == 'lib_id':
|
||
shared['lib_id'] = scalar(child, 1)
|
||
elif t == 'at':
|
||
shared['at'] = {
|
||
'x': scalar(child, 1),
|
||
'y': scalar(child, 2),
|
||
'angle': scalar(child, 3, 0),
|
||
}
|
||
elif t == 'exclude_from_sim':
|
||
shared['exclude_from_sim'] = scalar(child, 1) == 'yes'
|
||
elif t == 'in_bom':
|
||
shared['in_bom'] = scalar(child, 1) == 'yes'
|
||
elif t == 'on_board':
|
||
shared['on_board'] = scalar(child, 1) == 'yes'
|
||
elif t == 'dnp':
|
||
shared['dnp'] = scalar(child, 1) == 'yes'
|
||
elif t == 'uuid':
|
||
shared['uuid'] = scalar(child, 1)
|
||
elif t == 'property':
|
||
prop_name = scalar(child, 1)
|
||
prop_val = scalar(child, 2)
|
||
if prop_name is not None:
|
||
shared['properties'][prop_name] = prop_val
|
||
|
||
# Promote standard properties for convenient access
|
||
props = shared['properties']
|
||
shared['value'] = props.get('Value')
|
||
shared['footprint'] = props.get('Footprint')
|
||
shared['datasheet'] = props.get('Datasheet')
|
||
shared['description'] = props.get('Description')
|
||
|
||
# --- Per-instance fields (one record per path in instances block) ---
|
||
instances = extract_instances(sym_node)
|
||
|
||
if not instances:
|
||
# Fallback: no instances block — use top-level Reference property
|
||
record = dict(shared)
|
||
record['reference'] = props.get('Reference')
|
||
record['instance_path'] = None
|
||
record['instance_unit'] = shared.get('unit')
|
||
record['instance_project']= None
|
||
return [record]
|
||
|
||
records = []
|
||
for inst in instances:
|
||
record = dict(shared)
|
||
record['properties'] = dict(shared['properties']) # copy so each is independent
|
||
record['reference'] = inst['reference']
|
||
record['instance_path'] = inst['path']
|
||
record['instance_unit'] = inst['unit']
|
||
record['instance_project'] = inst['project']
|
||
records.append(record)
|
||
|
||
return records
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Hierarchy walker
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def find_reachable_sheets(root_sch: Path) -> list[Path]:
|
||
"""
|
||
Walk the sheet hierarchy starting from *root_sch* and return an ordered
|
||
list of every .kicad_sch file that is actually reachable (i.e. referenced
|
||
directly or transitively as a sub-sheet). Handles repeated sub-sheet
|
||
references (same file used N times) by visiting the file only once.
|
||
"""
|
||
reachable: list[Path] = []
|
||
visited_names: set[str] = set()
|
||
queue: list[Path] = [root_sch]
|
||
|
||
while queue:
|
||
sch = queue.pop(0)
|
||
if sch.name in visited_names:
|
||
continue
|
||
visited_names.add(sch.name)
|
||
reachable.append(sch)
|
||
|
||
try:
|
||
text = sch.read_text(encoding='utf-8')
|
||
except OSError:
|
||
continue
|
||
|
||
root_node = parse_sexp(text)
|
||
for child in children(root_node):
|
||
if tag(child) != 'sheet':
|
||
continue
|
||
for prop in all_children_with_tag(child, 'property'):
|
||
if scalar(prop, 1) == 'Sheetfile':
|
||
child_filename = scalar(prop, 2)
|
||
if child_filename:
|
||
child_path = sch.parent / child_filename
|
||
if child_path.exists() and child_path.name not in visited_names:
|
||
queue.append(child_path)
|
||
|
||
return reachable
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Per-file parsing
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def extract_from_schematic(sch_path: Path) -> list[dict]:
|
||
"""
|
||
Parse one .kicad_sch file and return a list of symbol records.
|
||
lib_symbols definitions are skipped; only placed instances are returned.
|
||
"""
|
||
text = sch_path.read_text(encoding='utf-8')
|
||
root = parse_sexp(text)
|
||
|
||
results = []
|
||
for child in children(root):
|
||
if not isinstance(child, list):
|
||
continue
|
||
t = tag(child)
|
||
if t == 'lib_symbols':
|
||
continue # skip library definitions
|
||
if t == 'symbol' and first_child_with_tag(child, 'lib_id') is not None:
|
||
records = extract_symbol_records(child, sch_path.name)
|
||
results.extend(records)
|
||
|
||
return results
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Main
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def get_root_uuid(project_dir: Path) -> str | None:
|
||
"""
|
||
Find the UUID of the root schematic by reading the .kicad_pro file
|
||
(which names the root sheet) or by scanning for the top-level sheet.
|
||
Returns the UUID string, or None if it cannot be determined.
|
||
"""
|
||
# The .kicad_pro file tells us the root schematic filename
|
||
pro_files = list(project_dir.glob('*.kicad_pro'))
|
||
root_sch: Path | None = None
|
||
|
||
if pro_files:
|
||
import json as _json
|
||
try:
|
||
pro = _json.loads(pro_files[0].read_text(encoding='utf-8'))
|
||
root_name = pro.get('sheets', [{}])[0] if pro.get('sheets') else None
|
||
# Fall back: just find a .kicad_sch with the same stem as the .pro
|
||
root_sch = project_dir / (pro_files[0].stem + '.kicad_sch')
|
||
except Exception:
|
||
pass
|
||
|
||
if root_sch is None or not root_sch.exists():
|
||
# Guess: the .kicad_sch whose stem matches the .kicad_pro
|
||
if pro_files:
|
||
candidate = project_dir / (pro_files[0].stem + '.kicad_sch')
|
||
if candidate.exists():
|
||
root_sch = candidate
|
||
|
||
if root_sch is None or not root_sch.exists():
|
||
return None
|
||
|
||
# Extract the first (uuid ...) at the root level of the file
|
||
import re
|
||
text = root_sch.read_text(encoding='utf-8')
|
||
m = re.search(r'\(uuid\s+"([^"]+)"', text)
|
||
return m.group(1) if m else None
|
||
|
||
|
||
def main(project_dir: Path):
|
||
# Determine root schematic and walk the real hierarchy
|
||
root_uuid = get_root_uuid(project_dir)
|
||
|
||
pro_files = list(project_dir.glob('*.kicad_pro'))
|
||
root_sch = project_dir / (pro_files[0].stem + '.kicad_sch') if pro_files else None
|
||
|
||
if root_sch and root_sch.exists():
|
||
sch_files = find_reachable_sheets(root_sch)
|
||
print(f"Root sheet: {root_sch.name}")
|
||
print(f"Found {len(sch_files)} reachable schematic file(s) in hierarchy:")
|
||
else:
|
||
# Fallback: glob everything
|
||
sch_files = sorted(
|
||
p for p in project_dir.rglob('*.kicad_sch')
|
||
if not p.name.startswith('_autosave')
|
||
and not p.suffix.endswith('.bak')
|
||
)
|
||
print(f"Warning: could not find root schematic; scanning all {len(sch_files)} files.\n")
|
||
|
||
if not sch_files:
|
||
print(f"No .kicad_sch files found in {project_dir}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
for f in sch_files:
|
||
print(f" {f.relative_to(project_dir)}")
|
||
|
||
all_records: list[dict] = []
|
||
|
||
for sch_path in sch_files:
|
||
print(f"\nParsing {sch_path.name} ...", end=' ', flush=True)
|
||
records = extract_from_schematic(sch_path)
|
||
print(f"{len(records)} instance record(s)")
|
||
all_records.extend(records)
|
||
|
||
# All records come from reachable sheets, so no orphan filtering needed.
|
||
# Optionally still filter by root UUID to catch stale instance paths.
|
||
if root_uuid:
|
||
active_prefix = f'/{root_uuid}/'
|
||
active = [r for r in all_records
|
||
if (r.get('instance_path') or '').startswith(active_prefix)]
|
||
stale = len(all_records) - len(active)
|
||
print(f"\nTotal records : {len(all_records)}")
|
||
if stale:
|
||
print(f"Stale paths dropped: {stale}")
|
||
else:
|
||
active = all_records
|
||
print(f"\nTotal records: {len(all_records)}")
|
||
|
||
# ---- Stage 1: dedup by (instance_path, uuid) ----
|
||
# Collapses records that were seen from multiple sheet scans into one.
|
||
seen: set = set()
|
||
stage1: list[dict] = []
|
||
for r in active:
|
||
key = (r.get('instance_path'), r.get('uuid'))
|
||
if key not in seen:
|
||
seen.add(key)
|
||
stage1.append(r)
|
||
|
||
# ---- Stage 2: dedup by uuid across different sheet files ----
|
||
# If the SAME uuid appears in two *different* .kicad_sch files, that is a
|
||
# UUID collision in the design (copy-paste without UUID regeneration).
|
||
# The same uuid appearing in the same sheet file with different instance
|
||
# paths is *correct* — it is how multi-instance sheets work, so those are
|
||
# left alone.
|
||
uuid_sheets: dict = {} # uuid -> set of sheet_files seen
|
||
uuid_collisions: dict = {} # uuid -> list of colliding records
|
||
unique: list[dict] = []
|
||
for r in stage1:
|
||
u = r.get('uuid')
|
||
sf = r.get('sheet_file', '')
|
||
sheets_so_far = uuid_sheets.setdefault(u, set())
|
||
if not sheets_so_far or sf in sheets_so_far:
|
||
# First time seeing this uuid, OR it's from the same sheet file
|
||
# (legitimate multi-instance expansion) — keep it.
|
||
sheets_so_far.add(sf)
|
||
unique.append(r)
|
||
else:
|
||
# Same uuid, but from a DIFFERENT sheet file → UUID collision.
|
||
uuid_collisions.setdefault(u, []).append(r)
|
||
# Don't append to unique — drop the duplicate.
|
||
|
||
if uuid_collisions:
|
||
print(f"\nNote: {len(uuid_collisions)} UUID collision(s) detected "
|
||
f"(same symbol UUID in multiple sheet files — likely copy-paste artifacts).")
|
||
print(" Only the first occurrence is kept in the output.")
|
||
for u, recs in list(uuid_collisions.items())[:10]:
|
||
refs = [r.get('reference') for r in recs]
|
||
files = [r.get('sheet_file') for r in recs]
|
||
print(f" uuid={u[:8]}... refs={refs} sheets={files}")
|
||
|
||
print(f"\nUnique instances after dedup: {len(unique)}")
|
||
|
||
# Separate power symbols from real parts
|
||
real = [r for r in unique if not (r.get('lib_id') or '').startswith('power:')]
|
||
power = [r for r in unique if (r.get('lib_id') or '').startswith('power:')]
|
||
print(f" Non-power parts : {len(real)}")
|
||
print(f" Power symbols : {len(power)}")
|
||
|
||
# Check for true reference duplicates (same ref, different uuid = multi-unit)
|
||
from collections import defaultdict, Counter
|
||
by_ref: dict[str, list] = defaultdict(list)
|
||
for r in unique:
|
||
by_ref[r.get('reference', '')].append(r)
|
||
|
||
multi_unit = {ref: recs for ref, recs in by_ref.items()
|
||
if len(recs) > 1 and len({r['uuid'] for r in recs}) > 1}
|
||
if multi_unit:
|
||
refs = [r for r in multi_unit if not r.startswith('#')]
|
||
if refs:
|
||
print(f"\nMulti-unit components ({len(refs)} references, expected for split-unit symbols):")
|
||
for ref in sorted(refs):
|
||
units = [r['instance_unit'] for r in multi_unit[ref]]
|
||
print(f" {ref}: units {units}")
|
||
|
||
output = {
|
||
"project_dir": str(project_dir),
|
||
"root_uuid": root_uuid,
|
||
"schematic_files": [str(f.relative_to(project_dir)) for f in sch_files],
|
||
"total_instances": len(unique),
|
||
"non_power_count": len(real),
|
||
"symbols": unique,
|
||
}
|
||
|
||
out_path = project_dir / 'extract_symbols.json'
|
||
out_path.write_text(json.dumps(output, indent=2, ensure_ascii=False), encoding='utf-8')
|
||
print(f"\nOutput written to: {out_path}")
|
||
|
||
# Print a summary table
|
||
print("\n--- Summary (non-power parts, sorted by reference) ---")
|
||
for r in sorted(real, key=lambda x: x.get('reference') or ''):
|
||
ref = r.get('reference', '')
|
||
value = r.get('value', '')
|
||
lib = r.get('lib_id', '')
|
||
mpn = r['properties'].get('MPN', '')
|
||
sheet = r.get('sheet_file', '')
|
||
unit = r.get('instance_unit', '')
|
||
print(f" {ref:<12} u{unit:<2} {value:<30} {lib:<40} MPN={mpn:<25} [{sheet}]")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
if len(sys.argv) > 1:
|
||
project_dir = Path(sys.argv[1]).resolve()
|
||
else:
|
||
project_dir = Path(__file__).parent.resolve()
|
||
|
||
if not project_dir.is_dir():
|
||
print(f"Error: {project_dir} is not a directory", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
main(project_dir)
|