#!/usr/bin/env python3 """ Correct RTCM3 message analyzer with proper bit-level parsing. Also detects non-RTCM data (ASCII, NMEA) in the stream. """ import sys import argparse from pathlib import Path def parse_rtcm_stream(data: bytes) -> list[dict]: """Parse RTCM3 stream, identifying both RTCM messages and non-RTCM data.""" items = [] i = 0 while i < len(data): # Check for RTCM3 message (0xD3 header) if data[i] == 0xD3 and i + 2 < len(data): # Parse header: 0xD3 + 2 bytes (6 bits reserved + 10 bits length) reserved = (data[i+1] >> 2) & 0x3F length = ((data[i+1] & 0x03) << 8) | data[i+2] msg_total_len = 3 + length + 3 # header + payload + CRC if i + msg_total_len <= len(data) and length >= 3: payload = data[i+3:i+3+length] # Extract message type - CORRECTLY from 12 bits # Bits 0-11 of payload contain message type msg_type = (payload[0] << 4) | (payload[1] >> 4) # Extract station ID (typically bits 12-23, next 12 bits) station_id = ((payload[1] & 0x0F) << 8) | payload[2] # CRC crc = data[i+3+length:i+3+length+3] if i+3+length+3 <= len(data) else None items.append({ 'type': 'rtcm', 'offset': i, 'message_type': msg_type, 'station_id': station_id, 'reserved': reserved, 'length': length, 'total_length': msg_total_len, 'payload': payload, 'crc': crc, }) i += msg_total_len continue # Check for ASCII data (NMEA, text, etc.) # Look for printable ASCII or common NMEA starters if data[i] == ord('$') or (32 <= data[i] < 127): # Find end of ASCII block start = i while i < len(data) and (32 <= data[i] < 127 or data[i] in [9, 10, 13]): i += 1 if i > start: text = data[start:i].decode('ascii', errors='ignore') items.append({ 'type': 'ascii', 'offset': start, 'length': i - start, 'text': text, }) continue # Unknown/binary data i += 1 return items def get_message_description(msg_type: int) -> str: """Get human-readable description for RTCM message type.""" descriptions = { 1001: "GPS L1 RTK Observables", 1002: "GPS L1 RTK Observables (Extended)", 1003: "GPS L1/L2 RTK Observables", 1004: "GPS L1/L2 RTK Observables (Extended)", 1005: "Stationary RTK Reference Station ARP", 1006: "Stationary RTK Reference Station ARP + Antenna Height", 1007: "Antenna Descriptor", 1008: "Antenna Descriptor & Serial Number", 1009: "GLONASS L1 RTK Observables", 1010: "GLONASS L1 RTK Observables (Extended)", 1011: "GLONASS L1/L2 RTK Observables", 1012: "GLONASS L1/L2 RTK Observables (Extended)", 1013: "System Parameters", 1019: "GPS Ephemerides", 1020: "GLONASS Ephemerides", 1033: "Receiver and Antenna Descriptors", 1071: "GPS MSM1", 1072: "GPS MSM2", 1073: "GPS MSM3", 1074: "GPS MSM4", 1075: "GPS MSM5", 1076: "GPS MSM6", 1077: "GPS MSM7", 1081: "GLONASS MSM1", 1082: "GLONASS MSM2", 1083: "GLONASS MSM3", 1084: "GLONASS MSM4", 1085: "GLONASS MSM5", 1086: "GLONASS MSM6", 1087: "GLONASS MSM7", 1091: "Galileo MSM1", 1092: "Galileo MSM2", 1093: "Galileo MSM3", 1094: "Galileo MSM4", 1095: "Galileo MSM5", 1096: "Galileo MSM6", 1097: "Galileo MSM7", 1121: "BeiDou MSM1", 1122: "BeiDou MSM2", 1123: "BeiDou MSM3", 1124: "BeiDou MSM4", 1125: "BeiDou MSM5", 1126: "BeiDou MSM6", 1127: "BeiDou MSM7", 1230: "GLONASS Code-Phase Biases", } return descriptions.get(msg_type, f"Unknown/Proprietary Type {msg_type}") def hex_dump(data: bytes, offset: int = 0, max_bytes: int = 64) -> str: """Create a hex dump of data.""" lines = [] data = data[:max_bytes] for i in range(0, len(data), 16): chunk = data[i:i+16] hex_part = " ".join(f"{b:02X}" for b in chunk) ascii_part = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk) lines.append(f"{offset + i:08X} {hex_part:<48} {ascii_part}") return "\n".join(lines) def analyze_file(filename: str, show_hex: bool = False, filter_type: int = None, max_items: int = 50, show_ascii: bool = True): """Analyze binary file with correct RTCM parsing.""" path = Path(filename) if not path.exists(): print(f"ERROR: File not found: {filename}") return print(f"Analyzing: {filename}") print(f"File size: {path.stat().st_size:,} bytes") print(f"{'=' * 80}\n") # Read and parse data = path.read_bytes() items = parse_rtcm_stream(data) # Statistics rtcm_count = sum(1 for item in items if item['type'] == 'rtcm') ascii_count = sum(1 for item in items if item['type'] == 'ascii') print(f"STREAM SUMMARY:") print(f"{'─' * 80}") print(f"Total RTCM messages: {rtcm_count}") print(f"Total ASCII blocks: {ascii_count}") print() # Message type distribution if rtcm_count > 0: type_counts = {} station_ids = set() for item in items: if item['type'] == 'rtcm': msg_type = item['message_type'] type_counts[msg_type] = type_counts.get(msg_type, 0) + 1 station_ids.add(item['station_id']) print(f"RTCM MESSAGE TYPES:") print(f"{'─' * 80}") for msg_type in sorted(type_counts.keys()): desc = get_message_description(msg_type) count = type_counts[msg_type] print(f" Type {msg_type:4d}: {count:5d} messages - {desc}") print() print(f"Station IDs found: {sorted(station_ids)}") print() # Show individual items print(f"STREAM CONTENT (first {max_items} items):") print(f"{'=' * 80}\n") shown = 0 for i, item in enumerate(items): if shown >= max_items: remaining = len(items) - i print(f"\n... and {remaining} more items (use --max-items to show more)") break # Filter if requested if filter_type is not None and (item['type'] != 'rtcm' or item.get('message_type') != filter_type): continue shown += 1 if item['type'] == 'rtcm': msg_type = item['message_type'] desc = get_message_description(msg_type) print(f"[{i+1}] RTCM Message {msg_type:4d} - {desc}") print(f" Offset: 0x{item['offset']:08X} ({item['offset']})") print(f" Station ID: {item['station_id']}") print(f" Length: {item['length']} bytes payload, {item['total_length']} bytes total") if show_hex: print(f" Payload (first 64 bytes):") hex_lines = hex_dump(item['payload'], item['offset'] + 3, max_bytes=64) for line in hex_lines.split('\n'): print(f" {line}") # Check for ASCII in payload payload = item['payload'] printable_count = sum(1 for b in payload if 32 <= b < 127) printable_percent = (printable_count / len(payload) * 100) if len(payload) > 0 else 0 if printable_percent > 30: print(f" Contains {printable_percent:.0f}% ASCII text:") try: text = payload.decode('ascii', errors='ignore') text_display = text.replace('\r', '\\r').replace('\n', '\\n').replace('\t', '\\t') if len(text_display) > 150: text_display = text_display[:150] + "..." print(f" {text_display}") except: pass print() elif item['type'] == 'ascii' and show_ascii: text = item['text'].replace('\r', '\\r').replace('\n', '\\n').replace('\t', '\\t') if len(text) > 150: text = text[:150] + "..." print(f"[{i+1}] ASCII DATA") print(f" Offset: 0x{item['offset']:08X} ({item['offset']})") print(f" Length: {item['length']} bytes") print(f" Text: {text}") print() def main(): parser = argparse.ArgumentParser( description="Analyze RTCM3 binary file with correct message type parsing", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Basic analysis python analyze_rtcm_correct.py ntrip_raw_20250605_120000.bin # Show hex dumps python analyze_rtcm_correct.py ntrip_raw_20250605_120000.bin --hex # Only show specific message type python analyze_rtcm_correct.py ntrip_raw_20250605_120000.bin --type 1005 # Show more items python analyze_rtcm_correct.py ntrip_raw_20250605_120000.bin --max-items 100 """ ) parser.add_argument('filename', help='Binary file to analyze') parser.add_argument('--hex', action='store_true', help='Show hex dumps of message payloads') parser.add_argument('--type', type=int, help='Filter to show only specific message type') parser.add_argument('--max-items', type=int, default=50, help='Maximum number of items to display') parser.add_argument('--no-ascii', action='store_true', help='Hide ASCII blocks in output') args = parser.parse_args() analyze_file( args.filename, show_hex=args.hex, filter_type=args.type, max_items=args.max_items, show_ascii=not args.no_ascii ) if __name__ == "__main__": main()