maglink-console/analyze_rtcm_correct.py

#!/usr/bin/env python3
"""
Correct RTCM3 message analyzer with proper bit-level parsing.
Also detects non-RTCM data (ASCII, NMEA) in the stream.
"""
import sys
import argparse
from pathlib import Path


def parse_rtcm_stream(data: bytes) -> list[dict]:
    """Parse RTCM3 stream, identifying both RTCM messages and non-RTCM data."""
    items = []
    i = 0

    while i < len(data):
        # Check for RTCM3 message (0xD3 header)
        if data[i] == 0xD3 and i + 2 < len(data):
            # Parse header: 0xD3 + 2 bytes (6 bits reserved + 10 bits length)
            reserved = (data[i+1] >> 2) & 0x3F
            length = ((data[i+1] & 0x03) << 8) | data[i+2]
            msg_total_len = 3 + length + 3  # header + payload + CRC

            if i + msg_total_len <= len(data) and length >= 3:
                payload = data[i+3:i+3+length]

                # Extract message type - CORRECTLY from 12 bits
                # Bits 0-11 of payload contain message type
                msg_type = (payload[0] << 4) | (payload[1] >> 4)

                # Extract station ID (typically bits 12-23, next 12 bits)
                station_id = ((payload[1] & 0x0F) << 8) | payload[2]

                # CRC
                crc = data[i+3+length:i+3+length+3] if i+3+length+3 <= len(data) else None

                items.append({
                    'type': 'rtcm',
                    'offset': i,
                    'message_type': msg_type,
                    'station_id': station_id,
                    'reserved': reserved,
                    'length': length,
                    'total_length': msg_total_len,
                    'payload': payload,
                    'crc': crc,
                })

                i += msg_total_len
                continue

        # Check for ASCII data (NMEA, text, etc.)
        # Look for printable ASCII or common NMEA starters
        if data[i] == ord('$') or (32 <= data[i] < 127):
            # Find end of ASCII block
            start = i
            while i < len(data) and (32 <= data[i] < 127 or data[i] in [9, 10, 13]):
                i += 1

            if i > start:
                text = data[start:i].decode('ascii', errors='ignore')
                items.append({
                    'type': 'ascii',
                    'offset': start,
                    'length': i - start,
                    'text': text,
                })
                continue

        # Unknown/binary data
        i += 1

    return items


def get_message_description(msg_type: int) -> str:
    """Get human-readable description for RTCM message type."""
    descriptions = {
        1001: "GPS L1 RTK Observables",
        1002: "GPS L1 RTK Observables (Extended)",
        1003: "GPS L1/L2 RTK Observables",
        1004: "GPS L1/L2 RTK Observables (Extended)",
        1005: "Stationary RTK Reference Station ARP",
        1006: "Stationary RTK Reference Station ARP + Antenna Height",
        1007: "Antenna Descriptor",
        1008: "Antenna Descriptor & Serial Number",
        1009: "GLONASS L1 RTK Observables",
        1010: "GLONASS L1 RTK Observables (Extended)",
        1011: "GLONASS L1/L2 RTK Observables",
        1012: "GLONASS L1/L2 RTK Observables (Extended)",
        1013: "System Parameters",
        1019: "GPS Ephemerides",
        1020: "GLONASS Ephemerides",
        1033: "Receiver and Antenna Descriptors",
        1071: "GPS MSM1",
        1072: "GPS MSM2",
        1073: "GPS MSM3",
        1074: "GPS MSM4",
        1075: "GPS MSM5",
        1076: "GPS MSM6",
        1077: "GPS MSM7",
        1081: "GLONASS MSM1",
        1082: "GLONASS MSM2",
        1083: "GLONASS MSM3",
        1084: "GLONASS MSM4",
        1085: "GLONASS MSM5",
        1086: "GLONASS MSM6",
        1087: "GLONASS MSM7",
        1091: "Galileo MSM1",
        1092: "Galileo MSM2",
        1093: "Galileo MSM3",
        1094: "Galileo MSM4",
        1095: "Galileo MSM5",
        1096: "Galileo MSM6",
        1097: "Galileo MSM7",
        1121: "BeiDou MSM1",
        1122: "BeiDou MSM2",
        1123: "BeiDou MSM3",
        1124: "BeiDou MSM4",
        1125: "BeiDou MSM5",
        1126: "BeiDou MSM6",
        1127: "BeiDou MSM7",
        1230: "GLONASS Code-Phase Biases",
    }
    return descriptions.get(msg_type, f"Unknown/Proprietary Type {msg_type}")


def hex_dump(data: bytes, offset: int = 0, max_bytes: int = 64) -> str:
    """Create a hex dump of data."""
    lines = []
    data = data[:max_bytes]

    for i in range(0, len(data), 16):
        chunk = data[i:i+16]
        hex_part = " ".join(f"{b:02X}" for b in chunk)
        ascii_part = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
        lines.append(f"{offset + i:08X}  {hex_part:<48}  {ascii_part}")

    return "\n".join(lines)


def analyze_file(filename: str, show_hex: bool = False, filter_type: int = None,
                 max_items: int = 50, show_ascii: bool = True):
    """Analyze binary file with correct RTCM parsing."""

    path = Path(filename)
    if not path.exists():
        print(f"ERROR: File not found: {filename}")
        return

    print(f"Analyzing: {filename}")
    print(f"File size: {path.stat().st_size:,} bytes")
    print(f"{'=' * 80}\n")

    # Read and parse
    data = path.read_bytes()
    items = parse_rtcm_stream(data)

    # Statistics
    rtcm_count = sum(1 for item in items if item['type'] == 'rtcm')
    ascii_count = sum(1 for item in items if item['type'] == 'ascii')

    print(f"STREAM SUMMARY:")
    print(f"{'─' * 80}")
    print(f"Total RTCM messages: {rtcm_count}")
    print(f"Total ASCII blocks:  {ascii_count}")
    print()

    # Message type distribution
    if rtcm_count > 0:
        type_counts = {}
        station_ids = set()

        for item in items:
            if item['type'] == 'rtcm':
                msg_type = item['message_type']
                type_counts[msg_type] = type_counts.get(msg_type, 0) + 1
                station_ids.add(item['station_id'])

        print(f"RTCM MESSAGE TYPES:")
        print(f"{'─' * 80}")
        for msg_type in sorted(type_counts.keys()):
            desc = get_message_description(msg_type)
            count = type_counts[msg_type]
            print(f"  Type {msg_type:4d}: {count:5d} messages - {desc}")

        print()
        print(f"Station IDs found: {sorted(station_ids)}")
        print()

    # Show individual items
    print(f"STREAM CONTENT (first {max_items} items):")
    print(f"{'=' * 80}\n")

    shown = 0
    for i, item in enumerate(items):
        if shown >= max_items:
            remaining = len(items) - i
            print(f"\n... and {remaining} more items (use --max-items to show more)")
            break

        # Filter if requested
        if filter_type is not None and (item['type'] != 'rtcm' or item.get('message_type') != filter_type):
            continue

        shown += 1

        if item['type'] == 'rtcm':
            msg_type = item['message_type']
            desc = get_message_description(msg_type)

            print(f"[{i+1}] RTCM Message {msg_type:4d} - {desc}")
            print(f"    Offset:     0x{item['offset']:08X} ({item['offset']})")
            print(f"    Station ID: {item['station_id']}")
            print(f"    Length:     {item['length']} bytes payload, {item['total_length']} bytes total")

            if show_hex:
                print(f"    Payload (first 64 bytes):")
                hex_lines = hex_dump(item['payload'], item['offset'] + 3, max_bytes=64)
                for line in hex_lines.split('\n'):
                    print(f"      {line}")

            # Check for ASCII in payload
            payload = item['payload']
            printable_count = sum(1 for b in payload if 32 <= b < 127)
            printable_percent = (printable_count / len(payload) * 100) if len(payload) > 0 else 0

            if printable_percent > 30:
                print(f"    Contains {printable_percent:.0f}% ASCII text:")
                try:
                    text = payload.decode('ascii', errors='ignore')
                    text_display = text.replace('\r', '\\r').replace('\n', '\\n').replace('\t', '\\t')
                    if len(text_display) > 150:
                        text_display = text_display[:150] + "..."
                    print(f"      {text_display}")
                except:
                    pass

            print()

        elif item['type'] == 'ascii' and show_ascii:
            text = item['text'].replace('\r', '\\r').replace('\n', '\\n').replace('\t', '\\t')
            if len(text) > 150:
                text = text[:150] + "..."

            print(f"[{i+1}] ASCII DATA")
            print(f"    Offset: 0x{item['offset']:08X} ({item['offset']})")
            print(f"    Length: {item['length']} bytes")
            print(f"    Text:   {text}")
            print()


def main():
    parser = argparse.ArgumentParser(
        description="Analyze RTCM3 binary file with correct message type parsing",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Basic analysis
  python analyze_rtcm_correct.py ntrip_raw_20250605_120000.bin

  # Show hex dumps
  python analyze_rtcm_correct.py ntrip_raw_20250605_120000.bin --hex

  # Only show specific message type
  python analyze_rtcm_correct.py ntrip_raw_20250605_120000.bin --type 1005

  # Show more items
  python analyze_rtcm_correct.py ntrip_raw_20250605_120000.bin --max-items 100
        """
    )

    parser.add_argument('filename', help='Binary file to analyze')
    parser.add_argument('--hex', action='store_true', help='Show hex dumps of message payloads')
    parser.add_argument('--type', type=int, help='Filter to show only specific message type')
    parser.add_argument('--max-items', type=int, default=50, help='Maximum number of items to display')
    parser.add_argument('--no-ascii', action='store_true', help='Hide ASCII blocks in output')

    args = parser.parse_args()

    analyze_file(
        args.filename,
        show_hex=args.hex,
        filter_type=args.type,
        max_items=args.max_items,
        show_ascii=not args.no_ascii
    )


if __name__ == "__main__":
    main()