Initial commit
This commit is contained in:
420
parse_rtcm_messages.py
Normal file
420
parse_rtcm_messages.py
Normal file
@@ -0,0 +1,420 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Scan a binary stream for RTCM v3 messages.
|
||||
|
||||
RTCM v3 frames are:
|
||||
0xD3 | 6 reserved bits + 10-bit payload length | payload | CRC-24Q
|
||||
|
||||
This script does not depend on any local project modules. It searches byte-by-byte
|
||||
for valid frames, verifies the CRC, prints what it finds, and can optionally write
|
||||
each full RTCM frame to disk.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import BinaryIO, Iterable
|
||||
|
||||
|
||||
PREAMBLE = 0xD3
|
||||
MAX_RTCM_PAYLOAD_LENGTH = 1023
|
||||
CRC24Q_POLY = 0x1864CFB
|
||||
CRC24Q_MASK = 0xFFFFFF
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RtcmMessage:
|
||||
index: int
|
||||
offset: int
|
||||
message_type: int | None
|
||||
payload_length: int
|
||||
frame_length: int
|
||||
crc: int
|
||||
frame: bytes
|
||||
|
||||
@property
|
||||
def payload(self) -> bytes:
|
||||
return self.frame[3:-3]
|
||||
|
||||
@property
|
||||
def payload_hex_preview(self) -> str:
|
||||
preview = self.payload[:24].hex(" ")
|
||||
if len(self.payload) > 24:
|
||||
return f"{preview} ..."
|
||||
return preview
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScanStats:
|
||||
bytes_read: int = 0
|
||||
bytes_skipped: int = 0
|
||||
invalid_headers: int = 0
|
||||
crc_failures: int = 0
|
||||
incomplete_tail_offset: int | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DechunkResult:
|
||||
data: bytes
|
||||
chunks: int
|
||||
start_offset: int
|
||||
consumed_bytes: int
|
||||
|
||||
|
||||
class ChunkedDecodeError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def crc24q(data: bytes) -> int:
|
||||
"""Return the RTCM CRC-24Q value for data."""
|
||||
crc = 0
|
||||
for byte in data:
|
||||
crc ^= byte << 16
|
||||
for _ in range(8):
|
||||
crc <<= 1
|
||||
if crc & 0x1000000:
|
||||
crc ^= CRC24Q_POLY
|
||||
return crc & CRC24Q_MASK
|
||||
|
||||
|
||||
def find_chunked_body_start(data: bytes) -> int:
|
||||
"""Return likely HTTP chunked body start offset."""
|
||||
header_end = data.find(b"\r\n\r\n")
|
||||
if header_end == -1:
|
||||
return 0
|
||||
|
||||
headers = data[:header_end].decode("iso-8859-1", errors="ignore").lower()
|
||||
if "transfer-encoding:" in headers and "chunked" in headers:
|
||||
return header_end + 4
|
||||
return 0
|
||||
|
||||
|
||||
def parse_chunk_size(line: bytes) -> int:
|
||||
size_text = line.split(b";", 1)[0].strip()
|
||||
if not size_text:
|
||||
raise ChunkedDecodeError("empty chunk size")
|
||||
try:
|
||||
return int(size_text, 16)
|
||||
except ValueError as exc:
|
||||
raise ChunkedDecodeError(f"invalid chunk size: {line!r}") from exc
|
||||
|
||||
|
||||
def dechunk_http_body(data: bytes, start_offset: int = 0) -> DechunkResult:
|
||||
"""Decode an HTTP chunked body from data[start_offset:]."""
|
||||
pos = start_offset
|
||||
decoded = bytearray()
|
||||
chunks = 0
|
||||
|
||||
while True:
|
||||
line_end = data.find(b"\r\n", pos)
|
||||
if line_end == -1:
|
||||
raise ChunkedDecodeError("missing chunk-size CRLF")
|
||||
|
||||
size = parse_chunk_size(data[pos:line_end])
|
||||
pos = line_end + 2
|
||||
|
||||
if size == 0:
|
||||
trailer_end = data.find(b"\r\n\r\n", pos)
|
||||
if trailer_end == -1:
|
||||
final_end = data.find(b"\r\n", pos)
|
||||
consumed = len(data) if final_end == -1 else final_end + 2
|
||||
else:
|
||||
consumed = trailer_end + 4
|
||||
return DechunkResult(bytes(decoded), chunks, start_offset, consumed)
|
||||
|
||||
chunk_end = pos + size
|
||||
if chunk_end + 2 > len(data):
|
||||
raise ChunkedDecodeError("chunk extends beyond input")
|
||||
if data[chunk_end : chunk_end + 2] != b"\r\n":
|
||||
raise ChunkedDecodeError("missing CRLF after chunk data")
|
||||
|
||||
decoded.extend(data[pos:chunk_end])
|
||||
chunks += 1
|
||||
pos = chunk_end + 2
|
||||
|
||||
if pos == len(data):
|
||||
return DechunkResult(bytes(decoded), chunks, start_offset, pos)
|
||||
|
||||
|
||||
def prepare_input_stream(data: bytes, mode: str) -> tuple[bytes, DechunkResult | None]:
|
||||
if mode == "raw":
|
||||
return data, None
|
||||
|
||||
start_offset = find_chunked_body_start(data)
|
||||
try:
|
||||
dechunked = dechunk_http_body(data, start_offset)
|
||||
except ChunkedDecodeError:
|
||||
if mode == "chunked":
|
||||
raise
|
||||
return data, None
|
||||
|
||||
if mode == "chunked":
|
||||
return dechunked.data, dechunked
|
||||
|
||||
raw_messages, _ = scan_rtcm_frames(data)
|
||||
dechunked_messages, _ = scan_rtcm_frames(dechunked.data)
|
||||
if len(dechunked_messages) > len(raw_messages):
|
||||
return dechunked.data, dechunked
|
||||
return data, None
|
||||
|
||||
|
||||
def rtcm_message_type(payload: bytes) -> int | None:
|
||||
"""Extract the 12-bit RTCM message number from a payload."""
|
||||
if len(payload) < 2:
|
||||
return None
|
||||
return (payload[0] << 4) | (payload[1] >> 4)
|
||||
|
||||
|
||||
def scan_rtcm_frames(data: bytes) -> tuple[list[RtcmMessage], ScanStats]:
|
||||
"""Find valid RTCM v3 frames in data."""
|
||||
stats = ScanStats(bytes_read=len(data))
|
||||
messages: list[RtcmMessage] = []
|
||||
pos = 0
|
||||
|
||||
while pos < len(data):
|
||||
if data[pos] != PREAMBLE:
|
||||
stats.bytes_skipped += 1
|
||||
pos += 1
|
||||
continue
|
||||
|
||||
if pos + 3 > len(data):
|
||||
stats.incomplete_tail_offset = pos
|
||||
break
|
||||
|
||||
second = data[pos + 1]
|
||||
if second & 0xFC:
|
||||
stats.invalid_headers += 1
|
||||
stats.bytes_skipped += 1
|
||||
pos += 1
|
||||
continue
|
||||
|
||||
payload_length = ((second & 0x03) << 8) | data[pos + 2]
|
||||
if payload_length > MAX_RTCM_PAYLOAD_LENGTH:
|
||||
stats.invalid_headers += 1
|
||||
stats.bytes_skipped += 1
|
||||
pos += 1
|
||||
continue
|
||||
|
||||
frame_length = 3 + payload_length + 3
|
||||
end = pos + frame_length
|
||||
if end > len(data):
|
||||
stats.incomplete_tail_offset = pos
|
||||
break
|
||||
|
||||
frame = data[pos:end]
|
||||
expected_crc = int.from_bytes(frame[-3:], "big")
|
||||
actual_crc = crc24q(frame[:-3])
|
||||
if actual_crc != expected_crc:
|
||||
stats.crc_failures += 1
|
||||
stats.bytes_skipped += 1
|
||||
pos += 1
|
||||
continue
|
||||
|
||||
payload = frame[3:-3]
|
||||
messages.append(
|
||||
RtcmMessage(
|
||||
index=len(messages) + 1,
|
||||
offset=pos,
|
||||
message_type=rtcm_message_type(payload),
|
||||
payload_length=payload_length,
|
||||
frame_length=frame_length,
|
||||
crc=expected_crc,
|
||||
frame=frame,
|
||||
)
|
||||
)
|
||||
pos = end
|
||||
|
||||
return messages, stats
|
||||
|
||||
|
||||
def write_frames(messages: Iterable[RtcmMessage], out_dir: Path) -> None:
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
for msg in messages:
|
||||
msg_type = "unknown" if msg.message_type is None else str(msg.message_type)
|
||||
path = out_dir / f"rtcm_{msg.index:05d}_type_{msg_type}_offset_{msg.offset}.bin"
|
||||
path.write_bytes(msg.frame)
|
||||
|
||||
|
||||
def write_csv(messages: Iterable[RtcmMessage], path: Path) -> None:
|
||||
with path.open("w", newline="", encoding="utf-8") as fp:
|
||||
writer = csv.DictWriter(
|
||||
fp,
|
||||
fieldnames=[
|
||||
"index",
|
||||
"offset",
|
||||
"message_type",
|
||||
"payload_length",
|
||||
"frame_length",
|
||||
"crc_hex",
|
||||
"payload_hex_preview",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
for msg in messages:
|
||||
writer.writerow(
|
||||
{
|
||||
"index": msg.index,
|
||||
"offset": msg.offset,
|
||||
"message_type": msg.message_type,
|
||||
"payload_length": msg.payload_length,
|
||||
"frame_length": msg.frame_length,
|
||||
"crc_hex": f"{msg.crc:06X}",
|
||||
"payload_hex_preview": msg.payload_hex_preview,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def write_jsonl(messages: Iterable[RtcmMessage], path: Path) -> None:
|
||||
with path.open("w", encoding="utf-8") as fp:
|
||||
for msg in messages:
|
||||
fp.write(
|
||||
json.dumps(
|
||||
{
|
||||
"index": msg.index,
|
||||
"offset": msg.offset,
|
||||
"message_type": msg.message_type,
|
||||
"payload_length": msg.payload_length,
|
||||
"frame_length": msg.frame_length,
|
||||
"crc_hex": f"{msg.crc:06X}",
|
||||
"payload_hex": msg.payload.hex(),
|
||||
"frame_hex": msg.frame.hex(),
|
||||
},
|
||||
separators=(",", ":"),
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
|
||||
def print_messages(
|
||||
messages: list[RtcmMessage],
|
||||
stats: ScanStats,
|
||||
show_hex: bool,
|
||||
debug_1005: bool,
|
||||
) -> None:
|
||||
for msg in messages:
|
||||
msg_type = "unknown" if msg.message_type is None else str(msg.message_type)
|
||||
line = (
|
||||
f"#{msg.index:05d} offset={msg.offset:<10} "
|
||||
f"type={msg_type:<5} payload={msg.payload_length:<4} "
|
||||
f"frame={msg.frame_length:<4} crc=0x{msg.crc:06X}"
|
||||
)
|
||||
if show_hex:
|
||||
line += f" payload={msg.payload_hex_preview}"
|
||||
print(line)
|
||||
if debug_1005 and msg.message_type == 1005:
|
||||
print(f" debug1005 frame_hex={msg.frame.hex(' ')}")
|
||||
|
||||
print()
|
||||
print(f"Valid RTCM messages: {len(messages)}")
|
||||
print(f"Bytes read: {stats.bytes_read}")
|
||||
print(f"Bytes skipped while searching: {stats.bytes_skipped}")
|
||||
print(f"Invalid RTCM-like headers: {stats.invalid_headers}")
|
||||
print(f"CRC failures: {stats.crc_failures}")
|
||||
if stats.incomplete_tail_offset is not None:
|
||||
print(f"Incomplete trailing candidate at offset: {stats.incomplete_tail_offset}")
|
||||
|
||||
|
||||
def read_input(path: Path | None, stdin: BinaryIO) -> bytes:
|
||||
if path is None:
|
||||
return stdin.read()
|
||||
return path.read_bytes()
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parse RTCM v3 messages from a .bin file or stdin."
|
||||
)
|
||||
parser.add_argument(
|
||||
"input",
|
||||
nargs="?",
|
||||
type=Path,
|
||||
help="Binary file to scan. If omitted, reads from stdin.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--out-dir",
|
||||
type=Path,
|
||||
help="Directory where each valid full RTCM frame will be written as a .bin file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--csv",
|
||||
type=Path,
|
||||
help="Write a CSV index of parsed messages.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--jsonl",
|
||||
type=Path,
|
||||
help="Write JSON Lines with message metadata and hex payload/frame content.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=["auto", "raw", "chunked"],
|
||||
default="auto",
|
||||
help=(
|
||||
"How to read the input: auto detects HTTP chunked transfer encoding, "
|
||||
"raw scans bytes exactly as stored, chunked forces HTTP dechunking."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--write-stream",
|
||||
type=Path,
|
||||
help="Write the reconstructed byte stream that is scanned for RTCM frames.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hex",
|
||||
action="store_true",
|
||||
help="Show a short payload hex preview in console output.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug-1005",
|
||||
action="store_true",
|
||||
help="Print the full RTCM 1005 frame bytes as hex in the console output.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
data = read_input(args.input, sys.stdin.buffer)
|
||||
|
||||
try:
|
||||
stream, dechunked = prepare_input_stream(data, args.mode)
|
||||
except ChunkedDecodeError as exc:
|
||||
print(f"Could not decode chunked input: {exc}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
if dechunked:
|
||||
print(
|
||||
f"Decoded HTTP chunked transfer stream: "
|
||||
f"{dechunked.chunks} chunks, {len(data)} input bytes -> {len(stream)} data bytes"
|
||||
)
|
||||
print()
|
||||
|
||||
if args.write_stream:
|
||||
args.write_stream.write_bytes(stream)
|
||||
print(f"Wrote scanned byte stream to {args.write_stream}")
|
||||
print()
|
||||
|
||||
messages, stats = scan_rtcm_frames(stream)
|
||||
|
||||
print_messages(messages, stats, args.hex, args.debug_1005)
|
||||
|
||||
if args.out_dir:
|
||||
write_frames(messages, args.out_dir)
|
||||
print(f"Wrote {len(messages)} frame file(s) to {args.out_dir}")
|
||||
if args.csv:
|
||||
write_csv(messages, args.csv)
|
||||
print(f"Wrote CSV index to {args.csv}")
|
||||
if args.jsonl:
|
||||
write_jsonl(messages, args.jsonl)
|
||||
print(f"Wrote JSONL details to {args.jsonl}")
|
||||
|
||||
return 0 if messages else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user