Initial commit
This commit is contained in:
288
parse_chunked_rtcm.py
Normal file
288
parse_chunked_rtcm.py
Normal file
@@ -0,0 +1,288 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Parse NTRIP stream with HTTP chunked transfer encoding.
|
||||
Handles the chunk headers and extracts clean RTCM messages.
|
||||
"""
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def parse_chunked_stream(data: bytes) -> tuple[bytes, list[dict]]:
|
||||
"""
|
||||
Parse HTTP chunked transfer encoded stream.
|
||||
Returns: (clean_rtcm_data, chunk_log)
|
||||
"""
|
||||
chunks = []
|
||||
rtcm_data = bytearray()
|
||||
i = 0
|
||||
|
||||
while i < len(data):
|
||||
# Look for chunk size line (hex number followed by \r\n)
|
||||
line_end = data.find(b'\r\n', i)
|
||||
if line_end == -1:
|
||||
# No more complete chunks
|
||||
break
|
||||
|
||||
chunk_size_line = data[i:line_end].decode('ascii', errors='ignore').strip()
|
||||
|
||||
# Try to parse as hex chunk size
|
||||
try:
|
||||
# Chunk size may have optional extension after semicolon
|
||||
chunk_size_str = chunk_size_line.split(';')[0].strip()
|
||||
chunk_size = int(chunk_size_str, 16)
|
||||
|
||||
# Move past the chunk size line
|
||||
chunk_data_start = line_end + 2 # skip \r\n
|
||||
|
||||
if chunk_size == 0:
|
||||
# End of chunks
|
||||
chunks.append({
|
||||
'offset': i,
|
||||
'size_declared': 0,
|
||||
'size_actual': 0,
|
||||
'is_end': True,
|
||||
})
|
||||
break
|
||||
|
||||
# Extract chunk data
|
||||
chunk_data_end = chunk_data_start + chunk_size
|
||||
|
||||
if chunk_data_end + 2 > len(data):
|
||||
# Incomplete chunk
|
||||
break
|
||||
|
||||
chunk_data = data[chunk_data_start:chunk_data_end]
|
||||
|
||||
# Verify trailing \r\n
|
||||
trailing = data[chunk_data_end:chunk_data_end + 2]
|
||||
|
||||
chunks.append({
|
||||
'offset': i,
|
||||
'size_declared': chunk_size,
|
||||
'size_actual': len(chunk_data),
|
||||
'chunk_data': chunk_data,
|
||||
'has_trailing_crlf': trailing == b'\r\n',
|
||||
})
|
||||
|
||||
# Append to clean RTCM data
|
||||
rtcm_data.extend(chunk_data)
|
||||
|
||||
# Move to next chunk
|
||||
i = chunk_data_end + 2 # skip trailing \r\n
|
||||
|
||||
except (ValueError, UnicodeDecodeError):
|
||||
# Not a valid chunk size, skip byte
|
||||
i += 1
|
||||
continue
|
||||
|
||||
return bytes(rtcm_data), chunks
|
||||
|
||||
|
||||
def parse_rtcm_messages(data: bytes) -> list[dict]:
|
||||
"""Parse RTCM3 messages from clean data."""
|
||||
messages = []
|
||||
i = 0
|
||||
|
||||
while i < len(data):
|
||||
if data[i] == 0xD3 and i + 2 < len(data):
|
||||
length = ((data[i+1] & 0x03) << 8) | data[i+2]
|
||||
msg_total_len = 3 + length + 3
|
||||
|
||||
if i + msg_total_len <= len(data) and length >= 3:
|
||||
payload = data[i+3:i+3+length]
|
||||
msg_type = (payload[0] << 4) | (payload[1] >> 4)
|
||||
station_id = ((payload[1] & 0x0F) << 8) | payload[2]
|
||||
|
||||
messages.append({
|
||||
'offset': i,
|
||||
'type': msg_type,
|
||||
'station_id': station_id,
|
||||
'length': length,
|
||||
'total_length': msg_total_len,
|
||||
'payload': payload,
|
||||
})
|
||||
|
||||
i += msg_total_len
|
||||
continue
|
||||
i += 1
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def get_message_description(msg_type: int) -> str:
|
||||
"""Get human-readable description for RTCM message type."""
|
||||
descriptions = {
|
||||
1005: "Stationary RTK Reference Station ARP",
|
||||
1006: "Stationary RTK Reference Station ARP + Antenna Height",
|
||||
1007: "Antenna Descriptor",
|
||||
1008: "Antenna Descriptor & Serial Number",
|
||||
1019: "GPS Ephemerides",
|
||||
1020: "GLONASS Ephemerides",
|
||||
1033: "Receiver and Antenna Descriptors",
|
||||
1074: "GPS MSM4",
|
||||
1075: "GPS MSM5",
|
||||
1077: "GPS MSM7",
|
||||
1084: "GLONASS MSM4",
|
||||
1085: "GLONASS MSM5",
|
||||
1087: "GLONASS MSM7",
|
||||
1094: "Galileo MSM4",
|
||||
1095: "Galileo MSM5",
|
||||
1097: "Galileo MSM7",
|
||||
1124: "BeiDou MSM4",
|
||||
1125: "BeiDou MSM5",
|
||||
1127: "BeiDou MSM7",
|
||||
1230: "GLONASS Code-Phase Biases",
|
||||
}
|
||||
return descriptions.get(msg_type, f"Type {msg_type}")
|
||||
|
||||
|
||||
def analyze_file(filename: str, save_clean: bool = False, show_chunks: bool = True,
|
||||
show_messages: bool = True, max_messages: int = 50):
|
||||
"""Analyze chunked NTRIP file."""
|
||||
|
||||
path = Path(filename)
|
||||
if not path.exists():
|
||||
print(f"ERROR: File not found: {filename}")
|
||||
return
|
||||
|
||||
print(f"Analyzing: {filename}")
|
||||
print(f"File size: {path.stat().st_size:,} bytes")
|
||||
print(f"{'=' * 80}\n")
|
||||
|
||||
# Read file
|
||||
data = path.read_bytes()
|
||||
|
||||
# Parse chunks
|
||||
print("Parsing HTTP chunked transfer encoding...")
|
||||
rtcm_data, chunks = parse_chunked_stream(data)
|
||||
|
||||
print(f"Found {len(chunks)} chunks")
|
||||
print(f"Clean RTCM data: {len(rtcm_data):,} bytes")
|
||||
print()
|
||||
|
||||
# Show chunk details
|
||||
if show_chunks and chunks:
|
||||
print(f"CHUNK DETAILS:")
|
||||
print(f"{'─' * 80}")
|
||||
|
||||
total_chunk_overhead = 0
|
||||
for i, chunk in enumerate(chunks[:20]): # Show first 20 chunks
|
||||
if chunk.get('is_end'):
|
||||
print(f"Chunk {i+1}: END (0-byte chunk)")
|
||||
break
|
||||
|
||||
size = chunk['size_declared']
|
||||
offset = chunk['offset']
|
||||
trailing = "✓" if chunk.get('has_trailing_crlf') else "✗"
|
||||
|
||||
# Calculate overhead (chunk size line + \r\n + trailing \r\n)
|
||||
size_line_len = len(hex(size)[2:]) + 2 # hex digits + \r\n
|
||||
overhead = size_line_len + 2 # + trailing \r\n
|
||||
total_chunk_overhead += overhead
|
||||
|
||||
print(f"Chunk {i+1:3d}: Offset 0x{offset:08X}, Size {size:5d} bytes, Trailing CRLF {trailing}")
|
||||
|
||||
if len(chunks) > 20:
|
||||
print(f"... and {len(chunks) - 20} more chunks")
|
||||
|
||||
print(f"\nTotal chunk overhead: {total_chunk_overhead:,} bytes")
|
||||
print(f"Efficiency: {len(rtcm_data) / len(data) * 100:.1f}% (data vs. total)")
|
||||
print()
|
||||
|
||||
# Save clean RTCM data
|
||||
if save_clean:
|
||||
clean_filename = path.stem + "_clean.bin"
|
||||
Path(clean_filename).write_bytes(rtcm_data)
|
||||
print(f"✓ Saved clean RTCM data to: {clean_filename}\n")
|
||||
|
||||
# Parse RTCM messages
|
||||
if show_messages:
|
||||
print(f"RTCM MESSAGES:")
|
||||
print(f"{'─' * 80}")
|
||||
|
||||
messages = parse_rtcm_messages(rtcm_data)
|
||||
print(f"Found {len(messages)} RTCM messages\n")
|
||||
|
||||
# Message type summary
|
||||
type_counts = {}
|
||||
station_ids = set()
|
||||
|
||||
for msg in messages:
|
||||
type_counts[msg['type']] = type_counts.get(msg['type'], 0) + 1
|
||||
station_ids.add(msg['station_id'])
|
||||
|
||||
print("Message type summary:")
|
||||
for msg_type in sorted(type_counts.keys()):
|
||||
desc = get_message_description(msg_type)
|
||||
count = type_counts[msg_type]
|
||||
print(f" Type {msg_type:4d}: {count:5d} messages - {desc}")
|
||||
|
||||
print(f"\nStation IDs: {sorted(station_ids)}")
|
||||
print()
|
||||
|
||||
# Show individual messages
|
||||
print(f"Individual messages (first {max_messages}):")
|
||||
print(f"{'─' * 80}\n")
|
||||
|
||||
for i, msg in enumerate(messages[:max_messages]):
|
||||
desc = get_message_description(msg['type'])
|
||||
print(f"Message {i+1}: Type {msg['type']:4d} - {desc}")
|
||||
print(f" Offset: 0x{msg['offset']:08X}, Station: {msg['station_id']}, Length: {msg['length']} bytes")
|
||||
|
||||
# Check for ASCII content
|
||||
payload = msg['payload']
|
||||
printable = sum(1 for b in payload if 32 <= b < 127)
|
||||
if printable / len(payload) > 0.3:
|
||||
text = payload.decode('ascii', errors='ignore')
|
||||
text_clean = text.replace('\r', '\\r').replace('\n', '\\n')
|
||||
if len(text_clean) > 100:
|
||||
text_clean = text_clean[:100] + "..."
|
||||
print(f" ASCII: {text_clean}")
|
||||
|
||||
print()
|
||||
|
||||
if len(messages) > max_messages:
|
||||
print(f"... and {len(messages) - max_messages} more messages")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parse NTRIP stream with HTTP chunked transfer encoding",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Basic analysis
|
||||
python parse_chunked_rtcm.py ntrip_raw_20250605_120000.bin
|
||||
|
||||
# Save clean RTCM data (without chunk encoding)
|
||||
python parse_chunked_rtcm.py ntrip_raw_20250605_120000.bin --save-clean
|
||||
|
||||
# Show more messages
|
||||
python parse_chunked_rtcm.py ntrip_raw_20250605_120000.bin --max-messages 100
|
||||
|
||||
# Skip chunk details, just show messages
|
||||
python parse_chunked_rtcm.py ntrip_raw_20250605_120000.bin --no-chunks
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('filename', help='Binary file to analyze')
|
||||
parser.add_argument('--save-clean', action='store_true', help='Save clean RTCM data without chunk encoding')
|
||||
parser.add_argument('--no-chunks', action='store_true', help='Skip chunk details')
|
||||
parser.add_argument('--no-messages', action='store_true', help='Skip message details')
|
||||
parser.add_argument('--max-messages', type=int, default=50, help='Maximum messages to show')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyze_file(
|
||||
args.filename,
|
||||
save_clean=args.save_clean,
|
||||
show_chunks=not args.no_chunks,
|
||||
show_messages=not args.no_messages,
|
||||
max_messages=args.max_messages
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user