use std::{
io::{BufReader, Read, Seek},
path::Path,
};
use chrono::{DateTime, Utc};
use color_eyre::eyre::{self as anyhow, WrapErr};
use h264_reader::{
nal::{
sei::{HeaderType, SeiMessage, SeiReader},
Nal, RefNal, UnitType,
},
rbsp::BitReaderError,
Context as H264ParsingContext,
};
use serde::{Deserialize, Serialize};
use ci2_remote_control::{H264Metadata, H264_METADATA_UUID, H264_METADATA_VERSION};
use crate::{
ntp_timestamp::NtpTimestamp, EncodedH264, FrameData, FrameDataSource, H264EncodingVariant,
ImageData, MyAsStr, Result, Timestamp, TimestampSource,
};
pub struct H264Source<H: SeekableH264Source> {
seekable_h264_source: H,
nal_locations: Vec<H::NalLocation>,
mp4_pts: Option<Vec<std::time::Duration>>,
frame_to_nalu_time_info: Vec<NaluTimeInfo>,
pub h264_metadata: Option<H264Metadata>,
frame0_precision_time: Option<chrono::DateTime<chrono::FixedOffset>>,
frame0_frameinfo_recv_ntp: Option<NtpTimestamp>,
width: u32,
height: u32,
do_decode_h264: bool,
timestamp_source: Option<crate::TimestampSource>,
has_timestamps: bool,
}
pub struct NaluTimeInfo {
nalu_index: usize,
precise_timestamp: Option<DateTime<Utc>>,
frameinfo_recv_ntp: Option<NtpTimestamp>,
}
impl<H: SeekableH264Source> FrameDataSource for H264Source<H> {
fn width(&self) -> u32 {
self.width
}
fn height(&self) -> u32 {
self.height
}
fn camera_name(&self) -> Option<&str> {
self.h264_metadata
.as_ref()
.and_then(|x| x.camera_name.as_deref())
}
fn gamma(&self) -> Option<f32> {
self.h264_metadata.as_ref().and_then(|x| x.gamma)
}
fn frame0_time(&self) -> Option<chrono::DateTime<chrono::FixedOffset>> {
match &self.timestamp_source {
Some(TimestampSource::BestGuess) => unreachable!(),
Some(TimestampSource::MispMicrosectime) => self.frame0_precision_time,
Some(TimestampSource::FrameInfoRecvTime) => {
Some(self.frame0_frameinfo_recv_ntp.unwrap().into())
}
Some(TimestampSource::Mp4Pts) | None => None,
}
}
fn skip_n_frames(&mut self, n_frames: usize) -> Result<()> {
if n_frames > 0 {
anyhow::bail!("Skipping frames with H264 file is not supported.");
}
Ok(())
}
fn estimate_luminance_range(&mut self) -> Result<(u16, u16)> {
anyhow::bail!("h264 luminance scanning not implemented");
}
fn iter<'a>(&'a mut self) -> Box<dyn Iterator<Item = Result<FrameData>> + 'a> {
let openh264_decoder_state = if self.do_decode_h264 {
Some(openh264::decoder::Decoder::new().unwrap())
} else {
None
};
Box::new(RawH264Iter {
parent: self,
frame_idx: 0,
next_nal_idx: 0,
openh264_decoder_state,
})
}
fn timestamp_source(&self) -> &str {
self.timestamp_source.as_str()
}
fn has_timestamps(&self) -> bool {
self.has_timestamps
}
}
pub(crate) struct FromMp4Track {
pub(crate) sequence_parameter_set: Vec<u8>,
pub(crate) picture_parameter_set: Vec<u8>,
}
pub trait SeekRead: Seek + Read {}
impl<T> SeekRead for T where T: Seek + Read {}
pub trait SeekableH264Source {
type NalLocation;
fn nal_boundaries(&mut self) -> &[Self::NalLocation];
fn read_nal(&mut self, location: &Self::NalLocation) -> Result<Vec<u8>>;
fn read_nals(&mut self, locations: &[Self::NalLocation]) -> Result<Vec<Vec<u8>>> {
let mut result = Vec::with_capacity(locations.len());
for seeker in locations.iter() {
let nal = self.read_nal(seeker)?;
result.push(nal);
}
Ok(result)
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct AnnexBLocation {
pub(crate) start: u64,
pub(crate) sz: usize,
}
pub struct H264AnnexBSource {
inner: Box<dyn SeekRead + Send>,
my_nal_boundaries: Vec<AnnexBLocation>,
}
impl H264AnnexBSource {
pub fn from_file(fd: std::fs::File) -> Result<Self> {
let inner = Box::new(BufReader::new(fd));
Self::from_readseek(inner)
}
pub fn from_readseek(mut inner: Box<(dyn SeekRead + Send)>) -> Result<Self> {
inner.seek(std::io::SeekFrom::Start(0))?;
let my_nal_boundaries = crate::h264_annexb_splitter::find_nals(&mut inner)?;
inner.seek(std::io::SeekFrom::Start(0))?;
Ok(Self {
inner,
my_nal_boundaries,
})
}
}
impl SeekableH264Source for H264AnnexBSource {
type NalLocation = AnnexBLocation;
fn nal_boundaries(&mut self) -> &[Self::NalLocation] {
&self.my_nal_boundaries
}
fn read_nal(&mut self, location: &Self::NalLocation) -> Result<Vec<u8>> {
self.inner.seek(std::io::SeekFrom::Start(location.start))?;
let mut buf = vec![0u8; location.sz];
self.inner.read_exact(&mut buf)?;
Ok(buf)
}
}
impl<H> H264Source<H>
where
H: SeekableH264Source,
<H as SeekableH264Source>::NalLocation: Clone,
{
pub(crate) fn from_seekable_h264_source_with_timestamp_source(
mut seekable_h264_source: H,
do_decode_h264: bool,
mp4_pts: Option<Vec<std::time::Duration>>,
data_from_mp4_track: Option<FromMp4Track>,
timestamp_source: crate::TimestampSource,
) -> Result<Self> {
let nal_locations: Vec<H::NalLocation> = seekable_h264_source.nal_boundaries().to_vec();
let mut tz_offset = None;
let mut h264_metadata = None;
let mut scratch = Vec::new();
let mut parsing_ctx = H264ParsingContext::default();
let mut frame0_precision_time = None;
let mut frame0_frameinfo_recv_ntp = None;
let mut frame_to_nalu_time_info = Vec::new();
let mut precise_timestamp = None;
let mut frameinfo_recv_ntp = None;
let mut next_frame_num = 0;
if let Some(dfc) = data_from_mp4_track {
tracing::trace!("Using SPS and PPS data from mp4 track.");
{
let sps_nal = RefNal::new(&dfc.sequence_parameter_set, &[], true);
if sps_nal.header().unwrap().nal_unit_type() != UnitType::SeqParameterSet {
anyhow::bail!("expected SPS NAL");
}
let isps =
h264_reader::nal::sps::SeqParameterSet::from_bits(sps_nal.rbsp_bits()).unwrap();
parsing_ctx.put_seq_param_set(isps);
}
{
let pps_nal = RefNal::new(&dfc.picture_parameter_set, &[], true);
if pps_nal.header().unwrap().nal_unit_type() != UnitType::PicParameterSet {
anyhow::bail!("expected PPS NAL");
}
let ipps = h264_reader::nal::pps::PicParameterSet::from_bits(
&parsing_ctx,
pps_nal.rbsp_bits(),
)
.unwrap();
parsing_ctx.put_pic_param_set(ipps);
}
}
tracing::trace!("iterating through all NAL units");
for (nalu_index, nal_location) in nal_locations.iter().enumerate() {
let nal_unit = seekable_h264_source.read_nal(nal_location)?;
let nal = RefNal::new(nal_unit.as_slice(), &[], true);
let nal_unit_type = nal.header().unwrap().nal_unit_type();
tracing::trace!("NALU index {nalu_index}, {nal_unit_type:?}");
match nal_unit_type {
UnitType::SEI => {
let mut sei_reader = SeiReader::from_rbsp_bytes(nal.rbsp_bytes(), &mut scratch);
loop {
match sei_reader.next() {
Ok(Some(sei_message)) => {
tracing::trace!("SEI payload type: {:?}", sei_message.payload_type);
match &sei_message.payload_type {
HeaderType::UserDataUnregistered => {
let udu = UserDataUnregistered::read(&sei_message)?;
tracing::trace!(
"SEI UserDataUnregistered uuid: {:?}",
udu.uuid
);
match udu.uuid {
&H264_METADATA_UUID => {
let md: H264Metadata =
serde_json::from_slice(udu.payload)?;
if md.version != H264_METADATA_VERSION {
anyhow::bail!(
"unexpected version in h264 metadata"
);
}
if h264_metadata.is_some() {
anyhow::bail!(
"multiple SEI messages, but expected exactly one"
);
}
tz_offset = Some(*md.creation_time.offset());
h264_metadata = Some(md);
}
b"MISPmicrosectime" => {
let precision_time =
parse_precision_time(udu.payload)
.with_context(|| {
"Parsing precision time stamp"
})?;
precise_timestamp = Some(precision_time);
if next_frame_num == 0 {
frame0_precision_time = Some(precision_time);
}
}
b"strawlab.org/89H" => {
let fi: FrameInfo =
serde_json::from_slice(udu.payload)?;
frameinfo_recv_ntp = Some(NtpTimestamp(fi.recv));
if next_frame_num == 0 {
frame0_frameinfo_recv_ntp =
frameinfo_recv_ntp.clone();
}
}
_uuid => {
}
}
}
_ => {
}
}
}
Ok(None) => {
break;
}
Err(BitReaderError::ReaderErrorFor(what, io_err)) => {
tracing::error!(
"Ignoring error when reading SEI NAL unit {what}: {io_err:?}"
);
}
Err(e) => {
anyhow::bail!(
"unexpected error reading NAL unit {nalu_index} SEI: {e:?}"
);
}
}
}
}
UnitType::SeqParameterSet => {
let isps =
h264_reader::nal::sps::SeqParameterSet::from_bits(nal.rbsp_bits()).unwrap();
parsing_ctx.put_seq_param_set(isps);
}
UnitType::PicParameterSet => {
match h264_reader::nal::pps::PicParameterSet::from_bits(
&parsing_ctx,
nal.rbsp_bits(),
) {
Ok(ipps) => {
parsing_ctx.put_pic_param_set(ipps);
}
Err(h264_reader::nal::pps::PpsError::BadPicParamSetId(
h264_reader::nal::pps::ParamSetIdError::IdTooLarge(_id),
)) => {
}
Err(e) => {
anyhow::bail!("reading PPS: {e:?}");
}
}
}
UnitType::SliceLayerWithoutPartitioningIdr
| UnitType::SliceLayerWithoutPartitioningNonIdr => {
frame_to_nalu_time_info.push(NaluTimeInfo {
nalu_index,
precise_timestamp,
frameinfo_recv_ntp,
});
precise_timestamp = None;
frameinfo_recv_ntp = None;
next_frame_num += 1;
}
_nal_unit_type => {}
}
}
let mut widthheight = None;
for sps in parsing_ctx.sps() {
if let Ok(wh) = sps.pixel_dimensions() {
widthheight = Some(wh);
}
}
let (width, height) =
widthheight.ok_or_else(|| anyhow::anyhow!("expected SPS not found"))?;
let timezone = tz_offset.unwrap_or_else(|| chrono::FixedOffset::east_opt(0).unwrap());
let frame0_precision_time = frame0_precision_time
.as_ref()
.map(|dt| dt.with_timezone(&timezone));
let (timestamp_source, has_timestamps) = match timestamp_source {
crate::TimestampSource::BestGuess => {
if frame0_precision_time.is_some() {
(Some(crate::TimestampSource::MispMicrosectime), true)
} else if frame0_frameinfo_recv_ntp.is_some() {
(Some(crate::TimestampSource::FrameInfoRecvTime), true)
} else if mp4_pts.is_some() {
(Some(crate::TimestampSource::Mp4Pts), true)
} else {
(None, false)
}
}
crate::TimestampSource::FrameInfoRecvTime => {
if frame0_frameinfo_recv_ntp.is_none() {
anyhow::bail!(
"Requested timestamp source {timestamp_source:?}, but FrameInfo not present."
);
}
(Some(timestamp_source), true)
}
crate::TimestampSource::MispMicrosectime => {
if frame0_precision_time.is_none() {
anyhow::bail!(
"Requested timestamp source {timestamp_source:?}, but timestamp not present."
);
}
(Some(timestamp_source), true)
}
crate::TimestampSource::Mp4Pts => {
if mp4_pts.is_none() {
anyhow::bail!(
"Requested timestamp source {timestamp_source:?}, but MP4 PTS not present."
);
}
(Some(timestamp_source), true)
}
};
Ok(Self {
seekable_h264_source,
nal_locations,
mp4_pts,
frame_to_nalu_time_info,
h264_metadata,
frame0_precision_time,
frame0_frameinfo_recv_ntp,
width,
height,
do_decode_h264,
timestamp_source,
has_timestamps,
})
}
}
struct RawH264Iter<'parent, H: SeekableH264Source> {
parent: &'parent mut H264Source<H>,
frame_idx: usize,
next_nal_idx: usize,
openh264_decoder_state: Option<openh264::decoder::Decoder>,
}
impl<'parent, H: SeekableH264Source> Iterator for RawH264Iter<'parent, H> {
type Item = Result<FrameData>;
fn next(&mut self) -> Option<Self::Item> {
let frame_number = self.frame_idx;
let res = self.parent.frame_to_nalu_time_info.get(self.frame_idx);
self.frame_idx += 1;
res.map(|nti| {
let nal_locations = &self.parent.nal_locations[self.next_nal_idx..=(nti.nalu_index)];
let mp4_pts = self.parent.mp4_pts.as_ref().map(|x| x[self.next_nal_idx]);
let fraction_done = self.next_nal_idx as f32 / self.parent.nal_locations.len() as f32;
self.next_nal_idx = nti.nalu_index + 1;
let frame_timestamp = match self.parent.timestamp_source {
Some(TimestampSource::BestGuess) => unreachable!(),
Some(TimestampSource::MispMicrosectime) => {
let f0 = self.parent.frame0_precision_time.as_ref().unwrap();
Timestamp::Duration(
nti.precise_timestamp
.unwrap()
.signed_duration_since(*f0)
.to_std()
.unwrap(),
)
}
Some(TimestampSource::FrameInfoRecvTime) => {
let t0 = self.parent.frame0_frameinfo_recv_ntp.as_ref().unwrap();
let t0: chrono::DateTime<chrono::Utc> = (*t0).into();
let this_frame: chrono::DateTime<chrono::Utc> =
nti.frameinfo_recv_ntp.unwrap().into();
Timestamp::Duration(this_frame.signed_duration_since(t0).to_std().unwrap())
}
Some(TimestampSource::Mp4Pts) => Timestamp::Duration(mp4_pts.unwrap()),
None => Timestamp::Fraction(fraction_done),
};
let nal_units = self.parent.seekable_h264_source.read_nals(nal_locations)?;
if let Some(decoder) = &mut self.openh264_decoder_state {
let annex_b = copy_nalus_to_annex_b(nal_units.as_slice());
let decode_result = decoder.decode(&annex_b[..]);
match decode_result {
Ok(Some(decoded_yuv)) => {
let dim = decoded_yuv.dimension_rgb();
let stride = dim.0 * 3;
let mut image_data = vec![0u8; stride * dim.1];
decoded_yuv.write_rgb8(&mut image_data);
let host_timestamp = match nti.precise_timestamp {
Some(ts) => ts,
None => {
if let (Some(mp4_pts), Some(md)) =
(mp4_pts, &self.parent.h264_metadata)
{
md.creation_time.with_timezone(&chrono::Utc)
+ chrono::Duration::from_std(mp4_pts).unwrap()
} else {
chrono::TimeZone::timestamp_opt(&chrono::Utc, 0, 0).unwrap()
}
}
};
let extra = Box::new(basic_frame::BasicExtra {
host_timestamp,
host_framenumber: frame_number,
});
let dynamic_frame =
basic_frame::DynamicFrame::RGB8(basic_frame::BasicFrame::<
machine_vision_formats::pixel_format::RGB8,
> {
width: dim.0.try_into().unwrap(),
height: dim.1.try_into().unwrap(),
stride: u32::try_from(stride).unwrap(),
image_data,
pixel_format: std::marker::PhantomData,
extra,
});
let buf_len = nal_units.iter().map(|x| x.len()).sum();
let idx = frame_number;
let image = ImageData::Decoded(dynamic_frame);
Ok(FrameData {
timestamp: frame_timestamp,
image,
buf_len,
idx,
})
}
Ok(None) => Err(anyhow::anyhow!(
"decoder unexpectedly did not return image data"
)),
Err(decode_err) => Err(decode_err.into()),
}
} else {
let buf_len = nal_units.iter().map(|x| x.len()).sum();
let idx = frame_number;
let buf = EncodedH264 {
data: H264EncodingVariant::RawEbsp(nal_units.to_vec()),
has_precision_timestamp: self.parent.frame0_precision_time.is_some(),
};
let image = ImageData::EncodedH264(buf);
Ok(FrameData {
timestamp: frame_timestamp,
image,
buf_len,
idx,
})
}
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.parent.frame_to_nalu_time_info.len() - self.frame_idx;
(remaining, Some(remaining))
}
}
pub(crate) fn from_annexb_path_with_timestamp_source<P: AsRef<Path>>(
path: P,
do_decode_h264: bool,
timestamp_source: crate::TimestampSource,
) -> Result<H264Source<H264AnnexBSource>> {
let rdr = std::fs::File::open(path.as_ref())
.with_context(|| format!("Opening {}", path.as_ref().display()))?;
let seekable_h264_source = H264AnnexBSource::from_file(rdr)?;
from_annexb_reader_with_timestamp_source(seekable_h264_source, do_decode_h264, timestamp_source)
.with_context(|| format!("Reading H264 file {}", path.as_ref().display()))
}
fn from_annexb_reader_with_timestamp_source(
annex_b_source: H264AnnexBSource,
do_decode_h264: bool,
timestamp_source: crate::TimestampSource,
) -> Result<H264Source<H264AnnexBSource>> {
H264Source::from_seekable_h264_source_with_timestamp_source(
annex_b_source,
do_decode_h264,
None,
None,
timestamp_source,
)
}
pub(crate) struct UserDataUnregistered<'a> {
pub uuid: &'a [u8; 16],
pub payload: &'a [u8],
}
impl<'a> UserDataUnregistered<'a> {
pub fn read(msg: &SeiMessage<'a>) -> Result<UserDataUnregistered<'a>> {
if msg.payload_type != HeaderType::UserDataUnregistered {
anyhow::bail!(
"expected UserDataUnregistered message, found {:?}",
msg.payload_type
);
}
if msg.payload.len() < 16 {
anyhow::bail!("SEI payload too short to contain UserDataUnregistered message");
}
let uuid = (&msg.payload[0..16]).try_into().unwrap();
let payload = &msg.payload[16..];
Ok(UserDataUnregistered { uuid, payload })
}
}
pub(crate) fn parse_precision_time(payload: &[u8]) -> Result<chrono::DateTime<chrono::Utc>> {
if payload.len() != 12 {
anyhow::bail!("unexpected payload length");
}
let mut precision_time_stamp_bytes = [0u8; 8];
for i in &[3, 6, 9] {
if payload[*i] != 0xFF {
anyhow::bail!("unexpected start code emulation prevention byte");
}
}
precision_time_stamp_bytes[0..2].copy_from_slice(&payload[1..3]);
precision_time_stamp_bytes[2..4].copy_from_slice(&payload[4..6]);
precision_time_stamp_bytes[4..6].copy_from_slice(&payload[7..9]);
precision_time_stamp_bytes[6..8].copy_from_slice(&payload[10..12]);
let precision_time_stamp: i64 = i64::from_be_bytes(precision_time_stamp_bytes);
let dur = chrono::Duration::microseconds(precision_time_stamp);
let epoch_start = chrono::NaiveDate::from_ymd_opt(1970, 1, 1)
.unwrap()
.and_hms_micro_opt(0, 0, 0, 0)
.unwrap()
.and_local_timezone(chrono::Utc)
.unwrap();
Ok(epoch_start + dur)
}
fn copy_nalus_to_annex_b(nalus: &[Vec<u8>]) -> Vec<u8> {
let sz = nalus.iter().fold(0, |acc, x| acc + x.len() + 4);
let mut result = vec![0u8; sz];
let mut start_idx = 0;
for src in nalus.iter() {
let dest = &mut result[start_idx..start_idx + 4 + src.len()];
dest[3] = 0x01;
dest[4..].copy_from_slice(src);
start_idx += src.len() + 4;
}
result
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FrameInfo {
recv: u64,
rtp: u32,
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parse_h264() -> color_eyre::Result<()> {
{
let file_buf = include_bytes!("test-data/test_less-avc_mono8_15x14.h264");
let cursor = std::io::Cursor::new(file_buf);
let seekable_h264_source = H264AnnexBSource::from_readseek(Box::new(cursor))?;
let do_decode_h264 = true;
let mut h264_src = from_annexb_reader_with_timestamp_source(
seekable_h264_source,
do_decode_h264,
TimestampSource::BestGuess,
)?;
assert_eq!(h264_src.width(), 15);
assert_eq!(h264_src.height(), 14);
let frames: Vec<_> = h264_src.iter().collect();
assert_eq!(frames.len(), 1);
}
{
let file_buf = include_bytes!("test-data/test_less-avc_rgb8_16x16.h264");
let cursor = std::io::Cursor::new(file_buf);
let seekable_h264_source = H264AnnexBSource::from_readseek(Box::new(cursor))?;
let do_decode_h264 = true;
let mut h264_src = from_annexb_reader_with_timestamp_source(
seekable_h264_source,
do_decode_h264,
TimestampSource::BestGuess,
)?;
assert_eq!(h264_src.width(), 16);
assert_eq!(h264_src.height(), 16);
let frames: Vec<_> = h264_src.iter().collect();
assert_eq!(frames.len(), 1);
}
Ok(())
}
}