use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::ops::{Add, Index, Range};
#[inline(always)]
#[allow(clippy::neg_cmp_op_on_partial_ord)]
pub fn is_empty_range<T: PartialOrd<T>>(range: &Range<T>) -> bool {
!(range.start < range.end)
}
pub struct UniqueItem<'a, Idx: ?Sized> {
lookup: &'a Idx,
index: usize,
}
impl<'a, Idx: ?Sized> UniqueItem<'a, Idx>
where
Idx: Index<usize>,
{
#[inline(always)]
pub fn value(&self) -> &Idx::Output {
&self.lookup[self.index]
}
#[inline(always)]
pub fn original_index(&self) -> usize {
self.index
}
}
impl<'a, Idx: Index<usize> + 'a> Debug for UniqueItem<'a, Idx>
where
Idx::Output: Debug,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("UniqueItem")
.field("value", &self.value())
.field("original_index", &self.original_index())
.finish()
}
}
impl<'a, 'b, A, B> PartialEq<UniqueItem<'a, A>> for UniqueItem<'b, B>
where
A: Index<usize> + 'b + ?Sized,
B: Index<usize> + 'b + ?Sized,
B::Output: PartialEq<A::Output>,
{
#[inline(always)]
fn eq(&self, other: &UniqueItem<'a, A>) -> bool {
self.value() == other.value()
}
}
pub fn unique<Idx>(lookup: &Idx, range: Range<usize>) -> Vec<UniqueItem<Idx>>
where
Idx: Index<usize> + ?Sized,
Idx::Output: Hash + Eq,
{
let mut by_item = HashMap::new();
for index in range {
match by_item.entry(&lookup[index]) {
Entry::Vacant(entry) => {
entry.insert(Some(index));
}
Entry::Occupied(mut entry) => {
let entry = entry.get_mut();
if entry.is_some() {
*entry = None
}
}
}
}
let mut rv = by_item
.into_iter()
.filter_map(|(_, x)| x)
.map(|index| UniqueItem { lookup, index })
.collect::<Vec<_>>();
rv.sort_by_key(|a| a.original_index());
rv
}
pub fn common_prefix_len<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> usize
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
if is_empty_range(&old_range) || is_empty_range(&new_range) {
return 0;
}
new_range
.zip(old_range)
.take_while(
#[inline(always)]
|x| new[x.0] == old[x.1],
)
.count()
}
pub fn common_suffix_len<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> usize
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
if is_empty_range(&old_range) || is_empty_range(&new_range) {
return 0;
}
new_range
.rev()
.zip(old_range.rev())
.take_while(
#[inline(always)]
|x| new[x.0] == old[x.1],
)
.count()
}
struct OffsetLookup<Int> {
offset: usize,
vec: Vec<Int>,
}
impl<Int> Index<usize> for OffsetLookup<Int> {
type Output = Int;
#[inline(always)]
fn index(&self, index: usize) -> &Self::Output {
&self.vec[index - self.offset]
}
}
pub struct IdentifyDistinct<Int> {
old: OffsetLookup<Int>,
new: OffsetLookup<Int>,
}
impl<Int> IdentifyDistinct<Int>
where
Int: Add<Output = Int> + From<u8> + Default + Copy,
{
pub fn new<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Self
where
Old: Index<usize> + ?Sized,
Old::Output: Eq + Hash,
New: Index<usize> + ?Sized,
New::Output: Eq + Hash + PartialEq<Old::Output>,
{
enum Key<'old, 'new, Old: ?Sized, New: ?Sized> {
Old(&'old Old),
New(&'new New),
}
impl<'old, 'new, Old, New> Hash for Key<'old, 'new, Old, New>
where
Old: Hash + ?Sized,
New: Hash + ?Sized,
{
fn hash<H: Hasher>(&self, state: &mut H) {
match *self {
Key::Old(val) => val.hash(state),
Key::New(val) => val.hash(state),
}
}
}
impl<'old, 'new, Old, New> PartialEq for Key<'old, 'new, Old, New>
where
Old: Eq + ?Sized,
New: Eq + PartialEq<Old> + ?Sized,
{
#[inline(always)]
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Key::Old(a), Key::Old(b)) => a == b,
(Key::New(a), Key::New(b)) => a == b,
(Key::Old(a), Key::New(b)) | (Key::New(b), Key::Old(a)) => b == a,
}
}
}
impl<'old, 'new, Old, New> Eq for Key<'old, 'new, Old, New>
where
Old: Eq + ?Sized,
New: Eq + PartialEq<Old> + ?Sized,
{
}
let mut map = HashMap::new();
let mut old_seq = Vec::new();
let mut new_seq = Vec::new();
let mut next_id = Int::default();
let step = Int::from(1);
let old_start = old_range.start;
let new_start = new_range.start;
for idx in old_range {
let item = Key::Old(&old[idx]);
let id = match map.entry(item) {
Entry::Occupied(o) => *o.get(),
Entry::Vacant(v) => {
let id = next_id;
next_id = next_id + step;
*v.insert(id)
}
};
old_seq.push(id);
}
for idx in new_range {
let item = Key::New(&new[idx]);
let id = match map.entry(item) {
Entry::Occupied(o) => *o.get(),
Entry::Vacant(v) => {
let id = next_id;
next_id = next_id + step;
*v.insert(id)
}
};
new_seq.push(id);
}
IdentifyDistinct {
old: OffsetLookup {
offset: old_start,
vec: old_seq,
},
new: OffsetLookup {
offset: new_start,
vec: new_seq,
},
}
}
pub fn old_lookup(&self) -> &impl Index<usize, Output = Int> {
&self.old
}
pub fn new_lookup(&self) -> &impl Index<usize, Output = Int> {
&self.new
}
pub fn old_range(&self) -> Range<usize> {
self.old.offset..self.old.offset + self.old.vec.len()
}
pub fn new_range(&self) -> Range<usize> {
self.new.offset..self.new.offset + self.new.vec.len()
}
}
#[test]
fn test_unique() {
let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6)
.into_iter()
.map(|x| (*x.value(), x.original_index()))
.collect::<Vec<_>>();
assert_eq!(u, vec![('a', 0), ('c', 2)]);
}
#[test]
fn test_int_hasher() {
let ih = IdentifyDistinct::<u8>::new(
&["", "foo", "bar", "baz"][..],
1..4,
&["", "foo", "blah", "baz"][..],
1..4,
);
assert_eq!(ih.old_lookup()[1], 0);
assert_eq!(ih.old_lookup()[2], 1);
assert_eq!(ih.old_lookup()[3], 2);
assert_eq!(ih.new_lookup()[1], 0);
assert_eq!(ih.new_lookup()[2], 3);
assert_eq!(ih.new_lookup()[3], 2);
assert_eq!(ih.old_range(), 1..4);
assert_eq!(ih.new_range(), 1..4);
}
#[test]
fn test_common_prefix_len() {
assert_eq!(
common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
0
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
7
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
0
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
4
);
}
#[test]
fn test_common_suffix_len() {
assert_eq!(
common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
0
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
4
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
0
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
2
);
}