1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
//! Varints
//!
//! This implementation borrows heavily from the `vint64` crate.
//!
//! Below is an example of how prefix bits signal the length of the integer value
//! which follows:
//!
//! | Prefix     | Precision | Total Bytes |
//! |------------|-----------|-------------|
//! | `xxxxxxx1` | 7 bits    | 1 byte      |
//! | `xxxxxx10` | 14 bits   | 2 bytes     |
//! | `xxxxx100` | 21 bits   | 3 bytes     |
//! | `xxxx1000` | 28 bits   | 4 bytes     |
//! | `xxx10000` | 35 bits   | 5 bytes     |
//! | `xx100000` | 42 bits   | 6 bytes     |
//! | `x1000000` | 49 bits   | 7 bytes     |
//! | `10000000` | 56 bits   | 8 bytes     |
//! | `00000000` | 64 bits   | 9 bytes     |
//!
//! ## Note
//!
//! Although this scheme supports up to 64 bits, it will only ever allow encoding
//! and decoding of up to `usize::max()` of the current platform. In practice,
//! this is not an issue, as you cannot send data larger than the address space
//! of your platform anyway.
//!
//! ## Important warning
//!
//! This implementation is NOT suitable for data that is passed between multiple
//! platforms, particularly those of different pointer sizes. If you are interested
//! in portably serializing/deserializing data, consider using the `vint64` crate.
//! This implementation makes assumptions that data larger than the platform's
//! `usize::max()` will never be encoded/decoded, which is not true when sending
//! between 32-bit and 64-bit platforms.
//!
//! For bbqueue, the sender doing the encoding (the `Producer`) and the receiver
//! doing the decoding (the `Consumer`) will always reside within the same application
//! running on the same machine, meaning we CAN make these non-portable
//! assumptions for the sake of performance/simplicity.
//!
//! Because `vusize` is an internal implementation detail of `BBQueue`, this does **NOT**
//! affect portability when sending data from one machine to another. Here's a diagram
//! explaining that:
//!
//! ```text
//!               interrupt sending bytes out
//!                   over the serial port
//!                           |
//!  application creating     |
//!      data to send         |
//!         |                 |
//!         v                 v
//! [         embedded system          ]    [      PC system      ]
//! [ [bbq producer] => [bbq consumer] ] => [                     ]
//! [                                  ]    [                     ]
//!                  ^                   ^
//!                  |                   |
//!          `vusize` lives here         |
//!                                      |
//!                             bytes sent over a serial
//!                               port, in order. Frame
//!                             information is not sent over
//!                                the wire.
//! ```

const USIZE_SIZE: usize = core::mem::size_of::<usize>();
const USIZE_SIZE_PLUS_ONE: usize = USIZE_SIZE + 1;

const fn max_size_header() -> u8 {
    // 64-bit: 0b0000_0000
    // 32-bit: 0b0001_0000
    // 16-bit: 0b0000_0100
    //  8-bit: 0b0000_0010
    ((1usize << USIZE_SIZE) & 0xFF) as u8
}

/// Get the length of an encoded `usize` for the given value in bytes.
#[cfg(target_pointer_width = "64")]
pub fn encoded_len(value: usize) -> usize {
    match value.leading_zeros() {
        0..=7 => 9,
        8..=14 => 8,
        15..=21 => 7,
        22..=28 => 6,
        29..=35 => 5,
        36..=42 => 4,
        43..=49 => 3,
        50..=56 => 2,
        57..=64 => 1,
        _ => {
            // SAFETY:
            //
            // The `leading_zeros` intrinsic returns the number of bits that
            // contain a zero bit. The result will always be in the range of
            // 0..=64 for a 64 bit `usize`, so the above pattern is exhaustive, however
            // it is not exhaustive over the return type of `u32`. Because of
            // this, we mark the "uncovered" part of the match as unreachable
            // for performance reasons.
            #[allow(unsafe_code)]
            unsafe {
                core::hint::unreachable_unchecked()
            }
        }
    }
}

/// Get the length of an encoded `usize` for the given value in bytes.
#[cfg(target_pointer_width = "32")]
pub fn encoded_len(value: usize) -> usize {
    match value.leading_zeros() {
        0..=3 => 5,
        4..=10 => 4,
        11..=17 => 3,
        18..=24 => 2,
        25..=32 => 1,
        _ => {
            // SAFETY:
            //
            // The `leading_zeros` intrinsic returns the number of bits that
            // contain a zero bit. The result will always be in the range of
            // 0..=32 for a 32 bit `usize`, so the above pattern is exhaustive, however
            // it is not exhaustive over the return type of `u32`. Because of
            // this, we mark the "uncovered" part of the match as unreachable
            // for performance reasons.
            #[allow(unsafe_code)]
            unsafe {
                core::hint::unreachable_unchecked()
            }
        }
    }
}

/// Get the length of an encoded `usize` for the given value in bytes.
#[cfg(target_pointer_width = "16")]
pub fn encoded_len(value: usize) -> usize {
    match value.leading_zeros() {
        0..=1 => 3,
        2..=8 => 2,
        9..=16 => 1,
        _ => {
            // SAFETY:
            //
            // The `leading_zeros` intrinsic returns the number of bits that
            // contain a zero bit. The result will always be in the range of
            // 0..=16 for a 16 bit `usize`, so the above pattern is exhaustive, however
            // it is not exhaustive over the return type of `u32`. Because of
            // this, we mark the "uncovered" part of the match as unreachable
            // for performance reasons.
            #[allow(unsafe_code)]
            unsafe {
                core::hint::unreachable_unchecked()
            }
        }
    }
}

/// Get the length of an encoded `usize` for the given value in bytes.
#[cfg(target_pointer_width = "8")]
pub fn encoded_len(value: usize) -> usize {
    // I don't think you can have targets with 8 bit pointers in rust,
    // but just in case, 0..=127 would fit in one byte, and 128..=255
    // would fit in two.
    if (value & 0x80) == 0x80 {
        2
    } else {
        1
    }
}

/// Encode the given usize to the `slice`, using `length` bytes for encoding.
///
/// ## Safety
///
/// * `slice.len()` must be >= `length` or this function will panic
/// * `length` must be `>= encoded_len(value)` or the value will be truncated
/// * `length` must be `<= size_of::<usize>() + 1` or the value will be truncated
pub fn encode_usize_to_slice(value: usize, length: usize, slice: &mut [u8]) {
    debug_assert!(
        encoded_len(value) <= length,
        "Tried to encode to smaller than necessary length!",
    );
    debug_assert!(length <= slice.len(), "Not enough space to encode!",);
    debug_assert!(
        length <= USIZE_SIZE_PLUS_ONE,
        "Tried to encode larger than platform supports!",
    );

    let header_bytes = &mut slice[..length];

    if length >= USIZE_SIZE_PLUS_ONE {
        // In the case where the number of bytes is larger than `usize`,
        // don't try to encode bits in the header byte, just create the header
        // and place all of the length bytes in subsequent bytes
        header_bytes[0] = max_size_header();
        header_bytes[1..USIZE_SIZE_PLUS_ONE].copy_from_slice(&value.to_le_bytes());
    } else {
        let encoded = (value << 1 | 1) << (length - 1);
        header_bytes.copy_from_slice(&encoded.to_le_bytes()[..length]);
    }
}

/// Determine the size of the encoded value (in bytes) based on the
/// encoded header
pub fn decoded_len(byte: u8) -> usize {
    byte.trailing_zeros() as usize + 1
}

/// Decode an encoded usize.
///
/// Accepts a slice containing the encoded usize.
pub fn decode_usize(input: &[u8]) -> usize {
    let length = decoded_len(input[0]);

    debug_assert!(input.len() >= length, "Not enough data to decode!",);
    debug_assert!(
        length <= USIZE_SIZE_PLUS_ONE,
        "Tried to decode data too large for this platform!",
    );

    let header_bytes = &input[..length];

    let mut encoded = [0u8; USIZE_SIZE];

    if length >= USIZE_SIZE_PLUS_ONE {
        // usize + 1 special case, see `encode_usize_to_slice()` for details
        encoded.copy_from_slice(&header_bytes[1..]);
        usize::from_le_bytes(encoded)
    } else {
        encoded[..length].copy_from_slice(header_bytes);
        usize::from_le_bytes(encoded) >> length
    }
}