bytestring/
lib.rs

1//! A UTF-8 encoded read-only string using `Bytes` as storage.
2//!
3//! See docs for [`ByteString`].
4
5#![no_std]
6
7extern crate alloc;
8
9use alloc::{boxed::Box, string::String, vec::Vec};
10use core::{borrow::Borrow, fmt, hash, ops, str};
11
12use bytes::Bytes;
13
14/// An immutable UTF-8 encoded string using [`Bytes`] as the storage.
15#[derive(Clone, Default, Eq, PartialOrd, Ord)]
16pub struct ByteString(Bytes);
17
18impl ByteString {
19    /// Creates a new empty `ByteString`.
20    pub const fn new() -> Self {
21        ByteString(Bytes::new())
22    }
23
24    /// Get a reference to the underlying `Bytes` object.
25    pub fn as_bytes(&self) -> &Bytes {
26        &self.0
27    }
28
29    /// Unwraps this `ByteString` into the underlying `Bytes` object.
30    pub fn into_bytes(self) -> Bytes {
31        self.0
32    }
33
34    /// Creates a new `ByteString` from a `&'static str`.
35    pub const fn from_static(src: &'static str) -> ByteString {
36        Self(Bytes::from_static(src.as_bytes()))
37    }
38
39    /// Creates a new `ByteString` from a Bytes.
40    ///
41    /// # Safety
42    /// This function is unsafe because it does not check the bytes passed to it are valid UTF-8.
43    /// If this constraint is violated, it may cause memory unsafety issues with future users of
44    /// the `ByteString`, as we assume that `ByteString`s are valid UTF-8. However, the most likely
45    /// issue is that the data gets corrupted.
46    pub const unsafe fn from_bytes_unchecked(src: Bytes) -> ByteString {
47        Self(src)
48    }
49
50    /// Divides one bytestring into two at an index, returning both parts.
51    ///
52    /// # Panics
53    ///
54    /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is past the end of the last
55    /// code point of the bytestring.
56    pub fn split_at(&self, mid: usize) -> (ByteString, ByteString) {
57        let this: &str = self.as_ref();
58        let _valid_midpoint_check = this.split_at(mid);
59
60        let mut bytes = self.0.clone();
61        let first = bytes.split_to(mid);
62        let last = bytes;
63
64        unsafe {
65            (
66                ByteString::from_bytes_unchecked(first),
67                ByteString::from_bytes_unchecked(last),
68            )
69        }
70    }
71
72    /// Returns a new `ByteString` that is equivalent to the given `subset`.
73    ///
74    /// When processing a `ByteString` buffer with other tools, one often gets a `&str` which is in
75    /// fact a slice of the original `ByteString`; i.e., a subset of it. This function turns that
76    /// `&str` into another `ByteString`, as if one had sliced the `ByteString` with the offsets
77    /// that correspond to `subset`.
78    ///
79    /// Corresponds to [`Bytes::slice_ref`].
80    ///
81    /// This operation is `O(1)`.
82    ///
83    /// # Panics
84    ///
85    /// Panics if `subset` is not a sub-slice of this byte string.
86    ///
87    /// Note that strings which are only subsets from an equality perspective do not uphold this
88    /// requirement; see examples.
89    ///
90    /// # Examples
91    ///
92    /// ```
93    /// # use bytestring::ByteString;
94    /// let string = ByteString::from_static(" foo ");
95    /// let subset = string.trim();
96    /// let substring = string.slice_ref(subset);
97    /// assert_eq!(substring, "foo");
98    /// ```
99    ///
100    /// ```should_panic
101    /// # use bytestring::ByteString;
102    /// // panics because the given slice is not derived from the original byte string, despite
103    /// // being a logical subset of the string
104    /// ByteString::from_static("foo bar").slice_ref("foo");
105    /// ```
106    pub fn slice_ref(&self, subset: &str) -> Self {
107        Self(self.0.slice_ref(subset.as_bytes()))
108    }
109}
110
111impl PartialEq<str> for ByteString {
112    fn eq(&self, other: &str) -> bool {
113        &self[..] == other
114    }
115}
116
117impl<T: AsRef<str>> PartialEq<T> for ByteString {
118    fn eq(&self, other: &T) -> bool {
119        &self[..] == other.as_ref()
120    }
121}
122
123impl AsRef<ByteString> for ByteString {
124    fn as_ref(&self) -> &ByteString {
125        self
126    }
127}
128
129impl AsRef<[u8]> for ByteString {
130    fn as_ref(&self) -> &[u8] {
131        self.0.as_ref()
132    }
133}
134
135impl AsRef<str> for ByteString {
136    fn as_ref(&self) -> &str {
137        self
138    }
139}
140
141impl hash::Hash for ByteString {
142    fn hash<H: hash::Hasher>(&self, state: &mut H) {
143        (**self).hash(state);
144    }
145}
146
147impl ops::Deref for ByteString {
148    type Target = str;
149
150    #[inline]
151    fn deref(&self) -> &str {
152        let bytes = self.0.as_ref();
153        // SAFETY: UTF-8 validity is guaranteed during construction.
154        unsafe { str::from_utf8_unchecked(bytes) }
155    }
156}
157
158impl Borrow<str> for ByteString {
159    fn borrow(&self) -> &str {
160        self
161    }
162}
163
164impl From<String> for ByteString {
165    #[inline]
166    fn from(value: String) -> Self {
167        Self(Bytes::from(value))
168    }
169}
170
171impl From<&str> for ByteString {
172    #[inline]
173    fn from(value: &str) -> Self {
174        Self(Bytes::copy_from_slice(value.as_ref()))
175    }
176}
177
178impl From<Box<str>> for ByteString {
179    #[inline]
180    fn from(value: Box<str>) -> Self {
181        Self(Bytes::from(value.into_boxed_bytes()))
182    }
183}
184
185impl From<ByteString> for String {
186    #[inline]
187    fn from(value: ByteString) -> Self {
188        // SAFETY: UTF-8 validity is guaranteed during construction.
189        unsafe { String::from_utf8_unchecked(value.0.into()) }
190    }
191}
192
193impl TryFrom<&[u8]> for ByteString {
194    type Error = str::Utf8Error;
195
196    #[inline]
197    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
198        if value.is_ascii() {
199            return Ok(ByteString(Bytes::copy_from_slice(value)));
200        }
201
202        let _ = str::from_utf8(value)?;
203        Ok(ByteString(Bytes::copy_from_slice(value)))
204    }
205}
206
207impl TryFrom<Vec<u8>> for ByteString {
208    type Error = str::Utf8Error;
209
210    #[inline]
211    fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
212        if value.is_ascii() {
213            return Ok(ByteString(Bytes::from(value)));
214        }
215
216        let buf = String::from_utf8(value).map_err(|err| err.utf8_error())?;
217        Ok(ByteString(Bytes::from(buf)))
218    }
219}
220
221impl TryFrom<Bytes> for ByteString {
222    type Error = str::Utf8Error;
223
224    #[inline]
225    fn try_from(value: Bytes) -> Result<Self, Self::Error> {
226        if value.is_ascii() {
227            return Ok(ByteString(value));
228        }
229
230        let _ = str::from_utf8(value.as_ref())?;
231        Ok(ByteString(value))
232    }
233}
234
235impl TryFrom<bytes::BytesMut> for ByteString {
236    type Error = str::Utf8Error;
237
238    #[inline]
239    fn try_from(value: bytes::BytesMut) -> Result<Self, Self::Error> {
240        if value.is_ascii() {
241            return Ok(ByteString(value.freeze()));
242        }
243
244        let _ = str::from_utf8(&value)?;
245        Ok(ByteString(value.freeze()))
246    }
247}
248
249macro_rules! array_impls {
250    ($($len:expr)+) => {
251        $(
252            impl TryFrom<[u8; $len]> for ByteString {
253                type Error = str::Utf8Error;
254
255                #[inline]
256                fn try_from(value: [u8; $len]) -> Result<Self, Self::Error> {
257                    ByteString::try_from(&value[..])
258                }
259            }
260
261            impl TryFrom<&[u8; $len]> for ByteString {
262                type Error = str::Utf8Error;
263
264                #[inline]
265                fn try_from(value: &[u8; $len]) -> Result<Self, Self::Error> {
266                    ByteString::try_from(&value[..])
267                }
268            }
269        )+
270    }
271}
272
273array_impls!(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32);
274
275impl fmt::Debug for ByteString {
276    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
277        (**self).fmt(fmt)
278    }
279}
280
281impl fmt::Display for ByteString {
282    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
283        (**self).fmt(fmt)
284    }
285}
286
287#[cfg(feature = "serde")]
288mod serde {
289    use alloc::string::String;
290
291    use serde_core::{
292        de::{Deserialize, Deserializer},
293        ser::{Serialize, Serializer},
294    };
295
296    use super::ByteString;
297
298    impl Serialize for ByteString {
299        #[inline]
300        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
301        where
302            S: Serializer,
303        {
304            serializer.serialize_str(self.as_ref())
305        }
306    }
307
308    impl<'de> Deserialize<'de> for ByteString {
309        #[inline]
310        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
311        where
312            D: Deserializer<'de>,
313        {
314            String::deserialize(deserializer).map(ByteString::from)
315        }
316    }
317
318    #[cfg(test)]
319    mod serde_impl_tests {
320        use serde_core::de::DeserializeOwned;
321        use static_assertions::assert_impl_all;
322
323        use super::*;
324
325        assert_impl_all!(ByteString: Serialize, DeserializeOwned);
326    }
327}
328
329#[cfg(test)]
330mod test {
331    use alloc::{borrow::ToOwned, format, vec};
332    use core::{
333        hash::{Hash, Hasher},
334        panic::{RefUnwindSafe, UnwindSafe},
335    };
336
337    use ahash::AHasher;
338    use static_assertions::assert_impl_all;
339
340    use super::*;
341
342    assert_impl_all!(ByteString: Send, Sync, Unpin, Sized);
343    assert_impl_all!(ByteString: Clone, Default, Eq, PartialOrd, Ord);
344    assert_impl_all!(ByteString: fmt::Debug, fmt::Display);
345    assert_impl_all!(ByteString: UnwindSafe, RefUnwindSafe);
346
347    #[test]
348    fn eq() {
349        let s: ByteString = ByteString::from_static("test");
350        assert_eq!(s, "test");
351        assert_eq!(s, *"test");
352        assert_eq!(s, "test".to_owned());
353    }
354
355    #[test]
356    fn new() {
357        let _: ByteString = ByteString::new();
358    }
359
360    #[test]
361    fn as_bytes() {
362        let buf = ByteString::new();
363        assert!(buf.as_bytes().is_empty());
364
365        let buf = ByteString::from("hello");
366        assert_eq!(buf.as_bytes(), "hello");
367    }
368
369    #[test]
370    fn from_bytes_unchecked() {
371        let buf = unsafe { ByteString::from_bytes_unchecked(Bytes::new()) };
372        assert!(buf.is_empty());
373
374        let buf = unsafe { ByteString::from_bytes_unchecked(Bytes::from("hello")) };
375        assert_eq!(buf, "hello");
376    }
377
378    #[test]
379    fn as_ref() {
380        let buf = ByteString::new();
381
382        let _: &ByteString = buf.as_ref();
383        let _: &[u8] = buf.as_ref();
384    }
385
386    #[test]
387    fn borrow() {
388        let buf = ByteString::new();
389
390        let _: &str = buf.borrow();
391    }
392
393    #[test]
394    fn hash() {
395        let mut hasher1 = AHasher::default();
396        "str".hash(&mut hasher1);
397
398        let mut hasher2 = AHasher::default();
399        let s = ByteString::from_static("str");
400        s.hash(&mut hasher2);
401        assert_eq!(hasher1.finish(), hasher2.finish());
402    }
403
404    #[test]
405    fn from_string() {
406        let s: ByteString = "hello".to_owned().into();
407        assert_eq!(&s, "hello");
408        let t: &str = s.as_ref();
409        assert_eq!(t, "hello");
410    }
411
412    #[test]
413    fn from_str() {
414        let _: ByteString = "str".into();
415        let _: ByteString = "str".to_owned().into_boxed_str().into();
416    }
417
418    #[test]
419    fn to_string() {
420        let buf = ByteString::from("foo");
421        assert_eq!(String::from(buf), "foo");
422    }
423
424    #[test]
425    fn from_static_str() {
426        static _S: ByteString = ByteString::from_static("hello");
427        let _ = ByteString::from_static("str");
428    }
429
430    #[test]
431    fn try_from_slice() {
432        let heart = "\u{1f496}";
433        let _ = ByteString::try_from(b"nice bytes").unwrap();
434        assert_eq!(ByteString::try_from(heart.as_bytes()).unwrap(), heart);
435        ByteString::try_from(&[0, 159, 146, 150][..]).unwrap_err();
436    }
437
438    #[test]
439    fn try_from_array() {
440        assert_eq!(
441            ByteString::try_from([b'h', b'i']).unwrap(),
442            ByteString::from_static("hi")
443        );
444    }
445
446    #[test]
447    fn try_from_vec() {
448        let _ = ByteString::try_from(vec![b'f', b'o', b'o']).unwrap();
449        ByteString::try_from(vec![0, 159, 146, 150]).unwrap_err();
450    }
451
452    #[test]
453    fn try_from_bytes() {
454        let heart = "\u{1f496}";
455        let _ = ByteString::try_from(Bytes::from_static(b"nice bytes")).unwrap();
456        assert_eq!(
457            ByteString::try_from(Bytes::from_static(heart.as_bytes())).unwrap(),
458            heart
459        );
460    }
461
462    #[test]
463    fn try_from_bytes_mut() {
464        let heart = "\u{1f496}";
465        let _ = ByteString::try_from(bytes::BytesMut::from(&b"nice bytes"[..])).unwrap();
466        assert_eq!(
467            ByteString::try_from(bytes::BytesMut::from(heart.as_bytes())).unwrap(),
468            heart
469        );
470    }
471
472    #[test]
473    fn display() {
474        let buf = ByteString::from("bar");
475        assert_eq!(format!("{buf}"), "bar");
476    }
477
478    #[test]
479    fn debug() {
480        let buf = ByteString::from("baz");
481        assert_eq!(format!("{buf:?}"), r#""baz""#);
482    }
483
484    #[cfg(feature = "serde")]
485    #[test]
486    fn serialize() {
487        let s: ByteString = serde_json::from_str(r#""nice bytes""#).unwrap();
488        assert_eq!(s, "nice bytes");
489    }
490
491    #[cfg(feature = "serde")]
492    #[test]
493    fn deserialize() {
494        let s = serde_json::to_string(&ByteString::from_static("nice bytes")).unwrap();
495        assert_eq!(s, r#""nice bytes""#);
496    }
497
498    #[test]
499    fn slice_ref() {
500        let string = ByteString::from_static(" foo ");
501        let subset = string.trim();
502        // subset is derived from original byte string
503        let substring = string.slice_ref(subset);
504        assert_eq!(substring, "foo");
505    }
506
507    #[test]
508    #[should_panic]
509    fn slice_ref_catches_not_a_subset() {
510        // panics because the given slice is not derived from the original byte string, despite
511        // being a logical subset of the string
512        ByteString::from_static("foo bar").slice_ref("foo");
513    }
514
515    #[test]
516    fn split_at() {
517        let buf = ByteString::from_static("foo bar");
518
519        let (first, last) = buf.split_at(0);
520        assert_eq!(ByteString::from_static(""), first);
521        assert_eq!(ByteString::from_static("foo bar"), last);
522
523        let (first, last) = buf.split_at(4);
524        assert_eq!(ByteString::from_static("foo "), first);
525        assert_eq!(ByteString::from_static("bar"), last);
526
527        let (first, last) = buf.split_at(7);
528        assert_eq!(ByteString::from_static("foo bar"), first);
529        assert_eq!(ByteString::from_static(""), last);
530    }
531
532    #[test]
533    #[should_panic = "byte index 1 is not a char boundary;"]
534    fn split_at_invalid_code_point() {
535        ByteString::from_static("ยต").split_at(1);
536    }
537
538    #[test]
539    #[should_panic = "byte index 9 is out of bounds"]
540    fn split_at_outside_string() {
541        ByteString::from_static("foo").split_at(9);
542    }
543}