/* * Copyright (c) 2015 Hugo Duncan * Copyright (c) 2015 Sean McArthur * Copyright (c) 2015 Simon Bernier St-Pierre * Copytight (c) 2015 Florian Hartwig * Copyright (c) 2015 Pyfisch * Copyright (c) 2023 Emma Tebibyte * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the “Software”), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #![no_std] #![cfg_attr(test, deny(missing_docs))] #![cfg_attr(test, deny(warnings))] //! The crate provides an enum representing all charset names used in Media Types //! and HTTP header values. The list can be found at [the IANA Character Sets //! registry](http://www.iana.org/assignments/character-sets/character-sets.xhtml). //! //! Charset names can be parsed from string, formatted to string and compared. //! Charset names can be parsed from string, formatted to string and compared. //! Unregistered charsets are represented using an `Unregistered` variant. extern crate alloc; use alloc::{ borrow::ToOwned, string::{ String, ToString }, }; use core::{ fmt::{self, Display}, str::FromStr, }; pub use self::Charset::*; /// A Mime charset. /// /// The string representation is normalised to upper case. /// /// See http://www.iana.org/assignments/character-sets/character-sets.xhtml #[derive(Clone, Debug, Eq, Ord, PartialOrd)] pub enum Charset { /// US ASCII UsAscii, /// ISO-8859-1 Iso88591, /// ISO-8859-2 Iso88592, /// ISO-8859-3 Iso88593, /// ISO-8859-4 Iso88594, /// ISO-8859-5 Iso88595, /// ISO-8859-6 Iso88596, /// ISO-8859-7 Iso88597, /// ISO-8859-8 Iso88598, /// ISO-8859-9 Iso88599, /// ISO-8859-10 Iso885910, /// Shift_JIS ShiftJis, /// EUC-JP EucJp, /// ISO-2022-KR Iso2022Kr, /// EUC-KR EucKr, /// ISO-2022-JP Iso2022Jp, /// ISO-2022-JP-2 Iso2022Jp2, /// ISO-8859-6-E Iso88596E, /// ISO-8859-6-I Iso88596I, /// ISO-8859-8-E Iso88598E, /// ISO-8859-8-I Iso88598I, /// GB2312 Gb2312, /// Big5 Big5, /// KOI8-R Koi8R, /// UTF-8 Utf8, /// An arbitrary charset specified as a string Unregistered(String), } const MAPPING: [(Charset, &'static str); 25] = [(UsAscii, "US-ASCII"), (Iso88591, "ISO-8859-1"), (Iso88592, "ISO-8859-2"), (Iso88593, "ISO-8859-3"), (Iso88594, "ISO-8859-4"), (Iso88595, "ISO-8859-5"), (Iso88596, "ISO-8859-6"), (Iso88597, "ISO-8859-7"), (Iso88598, "ISO-8859-8"), (Iso88599, "ISO-8859-9"), (Iso885910, "ISO-8859-10"), (ShiftJis, "Shift-JIS"), (EucJp, "EUC-JP"), (Iso2022Kr, "ISO-2022-KR"), (EucKr, "EUC-KR"), (Iso2022Jp, "ISO-2022-JP"), (Iso2022Jp2, "ISO-2022-JP-2"), (Iso88596E, "ISO-8859-6-E"), (Iso88596I, "ISO-8859-6-I"), (Iso88598E, "ISO-8859-8-E"), (Iso88598I, "ISO-8859-8-I"), (Gb2312, "GB2312"), (Big5, "5"), (Koi8R, "KOI8-R"), (Utf8, "utf-8")]; impl Charset { fn name(&self) -> &str { if let &Unregistered(ref s) = self { return &s[..]; } MAPPING.iter() .find(|&&(ref variant, _)| self == variant) .map(|&(_, name)| name) .unwrap() } } impl Display for Charset { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(self.name()) } } impl FromStr for Charset { type Err = Charset; fn from_str(s: &str) -> Result { match MAPPING.iter() .find(|&&(_, ref name)| name.eq_ignore_ascii_case(s)) .map(|&(ref variant, _)| variant.to_owned()) { Some(variant) => Ok(variant), None => Err(Unregistered(s.to_owned())), } } } impl PartialEq for Charset { fn eq(&self, other: &Self) -> bool { match (self, other) { (&UsAscii, &UsAscii) | (&Iso88591, &Iso88591) | (&Iso88592, &Iso88592) | (&Iso88593, &Iso88593) | (&Iso88594, &Iso88594) | (&Iso88595, &Iso88595) | (&Iso88596, &Iso88596) | (&Iso88597, &Iso88597) | (&Iso88598, &Iso88598) | (&Iso88599, &Iso88599) | (&Iso885910, &Iso885910) | (&ShiftJis, &ShiftJis) | (&EucJp, &EucJp) | (&Iso2022Kr, &Iso2022Kr) | (&EucKr, &EucKr) | (&Iso2022Jp, &Iso2022Jp) | (&Iso2022Jp2, &Iso2022Jp2) | (&Iso88596E, &Iso88596E) | (&Iso88596I, &Iso88596I) | (&Iso88598E, &Iso88598E) | (&Iso88598I, &Iso88598I) | (&Gb2312, &Gb2312) | (&Big5, &Big5) | (&Koi8R, &Koi8R) | (&Utf8, &Utf8) => true, (&Unregistered(ref s), &Unregistered(ref t)) => s.eq_ignore_ascii_case(t), _ => false, } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse() { assert_eq!(UsAscii, "us-ascii".parse().unwrap()); assert_eq!(UsAscii, "US-Ascii".parse().unwrap()); assert_eq!(UsAscii, "US-ASCII".parse().unwrap()); assert_eq!(ShiftJis, "Shift-JIS".parse().unwrap()); assert_eq!(Unregistered("ABCD".to_owned()), "abcd".parse::().err().unwrap()); } #[test] fn test_display() { assert_eq!("US-ASCII", UsAscii.to_string()); assert_eq!("ABCD", Unregistered("ABCD".to_owned()).to_string()); } #[test] fn test_cmp() { assert!(Iso88593 == Iso88593); assert!(UsAscii != Iso88593); assert_eq!(Unregistered("foobar".to_owned()), Unregistered("FOOBAR".to_owned())); } }