233 lines
6.6 KiB
Rust
233 lines
6.6 KiB
Rust
/*
|
|
* Copyright (c) 2015 Hugo Duncan <hugo@hugoduncan.org>
|
|
* Copyright (c) 2015 Sean McArthur <sean@seanmonstar.com>
|
|
* Copyright (c) 2015 Simon Bernier St-Pierre <https://github.com/sbstp/>
|
|
* Copytight (c) 2015 Florian Hartwig <florian.j.hartwig@gmail.com>
|
|
* Copyright (c) 2015 Pyfisch <pyfisch@gmail.com>
|
|
* Copyright (c) 2023 Emma Tebibyte <emma@tebibyte.media>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the “Software”), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#![no_std]
|
|
#![cfg_attr(test, deny(missing_docs))]
|
|
#![cfg_attr(test, deny(warnings))]
|
|
|
|
//! The crate provides an enum representing all charset names used in Media Types
|
|
//! and HTTP header values. The list can be found at [the IANA Character Sets
|
|
//! registry](http://www.iana.org/assignments/character-sets/character-sets.xhtml).
|
|
//!
|
|
//! Charset names can be parsed from string, formatted to string and compared.
|
|
//! Charset names can be parsed from string, formatted to string and compared.
|
|
//! Unregistered charsets are represented using an `Unregistered` variant.
|
|
|
|
extern crate alloc;
|
|
|
|
use alloc::{
|
|
borrow::ToOwned,
|
|
string::{ String, ToString },
|
|
};
|
|
|
|
use core::{
|
|
fmt::{self, Display},
|
|
str::FromStr,
|
|
};
|
|
|
|
pub use self::Charset::*;
|
|
|
|
/// A Mime charset.
|
|
///
|
|
/// The string representation is normalised to upper case.
|
|
///
|
|
/// See http://www.iana.org/assignments/character-sets/character-sets.xhtml
|
|
#[derive(Clone, Debug, Eq, Ord, PartialOrd)]
|
|
pub enum Charset {
|
|
/// US ASCII
|
|
UsAscii,
|
|
/// ISO-8859-1
|
|
Iso88591,
|
|
/// ISO-8859-2
|
|
Iso88592,
|
|
/// ISO-8859-3
|
|
Iso88593,
|
|
/// ISO-8859-4
|
|
Iso88594,
|
|
/// ISO-8859-5
|
|
Iso88595,
|
|
/// ISO-8859-6
|
|
Iso88596,
|
|
/// ISO-8859-7
|
|
Iso88597,
|
|
/// ISO-8859-8
|
|
Iso88598,
|
|
/// ISO-8859-9
|
|
Iso88599,
|
|
/// ISO-8859-10
|
|
Iso885910,
|
|
/// Shift_JIS
|
|
ShiftJis,
|
|
/// EUC-JP
|
|
EucJp,
|
|
/// ISO-2022-KR
|
|
Iso2022Kr,
|
|
/// EUC-KR
|
|
EucKr,
|
|
/// ISO-2022-JP
|
|
Iso2022Jp,
|
|
/// ISO-2022-JP-2
|
|
Iso2022Jp2,
|
|
/// ISO-8859-6-E
|
|
Iso88596E,
|
|
/// ISO-8859-6-I
|
|
Iso88596I,
|
|
/// ISO-8859-8-E
|
|
Iso88598E,
|
|
/// ISO-8859-8-I
|
|
Iso88598I,
|
|
/// GB2312
|
|
Gb2312,
|
|
/// Big5
|
|
Big5,
|
|
/// KOI8-R
|
|
Koi8R,
|
|
/// UTF-8
|
|
Utf8,
|
|
/// An arbitrary charset specified as a string
|
|
Unregistered(String),
|
|
}
|
|
|
|
const MAPPING: [(Charset, &'static str); 25] = [(UsAscii, "US-ASCII"),
|
|
(Iso88591, "ISO-8859-1"),
|
|
(Iso88592, "ISO-8859-2"),
|
|
(Iso88593, "ISO-8859-3"),
|
|
(Iso88594, "ISO-8859-4"),
|
|
(Iso88595, "ISO-8859-5"),
|
|
(Iso88596, "ISO-8859-6"),
|
|
(Iso88597, "ISO-8859-7"),
|
|
(Iso88598, "ISO-8859-8"),
|
|
(Iso88599, "ISO-8859-9"),
|
|
(Iso885910, "ISO-8859-10"),
|
|
(ShiftJis, "Shift-JIS"),
|
|
(EucJp, "EUC-JP"),
|
|
(Iso2022Kr, "ISO-2022-KR"),
|
|
(EucKr, "EUC-KR"),
|
|
(Iso2022Jp, "ISO-2022-JP"),
|
|
(Iso2022Jp2, "ISO-2022-JP-2"),
|
|
(Iso88596E, "ISO-8859-6-E"),
|
|
(Iso88596I, "ISO-8859-6-I"),
|
|
(Iso88598E, "ISO-8859-8-E"),
|
|
(Iso88598I, "ISO-8859-8-I"),
|
|
(Gb2312, "GB2312"),
|
|
(Big5, "5"),
|
|
(Koi8R, "KOI8-R"),
|
|
(Utf8, "utf-8")];
|
|
|
|
impl Charset {
|
|
fn name(&self) -> &str {
|
|
if let &Unregistered(ref s) = self {
|
|
return &s[..];
|
|
}
|
|
MAPPING.iter()
|
|
.find(|&&(ref variant, _)| self == variant)
|
|
.map(|&(_, name)| name)
|
|
.unwrap()
|
|
}
|
|
}
|
|
|
|
impl Display for Charset {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
f.write_str(self.name())
|
|
}
|
|
}
|
|
|
|
impl FromStr for Charset {
|
|
type Err = Charset;
|
|
fn from_str(s: &str) -> Result<Charset, Charset> {
|
|
match MAPPING.iter()
|
|
.find(|&&(_, ref name)| name.eq_ignore_ascii_case(s))
|
|
.map(|&(ref variant, _)| variant.to_owned()) {
|
|
Some(variant) => Ok(variant),
|
|
None => Err(Unregistered(s.to_owned())),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PartialEq for Charset {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
match (self, other) {
|
|
(&UsAscii, &UsAscii) |
|
|
(&Iso88591, &Iso88591) |
|
|
(&Iso88592, &Iso88592) |
|
|
(&Iso88593, &Iso88593) |
|
|
(&Iso88594, &Iso88594) |
|
|
(&Iso88595, &Iso88595) |
|
|
(&Iso88596, &Iso88596) |
|
|
(&Iso88597, &Iso88597) |
|
|
(&Iso88598, &Iso88598) |
|
|
(&Iso88599, &Iso88599) |
|
|
(&Iso885910, &Iso885910) |
|
|
(&ShiftJis, &ShiftJis) |
|
|
(&EucJp, &EucJp) |
|
|
(&Iso2022Kr, &Iso2022Kr) |
|
|
(&EucKr, &EucKr) |
|
|
(&Iso2022Jp, &Iso2022Jp) |
|
|
(&Iso2022Jp2, &Iso2022Jp2) |
|
|
(&Iso88596E, &Iso88596E) |
|
|
(&Iso88596I, &Iso88596I) |
|
|
(&Iso88598E, &Iso88598E) |
|
|
(&Iso88598I, &Iso88598I) |
|
|
(&Gb2312, &Gb2312) |
|
|
(&Big5, &Big5) |
|
|
(&Koi8R, &Koi8R) |
|
|
(&Utf8, &Utf8) => true,
|
|
(&Unregistered(ref s), &Unregistered(ref t)) => s.eq_ignore_ascii_case(t),
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_parse() {
|
|
assert_eq!(UsAscii, "us-ascii".parse().unwrap());
|
|
assert_eq!(UsAscii, "US-Ascii".parse().unwrap());
|
|
assert_eq!(UsAscii, "US-ASCII".parse().unwrap());
|
|
assert_eq!(ShiftJis, "Shift-JIS".parse().unwrap());
|
|
assert_eq!(Unregistered("ABCD".to_owned()),
|
|
"abcd".parse::<Charset>().err().unwrap());
|
|
}
|
|
|
|
#[test]
|
|
fn test_display() {
|
|
assert_eq!("US-ASCII", UsAscii.to_string());
|
|
assert_eq!("ABCD", Unregistered("ABCD".to_owned()).to_string());
|
|
}
|
|
|
|
#[test]
|
|
fn test_cmp() {
|
|
assert!(Iso88593 == Iso88593);
|
|
assert!(UsAscii != Iso88593);
|
|
assert_eq!(Unregistered("foobar".to_owned()),
|
|
Unregistered("FOOBAR".to_owned()));
|
|
}
|
|
}
|