add utf-8 character encoding

This commit is contained in:
Pyfisch 2015-10-17 13:41:15 +02:00
parent 2aa215fd5c
commit 21f1730f9d
2 changed files with 49 additions and 45 deletions

View File

@ -1,6 +1,6 @@
[package] [package]
name = "charsets" name = "charsets"
version = "0.1.0" version = "0.1.1"
authors = ["Pyfisch <pyfisch@gmail.com>"] authors = ["Pyfisch <pyfisch@gmail.com>"]
description = "An enum representing all charset names commonly used." description = "An enum representing all charset names commonly used."
readme = "README.md" readme = "README.md"

View File

@ -22,12 +22,12 @@ pub use self::Charset::*;
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
pub enum Error { pub enum Error {
/// Parsing as as charset failed. /// Parsing as as charset failed.
Invalid Invalid,
} }
impl ErrorTrait for Error { impl ErrorTrait for Error {
fn description(&self) -> &str { fn description(&self) -> &str {
return "The given charset is invalid" return "The given charset is invalid";
} }
} }
@ -46,7 +46,7 @@ pub type Result<T> = ::std::result::Result<T, Error>;
/// ///
/// See http://www.iana.org/assignments/character-sets/character-sets.xhtml /// See http://www.iana.org/assignments/character-sets/character-sets.xhtml
#[derive(Clone, Debug, Eq, Ord, PartialOrd)] #[derive(Clone, Debug, Eq, Ord, PartialOrd)]
pub enum Charset{ pub enum Charset {
/// US ASCII /// US ASCII
UsAscii, UsAscii,
/// ISO-8859-1 /// ISO-8859-1
@ -95,45 +95,47 @@ pub enum Charset{
Big5, Big5,
/// KOI8-R /// KOI8-R
Koi8R, Koi8R,
/// UTF-8
Utf8,
/// An arbitrary charset specified as a string /// An arbitrary charset specified as a string
Unregistered(String) Unregistered(String),
} }
const MAPPING: [(Charset, &'static str); 24] = [ const MAPPING: [(Charset, &'static str); 25] = [(UsAscii, "US-ASCII"),
(UsAscii, "US-ASCII"), (Iso88591, "ISO-8859-1"),
(Iso88591, "ISO-8859-1"), (Iso88592, "ISO-8859-2"),
(Iso88592, "ISO-8859-2"), (Iso88593, "ISO-8859-3"),
(Iso88593, "ISO-8859-3"), (Iso88594, "ISO-8859-4"),
(Iso88594, "ISO-8859-4"), (Iso88595, "ISO-8859-5"),
(Iso88595, "ISO-8859-5"), (Iso88596, "ISO-8859-6"),
(Iso88596, "ISO-8859-6"), (Iso88597, "ISO-8859-7"),
(Iso88597, "ISO-8859-7"), (Iso88598, "ISO-8859-8"),
(Iso88598, "ISO-8859-8"), (Iso88599, "ISO-8859-9"),
(Iso88599, "ISO-8859-9"), (Iso885910, "ISO-8859-10"),
(Iso885910, "ISO-8859-10"), (ShiftJis, "Shift-JIS"),
(ShiftJis, "Shift-JIS"), (EucJp, "EUC-JP"),
(EucJp, "EUC-JP"), (Iso2022Kr, "ISO-2022-KR"),
(Iso2022Kr, "ISO-2022-KR"), (EucKr, "EUC-KR"),
(EucKr, "EUC-KR"), (Iso2022Jp, "ISO-2022-JP"),
(Iso2022Jp, "ISO-2022-JP"), (Iso2022Jp2, "ISO-2022-JP-2"),
(Iso2022Jp2, "ISO-2022-JP-2"), (Iso88596E, "ISO-8859-6-E"),
(Iso88596E, "ISO-8859-6-E"), (Iso88596I, "ISO-8859-6-I"),
(Iso88596I, "ISO-8859-6-I"), (Iso88598E, "ISO-8859-8-E"),
(Iso88598E, "ISO-8859-8-E"), (Iso88598I, "ISO-8859-8-I"),
(Iso88598I, "ISO-8859-8-I"), (Gb2312, "GB2312"),
(Gb2312, "GB2312"), (Big5, "5"),
(Big5, "5"), (Koi8R, "KOI8-R"),
(Koi8R, "KOI8-R") (Utf8, "utf-8")];
];
impl Charset { impl Charset {
fn name(&self) -> &str { fn name(&self) -> &str {
if let &Unregistered(ref s) = self { if let &Unregistered(ref s) = self {
return &s[..] return &s[..];
} }
MAPPING.iter() MAPPING.iter()
.find(|&&(ref variant, _)| self == variant) .find(|&&(ref variant, _)| self == variant)
.map(|&(_, name)| name).unwrap() .map(|&(_, name)| name)
.unwrap()
} }
} }
@ -147,9 +149,9 @@ impl FromStr for Charset {
type Err = ::Error; type Err = ::Error;
fn from_str(s: &str) -> ::Result<Charset> { fn from_str(s: &str) -> ::Result<Charset> {
Ok(MAPPING.iter() Ok(MAPPING.iter()
.find(|&&(_, ref name)| name.eq_ignore_ascii_case(s)) .find(|&&(_, ref name)| name.eq_ignore_ascii_case(s))
.map(|&(ref variant, _)| variant.to_owned()) .map(|&(ref variant, _)| variant.to_owned())
.unwrap_or(Unregistered(s.to_owned()))) .unwrap_or(Unregistered(s.to_owned())))
} }
} }
@ -179,9 +181,10 @@ impl PartialEq for Charset {
(&Iso88598I, &Iso88598I) | (&Iso88598I, &Iso88598I) |
(&Gb2312, &Gb2312) | (&Gb2312, &Gb2312) |
(&Big5, &Big5) | (&Big5, &Big5) |
(&Koi8R, &Koi8R) => true, (&Koi8R, &Koi8R) |
(&Utf8, &Utf8) => true,
(&Unregistered(ref s), &Unregistered(ref t)) => s.eq_ignore_ascii_case(t), (&Unregistered(ref s), &Unregistered(ref t)) => s.eq_ignore_ascii_case(t),
_ => false _ => false,
} }
} }
} }
@ -192,11 +195,11 @@ mod tests {
#[test] #[test]
fn test_parse() { fn test_parse() {
assert_eq!(UsAscii,"us-ascii".parse().unwrap()); assert_eq!(UsAscii, "us-ascii".parse().unwrap());
assert_eq!(UsAscii,"US-Ascii".parse().unwrap()); assert_eq!(UsAscii, "US-Ascii".parse().unwrap());
assert_eq!(UsAscii,"US-ASCII".parse().unwrap()); assert_eq!(UsAscii, "US-ASCII".parse().unwrap());
assert_eq!(ShiftJis,"Shift-JIS".parse().unwrap()); assert_eq!(ShiftJis, "Shift-JIS".parse().unwrap());
assert_eq!(Unregistered("ABCD".to_owned()),"abcd".parse().unwrap()); assert_eq!(Unregistered("ABCD".to_owned()), "abcd".parse().unwrap());
} }
#[test] #[test]
@ -209,6 +212,7 @@ mod tests {
fn test_cmp() { fn test_cmp() {
assert!(Iso88593 == Iso88593); assert!(Iso88593 == Iso88593);
assert!(UsAscii != Iso88593); assert!(UsAscii != Iso88593);
assert_eq!(Unregistered("foobar".to_owned()), Unregistered("FOOBAR".to_owned())); assert_eq!(Unregistered("foobar".to_owned()),
Unregistered("FOOBAR".to_owned()));
} }
} }