From 55d595ac9f23c652b2a30efcdeffed199dcd7358 Mon Sep 17 00:00:00 2001 From: mars Date: Sat, 10 Feb 2024 16:58:03 -0700 Subject: [PATCH] Move parsing to parse module --- src/main.rs | 256 +----------------------------------------------- src/parse.rs | 272 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 273 insertions(+), 255 deletions(-) create mode 100644 src/parse.rs diff --git a/src/main.rs b/src/main.rs index cdcf179..f92eea3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,262 +16,8 @@ // You should have received a copy of the GNU Affero General Public License // along with Saul. If not, see . -use std::{ - borrow::Cow, - io::{self, BufRead}, -}; - -/// Configuration of a particular language. Affects parsing and output. -pub struct Language { - /// A string that appears before all commented lines. - pub comment: Cow<'static, str>, -} - -impl Language { - /// The default Rust language config. - pub const RUST: Self = Self { - comment: Cow::Borrowed("// "), - }; - - /// Extracts a header (as a list of lines) from an input in this language. - /// - /// Don't reply on the state of the reader being consistent after this function - /// is called. - pub fn read_header(&self, f: &mut impl BufRead) -> io::Result> { - let mut header = Vec::new(); - let mut is_first_line = true; - - for line in f.lines() { - let line = line?; - - if is_first_line && line.starts_with("#!") { - is_first_line = false; - continue; - } - - is_first_line = false; - - let Some(content) = line.strip_prefix(self.comment.as_ref()) else { - break; - }; - - header.push(content.trim().to_string()); - } - - Ok(header) - } -} - -/// A source file's header information. -pub struct Header { - /// The copyrights on this source file. - pub copyrights: Vec, - - /// The SPDX license identifier that this source file is covered under. - pub spdx: Option, - - /// This header's body, as a list of lines. - pub body: Vec, -} - -impl Header { - /// Parses a header from a list of comment-less lines. - pub fn parse(src: Vec) -> Result { - let mut src = src.into_iter().peekable(); - let mut copyrights = Vec::new(); - let mut spdx = None; - - while let Some(line) = src.peek() { - match Copyright::parse(line.as_str()) { - Ok(copyright) => { - copyrights.push(copyright); - src.next(); - } - Err(CopyrightError::Empty | CopyrightError::InvalidPrefix) => { - break; - } - Err(err) => { - return Err(HeaderError::Copyright(err)); - } - } - } - - if let Some(line) = src.peek() { - if let Some(body) = line.strip_prefix("SPDX-License-Identifier: ") { - spdx = Some(body.to_string()); - src.next(); - } - } - - let body: Vec<_> = src.collect(); - - Ok(Self { - copyrights, - spdx, - body, - }) - } -} - -/// An error that occurred during header parsing. -#[derive(Clone, Copy, Debug)] -pub enum HeaderError { - Copyright(CopyrightError), -} - -/// A single copyright notice on a source file. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Copyright { - /// The name of the copyright holder. - pub holder: String, - - /// The first year of copyright holding. - pub first_year: usize, - - /// The last year of copyright holding. - /// - /// May be the same as `first_year`, in which case this is omitted in - /// formatting. - pub last_year: usize, -} - -impl Copyright { - /// Attempts to parse a copyright notice from a string. - pub fn parse(src: &str) -> Result { - let src = src.trim(); - - if src.is_empty() { - return Err(CopyrightError::Empty); - } - - static VALID_PREFIXES: &'static [&'static str] = - &["© ", "Copyright © ", "Copyright (c) ", "Copyright (C) "]; - - let mut valid_prefixes = VALID_PREFIXES.iter(); - - let body = loop { - let Some(test_prefix) = valid_prefixes.next() else { - // options are exhausted, we didn't find a valid prefix - return Err(CopyrightError::InvalidPrefix); - }; - - if let Some(body) = src.strip_prefix(*test_prefix) { - break body; - } - }; - - // retrieve years range and copyright holder - let (years, holder) = body.split_once(" ").ok_or(CopyrightError::MissingHolder)?; - - let holder = holder.to_string(); - - // attempt to parse single given year - if let Ok(year) = years.parse() { - return Ok(Copyright { - holder, - first_year: year, - last_year: year, - }); - } - - let Some((first_year, last_year)) = years.split_once("-") else { - return Err(CopyrightError::MalformedYear); - }; - - let first_year = first_year - .parse() - .map_err(|_| CopyrightError::MalformedYear)?; - - let last_year = last_year - .parse() - .map_err(|_| CopyrightError::MalformedYear)?; - - Ok(Copyright { - holder, - first_year, - last_year, - }) - } -} - -/// An error in copyright parsing or validation. -#[derive(Clone, Copy, Debug)] -pub enum CopyrightError { - Empty, - InvalidPrefix, - MissingHolder, - MalformedYear, -} +mod parse; fn main() { println!("Hello, world!"); } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn header() { - let src = b"// header content\n// header content 2\nbody"; - let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap(); - assert_eq!(lines, vec!["header content", "header content 2"]); - } - - #[test] - fn skip_header_shebang() { - let src = b"#!/bin/sh\n// header content\nbody goes here"; - let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap(); - assert_eq!(lines, vec!["header content"]); - } - - #[test] - fn parse_copyright() { - let src = "Copyright (c) 2024 Marceline Cramer"; - let cr = Copyright::parse(src).unwrap(); - - assert_eq!( - cr, - Copyright { - holder: "Marceline Cramer".into(), - first_year: 2024, - last_year: 2024, - } - ); - } - - #[test] - fn parse_copyright_year_range() { - let src = "Copyright (c) 2023-2024 Marceline Cramer"; - let cr = Copyright::parse(src).unwrap(); - - assert_eq!( - cr, - Copyright { - holder: "Marceline Cramer".into(), - first_year: 2023, - last_year: 2024, - } - ); - } - - #[test] - fn parse_header() { - let src = b"#!/bin/sh\n// Copyright (c) 2024 Marceline Cramer\n// SPDX-License-Identifier: AGPL-3.0-or-later\n// body here"; - let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap(); - let header = Header::parse(lines).unwrap(); - - assert_eq!( - header.copyrights, - vec![Copyright { - holder: "Marceline Cramer".into(), - first_year: 2024, - last_year: 2024, - }] - ); - - assert_eq!(header.spdx, Some("AGPL-3.0-or-later".to_string())); - - assert_eq!(header.body, vec!["body here"]); - } -} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..a2213c8 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,272 @@ +// Copyright (c) 2024 Marceline Cramer +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This file is part of Saul. +// +// Saul is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// Saul is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with Saul. If not, see . + +use std::{ + borrow::Cow, + io::{self, BufRead}, +}; + +/// Configuration of a particular language. Affects parsing and output. +pub struct Language { + /// A string that appears before all commented lines. + pub comment: Cow<'static, str>, +} + +impl Language { + /// The default Rust language config. + pub const RUST: Self = Self { + comment: Cow::Borrowed("// "), + }; + + /// Extracts a header (as a list of lines) from an input in this language. + /// + /// Don't reply on the state of the reader being consistent after this function + /// is called. + pub fn read_header(&self, f: &mut impl BufRead) -> io::Result> { + let mut header = Vec::new(); + let mut is_first_line = true; + + for line in f.lines() { + let line = line?; + + if is_first_line && line.starts_with("#!") { + is_first_line = false; + continue; + } + + is_first_line = false; + + let Some(content) = line.strip_prefix(self.comment.as_ref()) else { + break; + }; + + header.push(content.trim().to_string()); + } + + Ok(header) + } +} +/// A source file's header information. +pub struct Header { + /// The copyrights on this source file. + pub copyrights: Vec, + + /// The SPDX license identifier that this source file is covered under. + pub spdx: Option, + + /// This header's body, as a list of lines. + pub body: Vec, +} + +impl Header { + /// Parses a header from a list of comment-less lines. + pub fn parse(src: Vec) -> Result { + let mut src = src.into_iter().peekable(); + let mut copyrights = Vec::new(); + let mut spdx = None; + + while let Some(line) = src.peek() { + match Copyright::parse(line.as_str()) { + Ok(copyright) => { + copyrights.push(copyright); + src.next(); + } + Err(CopyrightError::Empty | CopyrightError::InvalidPrefix) => { + break; + } + Err(err) => { + return Err(HeaderError::Copyright(err)); + } + } + } + + if let Some(line) = src.peek() { + if let Some(body) = line.strip_prefix("SPDX-License-Identifier: ") { + spdx = Some(body.to_string()); + src.next(); + } + } + + let body: Vec<_> = src.collect(); + + Ok(Self { + copyrights, + spdx, + body, + }) + } +} + +/// An error that occurred during header parsing. +#[derive(Clone, Copy, Debug)] +pub enum HeaderError { + Copyright(CopyrightError), +} + +/// A single copyright notice on a source file. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Copyright { + /// The name of the copyright holder. + pub holder: String, + + /// The first year of copyright holding. + pub first_year: usize, + + /// The last year of copyright holding. + /// + /// May be the same as `first_year`, in which case this is omitted in + /// formatting. + pub last_year: usize, +} + +impl Copyright { + /// Attempts to parse a copyright notice from a string. + pub fn parse(src: &str) -> Result { + let src = src.trim(); + + if src.is_empty() { + return Err(CopyrightError::Empty); + } + + static VALID_PREFIXES: &'static [&'static str] = + &["© ", "Copyright © ", "Copyright (c) ", "Copyright (C) "]; + + let mut valid_prefixes = VALID_PREFIXES.iter(); + + let body = loop { + let Some(test_prefix) = valid_prefixes.next() else { + // options are exhausted, we didn't find a valid prefix + return Err(CopyrightError::InvalidPrefix); + }; + + if let Some(body) = src.strip_prefix(*test_prefix) { + break body; + } + }; + + // retrieve years range and copyright holder + let (years, holder) = body.split_once(" ").ok_or(CopyrightError::MissingHolder)?; + + let holder = holder.to_string(); + + // attempt to parse single given year + if let Ok(year) = years.parse() { + return Ok(Copyright { + holder, + first_year: year, + last_year: year, + }); + } + + let Some((first_year, last_year)) = years.split_once("-") else { + return Err(CopyrightError::MalformedYear); + }; + + let first_year = first_year + .parse() + .map_err(|_| CopyrightError::MalformedYear)?; + + let last_year = last_year + .parse() + .map_err(|_| CopyrightError::MalformedYear)?; + + Ok(Copyright { + holder, + first_year, + last_year, + }) + } +} + +/// An error in copyright parsing or validation. +#[derive(Clone, Copy, Debug)] +pub enum CopyrightError { + Empty, + InvalidPrefix, + MissingHolder, + MalformedYear, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn header() { + let src = b"// header content\n// header content 2\nbody"; + let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap(); + assert_eq!(lines, vec!["header content", "header content 2"]); + } + + #[test] + fn skip_header_shebang() { + let src = b"#!/bin/sh\n// header content\nbody goes here"; + let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap(); + assert_eq!(lines, vec!["header content"]); + } + + #[test] + fn parse_copyright() { + let src = "Copyright (c) 2024 Marceline Cramer"; + let cr = Copyright::parse(src).unwrap(); + + assert_eq!( + cr, + Copyright { + holder: "Marceline Cramer".into(), + first_year: 2024, + last_year: 2024, + } + ); + } + + #[test] + fn parse_copyright_year_range() { + let src = "Copyright (c) 2023-2024 Marceline Cramer"; + let cr = Copyright::parse(src).unwrap(); + + assert_eq!( + cr, + Copyright { + holder: "Marceline Cramer".into(), + first_year: 2023, + last_year: 2024, + } + ); + } + + #[test] + fn parse_header() { + let src = b"#!/bin/sh\n// Copyright (c) 2024 Marceline Cramer\n// SPDX-License-Identifier: AGPL-3.0-or-later\n// body here"; + let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap(); + let header = Header::parse(lines).unwrap(); + + assert_eq!( + header.copyrights, + vec![Copyright { + holder: "Marceline Cramer".into(), + first_year: 2024, + last_year: 2024, + }] + ); + + assert_eq!(header.spdx, Some("AGPL-3.0-or-later".to_string())); + + assert_eq!(header.body, vec!["body here"]); + } +}