Move parsing to parse module

This commit is contained in:
mars 2024-02-10 16:58:03 -07:00
parent 4df0e276ff
commit 55d595ac9f
2 changed files with 273 additions and 255 deletions

View File

@ -16,262 +16,8 @@
// You should have received a copy of the GNU Affero General Public License
// along with Saul. If not, see <https://www.gnu.org/licenses/>.
use std::{
borrow::Cow,
io::{self, BufRead},
};
/// Configuration of a particular language. Affects parsing and output.
pub struct Language {
/// A string that appears before all commented lines.
pub comment: Cow<'static, str>,
}
impl Language {
/// The default Rust language config.
pub const RUST: Self = Self {
comment: Cow::Borrowed("// "),
};
/// Extracts a header (as a list of lines) from an input in this language.
///
/// Don't reply on the state of the reader being consistent after this function
/// is called.
pub fn read_header(&self, f: &mut impl BufRead) -> io::Result<Vec<String>> {
let mut header = Vec::new();
let mut is_first_line = true;
for line in f.lines() {
let line = line?;
if is_first_line && line.starts_with("#!") {
is_first_line = false;
continue;
}
is_first_line = false;
let Some(content) = line.strip_prefix(self.comment.as_ref()) else {
break;
};
header.push(content.trim().to_string());
}
Ok(header)
}
}
/// A source file's header information.
pub struct Header {
/// The copyrights on this source file.
pub copyrights: Vec<Copyright>,
/// The SPDX license identifier that this source file is covered under.
pub spdx: Option<String>,
/// This header's body, as a list of lines.
pub body: Vec<String>,
}
impl Header {
/// Parses a header from a list of comment-less lines.
pub fn parse(src: Vec<String>) -> Result<Self, HeaderError> {
let mut src = src.into_iter().peekable();
let mut copyrights = Vec::new();
let mut spdx = None;
while let Some(line) = src.peek() {
match Copyright::parse(line.as_str()) {
Ok(copyright) => {
copyrights.push(copyright);
src.next();
}
Err(CopyrightError::Empty | CopyrightError::InvalidPrefix) => {
break;
}
Err(err) => {
return Err(HeaderError::Copyright(err));
}
}
}
if let Some(line) = src.peek() {
if let Some(body) = line.strip_prefix("SPDX-License-Identifier: ") {
spdx = Some(body.to_string());
src.next();
}
}
let body: Vec<_> = src.collect();
Ok(Self {
copyrights,
spdx,
body,
})
}
}
/// An error that occurred during header parsing.
#[derive(Clone, Copy, Debug)]
pub enum HeaderError {
Copyright(CopyrightError),
}
/// A single copyright notice on a source file.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Copyright {
/// The name of the copyright holder.
pub holder: String,
/// The first year of copyright holding.
pub first_year: usize,
/// The last year of copyright holding.
///
/// May be the same as `first_year`, in which case this is omitted in
/// formatting.
pub last_year: usize,
}
impl Copyright {
/// Attempts to parse a copyright notice from a string.
pub fn parse(src: &str) -> Result<Self, CopyrightError> {
let src = src.trim();
if src.is_empty() {
return Err(CopyrightError::Empty);
}
static VALID_PREFIXES: &'static [&'static str] =
&["© ", "Copyright © ", "Copyright (c) ", "Copyright (C) "];
let mut valid_prefixes = VALID_PREFIXES.iter();
let body = loop {
let Some(test_prefix) = valid_prefixes.next() else {
// options are exhausted, we didn't find a valid prefix
return Err(CopyrightError::InvalidPrefix);
};
if let Some(body) = src.strip_prefix(*test_prefix) {
break body;
}
};
// retrieve years range and copyright holder
let (years, holder) = body.split_once(" ").ok_or(CopyrightError::MissingHolder)?;
let holder = holder.to_string();
// attempt to parse single given year
if let Ok(year) = years.parse() {
return Ok(Copyright {
holder,
first_year: year,
last_year: year,
});
}
let Some((first_year, last_year)) = years.split_once("-") else {
return Err(CopyrightError::MalformedYear);
};
let first_year = first_year
.parse()
.map_err(|_| CopyrightError::MalformedYear)?;
let last_year = last_year
.parse()
.map_err(|_| CopyrightError::MalformedYear)?;
Ok(Copyright {
holder,
first_year,
last_year,
})
}
}
/// An error in copyright parsing or validation.
#[derive(Clone, Copy, Debug)]
pub enum CopyrightError {
Empty,
InvalidPrefix,
MissingHolder,
MalformedYear,
}
mod parse;
fn main() {
println!("Hello, world!");
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn header() {
let src = b"// header content\n// header content 2\nbody";
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
assert_eq!(lines, vec!["header content", "header content 2"]);
}
#[test]
fn skip_header_shebang() {
let src = b"#!/bin/sh\n// header content\nbody goes here";
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
assert_eq!(lines, vec!["header content"]);
}
#[test]
fn parse_copyright() {
let src = "Copyright (c) 2024 Marceline Cramer";
let cr = Copyright::parse(src).unwrap();
assert_eq!(
cr,
Copyright {
holder: "Marceline Cramer".into(),
first_year: 2024,
last_year: 2024,
}
);
}
#[test]
fn parse_copyright_year_range() {
let src = "Copyright (c) 2023-2024 Marceline Cramer";
let cr = Copyright::parse(src).unwrap();
assert_eq!(
cr,
Copyright {
holder: "Marceline Cramer".into(),
first_year: 2023,
last_year: 2024,
}
);
}
#[test]
fn parse_header() {
let src = b"#!/bin/sh\n// Copyright (c) 2024 Marceline Cramer\n// SPDX-License-Identifier: AGPL-3.0-or-later\n// body here";
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
let header = Header::parse(lines).unwrap();
assert_eq!(
header.copyrights,
vec![Copyright {
holder: "Marceline Cramer".into(),
first_year: 2024,
last_year: 2024,
}]
);
assert_eq!(header.spdx, Some("AGPL-3.0-or-later".to_string()));
assert_eq!(header.body, vec!["body here"]);
}
}

272
src/parse.rs Normal file
View File

@ -0,0 +1,272 @@
// Copyright (c) 2024 Marceline Cramer
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This file is part of Saul.
//
// Saul is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option) any
// later version.
//
// Saul is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
//
// You should have received a copy of the GNU Affero General Public License
// along with Saul. If not, see <https://www.gnu.org/licenses/>.
use std::{
borrow::Cow,
io::{self, BufRead},
};
/// Configuration of a particular language. Affects parsing and output.
pub struct Language {
/// A string that appears before all commented lines.
pub comment: Cow<'static, str>,
}
impl Language {
/// The default Rust language config.
pub const RUST: Self = Self {
comment: Cow::Borrowed("// "),
};
/// Extracts a header (as a list of lines) from an input in this language.
///
/// Don't reply on the state of the reader being consistent after this function
/// is called.
pub fn read_header(&self, f: &mut impl BufRead) -> io::Result<Vec<String>> {
let mut header = Vec::new();
let mut is_first_line = true;
for line in f.lines() {
let line = line?;
if is_first_line && line.starts_with("#!") {
is_first_line = false;
continue;
}
is_first_line = false;
let Some(content) = line.strip_prefix(self.comment.as_ref()) else {
break;
};
header.push(content.trim().to_string());
}
Ok(header)
}
}
/// A source file's header information.
pub struct Header {
/// The copyrights on this source file.
pub copyrights: Vec<Copyright>,
/// The SPDX license identifier that this source file is covered under.
pub spdx: Option<String>,
/// This header's body, as a list of lines.
pub body: Vec<String>,
}
impl Header {
/// Parses a header from a list of comment-less lines.
pub fn parse(src: Vec<String>) -> Result<Self, HeaderError> {
let mut src = src.into_iter().peekable();
let mut copyrights = Vec::new();
let mut spdx = None;
while let Some(line) = src.peek() {
match Copyright::parse(line.as_str()) {
Ok(copyright) => {
copyrights.push(copyright);
src.next();
}
Err(CopyrightError::Empty | CopyrightError::InvalidPrefix) => {
break;
}
Err(err) => {
return Err(HeaderError::Copyright(err));
}
}
}
if let Some(line) = src.peek() {
if let Some(body) = line.strip_prefix("SPDX-License-Identifier: ") {
spdx = Some(body.to_string());
src.next();
}
}
let body: Vec<_> = src.collect();
Ok(Self {
copyrights,
spdx,
body,
})
}
}
/// An error that occurred during header parsing.
#[derive(Clone, Copy, Debug)]
pub enum HeaderError {
Copyright(CopyrightError),
}
/// A single copyright notice on a source file.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Copyright {
/// The name of the copyright holder.
pub holder: String,
/// The first year of copyright holding.
pub first_year: usize,
/// The last year of copyright holding.
///
/// May be the same as `first_year`, in which case this is omitted in
/// formatting.
pub last_year: usize,
}
impl Copyright {
/// Attempts to parse a copyright notice from a string.
pub fn parse(src: &str) -> Result<Self, CopyrightError> {
let src = src.trim();
if src.is_empty() {
return Err(CopyrightError::Empty);
}
static VALID_PREFIXES: &'static [&'static str] =
&["© ", "Copyright © ", "Copyright (c) ", "Copyright (C) "];
let mut valid_prefixes = VALID_PREFIXES.iter();
let body = loop {
let Some(test_prefix) = valid_prefixes.next() else {
// options are exhausted, we didn't find a valid prefix
return Err(CopyrightError::InvalidPrefix);
};
if let Some(body) = src.strip_prefix(*test_prefix) {
break body;
}
};
// retrieve years range and copyright holder
let (years, holder) = body.split_once(" ").ok_or(CopyrightError::MissingHolder)?;
let holder = holder.to_string();
// attempt to parse single given year
if let Ok(year) = years.parse() {
return Ok(Copyright {
holder,
first_year: year,
last_year: year,
});
}
let Some((first_year, last_year)) = years.split_once("-") else {
return Err(CopyrightError::MalformedYear);
};
let first_year = first_year
.parse()
.map_err(|_| CopyrightError::MalformedYear)?;
let last_year = last_year
.parse()
.map_err(|_| CopyrightError::MalformedYear)?;
Ok(Copyright {
holder,
first_year,
last_year,
})
}
}
/// An error in copyright parsing or validation.
#[derive(Clone, Copy, Debug)]
pub enum CopyrightError {
Empty,
InvalidPrefix,
MissingHolder,
MalformedYear,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn header() {
let src = b"// header content\n// header content 2\nbody";
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
assert_eq!(lines, vec!["header content", "header content 2"]);
}
#[test]
fn skip_header_shebang() {
let src = b"#!/bin/sh\n// header content\nbody goes here";
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
assert_eq!(lines, vec!["header content"]);
}
#[test]
fn parse_copyright() {
let src = "Copyright (c) 2024 Marceline Cramer";
let cr = Copyright::parse(src).unwrap();
assert_eq!(
cr,
Copyright {
holder: "Marceline Cramer".into(),
first_year: 2024,
last_year: 2024,
}
);
}
#[test]
fn parse_copyright_year_range() {
let src = "Copyright (c) 2023-2024 Marceline Cramer";
let cr = Copyright::parse(src).unwrap();
assert_eq!(
cr,
Copyright {
holder: "Marceline Cramer".into(),
first_year: 2023,
last_year: 2024,
}
);
}
#[test]
fn parse_header() {
let src = b"#!/bin/sh\n// Copyright (c) 2024 Marceline Cramer\n// SPDX-License-Identifier: AGPL-3.0-or-later\n// body here";
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
let header = Header::parse(lines).unwrap();
assert_eq!(
header.copyrights,
vec![Copyright {
holder: "Marceline Cramer".into(),
first_year: 2024,
last_year: 2024,
}]
);
assert_eq!(header.spdx, Some("AGPL-3.0-or-later".to_string()));
assert_eq!(header.body, vec!["body here"]);
}
}