Move parsing to parse module
This commit is contained in:
parent
0950a086b2
commit
e379147b03
256
src/main.rs
256
src/main.rs
|
@ -16,262 +16,8 @@
|
|||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with Saul. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
io::{self, BufRead},
|
||||
};
|
||||
|
||||
/// Configuration of a particular language. Affects parsing and output.
|
||||
pub struct Language {
|
||||
/// A string that appears before all commented lines.
|
||||
pub comment: Cow<'static, str>,
|
||||
}
|
||||
|
||||
impl Language {
|
||||
/// The default Rust language config.
|
||||
pub const RUST: Self = Self {
|
||||
comment: Cow::Borrowed("// "),
|
||||
};
|
||||
|
||||
/// Extracts a header (as a list of lines) from an input in this language.
|
||||
///
|
||||
/// Don't reply on the state of the reader being consistent after this function
|
||||
/// is called.
|
||||
pub fn read_header(&self, f: &mut impl BufRead) -> io::Result<Vec<String>> {
|
||||
let mut header = Vec::new();
|
||||
let mut is_first_line = true;
|
||||
|
||||
for line in f.lines() {
|
||||
let line = line?;
|
||||
|
||||
if is_first_line && line.starts_with("#!") {
|
||||
is_first_line = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
is_first_line = false;
|
||||
|
||||
let Some(content) = line.strip_prefix(self.comment.as_ref()) else {
|
||||
break;
|
||||
};
|
||||
|
||||
header.push(content.trim().to_string());
|
||||
}
|
||||
|
||||
Ok(header)
|
||||
}
|
||||
}
|
||||
|
||||
/// A source file's header information.
|
||||
pub struct Header {
|
||||
/// The copyrights on this source file.
|
||||
pub copyrights: Vec<Copyright>,
|
||||
|
||||
/// The SPDX license identifier that this source file is covered under.
|
||||
pub spdx: Option<String>,
|
||||
|
||||
/// This header's body, as a list of lines.
|
||||
pub body: Vec<String>,
|
||||
}
|
||||
|
||||
impl Header {
|
||||
/// Parses a header from a list of comment-less lines.
|
||||
pub fn parse(src: Vec<String>) -> Result<Self, HeaderError> {
|
||||
let mut src = src.into_iter().peekable();
|
||||
let mut copyrights = Vec::new();
|
||||
let mut spdx = None;
|
||||
|
||||
while let Some(line) = src.peek() {
|
||||
match Copyright::parse(line.as_str()) {
|
||||
Ok(copyright) => {
|
||||
copyrights.push(copyright);
|
||||
src.next();
|
||||
}
|
||||
Err(CopyrightError::Empty | CopyrightError::InvalidPrefix) => {
|
||||
break;
|
||||
}
|
||||
Err(err) => {
|
||||
return Err(HeaderError::Copyright(err));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(line) = src.peek() {
|
||||
if let Some(body) = line.strip_prefix("SPDX-License-Identifier: ") {
|
||||
spdx = Some(body.to_string());
|
||||
src.next();
|
||||
}
|
||||
}
|
||||
|
||||
let body: Vec<_> = src.collect();
|
||||
|
||||
Ok(Self {
|
||||
copyrights,
|
||||
spdx,
|
||||
body,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// An error that occurred during header parsing.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum HeaderError {
|
||||
Copyright(CopyrightError),
|
||||
}
|
||||
|
||||
/// A single copyright notice on a source file.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Copyright {
|
||||
/// The name of the copyright holder.
|
||||
pub holder: String,
|
||||
|
||||
/// The first year of copyright holding.
|
||||
pub first_year: usize,
|
||||
|
||||
/// The last year of copyright holding.
|
||||
///
|
||||
/// May be the same as `first_year`, in which case this is omitted in
|
||||
/// formatting.
|
||||
pub last_year: usize,
|
||||
}
|
||||
|
||||
impl Copyright {
|
||||
/// Attempts to parse a copyright notice from a string.
|
||||
pub fn parse(src: &str) -> Result<Self, CopyrightError> {
|
||||
let src = src.trim();
|
||||
|
||||
if src.is_empty() {
|
||||
return Err(CopyrightError::Empty);
|
||||
}
|
||||
|
||||
static VALID_PREFIXES: &'static [&'static str] =
|
||||
&["© ", "Copyright © ", "Copyright (c) ", "Copyright (C) "];
|
||||
|
||||
let mut valid_prefixes = VALID_PREFIXES.iter();
|
||||
|
||||
let body = loop {
|
||||
let Some(test_prefix) = valid_prefixes.next() else {
|
||||
// options are exhausted, we didn't find a valid prefix
|
||||
return Err(CopyrightError::InvalidPrefix);
|
||||
};
|
||||
|
||||
if let Some(body) = src.strip_prefix(*test_prefix) {
|
||||
break body;
|
||||
}
|
||||
};
|
||||
|
||||
// retrieve years range and copyright holder
|
||||
let (years, holder) = body.split_once(" ").ok_or(CopyrightError::MissingHolder)?;
|
||||
|
||||
let holder = holder.to_string();
|
||||
|
||||
// attempt to parse single given year
|
||||
if let Ok(year) = years.parse() {
|
||||
return Ok(Copyright {
|
||||
holder,
|
||||
first_year: year,
|
||||
last_year: year,
|
||||
});
|
||||
}
|
||||
|
||||
let Some((first_year, last_year)) = years.split_once("-") else {
|
||||
return Err(CopyrightError::MalformedYear);
|
||||
};
|
||||
|
||||
let first_year = first_year
|
||||
.parse()
|
||||
.map_err(|_| CopyrightError::MalformedYear)?;
|
||||
|
||||
let last_year = last_year
|
||||
.parse()
|
||||
.map_err(|_| CopyrightError::MalformedYear)?;
|
||||
|
||||
Ok(Copyright {
|
||||
holder,
|
||||
first_year,
|
||||
last_year,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// An error in copyright parsing or validation.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum CopyrightError {
|
||||
Empty,
|
||||
InvalidPrefix,
|
||||
MissingHolder,
|
||||
MalformedYear,
|
||||
}
|
||||
mod parse;
|
||||
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn header() {
|
||||
let src = b"// header content\n// header content 2\nbody";
|
||||
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
|
||||
assert_eq!(lines, vec!["header content", "header content 2"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skip_header_shebang() {
|
||||
let src = b"#!/bin/sh\n// header content\nbody goes here";
|
||||
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
|
||||
assert_eq!(lines, vec!["header content"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_copyright() {
|
||||
let src = "Copyright (c) 2024 Marceline Cramer";
|
||||
let cr = Copyright::parse(src).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
cr,
|
||||
Copyright {
|
||||
holder: "Marceline Cramer".into(),
|
||||
first_year: 2024,
|
||||
last_year: 2024,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_copyright_year_range() {
|
||||
let src = "Copyright (c) 2023-2024 Marceline Cramer";
|
||||
let cr = Copyright::parse(src).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
cr,
|
||||
Copyright {
|
||||
holder: "Marceline Cramer".into(),
|
||||
first_year: 2023,
|
||||
last_year: 2024,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_header() {
|
||||
let src = b"#!/bin/sh\n// Copyright (c) 2024 Marceline Cramer\n// SPDX-License-Identifier: AGPL-3.0-or-later\n// body here";
|
||||
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
|
||||
let header = Header::parse(lines).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
header.copyrights,
|
||||
vec![Copyright {
|
||||
holder: "Marceline Cramer".into(),
|
||||
first_year: 2024,
|
||||
last_year: 2024,
|
||||
}]
|
||||
);
|
||||
|
||||
assert_eq!(header.spdx, Some("AGPL-3.0-or-later".to_string()));
|
||||
|
||||
assert_eq!(header.body, vec!["body here"]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,272 @@
|
|||
// Copyright (c) 2024 Marceline Cramer
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This file is part of Saul.
|
||||
//
|
||||
// Saul is free software: you can redistribute it and/or modify it under the
|
||||
// terms of the GNU Affero General Public License as published by the Free
|
||||
// Software Foundation, either version 3 of the License, or (at your option) any
|
||||
// later version.
|
||||
//
|
||||
// Saul is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
||||
// details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with Saul. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
io::{self, BufRead},
|
||||
};
|
||||
|
||||
/// Configuration of a particular language. Affects parsing and output.
|
||||
pub struct Language {
|
||||
/// A string that appears before all commented lines.
|
||||
pub comment: Cow<'static, str>,
|
||||
}
|
||||
|
||||
impl Language {
|
||||
/// The default Rust language config.
|
||||
pub const RUST: Self = Self {
|
||||
comment: Cow::Borrowed("// "),
|
||||
};
|
||||
|
||||
/// Extracts a header (as a list of lines) from an input in this language.
|
||||
///
|
||||
/// Don't reply on the state of the reader being consistent after this function
|
||||
/// is called.
|
||||
pub fn read_header(&self, f: &mut impl BufRead) -> io::Result<Vec<String>> {
|
||||
let mut header = Vec::new();
|
||||
let mut is_first_line = true;
|
||||
|
||||
for line in f.lines() {
|
||||
let line = line?;
|
||||
|
||||
if is_first_line && line.starts_with("#!") {
|
||||
is_first_line = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
is_first_line = false;
|
||||
|
||||
let Some(content) = line.strip_prefix(self.comment.as_ref()) else {
|
||||
break;
|
||||
};
|
||||
|
||||
header.push(content.trim().to_string());
|
||||
}
|
||||
|
||||
Ok(header)
|
||||
}
|
||||
}
|
||||
/// A source file's header information.
|
||||
pub struct Header {
|
||||
/// The copyrights on this source file.
|
||||
pub copyrights: Vec<Copyright>,
|
||||
|
||||
/// The SPDX license identifier that this source file is covered under.
|
||||
pub spdx: Option<String>,
|
||||
|
||||
/// This header's body, as a list of lines.
|
||||
pub body: Vec<String>,
|
||||
}
|
||||
|
||||
impl Header {
|
||||
/// Parses a header from a list of comment-less lines.
|
||||
pub fn parse(src: Vec<String>) -> Result<Self, HeaderError> {
|
||||
let mut src = src.into_iter().peekable();
|
||||
let mut copyrights = Vec::new();
|
||||
let mut spdx = None;
|
||||
|
||||
while let Some(line) = src.peek() {
|
||||
match Copyright::parse(line.as_str()) {
|
||||
Ok(copyright) => {
|
||||
copyrights.push(copyright);
|
||||
src.next();
|
||||
}
|
||||
Err(CopyrightError::Empty | CopyrightError::InvalidPrefix) => {
|
||||
break;
|
||||
}
|
||||
Err(err) => {
|
||||
return Err(HeaderError::Copyright(err));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(line) = src.peek() {
|
||||
if let Some(body) = line.strip_prefix("SPDX-License-Identifier: ") {
|
||||
spdx = Some(body.to_string());
|
||||
src.next();
|
||||
}
|
||||
}
|
||||
|
||||
let body: Vec<_> = src.collect();
|
||||
|
||||
Ok(Self {
|
||||
copyrights,
|
||||
spdx,
|
||||
body,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// An error that occurred during header parsing.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum HeaderError {
|
||||
Copyright(CopyrightError),
|
||||
}
|
||||
|
||||
/// A single copyright notice on a source file.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Copyright {
|
||||
/// The name of the copyright holder.
|
||||
pub holder: String,
|
||||
|
||||
/// The first year of copyright holding.
|
||||
pub first_year: usize,
|
||||
|
||||
/// The last year of copyright holding.
|
||||
///
|
||||
/// May be the same as `first_year`, in which case this is omitted in
|
||||
/// formatting.
|
||||
pub last_year: usize,
|
||||
}
|
||||
|
||||
impl Copyright {
|
||||
/// Attempts to parse a copyright notice from a string.
|
||||
pub fn parse(src: &str) -> Result<Self, CopyrightError> {
|
||||
let src = src.trim();
|
||||
|
||||
if src.is_empty() {
|
||||
return Err(CopyrightError::Empty);
|
||||
}
|
||||
|
||||
static VALID_PREFIXES: &'static [&'static str] =
|
||||
&["© ", "Copyright © ", "Copyright (c) ", "Copyright (C) "];
|
||||
|
||||
let mut valid_prefixes = VALID_PREFIXES.iter();
|
||||
|
||||
let body = loop {
|
||||
let Some(test_prefix) = valid_prefixes.next() else {
|
||||
// options are exhausted, we didn't find a valid prefix
|
||||
return Err(CopyrightError::InvalidPrefix);
|
||||
};
|
||||
|
||||
if let Some(body) = src.strip_prefix(*test_prefix) {
|
||||
break body;
|
||||
}
|
||||
};
|
||||
|
||||
// retrieve years range and copyright holder
|
||||
let (years, holder) = body.split_once(" ").ok_or(CopyrightError::MissingHolder)?;
|
||||
|
||||
let holder = holder.to_string();
|
||||
|
||||
// attempt to parse single given year
|
||||
if let Ok(year) = years.parse() {
|
||||
return Ok(Copyright {
|
||||
holder,
|
||||
first_year: year,
|
||||
last_year: year,
|
||||
});
|
||||
}
|
||||
|
||||
let Some((first_year, last_year)) = years.split_once("-") else {
|
||||
return Err(CopyrightError::MalformedYear);
|
||||
};
|
||||
|
||||
let first_year = first_year
|
||||
.parse()
|
||||
.map_err(|_| CopyrightError::MalformedYear)?;
|
||||
|
||||
let last_year = last_year
|
||||
.parse()
|
||||
.map_err(|_| CopyrightError::MalformedYear)?;
|
||||
|
||||
Ok(Copyright {
|
||||
holder,
|
||||
first_year,
|
||||
last_year,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// An error in copyright parsing or validation.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum CopyrightError {
|
||||
Empty,
|
||||
InvalidPrefix,
|
||||
MissingHolder,
|
||||
MalformedYear,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn header() {
|
||||
let src = b"// header content\n// header content 2\nbody";
|
||||
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
|
||||
assert_eq!(lines, vec!["header content", "header content 2"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skip_header_shebang() {
|
||||
let src = b"#!/bin/sh\n// header content\nbody goes here";
|
||||
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
|
||||
assert_eq!(lines, vec!["header content"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_copyright() {
|
||||
let src = "Copyright (c) 2024 Marceline Cramer";
|
||||
let cr = Copyright::parse(src).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
cr,
|
||||
Copyright {
|
||||
holder: "Marceline Cramer".into(),
|
||||
first_year: 2024,
|
||||
last_year: 2024,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_copyright_year_range() {
|
||||
let src = "Copyright (c) 2023-2024 Marceline Cramer";
|
||||
let cr = Copyright::parse(src).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
cr,
|
||||
Copyright {
|
||||
holder: "Marceline Cramer".into(),
|
||||
first_year: 2023,
|
||||
last_year: 2024,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_header() {
|
||||
let src = b"#!/bin/sh\n// Copyright (c) 2024 Marceline Cramer\n// SPDX-License-Identifier: AGPL-3.0-or-later\n// body here";
|
||||
let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap();
|
||||
let header = Header::parse(lines).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
header.copyrights,
|
||||
vec![Copyright {
|
||||
holder: "Marceline Cramer".into(),
|
||||
first_year: 2024,
|
||||
last_year: 2024,
|
||||
}]
|
||||
);
|
||||
|
||||
assert_eq!(header.spdx, Some("AGPL-3.0-or-later".to_string()));
|
||||
|
||||
assert_eq!(header.body, vec!["body here"]);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue