// Copyright (c) 2024 Marceline Cramer // SPDX-License-Identifier: AGPL-3.0-or-later // // This file is part of Saul. // // Saul is free software: you can redistribute it and/or modify it under the // terms of the GNU Affero General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) any // later version. // // Saul is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more // details. // // You should have received a copy of the GNU Affero General Public License // along with Saul. If not, see . use std::{ borrow::Cow, io::{self, BufRead}, }; /// Configuration of a particular language. Affects parsing and output. pub struct Language { /// A string that appears before all commented lines. pub comment: Cow<'static, str>, } impl Language { /// The default Rust language config. pub const RUST: Self = Self { comment: Cow::Borrowed("//"), }; /// Extracts a header (as a list of lines) from an input in this language. /// /// Don't reply on the state of the reader being consistent after this function /// is called. pub fn read_header(&self, f: &mut impl BufRead) -> io::Result> { let mut header = Vec::new(); let mut is_first_line = true; for line in f.lines() { let line = line?; if is_first_line && line.starts_with("#!") { is_first_line = false; continue; } is_first_line = false; let Some(content) = line.strip_prefix(self.comment.as_ref()) else { break; }; header.push(content.trim().to_string()); } Ok(header) } } /// A source file's header information. #[derive(Debug)] pub struct Header { /// The copyrights on this source file. pub copyrights: Vec, /// The SPDX license identifier that this source file is covered under. pub spdx: Option, /// This header's body, as a list of lines. pub body: Vec, } impl Header { /// Parses a header from a list of comment-less lines. pub fn parse(src: Vec) -> Result { let mut src = src.into_iter().peekable(); let mut copyrights = Vec::new(); let mut spdx = None; while let Some(line) = src.peek() { match Copyright::parse(line.as_str()) { Ok(copyright) => { copyrights.push(copyright); src.next(); } Err(CopyrightError::Empty | CopyrightError::InvalidPrefix) => { break; } Err(err) => { return Err(HeaderError::Copyright(err)); } } } if let Some(line) = src.peek() { if let Some(body) = line.strip_prefix("SPDX-License-Identifier: ") { spdx = Some(body.to_string()); src.next(); } } let body: Vec<_> = src.collect(); Ok(Self { copyrights, spdx, body, }) } } /// An error that occurred during header parsing. #[derive(Clone, Copy, Debug)] pub enum HeaderError { Copyright(CopyrightError), } /// A single copyright notice on a source file. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Copyright { /// The name of the copyright holder. pub holder: String, /// The first year of copyright holding. pub first_year: usize, /// The last year of copyright holding. /// /// May be the same as `first_year`, in which case this is omitted in /// formatting. pub last_year: usize, } impl Copyright { /// Attempts to parse a copyright notice from a string. pub fn parse(src: &str) -> Result { let src = src.trim(); if src.is_empty() { return Err(CopyrightError::Empty); } static VALID_PREFIXES: &'static [&'static str] = &["© ", "Copyright © ", "Copyright (c) ", "Copyright (C) "]; let mut valid_prefixes = VALID_PREFIXES.iter(); let body = loop { let Some(test_prefix) = valid_prefixes.next() else { // options are exhausted, we didn't find a valid prefix return Err(CopyrightError::InvalidPrefix); }; if let Some(body) = src.strip_prefix(*test_prefix) { break body; } }; // retrieve years range and copyright holder let (years, holder) = body.split_once(" ").ok_or(CopyrightError::MissingHolder)?; let holder = holder.to_string(); // attempt to parse single given year if let Ok(year) = years.parse() { return Ok(Copyright { holder, first_year: year, last_year: year, }); } let Some((first_year, last_year)) = years.split_once("-") else { return Err(CopyrightError::MalformedYear); }; let first_year = first_year .parse() .map_err(|_| CopyrightError::MalformedYear)?; let last_year = last_year .parse() .map_err(|_| CopyrightError::MalformedYear)?; Ok(Copyright { holder, first_year, last_year, }) } } /// An error in copyright parsing or validation. #[derive(Clone, Copy, Debug)] pub enum CopyrightError { Empty, InvalidPrefix, MissingHolder, MalformedYear, } #[cfg(test)] mod tests { use super::*; #[test] fn header() { let src = b"// header content\n// header content 2\nbody"; let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap(); assert_eq!(lines, vec!["header content", "header content 2"]); } #[test] fn skip_header_shebang() { let src = b"#!/bin/sh\n// header content\nbody goes here"; let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap(); assert_eq!(lines, vec!["header content"]); } #[test] fn parse_copyright() { let src = "Copyright (c) 2024 Marceline Cramer"; let cr = Copyright::parse(src).unwrap(); assert_eq!( cr, Copyright { holder: "Marceline Cramer".into(), first_year: 2024, last_year: 2024, } ); } #[test] fn parse_copyright_year_range() { let src = "Copyright (c) 2023-2024 Marceline Cramer"; let cr = Copyright::parse(src).unwrap(); assert_eq!( cr, Copyright { holder: "Marceline Cramer".into(), first_year: 2023, last_year: 2024, } ); } #[test] fn parse_header() { let src = b"#!/bin/sh\n// Copyright (c) 2024 Marceline Cramer\n// SPDX-License-Identifier: AGPL-3.0-or-later\n// body here"; let lines = Language::RUST.read_header(&mut src.as_slice()).unwrap(); let header = Header::parse(lines).unwrap(); assert_eq!( header.copyrights, vec![Copyright { holder: "Marceline Cramer".into(), first_year: 2024, last_year: 2024, }] ); assert_eq!(header.spdx, Some("AGPL-3.0-or-later".to_string())); assert_eq!(header.body, vec!["body here"]); } }