Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 193 additions & 0 deletions gix-config/src/parse/format.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
//! Reformat a git-config file with normalized, sanitized whitespace.
//!
//! This operates purely on the syntactic [event stream](crate::parse::Events) of a single
//! file. `include`/`includeIf` directives are *never* resolved here - those are only acted upon
//! when constructing a [`File`](crate::File) - so the formatter is "flat" by construction.
//!
//! Values, comments and section headers are reproduced verbatim; only insignificant whitespace,
//! newlines and the `=` separator are rewritten according to [`Options`](crate::parse::format::Options).

use bstr::BString;

use crate::parse::{self, Event};

/// How key/value lines beneath a section header are indented.
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Indentation {
/// A single horizontal tab per line - git's de-facto writer style.
Tab,
/// The given number of spaces per line.
Spaces(usize),
/// No indentation at all.
None,
}

/// Which newline sequence to write between lines.
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum Newline {
/// Use the first newline sequence found in the input, falling back to `\n` if none is present.
Detect,
/// Always use a Unix newline (`\n`).
Lf,
/// Always use a Windows newline (`\r\n`).
CrLf,
}

/// Options controlling [`normalize()`].
///
/// The defaults are intentionally conservative: they tidy the common sources of noise (stray
/// indentation, spacing around `=`, trailing whitespace, missing final newline) while leaving
/// blank lines and the substance of the file untouched.
///
/// Note that trailing whitespace at the end of a line is always removed - it is never significant
/// in git-config syntax - so there is no option to retain it.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Options {
/// How to indent key/value (and comment) lines beneath a section header.
pub indentation: Indentation,
/// If `true`, place a single space on each side of the `=` separator (`a = b`);
/// if `false`, emit a bare `=` (`a=b`).
pub spaces_around_separator: bool,
/// Which newline sequence to emit between lines.
pub newline: Newline,
/// If `true`, ensure a non-empty file ends with exactly one newline.
pub ensure_trailing_newline: bool,
/// If `Some(n)`, cap runs of consecutive blank lines at `n`. `None` (the default) leaves
/// blank lines exactly as they are.
pub max_consecutive_blank_lines: Option<usize>,
}

impl Default for Options {
fn default() -> Self {
Options {
indentation: Indentation::Spaces(2),
spaces_around_separator: true,
newline: Newline::Detect,
ensure_trailing_newline: true,
max_consecutive_blank_lines: None,
}
}
}

/// Parse `input` as a single git-config file and return it with whitespace normalized per `options`.
///
/// Includes are never resolved. Values, comments and section headers are preserved byte-for-byte;
/// only insignificant whitespace, newlines and the `=` separator are rewritten.
///
/// # Errors
///
/// Returns a [`parse::Error`] if `input` is not a syntactically valid git-config file.
pub fn normalize(input: &[u8], options: &Options) -> Result<BString, parse::Error> {
let events = parse::Events::from_bytes(input, None)?.into_vec();
Ok(normalize_events(&events, options))
}

fn detect_newline(events: &[Event<'_>]) -> &'static [u8] {
for event in events {
if let Event::Newline(n) = event {
return if n.contains(&b'\r') { b"\r\n" } else { b"\n" };
}
}
b"\n"
}

fn normalize_events(events: &[Event<'_>], opts: &Options) -> BString {
let newline: &[u8] = match opts.newline {
Newline::Detect => detect_newline(events),
Newline::Lf => b"\n",
Newline::CrLf => b"\r\n",
};
let indent: Vec<u8> = match opts.indentation {
Indentation::Tab => vec![b'\t'],
Indentation::Spaces(n) => vec![b' '; n],
Indentation::None => Vec::new(),
};

let mut out: Vec<u8> = Vec::with_capacity(events.len() * 8);
let mut in_section = false;
let mut line_has_content = false;
let mut i = 0;

while i < events.len() {
match &events[i] {
// Standalone, insignificant whitespace is dropped; we synthesize whitespace
// deterministically around the structural events below.
Event::Whitespace(_) => {
i += 1;
}
Event::SectionHeader(_) => {
events[i].write_to(&mut out).expect("write to Vec is infallible");
in_section = true;
line_has_content = true;
i += 1;
}
Event::SectionValueName(_) => {
if in_section && !line_has_content {
out.extend_from_slice(&indent);
}
events[i].write_to(&mut out).expect("write to Vec is infallible");
line_has_content = true;
i += 1;
}
Event::KeyValueSeparator => {
if opts.spaces_around_separator {
out.extend_from_slice(b" = ");
} else {
out.push(b'=');
}
line_has_content = true;
i += 1;
}
Event::Value(_) | Event::ValueDone(_) => {
events[i].write_to(&mut out).expect("write to Vec is infallible");
line_has_content = true;
i += 1;
}
// A line-continuation span: emit everything verbatim through the closing `ValueDone`,
// so whitespace that the parser folded into the continued value is never touched.
Event::ValueNotDone(_) => {
loop {
let is_done = matches!(events[i], Event::ValueDone(_));
events[i].write_to(&mut out).expect("write to Vec is infallible");
i += 1;
if is_done || i >= events.len() {
break;
}
}
line_has_content = true;
}
Event::Comment(_) => {
if line_has_content {
// Inline comment trailing a value/header: one space before the marker.
out.push(b' ');
} else if in_section {
out.extend_from_slice(&indent);
}
events[i].write_to(&mut out).expect("write to Vec is infallible");
line_has_content = true;
i += 1;
}
Event::Newline(n) => {
let mut count = n.iter().filter(|&&b| b == b'\n').count();
if let Some(max_blank) = opts.max_consecutive_blank_lines {
// `count` newlines produce `count - 1` blank lines.
count = count.min(max_blank + 1);
}
for _ in 0..count {
out.extend_from_slice(newline);
}
line_has_content = false;
i += 1;
}
}
}

if opts.ensure_trailing_newline && !out.is_empty() {
while out.last() == Some(&b'\n') || out.last() == Some(&b'\r') {
out.pop();
Comment on lines +186 to +187

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve trailing blank lines when collapse is disabled

With the default max_consecutive_blank_lines: None, callers are told blank lines are left exactly as-is, but this loop strips every trailing \r/\n that was already emitted and then appends a single newline. For an input like [a]\nx = 1\n\n, normalization silently removes the final blank line even though blank-line collapsing was not requested; ensure_trailing_newline should add a newline only when one is missing, or otherwise honor the collapse option explicitly.

Useful? React with 👍 / 👎.

}
out.extend_from_slice(newline);
}

out.into()
}
2 changes: 2 additions & 0 deletions gix-config/src/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ pub use events_type::{Events, FrontMatterEvents};
mod comment;
mod error;
///
pub mod format;
///
pub mod section;

#[cfg(test)]
Expand Down
185 changes: 185 additions & 0 deletions gix-config/tests/config/parse/format.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
use gix_config::parse::{
Events,
format::{self, Indentation, Newline, Options},
};

fn norm(input: &str) -> String {
let out = format::normalize(input.as_bytes(), &Options::default()).expect("valid config");
String::from_utf8(out.into()).expect("utf8")
}

/// Collect (section, name, value) triples from a config's event stream so two configs can be
/// compared for *meaning* rather than bytes.
fn semantic_triples(input: &str) -> Vec<(String, String, String)> {
use gix_config::parse::Event;
let events = Events::from_str(input).expect("valid").into_vec();
let mut out = Vec::new();
let mut section = String::new();
let mut pending_name: Option<String> = None;
let mut value = String::new();
for ev in &events {
match ev {
Event::SectionHeader(h) => section = h.to_bstring().to_string(),
Event::SectionValueName(_) => {
if let Some(name) = pending_name.take() {
out.push((section.clone(), name, std::mem::take(&mut value)));
}
pending_name = Some(ev.to_bstr_lossy().to_string());
value.clear();
}
Event::Value(_) | Event::ValueDone(_) | Event::ValueNotDone(_) => {
value.push_str(&ev.to_bstr_lossy().to_string());
}
_ => {}
}
}
if let Some(name) = pending_name.take() {
out.push((section, name, value));
}
out
}

#[test]
fn default_policy_basic() {
// 4-space indent collapses to the 2-space default; trailing whitespace and tight `=` are fixed.
let input = "[core]\n editor=vim \n";
assert_eq!(norm(input), "[core]\n editor = vim\n");
}

#[test]
fn meaning_is_preserved() {
for input in [
"[core]\n editor=vim\n",
"[remote \"origin\"]\n\turl = https://example.com/x.git\n",
"[a]\nx=1\ny = 2\n[b]\nz=3\n",
"[user]\n\tname = A B ; trailing comment\n",
] {
assert_eq!(
semantic_triples(&norm(input)),
semantic_triples(input),
"formatting must not change meaning for: {input:?}"
);
}
}

#[test]
fn line_continuation_value_is_untouched() {
// The continued line's leading whitespace is part of the value and must survive verbatim.
let input = "[alias]\nsave = \"!f() { \\\n git status; \\\n}; f\"\n";
let out = norm(input);
assert_eq!(
semantic_triples(&out),
semantic_triples(input),
"continuation value bytes must be preserved"
);
}

#[test]
fn trailing_backslash_at_eof() {
let input = "[core]\na=hello\\";
// Must parse and round-trip without panicking or corrupting the continuation.
let out = norm(input);
assert_eq!(semantic_triples(&out), semantic_triples(input));
}

#[test]
fn implicit_boolean_key_keeps_no_separator() {
let input = "[core]\n autocrlf\n";
assert_eq!(norm(input), "[core]\n autocrlf\n");
}

#[test]
fn comments_are_preserved() {
let input = "; top comment\n[core]\n# inner\n\teditor = vim ; inline\n";
let out = norm(input);
assert!(out.contains("; top comment"));
assert!(out.contains("# inner"));
assert!(out.contains("; inline"));
}

#[test]
fn quoted_subsection_and_value_verbatim() {
let input = "[test \"sub \\\"x\\\"\"]\n\tpath = \"C:\\\\root\"\n";
assert_eq!(semantic_triples(&norm(input)), semantic_triples(input));
}

#[test]
fn crlf_is_detected_and_normalized() {
let input = "[core]\r\n editor=vim\r\n";
assert_eq!(norm(input), "[core]\r\n editor = vim\r\n");
}

#[test]
fn blank_lines_left_alone_by_default() {
let input = "[a]\nx = 1\n\n\n[b]\ny = 2\n";
assert_eq!(norm(input), "[a]\n x = 1\n\n\n[b]\n y = 2\n");
}

#[test]
fn blank_lines_collapsed_when_requested() {
let opts = Options {
max_consecutive_blank_lines: Some(1),
..Options::default()
};
let out = format::normalize("[a]\nx = 1\n\n\n\n[b]\ny = 2\n".as_bytes(), &opts).unwrap();
assert_eq!(String::from_utf8(out.into()).unwrap(), "[a]\n x = 1\n\n[b]\n y = 2\n");
}

#[test]
fn spaces_around_separator_can_be_disabled() {
let opts = Options {
spaces_around_separator: false,
indentation: Indentation::None,
..Options::default()
};
let out = format::normalize("[core]\n editor = vim\n".as_bytes(), &opts).unwrap();
assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\neditor=vim\n");
}

#[test]
fn tab_indentation_option() {
let opts = Options {
indentation: Indentation::Tab,
..Options::default()
};
let out = format::normalize("[core]\n editor=vim\n".as_bytes(), &opts).unwrap();
assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\n\teditor = vim\n");
}

#[test]
fn no_indentation_option() {
let opts = Options {
indentation: Indentation::None,
..Options::default()
};
let out = format::normalize("[core]\n editor=vim\n".as_bytes(), &opts).unwrap();
assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\neditor = vim\n");
}

#[test]
fn force_lf_newline() {
let opts = Options {
newline: Newline::Lf,
..Options::default()
};
let out = format::normalize("[core]\r\n editor = vim\r\n".as_bytes(), &opts).unwrap();
assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\n editor = vim\n");
}

#[test]
fn force_crlf_newline() {
let opts = Options {
newline: Newline::CrLf,
..Options::default()
};
let out = format::normalize("[core]\n editor = vim\n".as_bytes(), &opts).unwrap();
assert_eq!(String::from_utf8(out.into()).unwrap(), "[core]\r\n editor = vim\r\n");
}

#[test]
fn idempotent() {
let input = "[core]\n editor=vim\n[remote \"o\"]\nurl = x\n";
let once = norm(input);
let twice = norm(&once);
assert_eq!(once, twice);
}
1 change: 1 addition & 0 deletions gix-config/tests/config/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::borrow::Cow;
use gix_config::parse::{Event, Events, Section};

mod error;
mod format;
mod from_bytes;
mod section;

Expand Down
Loading