在golang编写器中剥离连续的空行

I've got a Go text/template that renders a file, however I've found it difficult to structure the template cleanly while preserving the line breaks in the output.

I'd like to have additional, unnecessary newlines in the template to make it more readable, but strip them from the output. Any group of newlines more than a normal paragraph break should be condensed to a normal paragraph break, e.g.

lines with



too many breaks should become lines with

normal paragraph breaks.

The string is potentially too large to store safely in memory, so I want to keep it as an output stream.

My first attempt:

type condensingWriter struct {
    writer io.Writer
    lastLineIsEmpty bool
}

func (c condensingWriter) Write(b []byte) (n int, err error){
    thisLineIsEmpty := strings.TrimSpace(string(b)) == ""
    defer func(){
        c.lastLineIsEmpty = thisLineIsEmpty
    }()
    if c.lastLineIsEmpty && thisLineIsEmpty{
        return 0, nil
    } else {
        return c.writer.Write(b)
    }
}

This doesn't work because I naively assumed that it would buffer on newline characters, but it doesn't.

Any suggestions on how to get this to work?

The general idea is you'll have to look for consecutive newlines anywhere in the input slice and if such cases exist, skip over all but the first newline character.

Additionally, you have to track whether the last byte written was a newline, so the next call to Write will know to eliminate a newline if necessary. You were on the right track by adding a bool to your writer type. However, you'll want to use a pointer receiver instead of a value receiver here, otherwise you'll be modifying a copy of the struct.

You would want to change

func (c condensingWriter) Write(b []byte)

to

func (c *condensingWriter) Write(b []byte)

You could try something like this. You'll have to test with larger inputs to make sure it handles all cases correctly.

package main

import (
    "bytes"
    "io"
    "os"
)

var Newline byte = byte('
')

type ReduceNewlinesWriter struct {
    w               io.Writer
    lastByteNewline bool
}

func (r *ReduceNewlinesWriter) Write(b []byte) (int, error) {
    // if the previous call to Write ended with a 

    // then we have to skip over any starting newlines here
    i := 0
    if r.lastByteNewline {
        for i < len(b) && b[i] == Newline {
            i++
        }
        b = b[i:]
    }
    r.lastByteNewline = b[len(b) - 1] == Newline

    i = bytes.IndexByte(b, Newline)
    if i == -1 {
        // no newlines - just write the entire thing
        return r.w.Write(b)
    }
    // write up to the newline
    i++
    n, err := r.w.Write(b[:i])
    if err != nil {
        return n, err
    }

    // skip over immediate newline and recurse
    i++

    for i < len(b) && b[i] == Newline {
        i++
    }
    i--
    m, err := r.Write(b[i:])
    return n + m, nil
}

func main() {
    r := ReduceNewlinesWriter{
        w: os.Stdout,
    }
    io.WriteString(&r, "this






has
multiple


newline



characters")
}

Inspired by zmb's approach, I've come up with the following package:

//Package striplines strips runs of consecutive empty lines from an output stream.
package striplines

import (
  "io"
  "strings"
)

// Striplines wraps an output stream, stripping runs of consecutive empty lines.
// You must call Flush before the output stream will be complete.
// Implements io.WriteCloser, Writer, Closer.
type Striplines struct {
  Writer   io.Writer
  lastLine []byte
  currentLine []byte
}

func (w *Striplines) Write(p []byte) (int, error) {
  totalN := 0
  s := string(p)
  if !strings.Contains(s, "
") {
    w.currentLine = append(w.currentLine, p...)
    return 0, nil 
  }
  cur := string(append(w.currentLine, p...))
  lastN := strings.LastIndex(cur, "
")
  s = cur[:lastN]
  for _, line := range strings.Split(s, "
") {
    n, err := w.writeLn(line + "
")
    w.lastLine = []byte(line)
    if err != nil {
      return totalN, err 
    }   
    totalN += n
  }
  rem := cur[(lastN + 1):]
  w.currentLine = []byte(rem)
  return totalN, nil 
}

// Close flushes the last of the output into the underlying writer.
func (w *Striplines) Close() error {
  _, err := w.writeLn(string(w.currentLine))
  return err 
}

func (w *Striplines) writeLn(line string) (n int, err error) {
  if strings.TrimSpace(string(w.lastLine)) == "" && strings.TrimSpace(line) == "" {
    return 0, nil 
  } else {
    return w.Writer.Write([]byte(line))
  }
}

See it in action here: http://play.golang.org/p/t8BGPUMYhb