Package tar

import "archive/tar"
Overview
Index
Examples
Documentation

Overview

Package tar implements access to tar archives.

Tape archives (tar) are a file format for storing a sequence of files that can be read and written in a streaming manner. This package aims to cover most variations of the format, including those produced by GNU and BSD tar tools.

Create and add some files to the archive.

Code:

// Create and add some files to the archive.
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
var files = []struct {
    Name, Body string
}{
    {"readme.txt", "This archive contains some text files."},
    {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"},
    {"todo.txt", "Get animal handling license."},
}
for _, file := range files {
    hdr := &tar.Header{
        Name: file.Name,
        Mode: 0600,
        Size: int64(len(file.Body)),
    }
    if err := tw.WriteHeader(hdr); err != nil {
        log.Fatal(err)
    }
    if _, err := tw.Write([]byte(file.Body)); err != nil {
        log.Fatal(err)
    }
}
if err := tw.Close(); err != nil {
    log.Fatal(err)
}

// Open and iterate through the files in the archive.
tr := tar.NewReader(&buf)
for {
    hdr, err := tr.Next()
    if err == io.EOF {
        break // End of archive
    }
    if err != nil {
        log.Fatal(err)
    }
    fmt.Printf("Contents of %s:\n", hdr.Name)
    if _, err := io.Copy(os.Stdout, tr); err != nil {
        log.Fatal(err)
    }
    fmt.Println()
}

Output:

Contents of readme.txt:
This archive contains some text files.
Contents of gopher.txt:
Gopher names:
George
Geoffrey
Gonzo
Contents of todo.txt:
Get animal handling license.

A sparse file can efficiently represent a large file that is mostly empty. When packing an archive, Header.DetectSparseHoles can be used to populate the sparse map, while Header.PunchSparseHoles can be used to create a sparse file on disk when extracting an archive.

Code:

// Create the source sparse file.
src, err := ioutil.TempFile("", "sparse.db")
if err != nil {
    log.Fatal(err)
}
defer os.Remove(src.Name()) // Best-effort cleanup
defer func() {
    if err := src.Close(); err != nil {
        log.Fatal(err)
    }
}()
if err := src.Truncate(10e6); err != nil {
    log.Fatal(err)
}
for i := 0; i < 10; i++ {
    if _, err := src.Seek(1e6-1e3, io.SeekCurrent); err != nil {
        log.Fatal(err)
    }
    if _, err := src.Write(bytes.Repeat([]byte{'0' + byte(i)}, 1e3)); err != nil {
        log.Fatal(err)
    }
}

// Create an archive and pack the source sparse file to it.
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
fi, err := src.Stat()
if err != nil {
    log.Fatal(err)
}
hdr, err := tar.FileInfoHeader(fi, "")
if err != nil {
    log.Fatal(err)
}
if err := hdr.DetectSparseHoles(src); err != nil {
    log.Fatal(err)
}
if err := tw.WriteHeader(hdr); err != nil {
    log.Fatal(err)
}
if _, err := io.Copy(tw, src); err != nil {
    log.Fatal(err)
}
if err := tw.Close(); err != nil {
    log.Fatal(err)
}

// Create the destination sparse file.
dst, err := ioutil.TempFile("", "sparse.db")
if err != nil {
    log.Fatal(err)
}
defer os.Remove(dst.Name()) // Best-effort cleanup
defer func() {
    if err := dst.Close(); err != nil {
        log.Fatal(err)
    }
}()

// Open the archive and extract the sparse file into the destination file.
tr := tar.NewReader(&buf)
hdr, err = tr.Next()
if err != nil {
    log.Fatal(err)
}
if err := hdr.PunchSparseHoles(dst); err != nil {
    log.Fatal(err)
}
if _, err := io.Copy(dst, tr); err != nil {
    log.Fatal(err)
}

// Verify that the sparse files are identical.
want, err := ioutil.ReadFile(src.Name())
if err != nil {
    log.Fatal(err)
}
got, err := ioutil.ReadFile(dst.Name())
if err != nil {
    log.Fatal(err)
}
fmt.Printf("Src MD5: %08x\n", md5.Sum(want))
fmt.Printf("Dst MD5: %08x\n", md5.Sum(got))

Output:

Src MD5: 33820d648d42cb3da2515da229149f74
Dst MD5: 33820d648d42cb3da2515da229149f74

The SparseHoles can be manually constructed without Header.DetectSparseHoles.

Code:

// Define a sparse file to add to the archive.
// This sparse files contains 5 data fragments, and 4 hole fragments.
// The logical size of the file is 16 KiB, while the physical size of the
// file is only 3 KiB (not counting the header data).
hdr := &tar.Header{
    Name: "sparse.db",
    Size: 16384,
    SparseHoles: []tar.SparseEntry{
        // Data fragment at 0..1023
        {Offset: 1024, Length: 1024 - 512}, // Hole fragment at 1024..1535
        // Data fragment at 1536..2047
        {Offset: 2048, Length: 2048 - 512}, // Hole fragment at 2048..3583
        // Data fragment at 3584..4095
        {Offset: 4096, Length: 4096 - 512}, // Hole fragment at 4096..7679
        // Data fragment at 7680..8191
        {Offset: 8192, Length: 8192 - 512}, // Hole fragment at 8192..15871
        // Data fragment at 15872..16383
    },
}

// The regions marked as a sparse hole are filled with NUL-bytes.
// The total length of the body content must match the specified Size field.
body := "" +
    strings.Repeat("A", 1024) +
    strings.Repeat("\x00", 1024-512) +
    strings.Repeat("B", 512) +
    strings.Repeat("\x00", 2048-512) +
    strings.Repeat("C", 512) +
    strings.Repeat("\x00", 4096-512) +
    strings.Repeat("D", 512) +
    strings.Repeat("\x00", 8192-512) +
    strings.Repeat("E", 512)

h := md5.Sum([]byte(body))
fmt.Printf("Write content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h)
fmt.Printf("Write SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles)

// Create a new archive and write the sparse file.
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
if err := tw.WriteHeader(hdr); err != nil {
    log.Fatal(err)
}
if _, err := tw.Write([]byte(body)); err != nil {
    log.Fatal(err)
}
if err := tw.Close(); err != nil {
    log.Fatal(err)
}

// Open and iterate through the files in the archive.
tr := tar.NewReader(&buf)
for {
    hdr, err := tr.Next()
    if err == io.EOF {
        break
    }
    if err != nil {
        log.Fatal(err)
    }
    body, err := ioutil.ReadAll(tr)
    if err != nil {
        log.Fatal(err)
    }

    h := md5.Sum([]byte(body))
    fmt.Printf("Read content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h)
    fmt.Printf("Read SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles)
}

Output:

Write content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9
Write SparseHoles of sparse.db:
	[{1024 512} {2048 1536} {4096 3584} {8192 7680}]

Read content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9
Read SparseHoles of sparse.db:
	[{1024 512} {2048 1536} {4096 3584} {8192 7680} {16384 0}]

Index

Constants
Variables
type Format
func (f Format) String() string
type Header
func FileInfoHeader(fi os.FileInfo, link string) (*Header, error)
func (h *Header) DetectSparseHoles(f *os.File) (err error)
func (h *Header) FileInfo() os.FileInfo
func (h *Header) PunchSparseHoles(f *os.File) (err error)
type Reader
func NewReader(r io.Reader) *Reader
func (tr *Reader) Next() (*Header, error)
func (tr *Reader) Read(b []byte) (int, error)
func (tr *Reader) WriteTo(w io.Writer) (int64, error)
type SparseEntry
type Writer
func NewWriter(w io.Writer) *Writer
func (tw *Writer) Close() error
func (tw *Writer) Flush() error
func (tw *Writer) ReadFrom(r io.Reader) (int64, error)
func (tw *Writer) Write(b []byte) (int, error)
func (tw *Writer) WriteHeader(hdr *Header) error

Examples

Package (Minimal)
Package (SparseAutomatic)
Package (SparseManual)

Documentation

Constants

const (
    // Type '0' indicates a regular file.
    TypeReg  = '0'
    TypeRegA = '\x00' // For legacy support; use TypeReg instead

    // Type '1' to '6' are header-only flags and may not have a data body.
    TypeLink    = '1' // Hard link
    TypeSymlink = '2' // Symbolic link
    TypeChar    = '3' // Character device node
    TypeBlock   = '4' // Block device node
    TypeDir     = '5' // Directory
    TypeFifo    = '6' // FIFO node

    // Type '7' is reserved.
    TypeCont = '7'

    // Type 'x' is used by the PAX format to store key-value records that
    // are only relevant to the next file.
    // This package transparently handles these types.
    TypeXHeader = 'x'

    // Type 'g' is used by the PAX format to store key-value records that
    // are relevant to all subsequent files.
    // This package only supports parsing and composing such headers,
    // but does not currently support persisting the global state across files.
    TypeXGlobalHeader = 'g'

    // Type 'S' indicates a sparse file in the GNU format.
    // Header.SparseHoles should be populated when using this type.
    TypeGNUSparse = 'S'

    // Types 'L' and 'K' are used by the GNU format for a meta file
    // used to store the path or link name for the next file.
    // This package transparently handles these types.
    TypeGNULongName = 'L'
    TypeGNULongLink = 'K'
)

Type flags for Header.Typeflag.

const (
    C2, C3 = 1, 2, 3
)

Variables

var (
    ErrHeader          = errors.New("tar: invalid tar header")
    ErrWriteTooLong    = errors.New("tar: write too long")
    ErrFieldTooLong    = errors.New("tar: header field too long")
    ErrWriteAfterClose = errors.New("tar: write after close")
)

type Format

type Format int

Format represents the tar archive format.

The original tar format was introduced in Unix V7. Since then, there have been multiple competing formats attempting to standardize or extend the V7 format to overcome its limitations. The most common formats are the USTAR, PAX, and GNU formats, each with their own advantages and limitations.

The following table captures the capabilities of each format:

                  |  USTAR |       PAX |       GNU
------------------+--------+-----------+----------
Name              |   256B | unlimited | unlimited
Linkname          |   100B | unlimited | unlimited
Size              | uint33 | unlimited |    uint89
Mode              | uint21 |    uint21 |    uint57
Uid/Gid           | uint21 | unlimited |    uint57
Uname/Gname       |    32B | unlimited |       32B
ModTime           | uint33 | unlimited |     int89
AccessTime        |    n/a | unlimited |     int89
ChangeTime        |    n/a | unlimited |     int89
Devmajor/Devminor | uint21 |    uint21 |    uint57
------------------+--------+-----------+----------
string encoding   |  ASCII |     UTF-8 |    binary
sub-second times  |     no |       yes |        no
sparse files      |     no |       yes |       yes

The table's upper portion shows the Header fields, where each format reports the maximum number of bytes allowed for each string field and the integer type used to store each numeric field (where timestamps are stored as the number of seconds since the Unix epoch).

The table's lower portion shows specialized features of each format, such as supported string encodings, support for sub-second timestamps, or support for sparse files.

const (

    // FormatUnknown indicates that the format is unknown.
    FormatUnknown Format

    // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
    //
    // While this format is compatible with most tar readers,
    // the format has several limitations making it unsuitable for some usages.
    // Most notably, it cannot support sparse files, files larger than 8GiB,
    // filenames larger than 256 characters, and non-ASCII filenames.
    //
    // Reference:
    //	http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
    FormatUSTAR

    // FormatPAX represents the PAX header format defined in POSIX.1-2001.
    //
    // PAX extends USTAR by writing a special file with Typeflag TypeXHeader
    // preceding the original header. This file contains a set of key-value
    // records, which are used to overcome USTAR's shortcomings, in addition to
    // providing the ability to have sub-second resolution for timestamps.
    //
    // Some newer formats add their own extensions to PAX by defining their
    // own keys and assigning certain semantic meaning to the associated values.
    // For example, sparse file support in PAX is implemented using keys
    // defined by the GNU manual (e.g., "GNU.sparse.map").
    //
    // Reference:
    //	http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
    FormatPAX

    // FormatGNU represents the GNU header format.
    //
    // The GNU header format is older than the USTAR and PAX standards and
    // is not compatible with them. The GNU format supports
    // arbitrary file sizes, filenames of arbitrary encoding and length,
    // sparse files, and other features.
    //
    // It is recommended that PAX be chosen over GNU unless the target
    // application can only parse GNU formatted archives.
    //
    // Reference:
    //	http://www.gnu.org/software/tar/manual/html_node/Standard.html
    FormatGNU
)

Constants to identify various tar formats.

func Format.String

func (f Format) String() string
type Header struct {
    Typeflag byte // Type of header entry (should be TypeReg for most files)

    Name     string // Path name of entry
    Linkname string // Target name of link (valid for TypeLink or TypeSymlink)

    Size  int64  // Logical file size in bytes
    Mode  int64  // Permission and mode bits
    Uid   int    // User ID of owner
    Gid   int    // Group ID of owner
    Uname string // User name of owner
    Gname string // Group name of owner

    // If the Format is unspecified, then Writer.WriteHeader rounds ModTime
    // to the nearest second and ignores the AccessTime and ChangeTime fields.
    //
    // To use AccessTime or ChangeTime, specify the Format as PAX or GNU.
    // To use sub-second resolution, specify the Format as PAX.
    ModTime    time.Time // Modification time
    AccessTime time.Time // Access time (requires either PAX or GNU support)
    ChangeTime time.Time // Change time (requires either PAX or GNU support)

    Devmajor int64 // Major device number (valid for TypeChar or TypeBlock)
    Devminor int64 // Minor device number (valid for TypeChar or TypeBlock)

    // SparseHoles represents a sequence of holes in a sparse file.
    //
    // A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse.
    // If TypeGNUSparse is set, then the format is GNU, otherwise
    // the format is PAX (by using GNU-specific PAX records).
    //
    // A sparse file consists of fragments of data, intermixed with holes
    // (described by this field). A hole is semantically a block of NUL-bytes,
    // but does not actually exist within the tar file.
    // The holes must be sorted in ascending order,
    // not overlap with each other, and not extend past the specified Size.
    SparseHoles []SparseEntry

    // Xattrs stores extended attributes as PAX records under the
    // "SCHILY.xattr." namespace.
    //
    // The following are semantically equivalent:
    //  h.Xattrs[key] = value
    //  h.PAXRecords["SCHILY.xattr."+key] = value
    //
    // When Writer.WriteHeader is called, the contents of Xattrs will take
    // precedence over those in PAXRecords.
    //
    // Deprecated: Use PAXRecords instead.
    Xattrs map[string]string

    // PAXRecords is a map of PAX extended header records.
    //
    // User-defined records should have keys of the following form:
    //	VENDOR.keyword
    // Where VENDOR is some namespace in all uppercase, and keyword may
    // not contain the '=' character (e.g., "GOLANG.pkg.version").
    // The key and value should be non-empty UTF-8 strings.
    //
    // When Writer.WriteHeader is called, PAX records derived from the
    // the other fields in Header take precedence over PAXRecords.
    PAXRecords map[string]string

    // Format specifies the format of the tar header.
    //
    // This is set by Reader.Next as a best-effort guess at the format.
    // Since the Reader liberally reads some non-compliant files,
    // it is possible for this to be FormatUnknown.
    //
    // If the format is unspecified when Writer.WriteHeader is called,
    // then it uses the first format (in the order of USTAR, PAX, GNU)
    // capable of encoding this Header (see tar.Format).
    Format Format
}

A Header represents a single header in a tar archive. Some fields may not be populated.

For forward compatibility, users that retrieve a Header from Reader.Next, mutate it in some ways, and then pass it back to Writer.WriteHeader should do so by creating a new Header and copying the fields that they are interested in preserving.

func FileInfoHeader

func FileInfoHeader(fi os.FileInfo, link string) (*Header, error)

FileInfoHeader creates a partially-populated Header from fi. If fi describes a symlink, FileInfoHeader records link as the link target. If fi describes a directory, a slash is appended to the name.

Since os.FileInfo's Name method only returns the base name of the file it describes, it may be necessary to modify Header.Name to provide the full path name of the file.

This function does not populate Header.SparseHoles; for sparse file support, additionally call Header.DetectSparseHoles.

func Header.DetectSparseHoles

func (h *Header) DetectSparseHoles(f *os.File) (err error)

DetectSparseHoles searches for holes within f to populate SparseHoles on supported operating systems and filesystems. The file offset is cleared to zero.

When packing a sparse file, DetectSparseHoles should be called prior to serializing the header to the archive with Writer.WriteHeader.

func Header.FileInfo

func (h *Header) FileInfo() os.FileInfo

FileInfo returns an os.FileInfo for the Header.

func Header.PunchSparseHoles

func (h *Header) PunchSparseHoles(f *os.File) (err error)

PunchSparseHoles destroys the contents of f, and prepares a sparse file (on supported operating systems and filesystems) with holes punched according to SparseHoles. The file offset is cleared to zero.

When extracting a sparse file, PunchSparseHoles should be called prior to populating the content of a file with Reader.WriteTo.

type Reader

type Reader struct {
    // contains filtered or unexported fields
}

Reader provides sequential access to the contents of a tar archive. Reader.Next advances to the next file in the archive (including the first), and then Reader can be treated as an io.Reader to access the file's data.

func NewReader

func NewReader(r io.Reader) *Reader

NewReader creates a new Reader reading from r.

func Reader.Next

func (tr *Reader) Next() (*Header, error)

Next advances to the next entry in the tar archive. The Header.Size determines how many bytes can be read for the next file. Any remaining data in the current file is automatically discarded.

io.EOF is returned at the end of the input.

func Reader.Read

func (tr *Reader) Read(b []byte) (int, error)

Read reads from the current file in the tar archive. It returns (0, io.EOF) when it reaches the end of that file, until Next is called to advance to the next file.

If the current file is sparse, then the regions marked as a hole are read back as NUL-bytes.

Calling Read on special types like TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what the Header.Size claims.

func Reader.WriteTo

func (tr *Reader) WriteTo(w io.Writer) (int64, error)

WriteTo writes the content of the current file to w. The bytes written matches the number of remaining bytes in the current file.

If the current file is sparse and w is an io.WriteSeeker, then WriteTo uses Seek to skip past holes defined in Header.SparseHoles, assuming that skipped regions are filled with NULs. This always writes the last byte to ensure w is the right size.

type SparseEntry

type SparseEntry struct{ Offset, Length int64 }

SparseEntry represents a Length sized fragment at Offset in the file.

type Writer

type Writer struct {
    // contains filtered or unexported fields
}

Writer provides sequential writing of a tar archive. Writer.WriteHeader begins a new file with the provided Header, and then Writer can be treated as an io.Writer to supply that file's data.

func NewWriter

func NewWriter(w io.Writer) *Writer

NewWriter creates a new Writer writing to w.

func Writer.Close

func (tw *Writer) Close() error

Close closes the tar archive by flushing the padding, and writing the footer. If the current file (from a prior call to WriteHeader) is not fully written, then this returns an error.

func Writer.Flush

func (tw *Writer) Flush() error

Flush finishes writing the current file's block padding. The current file must be fully written before Flush can be called.

Deprecated: This is unnecessary as the next call to WriteHeader or Close will implicitly flush out the file's padding.

func Writer.ReadFrom

func (tw *Writer) ReadFrom(r io.Reader) (int64, error)

ReadFrom populates the content of the current file by reading from r. The bytes read must match the number of remaining bytes in the current file.

If the current file is sparse and r is an io.ReadSeeker, then ReadFrom uses Seek to skip past holes defined in Header.SparseHoles, assuming that skipped regions are all NULs. This always reads the last byte to ensure r is the right size.

func Writer.Write

func (tw *Writer) Write(b []byte) (int, error)

Write writes to the current file in the tar archive. Write returns the error ErrWriteTooLong if more than Header.Size bytes are written after WriteHeader.

If the current file is sparse, then the regions marked as a hole must be written as NUL-bytes.

Calling Write on special types like TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless of what the Header.Size claims.

func Writer.WriteHeader

func (tw *Writer) WriteHeader(hdr *Header) error

WriteHeader writes hdr and prepares to accept the file's contents. The Header.Size determines how many bytes can be written for the next file. If the current file is not fully written, then this returns an error. This implicitly flushes any padding necessary before writing the header.