168 lines
5.0 KiB
C
168 lines
5.0 KiB
C
#include "tar.h"
|
|
#include "byteio.h"
|
|
#include "string.h"
|
|
#include "util.h"
|
|
|
|
#define ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR 2.0
|
|
|
|
/* File types:
|
|
* '0' or (ASCII NUL) Normal file
|
|
* '1' Hard link
|
|
* '2' Symbolic link
|
|
* '3' Character special
|
|
* '4' Block special
|
|
* '5' Directory
|
|
* '6' FIFO
|
|
* '7' Contiguous file
|
|
* 'g' Global extended header with meta data(POSIX.1 - 2001)
|
|
* 'x' Extended header with metadata for the next file in the archive(POSIX.1 - 2001)
|
|
* 'A'-'Z' Vendor specific extensions(POSIX.1 - 1988)
|
|
*/
|
|
|
|
#define TAR_TYPE_FILE '0'
|
|
#define TAR_TYPE_DIRECTORY '5'
|
|
#define TAR_TYPE_PAX_HEADER_X 'x'
|
|
#define TAR_TYPE_PAX_HEADER_G 'g'
|
|
|
|
PACK(struct tar_header {
|
|
/* Pre-posix */
|
|
u8 file_name[100];
|
|
u8 file_mode[8];
|
|
u8 owner_id[8];
|
|
u8 group_id[8];
|
|
u8 file_size[12];
|
|
u8 last_modified[12];
|
|
u8 checksum[8];
|
|
|
|
/* Both */
|
|
u8 file_type;
|
|
u8 linked_file_name[100];
|
|
|
|
/* UStar */
|
|
u8 ustar_indicator[6];
|
|
u8 ustar_version[2];
|
|
u8 owner_user_name[32];
|
|
u8 owner_group_name[32];
|
|
u8 device_major_number[8];
|
|
u8 device_minor_number[8];
|
|
u8 file_name_prefix[155];
|
|
u8 padding[12];
|
|
});
|
|
|
|
INTERNAL u64 str_oct_to_u64(struct string str)
|
|
{
|
|
u64 n = 0;
|
|
for (u64 i = 0; i < str.len; ++i) {
|
|
n *= 8;
|
|
n += (u64)(str.text[i]) - '0';
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/* `prefix` will be prepended to all file names in the archive
|
|
*
|
|
* NOTE: The resulting archive merely points into the supplied tar data. No
|
|
* copying is done. Accessing the archive assumes that the data it's derived
|
|
* from is valid (AKA open if from a file / memory map).
|
|
*/
|
|
struct tar_archive tar_parse(struct arena *arena, struct buffer data, struct string prefix)
|
|
{
|
|
__prof;
|
|
|
|
struct tar_archive archive = { 0 };
|
|
struct byte_reader br = br_create_from_buffer(data);
|
|
|
|
u64 num_files = 0;
|
|
while (br_bytes_left(&br) > 1024) {
|
|
|
|
struct tar_header header = { 0 };
|
|
br_read_to_struct(&br, &header);
|
|
|
|
if (!string_eq(STRING_FROM_ARRAY(header.ustar_indicator), STR("ustar\0"))) {
|
|
/* Invalid header */
|
|
ASSERT(false);
|
|
continue;
|
|
}
|
|
|
|
if (header.file_name_prefix[0] != 0) {
|
|
/* Header file name prefix not supported */
|
|
ASSERT(false);
|
|
continue;
|
|
}
|
|
|
|
struct string file_size_oct_str = { .len = 11, .text = header.file_size };
|
|
|
|
u64 file_size = str_oct_to_u64(file_size_oct_str);
|
|
struct buffer file_data = {
|
|
.size = file_size,
|
|
.data = br_read_raw(&br, file_size)
|
|
};
|
|
|
|
/* Skip sector padding */
|
|
u64 remaining = (512 - (file_size % 512)) % 512;
|
|
br_seek(&br, remaining);
|
|
|
|
b32 is_dir = header.file_type == TAR_TYPE_DIRECTORY;
|
|
if (!is_dir && header.file_type != TAR_TYPE_FILE) {
|
|
/* Unsupported type */
|
|
ASSERT(header.file_type == TAR_TYPE_PAX_HEADER_X ||
|
|
header.file_type == TAR_TYPE_PAX_HEADER_G);
|
|
continue;
|
|
}
|
|
|
|
struct string file_name_cstr = string_from_cstr((char *)header.file_name);
|
|
if (file_name_cstr.len >= 2) {
|
|
/* Chop off './' prefix */
|
|
file_name_cstr.len -= 2;
|
|
file_name_cstr.text += 2;
|
|
}
|
|
struct string file_name = string_cat(arena, prefix, file_name_cstr);
|
|
|
|
struct tar_entry *entry = arena_push(arena, struct tar_entry);
|
|
*entry = (struct tar_entry) {
|
|
.is_dir = is_dir,
|
|
.file_name = file_name,
|
|
.buff = file_data
|
|
};
|
|
|
|
entry->next = archive.head;
|
|
archive.head = entry;
|
|
++num_files;
|
|
}
|
|
|
|
/* Build lookup table */
|
|
archive.lookup = fixed_dict_init(arena, (u64)((f64)num_files * ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR));
|
|
for (struct tar_entry *entry = archive.head; entry; entry = entry->next) {
|
|
fixed_dict_set(arena, &archive.lookup, entry->file_name, entry);
|
|
}
|
|
|
|
/* Build hierarchy */
|
|
/* NOTE: This is a separate pass because tar entry order is not guaranteed
|
|
* (IE file entries may be encountered before their parent directory entry) */
|
|
for (struct tar_entry *entry = archive.head; entry; entry = entry->next) {
|
|
/* Enter into hierarchy */
|
|
if (!entry->is_dir) {
|
|
/* Find parent entry */
|
|
struct tar_entry *parent_entry = NULL;
|
|
for (struct string parent_dir_name = entry->file_name; parent_dir_name.len > 0; --parent_dir_name.len) {
|
|
if (parent_dir_name.text[parent_dir_name.len - 1] == '/') {
|
|
parent_entry = fixed_dict_get(&archive.lookup, parent_dir_name);
|
|
break;
|
|
}
|
|
}
|
|
/* Insert child into parent's list */
|
|
if (parent_entry) {
|
|
entry->next_child = parent_entry->next_child;
|
|
parent_entry->next_child = entry;
|
|
}
|
|
}
|
|
}
|
|
|
|
return archive;
|
|
}
|
|
|
|
struct tar_entry *tar_get(const struct tar_archive *archive, struct string name)
|
|
{
|
|
return fixed_dict_get(&archive->lookup, name);
|
|
}
|