power_play/src/tar.c

168 lines
5.0 KiB
C

#include "tar.h"
#include "byteio.h"
#include "string.h"
#include "util.h"
#define ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR 2.0
/* File types:
* '0' or (ASCII NUL) Normal file
* '1' Hard link
* '2' Symbolic link
* '3' Character special
* '4' Block special
* '5' Directory
* '6' FIFO
* '7' Contiguous file
* 'g' Global extended header with meta data(POSIX.1 - 2001)
* 'x' Extended header with metadata for the next file in the archive(POSIX.1 - 2001)
* 'A'-'Z' Vendor specific extensions(POSIX.1 - 1988)
*/
#define TAR_TYPE_FILE '0'
#define TAR_TYPE_DIRECTORY '5'
#define TAR_TYPE_PAX_HEADER_X 'x'
#define TAR_TYPE_PAX_HEADER_G 'g'
PACK(struct tar_header {
/* Pre-posix */
u8 file_name[100];
u8 file_mode[8];
u8 owner_id[8];
u8 group_id[8];
u8 file_size[12];
u8 last_modified[12];
u8 checksum[8];
/* Both */
u8 file_type;
u8 linked_file_name[100];
/* UStar */
u8 ustar_indicator[6];
u8 ustar_version[2];
u8 owner_user_name[32];
u8 owner_group_name[32];
u8 device_major_number[8];
u8 device_minor_number[8];
u8 file_name_prefix[155];
u8 padding[12];
});
INTERNAL u64 str_oct_to_u64(struct string str)
{
u64 n = 0;
for (u64 i = 0; i < str.len; ++i) {
n *= 8;
n += (u64)(str.text[i]) - '0';
}
return n;
}
/* `prefix` will be prepended to all file names in the archive
*
* NOTE: The resulting archive merely points into the supplied tar data. No
* copying is done. Accessing the archive assumes that the data it's derived
* from is valid (AKA open if from a file / memory map).
*/
struct tar_archive tar_parse(struct arena *arena, struct buffer data, struct string prefix)
{
__prof;
struct tar_archive archive = ZI;
struct byte_reader br = br_create_from_buffer(data);
u64 num_files = 0;
while (br_bytes_left(&br) > 1024) {
struct tar_header header = ZI;
br_read_to_struct(&br, &header);
if (!string_eq(STRING_FROM_ARRAY(header.ustar_indicator), STR("ustar\0"))) {
/* Invalid header */
ASSERT(false);
continue;
}
if (header.file_name_prefix[0] != 0) {
/* Header file name prefix not supported */
ASSERT(false);
continue;
}
struct string file_size_oct_str = { .len = 11, .text = header.file_size };
u64 file_size = str_oct_to_u64(file_size_oct_str);
struct buffer file_data = {
.size = file_size,
.data = br_read_raw(&br, file_size)
};
/* Skip sector padding */
u64 remaining = (512 - (file_size % 512)) % 512;
br_seek(&br, remaining);
b32 is_dir = header.file_type == TAR_TYPE_DIRECTORY;
if (!is_dir && header.file_type != TAR_TYPE_FILE) {
/* Unsupported type */
ASSERT(header.file_type == TAR_TYPE_PAX_HEADER_X ||
header.file_type == TAR_TYPE_PAX_HEADER_G);
continue;
}
struct string file_name_cstr = string_from_cstr((char *)header.file_name);
if (file_name_cstr.len >= 2) {
/* Chop off './' prefix */
file_name_cstr.len -= 2;
file_name_cstr.text += 2;
}
struct string file_name = string_cat(arena, prefix, file_name_cstr);
struct tar_entry *entry = arena_push(arena, struct tar_entry);
*entry = (struct tar_entry) {
.is_dir = is_dir,
.file_name = file_name,
.buff = file_data
};
entry->next = archive.head;
archive.head = entry;
++num_files;
}
/* Build lookup table */
archive.lookup = fixed_dict_init(arena, (u64)((f64)num_files * ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR));
for (struct tar_entry *entry = archive.head; entry; entry = entry->next) {
fixed_dict_set(arena, &archive.lookup, entry->file_name, entry);
}
/* Build hierarchy */
/* NOTE: This is a separate pass because tar entry order is not guaranteed
* (IE file entries may be encountered before their parent directory entry) */
for (struct tar_entry *entry = archive.head; entry; entry = entry->next) {
/* Enter into hierarchy */
if (!entry->is_dir) {
/* Find parent entry */
struct tar_entry *parent_entry = NULL;
for (struct string parent_dir_name = entry->file_name; parent_dir_name.len > 0; --parent_dir_name.len) {
if (parent_dir_name.text[parent_dir_name.len - 1] == '/') {
parent_entry = fixed_dict_get(&archive.lookup, parent_dir_name);
break;
}
}
/* Insert child into parent's list */
if (parent_entry) {
entry->next_child = parent_entry->next_child;
parent_entry->next_child = entry;
}
}
}
return archive;
}
struct tar_entry *tar_get(const struct tar_archive *archive, struct string name)
{
return fixed_dict_get(&archive->lookup, name);
}