#include "tar.h" #include "byteio.h" #include "string.h" #include "util.h" #define ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR 2.0 /* File types: * '0' or (ASCII NUL) Normal file * '1' Hard link * '2' Symbolic link * '3' Character special * '4' Block special * '5' Directory * '6' FIFO * '7' Contiguous file * 'g' Global extended header with meta data(POSIX.1 - 2001) * 'x' Extended header with metadata for the next file in the archive(POSIX.1 - 2001) * 'A'-'Z' Vendor specific extensions(POSIX.1 - 1988) */ #define TAR_TYPE_FILE '0' #define TAR_TYPE_DIRECTORY '5' #define TAR_TYPE_PAX_HEADER_X 'x' #define TAR_TYPE_PAX_HEADER_G 'g' PACK(struct tar_header { /* Pre-posix */ u8 file_name[100]; u8 file_mode[8]; u8 owner_id[8]; u8 group_id[8]; u8 file_size[12]; u8 last_modified[12]; u8 checksum[8]; /* Both */ u8 file_type; u8 linked_file_name[100]; /* UStar */ u8 ustar_indicator[6]; u8 ustar_version[2]; u8 owner_user_name[32]; u8 owner_group_name[32]; u8 device_major_number[8]; u8 device_minor_number[8]; u8 file_name_prefix[155]; u8 padding[12]; }); INTERNAL u64 str_oct_to_u64(struct string str) { u64 n = 0; for (u64 i = 0; i < str.len; ++i) { n *= 8; n += (u64)(str.text[i]) - '0'; } return n; } /* `prefix` will be prepended to all file names in the archive * * NOTE: The resulting archive merely points into the supplied tar data. No * copying is done. Accessing the archive assumes that the data it's derived * from is valid (AKA open if from a file / memory map). */ struct tar_archive tar_parse(struct arena *arena, struct buffer data, struct string prefix) { __prof; struct tar_archive archive = { 0 }; struct byte_reader br = br_create_from_buffer(data); u64 num_files = 0; while (br_bytes_left(&br) > 1024) { struct tar_header header = { 0 }; br_read_to_struct(&br, &header); if (!string_eq(STRING_FROM_ARRAY(header.ustar_indicator), STR("ustar\0"))) { /* Invalid header */ ASSERT(false); continue; } if (header.file_name_prefix[0] != 0) { /* Header file name prefix not supported */ ASSERT(false); continue; } struct string file_size_oct_str = { .len = 11, .text = header.file_size }; u64 file_size = str_oct_to_u64(file_size_oct_str); struct buffer file_data = { .size = file_size, .data = br_read_raw(&br, file_size) }; /* Skip sector padding */ u64 remaining = (512 - (file_size % 512)) % 512; br_seek(&br, remaining); b32 is_dir = header.file_type == TAR_TYPE_DIRECTORY; if (!is_dir && header.file_type != TAR_TYPE_FILE) { /* Unsupported type */ ASSERT(header.file_type == TAR_TYPE_PAX_HEADER_X || header.file_type == TAR_TYPE_PAX_HEADER_G); continue; } struct string file_name_cstr = string_from_cstr((char *)header.file_name); if (file_name_cstr.len >= 2) { /* Chop off './' prefix */ file_name_cstr.len -= 2; file_name_cstr.text += 2; } struct string file_name = string_cat(arena, prefix, file_name_cstr); struct tar_entry *entry = arena_push(arena, struct tar_entry); *entry = (struct tar_entry) { .is_dir = is_dir, .file_name = file_name, .buff = file_data }; entry->next = archive.head; archive.head = entry; ++num_files; } /* Build lookup table */ archive.lookup = fixed_dict_init(arena, (u64)((f64)num_files * ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR)); for (struct tar_entry *entry = archive.head; entry; entry = entry->next) { fixed_dict_set(arena, &archive.lookup, entry->file_name, entry); } /* Build hierarchy */ /* NOTE: This is a separate pass because tar entry order is not guaranteed * (IE file entries may be encountered before their parent directory entry) */ for (struct tar_entry *entry = archive.head; entry; entry = entry->next) { /* Enter into hierarchy */ if (!entry->is_dir) { /* Find parent entry */ struct tar_entry *parent_entry = NULL; for (struct string parent_dir_name = entry->file_name; parent_dir_name.len > 0; --parent_dir_name.len) { if (parent_dir_name.text[parent_dir_name.len - 1] == '/') { parent_entry = fixed_dict_get(&archive.lookup, parent_dir_name); break; } } /* Insert child into parent's list */ if (parent_entry) { entry->next_child = parent_entry->next_child; parent_entry->next_child = entry; } } } return archive; } struct tar_entry *tar_get(const struct tar_archive *archive, struct string name) { return fixed_dict_get(&archive->lookup, name); }