#include "tar.h" #include "bitbuff.h" #include "string.h" #include "util.h" #define ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR 2.0 /* File types: * '0' or (ASCII NUL) Normal file * '1' Hard link * '2' Symbolic link * '3' Character special * '4' Block special * '5' Directory * '6' FIFO * '7' Contiguous file * 'g' Global extended header with meta data(POSIX.1 - 2001) * 'x' Extended header with metadata for the next file in the archive(POSIX.1 - 2001) * 'A'-'Z' Vendor specific extensions(POSIX.1 - 1988) */ #define TAR_TYPE_FILE '0' #define TAR_TYPE_DIRECTORY '5' #define TAR_TYPE_PAX_HEADER_X 'x' #define TAR_TYPE_PAX_HEADER_G 'g' PACK(struct tar_header { /* Pre-posix */ u8 file_name[100]; u8 file_mode[8]; u8 owner_id[8]; u8 group_id[8]; u8 file_size[12]; u8 last_modified[12]; u8 checksum[8]; /* Both */ u8 file_type; u8 linked_file_name[100]; /* UStar */ u8 ustar_indicator[6]; u8 ustar_version[2]; u8 owner_user_name[32]; u8 owner_group_name[32]; u8 device_major_number[8]; u8 device_minor_number[8]; u8 file_name_prefix[155]; u8 padding[12]; }); INTERNAL u64 str_oct_to_u64(struct string str) { u64 n = 0; for (u64 i = 0; i < str.len; ++i) { n *= 8; n += (u64)(str.text[i]) - '0'; } return n; } /* `prefix` will be prepended to all file names in the archive * * NOTE: The resulting archive merely points into the supplied tar data, no * copying is done. Accessing the archive assumes that the data string is still valid. */ struct tar_archive tar_parse(struct arena *arena, struct string data, struct string prefix) { __prof; struct tar_archive archive = ZI; struct bitbuff bb = bitbuff_from_string(data); struct bitbuff_reader br = br_from_bitbuff_no_debug(&bb); u64 num_files = 0; while (br_num_bytes_left(&br) > 1024) { struct tar_header header = ZI; br_read_bytes(&br, STRING_FROM_STRUCT(&header)); if (!string_eq(STRING_FROM_ARRAY(header.ustar_indicator), LIT("ustar\0"))) { /* Invalid header */ ASSERT(0); continue; } if (header.file_name_prefix[0] != 0) { /* Header file name prefix not supported */ ASSERT(0); continue; } struct string file_size_oct_str = { .len = 11, .text = header.file_size }; u64 file_size = str_oct_to_u64(file_size_oct_str); u8 *file_data_ptr = br_read_bytes_raw(&br, file_size); if (!file_data_ptr) { file_size = 0; } struct string file_data = STRING(file_size, file_data_ptr); /* Skip sector padding */ u64 remaining = (512 - (file_size % 512)) % 512; br_seek_bytes(&br, remaining); b32 is_dir = header.file_type == TAR_TYPE_DIRECTORY; if (!is_dir && header.file_type != TAR_TYPE_FILE) { /* Unsupported type */ ASSERT(header.file_type == TAR_TYPE_PAX_HEADER_X || header.file_type == TAR_TYPE_PAX_HEADER_G); continue; } struct string file_name_cstr = string_from_cstr_no_limit((char *)header.file_name); if (file_name_cstr.len >= 2) { /* Chop off './' prefix */ file_name_cstr.len -= 2; file_name_cstr.text += 2; } struct string file_name = string_cat(arena, prefix, file_name_cstr); struct tar_entry *entry = arena_push(arena, struct tar_entry); entry->is_dir = is_dir; entry->file_name = file_name; entry->data = file_data; entry->next = archive.head; archive.head = entry; ++num_files; } /* Build lookup table */ archive.lookup = dict_init(arena, (u64)((f64)num_files * ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR)); for (struct tar_entry *entry = archive.head; entry; entry = entry->next) { u64 hash = hash_fnv64(HASH_FNV64_BASIS, entry->file_name); dict_set(arena, archive.lookup, hash, (u64)entry); } /* Build hierarchy */ /* NOTE: This is a separate pass because tar entry order is not guaranteed * (IE file entries may be encountered before their parent directory entry) */ for (struct tar_entry *entry = archive.head; entry; entry = entry->next) { /* Enter into hierarchy */ if (!entry->is_dir) { /* Find parent entry */ struct tar_entry *parent_entry = 0; for (struct string parent_dir_name = entry->file_name; parent_dir_name.len > 0; --parent_dir_name.len) { if (parent_dir_name.text[parent_dir_name.len - 1] == '/') { u64 hash = hash_fnv64(HASH_FNV64_BASIS, parent_dir_name); parent_entry = (struct tar_entry *)dict_get(archive.lookup, hash); break; } } /* Insert child into parent's list */ if (parent_entry) { entry->next_child = parent_entry->next_child; parent_entry->next_child = entry; } } } return archive; } struct tar_entry *tar_get(struct tar_archive *archive, struct string name) { u64 hash = hash_fnv64(HASH_FNV64_BASIS, name); return (struct tar_entry *)dict_get(archive->lookup, hash); }