167 lines
5.0 KiB
C
167 lines
5.0 KiB
C
#include "tar.h"
|
|
#include "bitbuff.h"
|
|
#include "string.h"
|
|
#include "util.h"
|
|
|
|
#define ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR 2.0
|
|
|
|
/* File types:
|
|
* '0' or (ASCII NUL) Normal file
|
|
* '1' Hard link
|
|
* '2' Symbolic link
|
|
* '3' Character special
|
|
* '4' Block special
|
|
* '5' Directory
|
|
* '6' FIFO
|
|
* '7' Contiguous file
|
|
* 'g' Global extended header with meta data(POSIX.1 - 2001)
|
|
* 'x' Extended header with metadata for the next file in the archive(POSIX.1 - 2001)
|
|
* 'A'-'Z' Vendor specific extensions(POSIX.1 - 1988)
|
|
*/
|
|
|
|
#define TAR_TYPE_FILE '0'
|
|
#define TAR_TYPE_DIRECTORY '5'
|
|
#define TAR_TYPE_PAX_HEADER_X 'x'
|
|
#define TAR_TYPE_PAX_HEADER_G 'g'
|
|
|
|
PACK(struct tar_header {
|
|
/* Pre-posix */
|
|
u8 file_name[100];
|
|
u8 file_mode[8];
|
|
u8 owner_id[8];
|
|
u8 group_id[8];
|
|
u8 file_size[12];
|
|
u8 last_modified[12];
|
|
u8 checksum[8];
|
|
|
|
/* Both */
|
|
u8 file_type;
|
|
u8 linked_file_name[100];
|
|
|
|
/* UStar */
|
|
u8 ustar_indicator[6];
|
|
u8 ustar_version[2];
|
|
u8 owner_user_name[32];
|
|
u8 owner_group_name[32];
|
|
u8 device_major_number[8];
|
|
u8 device_minor_number[8];
|
|
u8 file_name_prefix[155];
|
|
u8 padding[12];
|
|
});
|
|
|
|
INTERNAL u64 str_oct_to_u64(struct string str)
|
|
{
|
|
u64 n = 0;
|
|
for (u64 i = 0; i < str.len; ++i) {
|
|
n *= 8;
|
|
n += (u64)(str.text[i]) - '0';
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/* `prefix` will be prepended to all file names in the archive
|
|
*
|
|
* NOTE: The resulting archive merely points into the supplied tar data, no
|
|
* copying is done. Accessing the archive assumes that the data string is still valid.
|
|
*/
|
|
struct tar_archive tar_parse(struct arena *arena, struct string data, struct string prefix)
|
|
{
|
|
__prof;
|
|
|
|
struct tar_archive archive = ZI;
|
|
struct bitbuff bb = bitbuff_from_string(data);
|
|
struct bitbuff_reader br = br_from_bitbuff_no_debug(&bb);
|
|
|
|
u64 num_files = 0;
|
|
while (br_num_bytes_left(&br) > 1024) {
|
|
|
|
struct tar_header header = ZI;
|
|
br_read_bytes(&br, STRING_FROM_STRUCT(&header));
|
|
|
|
if (!string_eq(STRING_FROM_ARRAY(header.ustar_indicator), LIT("ustar\0"))) {
|
|
/* Invalid header */
|
|
ASSERT(false);
|
|
continue;
|
|
}
|
|
|
|
if (header.file_name_prefix[0] != 0) {
|
|
/* Header file name prefix not supported */
|
|
ASSERT(false);
|
|
continue;
|
|
}
|
|
|
|
struct string file_size_oct_str = { .len = 11, .text = header.file_size };
|
|
|
|
u64 file_size = str_oct_to_u64(file_size_oct_str);
|
|
u8 *file_data_ptr = br_read_bytes_raw(&br, file_size);
|
|
if (!file_data_ptr) {
|
|
file_size = 0;
|
|
}
|
|
struct string file_data = STRING(file_size, file_data_ptr);
|
|
|
|
/* Skip sector padding */
|
|
u64 remaining = (512 - (file_size % 512)) % 512;
|
|
br_seek_bytes(&br, remaining);
|
|
|
|
b32 is_dir = header.file_type == TAR_TYPE_DIRECTORY;
|
|
if (!is_dir && header.file_type != TAR_TYPE_FILE) {
|
|
/* Unsupported type */
|
|
ASSERT(header.file_type == TAR_TYPE_PAX_HEADER_X ||
|
|
header.file_type == TAR_TYPE_PAX_HEADER_G);
|
|
continue;
|
|
}
|
|
|
|
struct string file_name_cstr = string_from_cstr_no_limit((char *)header.file_name);
|
|
if (file_name_cstr.len >= 2) {
|
|
/* Chop off './' prefix */
|
|
file_name_cstr.len -= 2;
|
|
file_name_cstr.text += 2;
|
|
}
|
|
struct string file_name = string_cat(arena, prefix, file_name_cstr);
|
|
|
|
struct tar_entry *entry = arena_push(arena, struct tar_entry);
|
|
entry->is_dir = is_dir;
|
|
entry->file_name = file_name;
|
|
entry->data = file_data;
|
|
|
|
entry->next = archive.head;
|
|
archive.head = entry;
|
|
++num_files;
|
|
}
|
|
|
|
/* Build lookup table */
|
|
archive.lookup = fixed_dict_init(arena, (u64)((f64)num_files * ARCHIVE_LOOKUP_TABLE_CAPACITY_FACTOR));
|
|
for (struct tar_entry *entry = archive.head; entry; entry = entry->next) {
|
|
fixed_dict_set(arena, &archive.lookup, entry->file_name, entry);
|
|
}
|
|
|
|
/* Build hierarchy */
|
|
/* NOTE: This is a separate pass because tar entry order is not guaranteed
|
|
* (IE file entries may be encountered before their parent directory entry) */
|
|
for (struct tar_entry *entry = archive.head; entry; entry = entry->next) {
|
|
/* Enter into hierarchy */
|
|
if (!entry->is_dir) {
|
|
/* Find parent entry */
|
|
struct tar_entry *parent_entry = NULL;
|
|
for (struct string parent_dir_name = entry->file_name; parent_dir_name.len > 0; --parent_dir_name.len) {
|
|
if (parent_dir_name.text[parent_dir_name.len - 1] == '/') {
|
|
parent_entry = fixed_dict_get(&archive.lookup, parent_dir_name);
|
|
break;
|
|
}
|
|
}
|
|
/* Insert child into parent's list */
|
|
if (parent_entry) {
|
|
entry->next_child = parent_entry->next_child;
|
|
parent_entry->next_child = entry;
|
|
}
|
|
}
|
|
}
|
|
|
|
return archive;
|
|
}
|
|
|
|
struct tar_entry *tar_get(const struct tar_archive *archive, struct string name)
|
|
{
|
|
return fixed_dict_get(&archive->lookup, name);
|
|
}
|