#include <zlib.h>
#include <bzlib.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include <talloc.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <attr/xattr.h>
#include <time.h>
#include <errno.h>
#include "cfile.h"
Data Structures | |
| struct | cfile_struct |
| the structure of the actual file handle information we pass around More... | |
| struct | size_xattr_struct |
| The structure used in the extended attributes to store uncompressed file sizes and the associated time stamp. More... | |
Defines | |
| #define | CFILE_BUFFER_SIZE 1024 |
| #define | isafullline(line, len) ((line)[(len-1)] == '\n' || (line)[(len-1)] == '\r') |
Typedefs | |
| typedef enum filetype_enum | CFile_type |
| A set of the types of file we recognise. | |
Enumerations | |
| enum | filetype_enum { UNCOMPRESSED, GZIPPED, BZIPPED } |
Functions | |
| static void | bzip_attempt_store (CFile *fp, off_t size) |
| Attempt to store the file size in the extended user attributes. | |
| static int | cf_destroyclose (CFile *fp) |
| close the file when the file pointer is destroyed. | |
| static void | finalise_open (CFile *fp) |
| The common things we do with a file handle when it's being opened. | |
| static int | bz_fgetc (CFile *fp) |
| An implementation of fgetc for bzip2 files. | |
| static CFile_type | file_extension_type (const char *name) |
| Detect the file type from its extension. | |
| CFile * | cfopen (const char *name, const char *mode) |
| Open a file for reading or writing. | |
| CFile * | cfdopen (int filedesc, const char *mode) |
| Open a file from a file descriptor. | |
| static off_t | bzip_calculate_size (CFile *fp) |
| Calculate the size of a bzip2 file by running it through bzcat. | |
| static int | bzip_attribute_size (CFile *fp) |
| Give the uncompressed file size, or 0 if errors. | |
| off_t | cfsize (CFile *fp) |
| Returns the _uncompressed_ file size. | |
| int | cfeof (CFile *fp) |
| Returns true if we've reached the end of the file being read. | |
| char * | cfgets (CFile *fp, char *str, int len) |
| Get a string from the file, up to a maximum length or newline. | |
| char * | cfgetline (CFile *fp, char *line, int *maxline) |
| Read a full line from the file, regardless of length. | |
| int | cvfprintf (CFile *fp, const char *fmt, va_list ap) |
| Print a formatted string to the file, from another function. | |
| int | cfprintf (CFile *fp, const char *fmt,...) |
| Print a formatted string to the file. | |
| int | cfread (CFile *fp, void *ptr, size_t size, size_t num) |
| Read a block of data from the file. | |
| int | cfwrite (CFile *fp, const void *ptr, size_t size, size_t num) |
| Write a block of data from the file. | |
| int | cfflush (CFile *fp) |
| Flush the file's output buffer. | |
| int | cfclose (CFile *fp) |
| Close the given file handle. | |
Variables | |
| void * | pwlib_context = NULL |
| #define CFILE_BUFFER_SIZE 1024 |
The size of the character buffer for reading lines from bzip2 files.
This isn't really a file cache, just a way of saving us single-byte calls to bzread.
| #define isafullline | ( | line, | |||
| len | ) | ((line)[(len-1)] == '\n' || (line)[(len-1)] == '\r') |
Macro to check whether the line is terminated by a newline or equivalent
A set of the types of file we recognise.
| enum filetype_enum |
| static int bz_fgetc | ( | CFile * | fp | ) | [static] |
An implementation of fgetc for bzip2 files.
bzlib does not implement any of the 'low level' string functions. In order to support treating a bzip2 file as a 'real' file, we we need to provide fgets (for the cfgetline function, if nothing else). The stdio.c implementation relies on fgetc to get one character at a time, but this would be inefficient if done as continued one-byte reads from bzlib. So we use the buffer pointer to store chunks of the file to read from.
| fp | The file to read from. |
| static void bzip_attempt_store | ( | CFile * | fp, | |
| off_t | size | |||
| ) | [static] |
Attempt to store the file size in the extended user attributes.
If we've had to calculate the uncompressed file size the hard way, then it's worth saving this. This routine attempts to do so. If we can't the value is discarded and the user will have to wait for the file size to be calculated afresh each time.
| fp | The file handle to check. | |
| size | The uncompressed size of the file in bytes. |
| static int bzip_attribute_size | ( | CFile * | fp | ) | [static] |
Give the uncompressed file size, or 0 if errors.
This function checks whether we:
| fp | The file handle to check. |
| static off_t bzip_calculate_size | ( | CFile * | fp | ) | [static] |
Calculate the size of a bzip2 file by running it through bzcat.
The only way to get the uncompressed size of a bzip2 file, if there's no other information about it, is to count every character. Here we run it through bzcat and wc -c, which some might argue was horribly inefficient - but these tools are designed for the job, whereas we'd have to run it through a buffer here anyway.
If we have extended attributes, we can try to cache this value in them (see below).
| fp | The file whose size needs calculating. |
| static int cf_destroyclose | ( | CFile * | fp | ) | [static] |
close the file when the file pointer is destroyed.
This function is given to talloc_set_destructor so that, when the the user does a talloc_free on the file handle or any context that contained it, the file that was opened with that handle is automatically closed.
| fp | The file handle that is being destroyed. Thanks to improvements in the talloc library, this is now a typed pointer (it was formerly a void pointer that we had to cast). |
| int cfclose | ( | CFile * | fp | ) |
Close the given file handle.
This function frees the memory allocated for the file handle and closes the associated file.
| fp | The file handle to close. |
| CFile* cfdopen | ( | int | filedesc, | |
| const char * | mode | |||
| ) |
Open a file from a file descriptor.
Allows you to open the file specified by the given file descriptor, with the same mode options as a regular file. Originally necessary to allow access to stdin and stdout, but with the current handling of "-" by cfopen this should be mostly unnecessary.
| filedesc | An integer file descriptor number. | |
| mode | The mode to open the file in ("r" for read, "w" for write). |
| int cfeof | ( | CFile * | fp | ) |
Returns true if we've reached the end of the file being read.
This mostly passes through the state of the lower-level's EOF checking. But bzlib doesn't seem to correctly return BZ_STREAM_END when the stream has actually reached its end, so we have to check another way - whether the last buffer read was zero bytes long.
| fp | The file handle to check. |
| int cfflush | ( | CFile * | fp | ) |
Flush the file's output buffer.
This function flushes any data passed to write or printf but not yet written to disk. If the file is being read, it has no effect.
| fp | The file handle to flush. |
| char* cfgetline | ( | CFile * | fp, | |
| char * | line, | |||
| int * | maxline | |||
| ) |
Read a full line from the file, regardless of length.
Of course, with fgets you can't always guarantee you've read an entire line. You have to know the length of the longest line, in advance, in order to read each line from the file in one call. cfgetline solves this problem by progressively extending the string you pass until the entire line has been read. To do this it uses talloc_realloc, and a variable which holds the length of the line allocated so far. If you haven't initialised the line beforehand, cfgetline will do so (allocating it against the file pointer's context). If you have, then the magic of talloc_realloc allocates the new space against the context that you originally allocated your buffer against. So to speak.
In normal usage, this 'buffer' will expand but never contract. It expands to half again its current size, so if you have a very long line lurking in your input somewhere, then it's going to set the buffer size for all the lines after it. If you're concerned by this wasting a lot of memory, then set the length negative (while keeping its absolute size). This will signal to cfgetline to shrink the line buffer after this line has been read. For example, if your line buffer is currently 1024 and you want it to shrink, then set it to -1024 before calling cfgetline. In reality, this is almost never going to be a problem.
| fp | The file handle to read from. | |
| line | A character array to read the line into, and optionally extend. | |
| maxline | A pointer to an integer which will contain the length of the string currently allocated. |
| char* cfgets | ( | CFile * | fp, | |
| char * | str, | |||
| int | len | |||
| ) |
Get a string from the file, up to a maximum length or newline.
For gzipped and uncompressed files, this simply uses their relative library's fgets implementation. Since bzlib doesn't provide such a function, we have to copy the implementation from stdio.c and use it here, referring to our own bz_fgetc function.
| fp | The file handle to read from. | |
| str | An array of characters to read the file contents into. | |
| len | The maximum length, plus one, of the string to read. In other words, if this is 10, then fgets will read a maximum of nine characters from the file. The character after the last character read is always set to \0 to terminate the string. The newline character is kept on the line if there was room to read it. |
| CFile* cfopen | ( | const char * | name, | |
| const char * | mode | |||
| ) |
Open a file for reading or writing.
Open the given file using the given mode. Opens the file and returns a CFile handle to it. Mode must start with 'r' or 'w' to read or write (respectively) - other modes are not expected to work.
| name | The name of the file to open. If this is "-", then stdin is read from or stdout is written to, as appropriate (both being used uncompressed.) | |
| mode | "r" to specify reading, "w" for writing. |
| int cfprintf | ( | CFile * | fp, | |
| const char * | fmt, | |||
| ... | ||||
| ) |
Print a formatted string to the file.
The standard fprintf implementation. For bzip2 and gzip files this allocates a temporary buffer for each call. This might seem inefficient, but otherwise we have the fgets problem all over again...
| fp | The file handle to write to. | |
| fmt | The format string to print. | |
| ... | Any other variables to be printed using the format string. |
| int cfread | ( | CFile * | fp, | |
| void * | ptr, | |||
| size_t | size, | |||
| size_t | num | |||
| ) |
Read a block of data from the file.
Reads a given number of structures of a specified size from the file into the memory pointer given. The destination memory must be allocated first. Some read functions only specify one size, we use two here because that's what fread requires (and it's better for the programmer anyway IMHO).
| fp | The file handle to read from. | |
| ptr | The memory to write into. | |
| size | The size of each structure in bytes. | |
| num | The number of structures to read. |
| off_t cfsize | ( | CFile * | fp | ) |
Returns the _uncompressed_ file size.
The common way of reporting your progress through reading a file is as a proportion of the uncompressed size. But a simple stat of the compressed file will give you a much lower figure. So here we extract the size of the uncompressed content of the file. Naturally this process is easy with uncompressed files. It's also fairly easy with gzip files - the size is a 32-bit little-endian signed int (I think) at the end of the file. Unfortunately, bzip2 files do not carry this information, so we have to read the entire file through bzcat and wc -c. This is easier than reading it directly, although it then relies on the availability of those two binaries, and may therefore make this routine not portable. I'm not sure if this introduces any security holes in this library. Unfortunately, correspondence with Julian Seward has confirmed that there's no other way of determining the exact uncompressed file size, as it's not stored in the bzip2 file itself.
HOWEVER: we can save the next call to cfsize on this file a considerable amount of work if we save the size in a filesystem extended attribute. Because rewriting an existing file does a truncate rather than delete the inode, the attribute may get out of sync with the actual file. So we also write the current time as a timestamp on that data. If the file's mtime is greater than that timestamp, then the data is out of date and must be recalculated. Make sure your file system has the user_xattr option set if you want to use this feature!
| fp | The file handle to check |
| int cfwrite | ( | CFile * | fp, | |
| const void * | ptr, | |||
| size_t | size, | |||
| size_t | num | |||
| ) |
Write a block of data from the file.
Writes a given number of structures of a specified size into the file from the memory pointer given.
| fp | The file handle to write into. | |
| ptr | The memory to read from. | |
| size | The size of each structure in bytes. | |
| num | The number of structures to write. |
| int cvfprintf | ( | CFile * | fp, | |
| const char * | fmt, | |||
| va_list | ap | |||
| ) |
Print a formatted string to the file, from another function.
The standard vfprintf implementation. For those people that have to receive a '...' argument in their own function and send it to a CFile.
| fp | The file handle to write to. | |
| fmt | The format string to print. | |
| ap | The compiled va_list of parameters to print. |
| static CFile_type file_extension_type | ( | const char * | name | ) | [static] |
Detect the file type from its extension.
A common routine to detect the file type from its extension. This probably also detects a file with the name like 'foo.gzbar'.
| name | The name of the file to check. |
| static void finalise_open | ( | CFile * | fp | ) | [static] |
The common things we do with a file handle when it's being opened.
This function sets the remaining fields that are common to all files. We have this as a separate function because it's called from various parts of cfopen and also from cfdopen.
| fp | The file handle to finalise. |
| void* pwlib_context = NULL |
1.4.7