libmobi
C library for handling MOBI format ebook documents
Macros | Functions
util.h File Reference
#include "config.h"
#include "mobi.h"
#include "memory.h"
#include "buffer.h"
#include "read.h"
#include "compression.h"
#include <zlib.h>

Go to the source code of this file.

Macros

#define strdup   mobi_strdup
 strdup replacement
 
#define m_uncompress   uncompress
 
#define m_crc32   crc32
 
#define M_OK   Z_OK
 
#define UNUSED(x)   (void)(x)
 
#define AUDI_MAGIC   "AUDI"
 Magic numbers of records.
 
#define CDIC_MAGIC   "CDIC"
 
#define CMET_MAGIC   "CMET"
 
#define EXTH_MAGIC   "EXTH"
 
#define FDST_MAGIC   "FDST"
 
#define FONT_MAGIC   "FONT"
 
#define HUFF_MAGIC   "HUFF"
 
#define IDXT_MAGIC   "IDXT"
 
#define INDX_MAGIC   "INDX"
 
#define LIGT_MAGIC   "LIGT"
 
#define MOBI_MAGIC   "MOBI"
 
#define ORDT_MAGIC   "ORDT"
 
#define RESC_MAGIC   "RESC"
 
#define SRCS_MAGIC   "SRCS"
 
#define TAGX_MAGIC   "TAGX"
 
#define VIDE_MAGIC   "VIDE"
 
#define BOUNDARY_MAGIC   "BOUNDARY"
 
#define EOF_MAGIC   "\xe9\x8e\r\n"
 
#define REPLICA_MAGIC   "%MOP"
 
#define EPOCH_MAC_DIFF   2082844800UL
 Difference in seconds between epoch time and mac time.
 
#define PALMDB_HEADER_LEN   78
 
#define PALMDB_NAME_SIZE_MAX   32
 
#define PALMDB_RECORD_INFO_SIZE   8
 
#define PALMDB_ATTRIBUTE_DEFAULT   0
 
#define PALMDB_VERSION_DEFAULT   0
 
#define PALMDB_MODNUM_DEFAULT   0
 
#define PALMDB_APPINFO_DEFAULT   0
 
#define PALMDB_SORTINFO_DEFAULT   0
 
#define PALMDB_TYPE_DEFAULT   "BOOK"
 
#define PALMDB_CREATOR_DEFAULT   "MOBI"
 
#define PALMDB_NEXTREC_DEFAULT   0
 
#define RECORD0_HEADER_LEN   16
 
#define RECORD0_TEXT_SIZE_MAX   4096
 
#define RECORD0_FULLNAME_SIZE_MAX   1024
 
#define CDIC_HEADER_LEN   16
 
#define CDIC_RECORD_MAXCNT   1024
 
#define HUFF_CODELEN_MAX   16
 
#define HUFF_HEADER_LEN   24
 
#define HUFF_RECORD_MAXCNT   1024
 
#define HUFF_RECORD_MINSIZE   2584
 
#define FONT_HEADER_LEN   24
 
#define MEDIA_HEADER_LEN   12
 
#define FONT_SIZEMAX   (50 * 1024 * 1024)
 
#define RAWTEXT_SIZEMAX   0xfffffff
 
#define MOBI_HEADER_V2_SIZE   0x18
 
#define MOBI_HEADER_V3_SIZE   0x74
 
#define MOBI_HEADER_V4_SIZE   0xd0
 
#define MOBI_HEADER_V5_SIZE   0xe4
 
#define MOBI_HEADER_V6_SIZE   0xe4
 
#define MOBI_HEADER_V6_EXT_SIZE   0xe8
 
#define MOBI_HEADER_V7_SIZE   0xe4
 
#define max(a, b)   ((a) > (b) ? (a) : (b))
 
#define min(a, b)   ((a) < (b) ? (a) : (b))
 
#define ARRAYSIZE(arr)   (sizeof(arr) / sizeof(arr[0]))
 
#define MOBI_TITLE_SIZEMAX   1024
 

Functions

int mobi_bitcount (const uint8_t byte)
 Get number of bits set in a given byte. More...
 
MOBI_RET mobi_delete_record_by_seqnumber (MOBIData *m, const size_t num)
 Delete palm database record with given sequential number from MOBIData structure. More...
 
MOBI_RET mobi_swap_mobidata (MOBIData *m)
 Swap KF7 and KF8 MOBIData structures in a hybrid file. More...
 
char * mobi_strdup (const char *s)
 strdup replacement More...
 
bool mobi_is_cp1252 (const MOBIData *m)
 Check if document's text is cp1252 encoded. More...
 
bool mobi_has_drmkey (const MOBIData *m)
 Check if DRM key is set for the document. More...
 
bool mobi_has_drmcookies (const MOBIData *m)
 Check if DRM cookies are set for the document. More...
 
MOBI_RET mobi_cp1252_to_utf8 (char *output, const char *input, size_t *outsize, const size_t insize)
 Convert cp1252 encoded string to utf-8. More...
 
MOBI_RET mobi_utf8_to_cp1252 (char *output, const char *input, size_t *outsize, const size_t insize)
 Convert utf-8 encoded string to cp1252. More...
 
uint8_t mobi_ligature_to_cp1252 (const uint8_t byte1, const uint8_t byte2)
 Decode ligature to cp1252. More...
 
uint16_t mobi_ligature_to_utf16 (const uint32_t byte1, const uint32_t byte2)
 Decode ligature to utf-16. More...
 
MOBIFiletype mobi_determine_resource_type (const MOBIPdbRecord *record)
 Get resource type (image, font) by checking its magic header. More...
 
MOBIFiletype mobi_determine_flowpart_type (const MOBIRawml *rawml, const size_t part_number)
 Get file type of given part with number [part_number]. More...
 
MOBI_RET mobi_base32_decode (uint32_t *decoded, const char *encoded)
 Decode positive number from base 32 to base 10. More...
 
MOBIFiletype mobi_get_resourcetype_by_uid (const MOBIRawml *rawml, const size_t uid)
 Get MOBIFiletype type of MOBIPart resource record with given unique id. More...
 
uint32_t mobi_get_exthsize (const MOBIData *m)
 Get size of serialized exth record including padding. More...
 
uint32_t mobi_get_drmsize (const MOBIData *m)
 Get size of serialized DRM data. More...
 
uint16_t mobi_get_records_count (const MOBIData *m)
 Get count of palm database records. More...
 
void mobi_remove_zeros (unsigned char *buffer, size_t *len)
 Remove null characters from char buffer. More...
 
MOBI_RET mobi_add_audio_resource (MOBIPart *part)
 Replace part data with decoded audio data. More...
 
MOBI_RET mobi_add_video_resource (MOBIPart *part)
 Replace part data with decoded video data. More...
 
MOBI_RET mobi_add_font_resource (MOBIPart *part)
 Replace part data with decoded font data. More...
 
MOBI_RET mobi_set_fullname (MOBIData *m, const char *fullname)
 Set ebook full name stored in Record 0 at offset given in MOBI header. More...
 
MOBI_RET mobi_set_pdbname (MOBIData *m, const char *name)
 Set palm database name. More...
 
void mobi_free_internals (MOBIData *m)
 Free internals. More...
 
uint32_t mobi_get32be (const unsigned char buf[4])
 Convert char buffer to 32-bit unsigned integer big endian. More...
 
uint32_t mobi_get32le (const unsigned char buf[4])
 Convert char buffer to 32-bit unsigned integer little endian. More...
 

Detailed Description

Copyright (c) 2014 Bartek Fabiszewski http://www.fabiszewski.net

This file is part of libmobi. Licensed under LGPL, either version 3, or any later. See http://www.gnu.org/licenses/

Function Documentation

◆ mobi_add_audio_resource()

MOBI_RET mobi_add_audio_resource ( MOBIPart part)

Replace part data with decoded audio data.

Parameters
[in,out]partMOBIPart structure containing font resource, decoded part type will be set in the structure
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_add_font_resource()

MOBI_RET mobi_add_font_resource ( MOBIPart part)

Replace part data with decoded font data.

Parameters
[in,out]partMOBIPart structure containing font resource, decoded part type will be set in the structure
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_add_video_resource()

MOBI_RET mobi_add_video_resource ( MOBIPart part)

Replace part data with decoded video data.

Parameters
[in,out]partMOBIPart structure containing font resource, decoded part type will be set in the structure
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_base32_decode()

MOBI_RET mobi_base32_decode ( uint32_t *  decoded,
const char *  encoded 
)

Decode positive number from base 32 to base 10.

Base 32 characters must be upper case. Maximal supported value is VVVVVV.

Parameters
[in,out]decodedBase 10 output number
[in]encodedBase 32 input number
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_bitcount()

int mobi_bitcount ( const uint8_t  byte)

Get number of bits set in a given byte.

Parameters
[in]byteA byte
Returns
Number of bits set

◆ mobi_cp1252_to_utf8()

MOBI_RET mobi_cp1252_to_utf8 ( char *  output,
const char *  input,
size_t *  outsize,
const size_t  insize 
)

Convert cp1252 encoded string to utf-8.

Maximum length of output string is 3 * (input string length) + 1 Output string will be null terminated (even if truncated)

Parameters
[in,out]outputOutput string
[in,out]inputInput string
[in,out]outsizeSize of the allocated output buffer, will be set to output string length (without null terminator) on return
[in]insizeLength of the input string.
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_delete_record_by_seqnumber()

MOBI_RET mobi_delete_record_by_seqnumber ( MOBIData m,
const size_t  num 
)

Delete palm database record with given sequential number from MOBIData structure.

Parameters
[in,out]mMOBIData structure with loaded data
[in]numSequential number
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_determine_flowpart_type()

MOBIFiletype mobi_determine_flowpart_type ( const MOBIRawml rawml,
const size_t  part_number 
)

Get file type of given part with number [part_number].

Parameters
[in]rawmlMOBIRawml parsed records structure
[in]part_numberSequential number of the part within rawml structure
Returns
MOBIFiletype file type

◆ mobi_determine_resource_type()

MOBIFiletype mobi_determine_resource_type ( const MOBIPdbRecord record)

Get resource type (image, font) by checking its magic header.

Parameters
[in]recordMOBIPdbRecord structure containing unknown record type
Returns
MOBIFiletype file type, T_UNKNOWN if not determined, T_BREAK if end of records mark found

◆ mobi_free_internals()

void mobi_free_internals ( MOBIData m)

Free internals.

Parameters
[in,out]mMOBIData structure with raw data and metadata

◆ mobi_get32be()

uint32_t mobi_get32be ( const unsigned char  buf[4])

Convert char buffer to 32-bit unsigned integer big endian.

Parameters
[in]bufInput buffer
Returns
Converted value

◆ mobi_get32le()

uint32_t mobi_get32le ( const unsigned char  buf[4])

Convert char buffer to 32-bit unsigned integer little endian.

Parameters
[in]bufInput buffer
Returns
Converted value

◆ mobi_get_drmsize()

uint32_t mobi_get_drmsize ( const MOBIData m)

Get size of serialized DRM data.

Parameters
[in]mMOBIData structure
Returns
Size of DRM data, zero on failure

◆ mobi_get_exthsize()

uint32_t mobi_get_exthsize ( const MOBIData m)

Get size of serialized exth record including padding.

Parameters
[in]mMOBIData structure
Returns
Size of exth record, zero on failure

◆ mobi_get_records_count()

uint16_t mobi_get_records_count ( const MOBIData m)

Get count of palm database records.

Parameters
[in]mMOBIData structure
Returns
Count of records, zero on failure

◆ mobi_get_resourcetype_by_uid()

MOBIFiletype mobi_get_resourcetype_by_uid ( const MOBIRawml rawml,
const size_t  uid 
)

Get MOBIFiletype type of MOBIPart resource record with given unique id.

Parameters
[in]rawmlMOBIRawml structure with loaded data
[in]uidUnique id
Returns
Pointer to MOBIPart resource structure, NULL on failure

◆ mobi_has_drmcookies()

bool mobi_has_drmcookies ( const MOBIData m)

Check if DRM cookies are set for the document.

Parameters
[in]mMOBIData structure with loaded Record(s) 0 headers
Returns
true or false

◆ mobi_has_drmkey()

bool mobi_has_drmkey ( const MOBIData m)

Check if DRM key is set for the document.

Parameters
[in]mMOBIData structure with loaded Record(s) 0 headers
Returns
true or false

◆ mobi_is_cp1252()

bool mobi_is_cp1252 ( const MOBIData m)

Check if document's text is cp1252 encoded.

Parameters
[in]mMOBIData structure holding document data and metadata
Returns
True or false

◆ mobi_ligature_to_cp1252()

uint8_t mobi_ligature_to_cp1252 ( const uint8_t  byte1,
const uint8_t  byte2 
)

Decode ligature to cp1252.

Some latin ligatures are encoded in indices to facilitate search They are listed in LIGT header, but it seems every LIGT header contains same 5 ligatures (even if not all of them are used). So, instead of parsing header, we use static replacements. Invalid control characters are skipped

Parameters
[in]byte1First byte - control character
[in]byte2Second byte of the ligature
Returns
Ligature in cp1252 encoding, zero if not found

◆ mobi_ligature_to_utf16()

uint16_t mobi_ligature_to_utf16 ( const uint32_t  byte1,
const uint32_t  byte2 
)

Decode ligature to utf-16.

Parameters
[in]byte1First byte - control character, should be <= 5
[in]byte2Second byte of the ligature
Returns
Ligature in utf-16 encoding, uni_replacement if not found

◆ mobi_remove_zeros()

void mobi_remove_zeros ( unsigned char *  buffer,
size_t *  len 
)

Remove null characters from char buffer.

Parameters
[in,out]bufferCharacter buffer
[in,out]lenSize of buffer, will be updated with new length

◆ mobi_set_fullname()

MOBI_RET mobi_set_fullname ( MOBIData m,
const char *  fullname 
)

Set ebook full name stored in Record 0 at offset given in MOBI header.

Parameters
[in,out]mMOBIData structure with loaded data
[in]fullnameMemory area to be filled with zero terminated full name string
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_set_pdbname()

MOBI_RET mobi_set_pdbname ( MOBIData m,
const char *  name 
)

Set palm database name.

Parameters
[in,out]mMOBIData structure with loaded data
[in]nameName
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_strdup()

char* mobi_strdup ( const char *  s)

strdup replacement

Returned pointer must be freed by caller

Parameters
[in]sInput string
Returns
Duplicated string

◆ mobi_swap_mobidata()

MOBI_RET mobi_swap_mobidata ( MOBIData m)

Swap KF7 and KF8 MOBIData structures in a hybrid file.

MOBIData structures form a circular linked list in case of hybrid files. By default KF8 structure is first one in the list. This function puts KF7 structure on the first place, so that it starts to be used by default.

Parameters
[in,out]mMOBIData structure
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_utf8_to_cp1252()

MOBI_RET mobi_utf8_to_cp1252 ( char *  output,
const char *  input,
size_t *  outsize,
const size_t  insize 
)

Convert utf-8 encoded string to cp1252.

Characters out of range will be replaced with substitute character

Parameters
[in,out]outputOutput string
[in,out]inputInput string
[in,out]outsizeSize of the allocated output buffer, will be set to output string length (without null terminator) on return
[in]insizeLength of the input string.
Returns
MOBI_RET status code (on success MOBI_SUCCESS)