libmobi
C library for handling MOBI format ebook documents
Macros | Functions
index.c File Reference

Functions to parse index records. More...

#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include "index.h"
#include "util.h"
#include "memory.h"
#include "debug.h"
#include "buffer.h"

Macros

#define _GNU_SOURCE   1
 
#define __USE_BSD   /* for strdup on linux/glibc */
 

Functions

size_t mobi_indx_get_label (unsigned char *output, MOBIBuffer *buf, const size_t length, const size_t has_ligatures)
 Read index entry label from buffer pointing at index record data. More...
 
size_t mobi_ordt_getbuffer (const MOBIOrdt *ordt, MOBIBuffer *buf, uint16_t *offset)
 Get encoded character from dictionary index The characters are offsets into ORDT table. More...
 
uint16_t mobi_ordt_lookup (const MOBIOrdt *ordt, const uint16_t offset)
 Fetch UTF-16 value from ORDT2 table. More...
 
size_t mobi_getstring_ordt (const MOBIOrdt *ordt, MOBIBuffer *buf, unsigned char *output, size_t length)
 Get UTF-8 string from buffer, decoded by lookups in ORDT2 table. More...
 
MOBI_RET mobi_parse_indx (const MOBIPdbRecord *indx_record, MOBIIndx *indx, MOBITagx *tagx, MOBIOrdt *ordt)
 Parser of INDX record. More...
 
MOBI_RET mobi_parse_index (const MOBIData *m, MOBIIndx *indx, const size_t indx_record_number)
 Parser of a set of index records. More...
 
MOBI_RET mobi_get_indxentry_tagvalue (uint32_t *tagvalue, const MOBIIndexEntry *entry, const unsigned tag_arr[])
 Get a value of tag[tagid][tagindex] for given index entry. More...
 
size_t mobi_get_indxentry_tagarray (uint32_t **tagarr, const MOBIIndexEntry *entry, const size_t tagid)
 Get array of tagvalues of tag[tagid] for given index entry. More...
 
uint32_t mobi_get_orth_entry_offset (const MOBIIndexEntry *entry)
 Get entry start offset for the orth entry. More...
 
uint32_t mobi_get_orth_entry_length (const MOBIIndexEntry *entry)
 Get text length for the orth entry. More...
 
bool mobi_indx_has_tag (const MOBIIndx *indx, const size_t tagid)
 Check if given tagid is present in the index. More...
 
char * mobi_get_cncx_string (const MOBIPdbRecord *cncx_record, const uint32_t cncx_offset)
 Get compiled index entry string. More...
 
char * mobi_get_cncx_string_utf8 (const MOBIPdbRecord *cncx_record, const uint32_t cncx_offset, MOBIEncoding cncx_encoding)
 Get compiled index entry string, converted to utf8 encoding. More...
 
char * mobi_get_cncx_string_flat (const MOBIPdbRecord *cncx_record, const uint32_t cncx_offset, const size_t length)
 Get flat index entry string. More...
 
MOBI_RET mobi_decode_infl (unsigned char *decoded, int *decoded_size, const unsigned char *rule)
 Decode compiled infl index entry. More...
 
size_t mobi_trie_get_inflgroups (char **infl_strings, MOBITrie *const root, const char *string)
 Get all matches for given string from trie structure. More...
 
MOBI_RET mobi_trie_insert_infl (MOBITrie **root, const MOBIIndx *indx, size_t i)
 Insert inversed inlection string for given entry into trie structure. More...
 

Detailed Description

Functions to parse index records.

Copyright (c) 2020 Bartek Fabiszewski http://www.fabiszewski.net

This file is part of libmobi. Licensed under LGPL, either version 3, or any later. See http://www.gnu.org/licenses/

Function Documentation

◆ mobi_decode_infl()

MOBI_RET mobi_decode_infl ( unsigned char *  decoded,
int *  decoded_size,
const unsigned char *  rule 
)

Decode compiled infl index entry.

Buffer decoded must be initialized with basic index entry. Basic index entry will be transformed into inflected form, based on compiled rule. Min. size of input buffer (decoded) must be INDX_INFLBUF_SIZEMAX + 1

Parameters
[in,out]decodedDecoded entry string
[in,out]decoded_sizeDecoded entry size
[in]ruleCompiled rule
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_get_cncx_string()

char* mobi_get_cncx_string ( const MOBIPdbRecord cncx_record,
const uint32_t  cncx_offset 
)

Get compiled index entry string.

Allocates memory for the string. Must be freed by caller.

Parameters
[in]cncx_recordMOBIPdbRecord structure with cncx record
[in]cncx_offsetOffset of string entry from the beginning of the record
Returns
Entry string or null if malloc failed

◆ mobi_get_cncx_string_flat()

char* mobi_get_cncx_string_flat ( const MOBIPdbRecord cncx_record,
const uint32_t  cncx_offset,
const size_t  length 
)

Get flat index entry string.

Allocates memory for the string. Must be freed by caller.

Parameters
[in]cncx_recordMOBIPdbRecord structure with cncx record
[in]cncx_offsetOffset of string entry from the beginning of the record
[in]lengthLength of the string to be extracted
Returns
Entry string

◆ mobi_get_cncx_string_utf8()

char* mobi_get_cncx_string_utf8 ( const MOBIPdbRecord cncx_record,
const uint32_t  cncx_offset,
MOBIEncoding  cncx_encoding 
)

Get compiled index entry string, converted to utf8 encoding.

Allocates memory for the string. Must be freed by caller.

Parameters
[in]cncx_recordMOBIPdbRecord structure with cncx record
[in]cncx_offsetOffset of string entry from the beginning of the record
[in]cncx_encodingEncoding
Returns
Entry string or null if malloc failed

◆ mobi_get_indxentry_tagarray()

size_t mobi_get_indxentry_tagarray ( uint32_t **  tagarr,
const MOBIIndexEntry entry,
const size_t  tagid 
)

Get array of tagvalues of tag[tagid] for given index entry.

Parameters
[in,out]tagarrPointer to tagvalues array
[in]entryIndex entry to be search for the value
[in]tagidId of the tag
Returns
Size of the array (zero on failure)

◆ mobi_get_indxentry_tagvalue()

MOBI_RET mobi_get_indxentry_tagvalue ( uint32_t *  tagvalue,
const MOBIIndexEntry entry,
const unsigned  tag_arr[] 
)

Get a value of tag[tagid][tagindex] for given index entry.

Parameters
[in,out]tagvalueWill be set to a tag value
[in]entryIndex entry to be search for the value
[in]tag_arrArray: tag_arr[0] = tagid, tag_arr[1] = tagindex
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_getstring_ordt()

size_t mobi_getstring_ordt ( const MOBIOrdt ordt,
MOBIBuffer buf,
unsigned char *  output,
size_t  length 
)

Get UTF-8 string from buffer, decoded by lookups in ORDT2 table.

Parameters
[in]ordtMOBIOrdt structure (ORDT data and metadata)
[in,out]bufMOBIBuffer structure with input string
[in,out]outputOutput buffer (INDX_LABEL_SIZEMAX + 1 bytes)
[in]lengthLength of input string contained in buf
Returns
Length of output string (without null terminator)

◆ mobi_indx_get_label()

size_t mobi_indx_get_label ( unsigned char *  output,
MOBIBuffer buf,
const size_t  length,
const size_t  has_ligatures 
)

Read index entry label from buffer pointing at index record data.

Parameters
[in,out]outputOutput buffer (INDX_LABEL_SIZEMAX + 1 bytes)
[in,out]bufMOBIBuffer structure, offset pointing at index entry label
[in]lengthNumber of bytes to be read
[in]has_ligaturesDecode ligatures if true
Returns
Length of output string (without null terminator), on error buf->error set to MOBI_RET status

◆ mobi_indx_has_tag()

bool mobi_indx_has_tag ( const MOBIIndx indx,
const size_t  tagid 
)

Check if given tagid is present in the index.

Parameters
[in]indxIndex MOBIIndx structure
[in]tagidId of the tag
Returns
True on success, false otherwise

◆ mobi_ordt_getbuffer()

size_t mobi_ordt_getbuffer ( const MOBIOrdt ordt,
MOBIBuffer buf,
uint16_t *  offset 
)

Get encoded character from dictionary index The characters are offsets into ORDT table.

Parameters
[in]ordtMOBIOrdt structure (ORDT data and metadata)
[in,out]bufMOBIBuffer structure with index data
[in,out]offsetValue read from buffer
Returns
Number of bytes read (zero in case of error)

◆ mobi_ordt_lookup()

uint16_t mobi_ordt_lookup ( const MOBIOrdt ordt,
const uint16_t  offset 
)

Fetch UTF-16 value from ORDT2 table.

Parameters
[in]ordtMOBIOrdt structure (ORDT data and metadata)
[in]offsetOffset in ORDT2 table
Returns
UTF-16 code point

◆ mobi_parse_index()

MOBI_RET mobi_parse_index ( const MOBIData m,
MOBIIndx indx,
const size_t  indx_record_number 
)

Parser of a set of index records.

Parameters
[in]mMOBIData structure containing MOBI file metadata and data
[in,out]indxMOBIIndx structure to be filled with parsed entries
[in]indx_record_numberNumber of the first record of the set
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_parse_indx()

MOBI_RET mobi_parse_indx ( const MOBIPdbRecord indx_record,
MOBIIndx indx,
MOBITagx tagx,
MOBIOrdt ordt 
)

Parser of INDX record.

Parameters
[in]indx_recordMOBIPdbRecord structure with INDX record
[in,out]indxMOBIIndx structure to be filled with parsed entries
[in,out]tagxMOBITagx structure, will be filled with parsed TAGX section data if present in the INDX record, otherwise TAGX data will be used to parse the record
[in,out]ordtMOBIOrdt structure, will be filled with parsed ORDT sections
Returns
MOBI_RET status code (on success MOBI_SUCCESS)

◆ mobi_trie_get_inflgroups()

size_t mobi_trie_get_inflgroups ( char **  infl_strings,
MOBITrie *const  root,
const char *  string 
)

Get all matches for given string from trie structure.

Matches are made agains reversed string and all its substrings

Parameters
[in,out]infl_stringsArray of returned strings
[in]rootRoot node of the tree
[in]stringIndex entry number
Returns
Number of returned strings

◆ mobi_trie_insert_infl()

MOBI_RET mobi_trie_insert_infl ( MOBITrie **  root,
const MOBIIndx indx,
size_t  i 
)

Insert inversed inlection string for given entry into trie structure.

Parameters
[in,out]rootRoot node of the tree, created if NULL
[in]indxMOBIIndx infl index records
[in]iIndex entry number
Returns
MOBI_RET status code (on success MOBI_SUCCESS)