39#ifndef DOCTOTEXT_C_API_H
40#define DOCTOTEXT_C_API_H
51 #if defined(_WIN32) && !defined(_WIN64)
52 #define DOCTOTEXT_CALL __cdecl
54 #define DOCTOTEXT_CALL
59typedef struct DocToTextItem DocToTextItem;
340DllExport
unsigned int
DllExport DocToTextWriter *DOCTOTEXT_CALL doctotext_create_plain_text_writer()
Creates PlainTextWriter. PlainTextWriter writes parsed data from callbacks as plain text.
DllExport DocToTextParsingChain *DOCTOTEXT_CALL doctotext_connect_parsing_chain_to_exporter(DocToTextParsingChain *parsing_chain, DocToTextExporter *exporter)
Adds exporter to the parsing chain.
DllExport const char *DOCTOTEXT_CALL doctotext_info_get_plain_text(DocToTextInfo *info)
Returns parsed text from DocToTextInfo.
struct DocToTextParser DocToTextParser
DllExport DocToTextWriter *DOCTOTEXT_CALL doctotext_create_html_writer()
Creates HtmlWriter. HtmlWriter writes parsed date from callbacks as html. Example of usage:
DllExport void DOCTOTEXT_CALL doctotext_add_uint_parameter(DocToTextParameters *parameters, const char *name, unsigned int value)
Adds unsigned int parameter to parser parameters.
DllExport DocToTextParsingChain *DOCTOTEXT_CALL doctotext_connect_importer_to_exporter(DocToTextImporter *importer, DocToTextExporter *exporter)
Creates connection between importer and exporter and returns DocToTextParsingChain which contains all...
struct DocToTextParserManager DocToTextParserManager
DllExport void DOCTOTEXT_CALL doctotext_simple_extractor_add_callback_function(DocToTextSimpleExtractor *extractor, void(*callback)(DocToTextInfo *, void *data), void *data)
Adds a callback function to be called during parsing. Example of usage:
DllExport unsigned int DOCTOTEXT_CALL doctotext_info_get_uint_attribute(DocToTextInfo *info, const char *attribute_name)
Returns attribute value as a unsigned integer from DocToTextInfo.
DllExport void DOCTOTEXT_CALL doctotext_writer_write_header(DocToTextWriter *writer, FILE *out_stream)
Returns beginning of text from callbacks.
DllExport void DOCTOTEXT_CALL doctotext_parser_add_callback_on_new_node(DocToTextParser *parser, void(*callback)(DocToTextInfo *, void *data), void *data)
Adds new function to execute when new node will be parsed. Node is a part of hierarchical structure....
DllExport void DOCTOTEXT_CALL doctotext_writer_write(DocToTextWriter *writer, DocToTextInfo *info, FILE *out_stream)
Converts text from callback to html format.
DllExport DocToTextParserManager *DOCTOTEXT_CALL doctotext_init_parser_manager(const char *path_to_plugins)
Creates new parser manager with all available parsers.
DllExport DocToTextParser *DOCTOTEXT_CALL doctotext_parser_manager_get_parser_by_extension(DocToTextParserManager *parser_manager, const char *format)
Returns proper parser for given format. The format is defined by file extension. Example of usage:
DllExport void DOCTOTEXT_CALL doctotext_free_parser(DocToTextParser *parser)
Frees parser. Remember not to use function free(). DocToTextParser is allocated using operator new (f...
struct DocToTextInfo DocToTextInfo
struct DocToTextParameters DocToTextParameters
DllExport const char *DOCTOTEXT_CALL doctotext_simple_extractor_get_plain_text(DocToTextSimpleExtractor *extractor)
Gets parsed plain text from a DocToTextSimpleExtractor object.
DllExport void DOCTOTEXT_CALL doctotext_parser_add_parameters(DocToTextParser *parser, DocToTextParameters *parameters)
Adds DocToTextParameters to parser. Every parser pass recursively DocToTextParameters to another pars...
DllExport void DOCTOTEXT_CALL doctotext_writer_write_footer(DocToTextWriter *writer, FILE *out_stream)
Returns end of text from callbacks.
DllExport void DOCTOTEXT_CALL doctotext_info_set_skip(DocToTextInfo *info, bool skip)
Sets skip flag in DocToTextInfo. If skip is true then current node will be skipped....
struct DocToTextWriter DocToTextWriter
DllExport DocToTextExporter *DOCTOTEXT_CALL doctotext_create_html_exporter(FILE *output_stream)
Creates a new DocToTextExporter object. This object is used to export parsed data to output as a html...
DllExport void DOCTOTEXT_CALL doctotext_free_transformer(DocToTextTransformer *transformer)
Frees transformer and all resources allocated by the transformer. Remember not to use function free()...
DllExport void DOCTOTEXT_CALL doctotext_free_writer(DocToTextWriter *writer)
Frees HtmlWriter. DocToTextWriter is allocated using operator new (from C++) and is supposed to be de...
DllExport void DOCTOTEXT_CALL doctotext_add_float_parameter(DocToTextParameters *parameters, const char *name, float value)
Adds float parameter to parser parameters.
DllExport DocToTextExporter *DOCTOTEXT_CALL doctotext_create_plain_text_exporter(FILE *output_stream)
Creates a new DocToTextExporter object. This object is used to export parsed data to output as a plai...
DllExport void DOCTOTEXT_CALL doctotext_info_set_cancel_parser(DocToTextInfo *info, bool cancel)
Sets cancel flag in DocToTextInfo. If cancel is true then parsing chain will be stop....
DllExport void DOCTOTEXT_CALL doctotext_parser_parse(DocToTextParser *parser)
Start parsing loaded data. The data comes from file or from buffer.
struct DocToTextTransformer DocToTextTransformer
struct DocToTextSimpleExtractor DocToTextSimpleExtractor
DllExport void DOCTOTEXT_CALL doctotext_free_parsing_chain(DocToTextParsingChain *parsing_chain)
Frees parsing_chain and all resources allocated by the parsing chain. Remember not to use function fr...
DllExport DocToTextImporter *DOCTOTEXT_CALL doctotext_create_importer_from_file_name(DocToTextParserManager *manager, const char *file_name)
Creates a new DocToTextImporter object. This object is used to import a file and parse it using avail...
DllExport char ** doctotext_parser_manager_get_available_formats(DocToTextParserManager *parser_manager, unsigned int *formats_number)
struct DocToTextExporter DocToTextExporter
DllExport DocToTextImporter *DOCTOTEXT_CALL doctotext_create_importer_from_stream(DocToTextParserManager *manager, FILE *input_stream)
Creates a new DocToTextImporter object. This object is used to import a data from input stream and pa...
DllExport void DOCTOTEXT_CALL doctotext_parsing_chain_set_input(DocToTextParsingChain *parsing_chain, FILE *input_stream)
Adds input stream to the parsing chain. This function starts parsing chain.
DllExport DocToTextTransformer *DOCTOTEXT_CALL doctotext_create_transfomer(void(*callback)(DocToTextInfo *, void *data), void *data)
Creates a new DocToTextTransformer object. This object is used to transform parsed data....
struct DocToTextParsingChain DocToTextParsingChain
DllExport const char *DOCTOTEXT_CALL doctotext_info_get_tag_name(DocToTextInfo *info)
DllExport void DOCTOTEXT_CALL doctotext_add_string_parameter(DocToTextParameters *parameters, const char *name, const char *value)
Adds const char* parameter to parser parameters.
DllExport DocToTextParameters *DOCTOTEXT_CALL doctotext_create_parameter()
Creates new empty DocToTextParameters. In next step we can pass to DocToTextParameters required param...
DllExport void DOCTOTEXT_CALL doctotext_free_exporter(DocToTextExporter *exporter)
Frees exporter and all resources allocated by the exporter. Remember not to use function free()....
DllExport DocToTextParsingChain *DOCTOTEXT_CALL doctotext_connect_parsing_chain_to_transformer(DocToTextParsingChain *parsing_chain, DocToTextTransformer *transformer)
Adds transformer to the parsing chain.
DllExport const char *DOCTOTEXT_CALL doctotext_info_get_string_attribute(DocToTextInfo *info, const char *attribute_name)
Returns attribute value as a string from DocToTextInfo.
DllExport void DOCTOTEXT_CALL doctotext_free_importer(DocToTextImporter *importer)
Frees importer and all resources allocated by the importer. DocToTextImporter is allocated using oper...
struct DocToTextImporter DocToTextImporter
DllExport DocToTextParsingChain *DOCTOTEXT_CALL doctotext_connect_importer_to_transformer(DocToTextImporter *importer, DocToTextTransformer *transformer)
Creates connection between importer and transformer and returns DocToTextParsingChain which contains ...
DllExport DocToTextSimpleExtractor *DOCTOTEXT_CALL doctotext_create_simple_extractor(const char *file_name)
Creates a new DocToTextSimpleExtractor object. Example:
DllExport void DOCTOTEXT_CALL doctotext_add_int_parameter(DocToTextParameters *parameters, const char *name, int value)
Adds int parameter to parser parameters.