How to convert a bitmap to PIX in memory?

Tesseract does not work well with a bitmap, it works on some inputs, but screwed on others. Although at the same time it works fine on inputs that are identical to the previous ones, but in PIX format in leptonica format.

How to convert a bitmap to memory in PIX?

One idea that came to mind was to use leptonica pixReadMem ():

00724 /*---------------------------------------------------------------------*
00725  *                            Read from memory                         *
00726  *---------------------------------------------------------------------*/
00727 /*!
00728  *  pixReadMem()
00729  *
00730  *      Input:  data (const; encoded)
00731  *              datasize (size of data)
00732  *      Return: pix, or null on error
00733  *
00734  *  Notes:
00735  *      (1) This is a variation of pixReadStream(), where the data is read
00736  *          from a memory buffer rather than a file.
00737  *      (2) On windows, this will only read tiff formatted files from
00738  *          memory.  For other formats, it requires fmemopen(3).
00739  *          Attempts to read those formats will fail at runtime.
00740  *      (3) findFileFormatBuffer() requires up to 8 bytes to decide on
00741  *          the format.  That determines the constraint here.
00742  */

So now I need to find a way to build TIFF in memory from a bitmap in memory. But I dont know how.

+4
source share
1 answer

, . , BMP, fread/fseek, . .

- , . , leptonica .

#include <cstring>

#include <leptonica/allheaders.h>
#include "leptonica_hack.h"
#include "bmp.h"

int fake_file_tracker = 0;

size_t fake_fread ( void * ptr, size_t size, size_t count, char * buffer, size_t buflen)
{
    if (fake_file_tracker >= buflen)
    {
        return 0;
    }

    if (fake_file_tracker + size * count > buflen)
    {
        (void)memcpy(ptr, (void *)(buffer+fake_file_tracker), (size_t)(buflen - fake_file_tracker));
        fake_file_tracker = buflen+1;
        return (size_t)((buflen - fake_file_tracker) / size);
    }

    (void)memcpy(ptr, (void *)(buffer+fake_file_tracker), size * count);
    fake_file_tracker += size * count;
    return count;

}

int fake_fseek ( char * buffer, long int offset, int origin, size_t buflen)
{
    if (origin + offset >= buflen)
        return 0;

    fake_file_tracker = origin + offset;
    return 0;
}


/*!
 *  pixReadStreamBmp()
 *
 *      Input:  stream opened for read
 *      Return: pix, or null on error
 *
 *  Notes:
 *      (1) Here are references on the bmp file format:
 *          http://en.wikipedia.org/wiki/BMP_file_format
 *          http://www.fortunecity.com/skyscraper/windows/364/bmpffrmt.html
 */
PIX *
pixReadBmpFromBuffer(char  *fp, size_t buflen)
{
    fake_file_tracker = 0;
l_uint16   sval;
l_uint32   ival;
l_int16    bfType, bfSize, bfFill1, bfReserved1, bfReserved2;
l_int16    offset, bfFill2, biPlanes, depth, d;
l_int32    biSize, width, height, xres, yres, compression, ignore;
l_int32    imagebytes, biClrUsed, biClrImportant;
l_uint8   *colormapBuf = NULL;
l_int32    colormapEntries;
l_int32    fileBpl, extrabytes, readerror;
l_int32    pixWpl, pixBpl;
l_int32    i, j, k;
l_uint8    pel[4];
l_uint8   *data;
l_uint32  *line, *pword;
PIX        *pix, *pixt;
PIXCMAP   *cmap;

    PROCNAME("pixReadBmpFromBuffer");

    if (!fp)
        return (PIX *)ERROR_PTR("fp not defined", procName, NULL);

        /* Read bitmap file header */
    ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
    bfType = convertOnBigEnd16(sval);
    if (bfType != BMP_ID)
        return (PIX *)ERROR_PTR("not bmf format", procName, NULL);

    ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
    bfSize = convertOnBigEnd16(sval);
    ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
    bfFill1 = convertOnBigEnd16(sval);
    ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
    bfReserved1 = convertOnBigEnd16(sval);
    ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
    bfReserved2 = convertOnBigEnd16(sval);
    ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
    offset = convertOnBigEnd16(sval);
    ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
    bfFill2 = convertOnBigEnd16(sval);

        /* Read bitmap info header */
    ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
    biSize = convertOnBigEnd32(ival);
    ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
    width = convertOnBigEnd32(ival);
    ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
    height = convertOnBigEnd32(ival);
    ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
    biPlanes = convertOnBigEnd16(sval);
    ignore = fake_fread((char *)&sval, 1, 2, fp, buflen);
    depth = convertOnBigEnd16(sval);
    ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
    compression = convertOnBigEnd32(ival);
    ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
    imagebytes = convertOnBigEnd32(ival);
    ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
    xres = convertOnBigEnd32(ival);
    ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
    yres = convertOnBigEnd32(ival);
    ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
    biClrUsed = convertOnBigEnd32(ival);
    ignore = fake_fread((char *)&ival, 1, 4, fp, buflen);
    biClrImportant = convertOnBigEnd32(ival);

    if (compression != 0)
        return (PIX *)ERROR_PTR("cannot read compressed BMP files",
                                procName,NULL);

        /* A little sanity checking.  It would be nice to check
         * if the number of bytes in the file equals the offset to
         * the data plus the imagedata, but this won't work when
         * reading from memory, because fmemopen() doesn't implement
         * ftell().  So we can't do that check.  The imagebytes for
         * uncompressed images is either 0 or the size of the file data.
         * (The fact that it can be 0 is perhaps some legacy glitch).  */
    if (width < 1)
        return (PIX *)ERROR_PTR("width < 1", procName,NULL);
    if (height < 1)
        return (PIX *)ERROR_PTR("height < 1", procName,NULL);
    if (depth < 1 || depth > 32)
        return (PIX *)ERROR_PTR("depth not in [1 ... 32]", procName,NULL);
    fileBpl = 4 * ((width * depth + 31)/32);
    if (imagebytes != 0 && imagebytes != fileBpl * height)
        return (PIX *)ERROR_PTR("invalid imagebytes", procName,NULL);
    if (offset < BMP_FHBYTES + BMP_IHBYTES)
        return (PIX *)ERROR_PTR("invalid offset: too small", procName,NULL);
    if (offset > BMP_FHBYTES + BMP_IHBYTES + 4 * 256)
        return (PIX *)ERROR_PTR("invalid offset: too large", procName,NULL);

        /* Handle the colormap */
    colormapEntries = (offset - BMP_FHBYTES - BMP_IHBYTES) / sizeof(RGBA_QUAD);
    if (colormapEntries > 0) {
        if ((colormapBuf = (l_uint8 *)CALLOC(colormapEntries,
                                             sizeof(RGBA_QUAD))) == NULL)
            return (PIX *)ERROR_PTR("colormapBuf alloc fail", procName, NULL );

            /* Read colormap */
        if (fake_fread(colormapBuf, sizeof(RGBA_QUAD), colormapEntries, fp, buflen)
                 != colormapEntries) {
            FREE(colormapBuf);
            return (PIX *)ERROR_PTR( "colormap read fail", procName, NULL);
        }
    }

        /* Make a 32 bpp pix if depth is 24 bpp */
    d = depth;
    if (depth == 24)
        d = 32;
    if ((pix = pixCreate(width, height, d)) == NULL)
        return (PIX *)ERROR_PTR( "pix not made", procName, NULL);
    pixSetXRes(pix, (l_int32)((l_float32)xres / 39.37 + 0.5));  /* to ppi */
    pixSetYRes(pix, (l_int32)((l_float32)yres / 39.37 + 0.5));  /* to ppi */
    pixWpl = pixGetWpl(pix);
    pixBpl = 4 * pixWpl;

    cmap = NULL;
    if (colormapEntries > 256)
        L_WARNING("more than 256 colormap entries!", procName);
    if (colormapEntries > 0) {  /* import the colormap to the pix cmap */
        cmap = pixcmapCreate(L_MIN(d, 8));
        FREE(cmap->array);  /* remove generated cmap array */
        cmap->array  = (void *)colormapBuf;  /* and replace */
        cmap->n = L_MIN(colormapEntries, 256);
    }
    pixSetColormap(pix, cmap);

        /* Seek to the start of the bitmap in the file */
    fake_fseek(fp, offset, 0, buflen);

    if (depth != 24) {  /* typ. 1 or 8 bpp */
        data = (l_uint8 *)pixGetData(pix) + pixBpl * (height - 1);
        for (i = 0; i < height; i++) {
            if (fake_fread(data, 1, fileBpl, fp, buflen) != fileBpl) {
                pixDestroy(&pix);
                return (PIX *)ERROR_PTR("BMP read fail", procName, NULL);
            }
            data -= pixBpl;
        }
    }
    else {  /*  24 bpp file; 32 bpp pix
             *  Note: for bmp files, pel[0] is blue, pel[1] is green,
             *  and pel[2] is red.  This is opposite to the storage
             *  in the pix, which puts the red pixel in the 0 byte,
             *  the green in the 1 byte and the blue in the 2 byte.
             *  Note also that all words are endian flipped after
             *  assignment on L_LITTLE_ENDIAN platforms.
             *
             *  We can then make these assignments for little endians:
             *      SET_DATA_BYTE(pword, 1, pel[0]);      blue
             *      SET_DATA_BYTE(pword, 2, pel[1]);      green
             *      SET_DATA_BYTE(pword, 3, pel[2]);      red
             *  This looks like:
             *          3  (R)     2  (G)        1  (B)        0
             *      |-----------|------------|-----------|-----------|
             *  and after byte flipping:
             *           3          2  (B)     1  (G)        0  (R)
             *      |-----------|------------|-----------|-----------|
             *
             *  For big endians we set:
             *      SET_DATA_BYTE(pword, 2, pel[0]);      blue
             *      SET_DATA_BYTE(pword, 1, pel[1]);      green
             *      SET_DATA_BYTE(pword, 0, pel[2]);      red
             *  This looks like:
             *          0  (R)     1  (G)        2  (B)        3
             *      |-----------|------------|-----------|-----------|
             *  so in both cases we get the correct assignment in the PIX.
             *
             *  Can we do a platform-independent assignment?
             *  Yes, set the bytes without using macros:
             *      *((l_uint8 *)pword) = pel[2];           red
             *      *((l_uint8 *)pword + 1) = pel[1];       green
             *      *((l_uint8 *)pword + 2) = pel[0];       blue
             *  For little endians, before flipping, this looks again like:
             *          3  (R)     2  (G)        1  (B)        0
             *      |-----------|------------|-----------|-----------|
             */
        readerror = 0;
        extrabytes = fileBpl - 3 * width;
        line = pixGetData(pix) + pixWpl * (height - 1);
        for (i = 0; i < height; i++) {
            for (j = 0; j < width; j++) {
                pword = line + j;
                if (fake_fread(&pel, 1, 3, fp, buflen) != 3)
                    readerror = 1;
                *((l_uint8 *)pword + COLOR_RED) = pel[2];
                *((l_uint8 *)pword + COLOR_GREEN) = pel[1];
                *((l_uint8 *)pword + COLOR_BLUE) = pel[0];
            }
            if (extrabytes) {
                for (k = 0; k < extrabytes; k++)
                    ignore = fake_fread(&pel, 1, 1, fp, buflen);
            }
            line -= pixWpl;
        }
        if (readerror) {
            pixDestroy(&pix);
            return (PIX *)ERROR_PTR("BMP read fail", procName, NULL);
        }
    }

    pixEndianByteSwap(pix);

        /* ----------------------------------------------
         * The bmp colormap determines the values of black
         * and white pixels for binary in the following way:
         * if black = 1 (255), white = 0
         *      255, 255, 255, 0, 0, 0, 0, 0
         * if black = 0, white = 1 (255)
         *      0, 0, 0, 0, 255, 255, 255, 0
         * We have no need for a 1 bpp pix with a colormap!
         * ---------------------------------------------- */
    if (depth == 1 && cmap) {
/*        L_INFO("Removing colormap", procName); */
        pixt = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC);
        pixDestroy(&pix);
        pix = pixt;  /* rename */
    }

    return pix;
} 
+3

Source: https://habr.com/ru/post/1535633/


All Articles