Using stdio to read and sort data in Linux

As the name says, I need to write a small program to read data from standard input, sort and send to standard output. The program should take 1 argument, which tells it how long it takes for one record (in bytes). This is how I test it:

printf 'D\x00C\x00\x00B\x00A' | ./binsort 2 | od -c

The above should output something like:

0000000  \0   A  \0   B   C  \0   D  \0
0000010

Here is what I still have ( binsort.c):

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>

using namespace std;


void print_usage()
{
        printf("%s\n", "Usage: ");
}


int compare (const void * a, const void * b) // the compare function for qsort... might need some work
{
  return ( *(int*)a - *(int*)b );
}


int main(int argc, char *argv[])
{
        if (argc != 2 || stdin == NULL) // check for argument and piped input
        {
                print_usage();
                exit(EXIT_FAILURE);
        }

        int entry_size = atoi(argv[1]);

        if (entry_size <= 0 || entry_size >= INT_MAX) // check for valid range of entry size
        {
                print_usage();
                exit(EXIT_FAILURE);
        }

        char *input = new char[entry_size]; // to hold single record

        while (fgets(input, entry_size, stdin) != NULL)
        {
                printf("%s", input); // output single record we just read
        }

        exit(EXIT_SUCCESS);
}

Then compile with g++ binsort.c -o binsort.

The above compiles but does not output the data sent to it printf. It should output it in 2-byte chunks ... for example D\0 C\0 \0B \0A... but it is not.

qsort malloc/realloc -allocated. , , , . ?

P.S. , ... - .

+3
6

scanf() printf(). , . , fread() fwrite(). , , (, ) . - , , . - 4 , , 4 . memcmp().

:

char *input = NULL;
size_t input_size = 0;
int num_items = 0;
int entry_size;

int compare_func(const void *e1, const void *e2)
{
  return memcmp(e1, e2, entry_size);
}

int main(int argc, char **argv)
{
   // ...
  char *datum = malloc(entry_size);  // check for NULL
  input_size = 4096;
  input = malloc(input_size);  // check for NULL

  while(1)
  {
    if(fread(datum, 1, entry_size, stdin) < entry_size)
      break;
    size_t new_size = (num_items + 1) * entry_size;
    if(new_size > input_size)
    {
      input = realloc(input, input_size * 2);  // check for NULL
      input_size *= 2;
    }
    memcpy(input + num_items * entry_size, datum, entry_size);
    num_items++;
  }

  qsort(input, num_items, entry_size, compare_func);

  fwrite(input, entry_size, num_items, stdout);

  return 0;
}
+5

fgets fprintf ( C char -arrays, \0).

fread fwrite . qsort , , entry_size, void int, , , .

+2

, ( , , ):

  • -, . , , . : , , - .. Ad nauseum... , (. ).

  • , . , . , ?

  • , .

, ?


:

  • K & R? - c c++? . . OO , c++. , c++.

  • ++, STL . - std::vector<char[2]> ( ) <algorithm> .

  • , -. , , IO.

+2

printf \0 .

read() write() :

    int res;
    int count;
    while (1) {
            count = 0;
            while(count < entry_size) {
                    res = read(STDIN_FILENO, input + count, entry_size - count);
                    if (res <= 0)
                            exit(errno);
                    count += res;
            }
            count = 0;
            while(res) {
                    res = write(STDOUT_FILENO, input + count, entry_size - count);
                    if (res < 0)
                            exit(errno);
                    count += res;
            }
    }
+2

P.S. , ... - .

, Linux, "?

:.

% printf 'D\x00C\x00\x00B\x00A' | sort -z | od -c
0000000  \0   A  \0   B  \0   C  \0   D  \0
0000011

FSF/GNU -z:

-z, --zero-terminated
        end lines with 0 byte, not newline


:

, , ...

, STL.

struct FUNCTOR ( stl:: sort()). , , ostream_iterator <string> (cout, "\n" ), .

#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>
#include <vector>
#include <algorithm>
#include <iterator>

using namespace std;

   /* ifstream won't set EOF-state until we try to read past the end-of-file.*/
   /* (Makes for lots of grief trying to read in files...)                   */
inline bool
IsStreamStillGood( istream & theStream )
{
  return theStream && (theStream . peek() != EOF);
}

template<class TYPE> inline void DELETE( TYPE * x)
{
  delete x;
  x = (TYPE *) NULL;
}

struct FUNCTOR
{
  bool operator()(const string & x, const string & y) { return x < y; }
};


int
main(int argc, char **argv)
{
  istream *       stream;
  vector<string>  v;

  UASSERT( argc, >, 1 );

  const int recordSize = atoi( argv[1] );
  char      buffer [ recordSize + 1 ];

  UASSERT( recordSize, >, 0 );


  if ( argc > 2 )
    stream = new ifstream( argv[2] );
  else
    stream = & cin;


  while ( IsStreamStillGood( * stream ) )
  {
    stream-> read( buffer, recordSize );
    v.push_back( string( buffer, stream->gcount() ) );
  }

  UASSERT( v.back().size(), ==, size_t(recordSize) );


  FUNCTOR functor;
  sort( v.begin(), v.end(), functor );

  copy( v.begin(), v.end(), ostream_iterator<string>(cout) );


  if ( argc > 2 )
    DELETE(stream);
}


(), :

STL-: ('\ 0'), - ? - char [], , .

char c [10];, c [0] = '\ 0';. c [1] c [9]. , . ( , , .)

c c- , . 1 9? C , NULL.

, printf (% s), scanf (% s), strncat(), strncpy(), strncmp() .., NULL ('\ 0'), .

++ std::string . , , , : myString.append(10, '\ 0');

, stream- > read (buffer, recordSize), (). , ('\ 0') . . recordSize .

v.push_back (string (buffer, stream- > gcount())), , stream- > gcount() (). , ('\ 0') . stream- > gcount() .

, < (const string &, const string &), string:: compare(), , . Nulls ('\ 0') .

Now, if we try to use v.back (). c_str (), well, then we have no length, so the zeros will confuse us. But while we are using a string object (e.g. v.back ()) containing both data and length, we are good.

This leads us to a conclusion. And again we print the string, not myString.c_str (), so all the characters in the string are printed. Zeros ('\ 0') are included.

+2
source

Thank you all for the great offers! Just for the record here is completed binsort.c, which does what is expected.

#include <stdio.h>
#include <stdlib.h>
#include <cstring>
#include <limits.h>

using namespace std;


int num_items = 0;
size_t input_size = 0;
int entry_size = 0;


void print_usage()
{
        printf("%s\n", "Usage: <binary data> | ./binsort [RECORD_LENGTH]");
        printf("%s\n", "For example: printf 'D\\x00C\\x00\\x00B\\x00A' | ./binsort 2 | od -c");
}


int compare (const void * a, const void * b)
{
        return memcmp(a, b, entry_size);
}


int main(int argc, char *argv[])
{
        if (argc != 2 || stdin == NULL)
        {
                print_usage();
                exit(EXIT_FAILURE);
        }

        entry_size = atoi(argv[1]);

        if (entry_size <= 0 || entry_size >= INT_MAX)
        {
                print_usage();
                exit(EXIT_FAILURE);
        }

        char *input = NULL;
        char *datum = (char*) malloc(entry_size);
        if (datum == NULL)
                exit(EXIT_FAILURE);

        while(1)
        {
                int read_size = fread(datum, 1, entry_size, stdin);

                if (read_size == 0)
                        break;

                if(read_size < entry_size)
                {
                        while(read_size < entry_size)
                        {
                                memcpy(datum, '\0', 1);
                                read_size++;
                        }
                        break;
                }

                size_t new_size = (num_items + 1) * entry_size;
                if(new_size > input_size)
                {
                        input = (char*) realloc(input, new_size);
                        if (input == NULL)
                                exit(EXIT_FAILURE);
                        input_size = new_size;
                }
                memcpy(input + num_items * entry_size, datum, entry_size);
                num_items++;
        }

        qsort(input, num_items, entry_size, compare);

        fwrite(input, entry_size, num_items, stdout);

        exit(EXIT_SUCCESS);
}
+1
source

Source: https://habr.com/ru/post/1702941/


All Articles