When reading and writing files in C, you have two main approaches:

  1. Using stdio functions like fopen(), getc(), putc().
  2. Using direct syscalls like open(), read(), and write().

Both approaches can copy files, but there is difference in performance.

Example 1: stdio-based file copy

#include <stdio.h>
#include <stdlib.h>

void filecopy(FILE *ifp, FILE *ofp);

int main(int argc, char *argv[]) {
    FILE *fp;
    if (argc == 1)
        filecopy(stdin, stdout);
    else
        while (--argc > 0) {
            if ((fp = fopen(*++argv, "r")) == NULL) {
                exit(1);
            }
            filecopy(fp, stdout);
            fclose(fp);
        }
    return 0;
}

void filecopy(FILE *ifp, FILE *ofp) {
    int c;
    while ((c = getc(ifp)) != EOF)
        putc(c, ofp);
}

Example 2: syscalls based file copy

#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#define BUF_SIZE 4096

void filecopy(FILE *ifp, FILE *ofp);

int main(int argc, char *argv[]) {
    if (argc == 1)
        filecopy(stdin, stdout);
    else
        while (--argc > 0) {
            int fd;
            fd = open(*++argv, O_RDONLY);
            if (fd < 0) {
                exit(1);
            }
            char buf[BUF_SIZE];
            while (1) {
                ssize_t r = read(fd, buf, (size_t) BUF_SIZE);
                if (r < 0) {
                    return 1;
                }
                if (r == 0)
                    return 0;

                size_t tot_written = 0;
                while (tot_written < r) {
                    size_t w = write(STDOUT_FILENO, buf + tot_written, r - tot_written);
                    if (w < 0) {
                        if (errno == EINTR)
                            continue;
                        return 1;
                    }
                    tot_written += w;
                }
            }
        }

    return 0;
}

void filecopy(FILE *ifp, FILE *ofp) {
    int c;
    while ((c = getc(ifp)) != EOF)
        putc(c, ofp);
}

Performance comparison

Using a 100 MB file:

$ time ./stdlib_cat bigfile > /dev/null
0.21s user 0.02s system 0.236 total

$ time ./sys_cat bigfile > /dev/null
0.01s user 0.02s system 0.025 total

Why the difference?

perf stat shows:

Metric stdio syscalls
CPU instructions 4.7B 3M
Branch misses 0 0.1M
CPU cycles 571M 5.5M