When reading and writing files in C, you have two main approaches:
fopen(), getc(), putc().open(), read(), and write().Both approaches can copy files, but there is difference in performance.
Example 1: stdio-based file copy
#include <stdio.h>
#include <stdlib.h>
void filecopy(FILE *ifp, FILE *ofp);
int main(int argc, char *argv[]) {
FILE *fp;
if (argc == 1)
filecopy(stdin, stdout);
else
while (--argc > 0) {
if ((fp = fopen(*++argv, "r")) == NULL) {
exit(1);
}
filecopy(fp, stdout);
fclose(fp);
}
return 0;
}
void filecopy(FILE *ifp, FILE *ofp) {
int c;
while ((c = getc(ifp)) != EOF)
putc(c, ofp);
}
FILE * and writes to another FILE *.FILE * is 4096 bytes (typically).Example 2: syscalls based file copy
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define BUF_SIZE 4096
void filecopy(FILE *ifp, FILE *ofp);
int main(int argc, char *argv[]) {
if (argc == 1)
filecopy(stdin, stdout);
else
while (--argc > 0) {
int fd;
fd = open(*++argv, O_RDONLY);
if (fd < 0) {
exit(1);
}
char buf[BUF_SIZE];
while (1) {
ssize_t r = read(fd, buf, (size_t) BUF_SIZE);
if (r < 0) {
return 1;
}
if (r == 0)
return 0;
size_t tot_written = 0;
while (tot_written < r) {
size_t w = write(STDOUT_FILENO, buf + tot_written, r - tot_written);
if (w < 0) {
if (errno == EINTR)
continue;
return 1;
}
tot_written += w;
}
}
}
return 0;
}
void filecopy(FILE *ifp, FILE *ofp) {
int c;
while ((c = getc(ifp)) != EOF)
putc(c, ofp);
}
Performance comparison
Using a 100 MB file:
$ time ./stdlib_cat bigfile > /dev/null
0.21s user 0.02s system 0.236 total
$ time ./sys_cat bigfile > /dev/null
0.01s user 0.02s system 0.025 total
Why the difference?
getc() and putc() involve additional checks and function overhead.FILE * structures ( stdin, stdout). We only use STDIN_FILENO and STDOUT_FILENO in syscall version.perf stat shows:
| Metric | stdio | syscalls |
|---|---|---|
| CPU instructions | 4.7B | 3M |
| Branch misses | 0 | 0.1M |
| CPU cycles | 571M | 5.5M |