/* This program lists each program on its command line together with its size. It does this by opening each file, lseeking to the end, and printing the value of the file pointer. */ #include <stdio.h> #include <fcntl.h> #include <sys/types.h> #include <unistd.h> int main(int argc, char **argv) { int i; int fd; off_t size; for (i = 1; i < argc; i++) { fd = open(argv[i], O_RDONLY); if (fd < 0) { printf("Couldn't open %s\n", argv[i]); } else { size = lseek(fd, (off_t) 0, SEEK_END); printf("%10lld %s\n", size, argv[i]); close(fd); } } return 0; } |
What it does is attempt to open each file, and then seek to the end of the file to figure out the size. This works ok:
UNIX> ls -l txt/input*.txt -rw-r--r-- 1 plank staff 185 Feb 13 11:22 txt/input1.txt -rw-r--r-- 1 plank staff 179 Feb 13 11:22 txt/input2.txt UNIX> bin/ls1 txt/input*.txt # It reports the same size as ls above 185 txt/input1.txt 179 txt/input2.txt UNIX>Here's a problem, though:
UNIX> rm -rf txt/myfile.txt UNIX> make txt/myfile.txt # The makefile creates txt/myfile.txt, and sets its protection to nothing. echo "Hi" > txt/myfile.txt chmod 0 txt/myfile.txt UNIX> ls -l txt/myfile.txt # You can list it with ls. ---------- 1 plank staff 3 Feb 13 11:24 txt/myfile.txt UNIX> bin/ls1 txt/myfile.txt # But bin/ls1 fails on it. Couldn't open txt/myfile.txt UNIX>Since ls1 couldn't open "txt/myfile.txt", it couldn't print out its size. This is unfortunate, but it also points out why we need the "stat" function -- there are things about a file that it would be nice to know, even if we're not allowed to access the file itself.
To reiterate, the stat system call gives you information about a file's inode. It can do this as long as the user has permission to get to the directory that contains the file.
Read the man page for stat. The stat struct is defined in /usr/include/sys/stat.h, and is roughly:
struct stat { mode_t st_mode; /* File mode (see mknod(2)) */ ino_t st_ino; /* Inode number */ dev_t st_dev; /* ID of device containing */ /* a directory entry for this file */ dev_t st_rdev; /* ID of device */ /* This entry is defined only for */ /* char special or block special files */ nlink_t st_nlink; /* Number of links */ uid_t st_uid; /* User ID of the file's owner */ gid_t st_gid; /* Group ID of the file's group */ off_t st_size; /* File size in bytes */ time_t st_atime; /* Time of last access */ time_t st_mtime; /* Time of last data modification */ time_t st_ctime; /* Time of last file status change */ /* Times measured in seconds since */ long st_blksize; /* Preferred I/O block size */ long st_blocks; /* Number of 512 byte blocks allocated*/ }; |
The confusing types are mostly ints, longs, and shorts. I.e. from /usr/include/sys/types.h: (This was from 1996. off_t is now a long long, usually. I'm keeping the typedef's below just to show how using types like off_t still will not guarantee your code that will port cleanly over time. So be it.)
typedef unsigned long ino_t; typedef short dev_t; typedef long off_t; typedef unsigned short uid_t; typedef unsigned short gid_t; |
And from /usr/include/sys/stdtypes.h:
typedef unsigned short mode_t; /* file mode bits */ typedef short nlink_t; /* links to a file */ typedef long time_t; /* value = secs since epoch */ |
Once you have read this man page, it should be trivial to change ls1.c to work correctly using stat instead of open/lseek. This is in src/ls2.c:
/* This is a program which lists files and their sizes to standard output. The files are specified on the command line arguments. It uses stat to see if the files exist, and to determine the files' sizes. */ #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> int main(int argc, char **argv) { int i; struct stat buf; int exists; for (i = 1; i < argc; i++) { exists = stat(argv[i], &buf); if (exists < 0) { fprintf(stderr, "%s not found\n", argv[i]); } else { printf("%10lld %s\n", buf.st_size, argv[i]); } } return 0; } |
UNIX> bin/ls2 txt/* 185 txt/input1.txt 179 txt/input2.txt 3 txt/myfile.txt UNIX>
For example, the program src/mtime.c prints the modification times of the files given on the command line, in seconds. It uses st_mtime, which on most machines is a macro defined in src/stat.h:
#include <stdio.h> #include <stdlib.h> #include <sys/stat.h> int main(int argc, char **argv) { int i; struct stat buf; for (i = 1; i < argc; i++) { if (stat(argv[i], &buf) != 0) { printf("Couldn't stat %s\n", argv[i]); } else { printf("%s %ld\n", argv[i], buf.st_mtime); } } return 0; } |
Here we create, and then modify the file fred.txt, and show how m_time changes:
UNIX> echo "Fred" > fred.txt # Create fred.txt UNIX> bin/mtime fred.txt # Show its modification time. fred.txt 1645028284 UNIX> touch fred.txt # I waited 5 seconds before doing this UNIX> bin/mtime fred.txt # This is reflected in its modification time. fred.txt 1645028289 UNIX> rm fred.txt UNIX>
struct dirent { off_t d_off; /* offset of next disk dir entry */ unsigned long d_fileno; /* file number of entry */ unsigned short d_reclen; /* length of this record */ char *d_name; /* name */ }; |
src/ls3.c tweaks ls2.c to read from the current directory ("."), and print out all files and their sizes:
/* This is a program which lists files and their sizes to standard output. The files are identified by using opendir/readdir/closedir. */ #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <stdlib.h> #include <dirent.h> int main(int argc, char **argv) { struct stat buf; int exists; DIR *d; struct dirent *de; d = opendir("."); if (d == NULL) { fprintf(stderr, "Couldn't open \".\"\n"); exit(1); } for (de = readdir(d); de != NULL; de = readdir(d)) { exists = stat(de->d_name, &buf); if (exists < 0) { fprintf(stderr, "%s not found\n", de->d_name); } else { printf("%s %lld\n", de->d_name, buf.st_size); } } closedir(d); return 0; } |
UNIX> ( cd txt ; ../bin/ls3 ) # You'll note that the files are not in any particular order. . 192 .. 320 input1.txt 185 input2.txt 179 .keep 0 myfile.txt 3 UNIX>
First, formatting output. What we'd like to see is somthing like:
. 192 .. 352 input1.txt 185 input2.txt 179 .keep 0 myfile.txt 3In order to do this, we need to know how long the longest filename is. An we need to know it before we print out any filenames. So, what we do is read all the directory entries into a linked list, calculating the maximum length one along the way. After doing that, we traverse the list, and print the output in a nice format. Look closely at the printf() statement, and read the man page on printf() so that you can figure out why it works. This is src/ls4.c:
/* This is the same as ls3.c except the input is formatted so that all the filenames are padded to the size of the largest one. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> #include <dirent.h> #include "dllist.h" int main(int argc, char **argv) { struct stat buf; int exists; DIR *d; struct dirent *de; Dllist files, tmp; int maxlen; d = opendir("."); if (d == NULL) { fprintf(stderr, "Couldn't open \".\"\n"); exit(1); } maxlen = 0; files = new_dllist(); for (de = readdir(d); de != NULL; de = readdir(d)) { /* List all fo the files and store in a linked list. */ dll_append(files, new_jval_s(strdup(de->d_name))); if (strlen(de->d_name) > maxlen) maxlen = strlen(de->d_name); /* Maintain the size of the longers filename. */ } closedir(d); dll_traverse(tmp, files) { /* Now traverse the list and call stat() on each file to determine its size. */ exists = stat(tmp->val.s, &buf); if (exists < 0) { fprintf(stderr, "%s not found\n", tmp->val.s); } else { printf("%*s %10lld\n", -maxlen, tmp->val.s, buf.st_size); } } return 0; } |
Why did I use strdup in the dll_append() call instead of de->d_name? The answer is subtle. The man page doesn't tell you anything about how the struct that readdir() returns is allocated. All that you can really assume is that until you make the next call to readdir() or closedir(), the value of what readdir() returns is ok. If we knew that readdir() mallocs space for the "struct dirent" that it returns, and that that space isn't free'd until the user calls free(), then we could readily put de->d_name into our dlist, and not worry about anything. However, with no such assurances from the man page, we have to call strdup(). For example, opendir/readdir/closedir could be implemented as follows: