Sunday, February 1, 2009

Getting a simple directory listing in Linux

There are often many ways to do the same things under Linux; getting a listing of a given directory falls under that category. The easiest way is to issue the command, 'ls -l', in the shell. Alternatively, if you don't mind a bit of work, you could write your own program.

Lately, I have been reading Steve D. Pate's UNIX Filesystems -- Evolution, Design, and Implementation. I am at the section in the book (pp.21-22) where the author implements a simple version of the Unix style ls command. The book itself was published in 2003 and, as things stand in 2009, the code example he lists will not even compile. Here is the code:
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/dirent.h>
#include <sys/unistd.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <pwd.h>
#include <grp.h>

#define BUFSZ 1024

main()
{
    struct dirent *dir;
    struct stat st;
    struct passwd *pw;
    struct group *grp;
    char buf[BUFSZ], *bp, *ftime;
    int dfd, fd, nread;

    dfd = open(".", O_RDONLY);
    bzero(buf, BUFSZ);
    while (nread = getdents(dfd, (struct dirent *) &buf, BUFSZ) != 0) {
        bp = buf;
        dir = (struct dirent *) buf;
        do {
            if (dir->d_reclen != 0) {
                stat(dir->d_name, &st);
                ftime = ctime(&st.st_mtime);
                ftime[16] = '\0';
                ftime += 4;
                pw = getpwuid(st.st_uid);
                grp = getgrgid(st.st_gid);
                perms(st.st_mode);
                printf("%3d %-8s %-7s %9d %s %s\n",
                       st.st_nlink, pw->pw_name, grp->gr_name,
                       st.st_size, ftime, dir->d_name);
            }
            bp = bp + dir->d_reclen;
            dir = (struct dirent *) (bp);
        } while (dir->d_ino != 0);
        bzero(buf, BUFSZ);
    }
}
Here is what I get when I try to compile it on my machine:

kermit@fastbox ~/cprogs/fs $ gcc -Wall -o pate_ls pate_ls.c
pate_ls.c:3:24: error: sys/dirent.h: No such file or directory
pate_ls.c:14: warning: return type defaults to ‘int’
pate_ls.c: In function ‘main’:
pate_ls.c:23: warning: implicit declaration of function ‘bzero’
pate_ls.c:23: warning: incompatible implicit declaration of built-in function ‘bzero’
pate_ls.c:24: warning: implicit declaration of function ‘getdents’
pate_ls.c:24: warning: suggest parentheses around assignment used as truth value
pate_ls.c:28: error: dereferencing pointer to incomplete type
pate_ls.c:29: error: dereferencing pointer to incomplete type
pate_ls.c:30: warning: implicit declaration of function ‘ctime’
pate_ls.c:30: warning: assignment makes pointer from integer without a cast
pate_ls.c:35: warning: implicit declaration of function ‘perms’
pate_ls.c:36: warning: implicit declaration of function ‘printf’
pate_ls.c:36: warning: incompatible implicit declaration of built-in function ‘printf’
pate_ls.c:38: error: dereferencing pointer to incomplete type
pate_ls.c:38: warning: format ‘%3d’ expects type ‘int’, but argument 2 has type ‘__nlink_t’
pate_ls.c:38: warning: format ‘%9d’ expects type ‘int’, but argument 5 has type ‘__off_t’
pate_ls.c:40: error: dereferencing pointer to incomplete type
pate_ls.c:42: error: dereferencing pointer to incomplete type
pate_ls.c:20: warning: unused variable ‘fd’
pate_ls.c:45: warning: control reaches end of non-void function
kermit@fastbox ~/cprogs/fs/tmp $


That's a lot to look at! What we need to do is make some changes, so that the code will compile cleanly. Before dealing with the errors and warnings though, we will first look at the only 'major' change to the code, which will have a bonus side effect of eliminating some of the other problems. What we are going to do, is get rid of the call to getdents. If you look at your man page for getdents, you will notice the following:

"This is not the function you are interested in. Look at readdir(3) for the POSIX conforming C library interface. This page documents the bare kernel system call interface."


I am not sure if getdents was ever able to be used under Linux, such as is demonstrated above, but it certainly is not now. Instead, if we want to read a directory, we can use the readdir system call. Furthermore, instead of using the open system call on a directory, we can use the opendir system call that is provided under Linux. By using readdir, we can get rid of a few variables, notably the buf array, which also eliminates the need for the call to bzero. (Incidentally, if you read the man page for bzero, you will notice that it is being deprecated in favour of the memset function). Let's have a look at the original code again, with some comments annotating some of the problems with regards to building it under Linux:
#include <sys/types.h> 
#include <sys/stat.h> 
#include <sys/dirent.h>         /* Does not exist under some   
                                 * (all?) Linux distributions 
                                 */ 
#include <sys/unistd.h> 
#include <fcntl.h> 
#include <unistd.h> 
#include <errno.h> 
#include <pwd.h> 
#include <grp.h> 
 
#define BUFSZ 1024 
 
main() 
{                               /* Main returns an int */ 
    struct dirent *dir; 
    struct stat st; 
    struct passwd *pw; 
    struct group *grp; 
    char buf[BUFSZ], *bp, *ftime; 
    int dfd, fd, nread; 
 
    dfd = open(".", O_RDONLY); 
    bzero(buf, BUFSZ);          /* #include <strings.h> for bzero, 
                                 * but we don't need to use 
                                 * bzero (or memset) if we use 
                                 * readdir instead of getdents 
                                 */ 
    while (nread = getdents(dfd, (struct dirent *) &buf, BUFSZ) != 0) { 
        /* We can use readdir instead 
         * of getdents.  Example will 
         * follow later. 
         */ 
        bp = buf; 
        dir = (struct dirent *) buf; 
        do { 
            if (dir->d_reclen != 0) { 
                stat(dir->d_name, &st); 
                ftime = ctime(&st.st_mtime);    /* #include <time.h> 
                                                 * for ctime()  
                                                 */ 
                ftime[16] = '\0'; 
                ftime += 4; 
                pw = getpwuid(st.st_uid); 
                grp = getgrgid(st.st_gid); 
                perms(st.st_mode); 
                printf("%3d %-8s %-7s %9d %s %s\n", 
                       /* #include <stdio.h> 
                        * for printf()  
                        */ 
                       st.st_nlink, pw->pw_name, grp->gr_name, 
                       st.st_size, ftime, dir->d_name); 
            } 
            bp = bp + dir->d_reclen; 
            dir = (struct dirent *) (bp); 
        } while (dir->d_ino != 0); 
        bzero(buf, BUFSZ); 
    } 
    /* Should return something, as main returns an int */ 
} 
And finally, have a look at what the new program, with the changes, looks like:
#include <sys/types.h>          /* opendir, stat, closedir */
#include <dirent.h>             /* opendir, readdir, closedir */
#include <errno.h>              /* perror */
#include <stdlib.h>             /* exit */
#include <sys/stat.h>           /* stat */
#include <unistd.h>             /* stat */
#include <time.h>               /* ctime */
#include <pwd.h>                /* getpwuid */
#include <grp.h>                /* getgrgid */
#include <stdio.h>              /* printf */

int main(void)
{
    DIR *dp;
    struct dirent *dirp;
    struct stat st;
    struct passwd *pw;
    struct group *grp;
    char *ftime;

    if ((dp = opendir(".")) == NULL) {
        printf("Cannot open this directory\n");
        perror("opendir");
        exit(EXIT_FAILURE);
    }

    while ((dirp = readdir(dp)) != NULL) {
        if (dirp->d_reclen != 0) {
            stat(dirp->d_name, &st);
            ftime = ctime(&st.st_mtime);
            ftime[16] = '\0';
            ftime += 4;
            pw = getpwuid(st.st_uid);
            grp = getgrgid(st.st_gid);
            printf("%3zu %-8s %-7s %9zu %s %s\n",
                   st.st_nlink, pw->pw_name, grp->gr_name,
                   st.st_size, ftime, dirp->d_name);
        }
    }

    closedir(dp);
    return 0;
}
It compiles cleanly on my system (kernel 2.6.27, gcc 4.1.2). Note that this implementation is very 'bare bones.' It lists everything in the directory where it is executed (provided the directory permission settings allows it to do so). This includes 'hidden' dot files (.somefile), as well as the current directory, dot (.), and the parent directory, dot-dot (..). Now that you have a working version of the program, I would encourage you to experiment with it, and do the other exercises such as making it accept user input so as to be able to perform long and short listings (and even the -a option to show hidden files) etc. Finally, I have included a diff of the two files, in order to give an idea of what has been changed:

1,9c1,10
< #include <sys/types.h> 
< #include <sys/stat.h> 
< #include <sys/dirent.h> 
< #include <sys/unistd.h> 
< #include <fcntl.h> 
< #include <unistd.h> 
< #include <errno.h> 
< #include <pwd.h> 
< #include <grp.h> 
--- 
> #include <sys/types.h>         /* opendir, stat, closedir */
> #include <dirent.h>            /* opendir, readdir, closedir */
> #include <errno.h>             /* perror */
> #include <stdlib.h>            /* exit */
> #include <sys/stat.h>          /* stat */
> #include <unistd.h>            /* stat */
> #include <time.h>              /* ctime */
> #include <pwd.h>               /* getpwuid */
> #include <grp.h>               /* getgrgid */
> #include <stdio.h>             /* printf */
---
< #define BUFSZ 1024
---
13c14
< main()
---
> int main(void)
15c16,17
<     struct dirent *dir;
---
>     DIR *dp;
>     struct dirent *dirp;
19,20c21
<     char buf[BUFSZ], *bp, *ftime;
<     int dfd, fd, nread;
---
>     char *ftime;
22,43c23,26
<     dfd = open(".", O_RDONLY);
<     bzero(buf, BUFSZ);
<     while (nread = getdents(dfd, (struct dirent *) &buf, BUFSZ) != 0) {
<         bp = buf;
<         dir = (struct dirent *) buf;
<         do {
<             if (dir->d_reclen != 0) {
<                 stat(dir->d_name, &st);
<                 ftime = ctime(&st.st_mtime);
<                 ftime[16] = '\0';
<                 ftime += 4;
<                 pw = getpwuid(st.st_uid);
<                 grp = getgrgid(st.st_gid);
<                 perms(st.st_mode);
<                 printf("%3d %-8s %-7s %9d %s %s\n",
<                        st.st_nlink, pw->pw_name, grp->gr_name,
<                        st.st_size, ftime, dir->d_name);
<             }
<             bp = bp + dir->d_reclen;
<             dir = (struct dirent *) (bp);
<         } while (dir->d_ino != 0);
<         bzero(buf, BUFSZ);
---
>     if ((dp = opendir(".")) == NULL) {
>         printf("Cannot open this directory\n");
>         perror("opendir");
>         exit(EXIT_FAILURE);
44a28,44
>
>     while ((dirp = readdir(dp)) != NULL) {
>         if (dirp->d_reclen != 0) {
>             stat(dirp->d_name, &st);
>             ftime = ctime(&st.st_mtime);
>             ftime[16] = '\0';
>             ftime += 4;
>             pw = getpwuid(st.st_uid);
>             grp = getgrgid(st.st_gid);
>             printf("%3zu %-8s %-7s %9zu %s %s\n",
>                    st.st_nlink, pw->pw_name, grp->gr_name,
>                    st.st_size, ftime, dirp->d_name);
>         }
>     }
>
>     closedir(dp);
>     return 0;

1 comment:

  1. Generates "Segmentation fault" on GEHC/CTT Linux 6.2.9.
    Bare minimum works:

    /*
    gcc ls1.c -o ls1; ls1
    */
    #include /* opendir, stat, closedir */
    #include /* opendir, readdir, closedir */
    #include /* perror */
    #include /* exit */
    #include /* stat */
    #include /* stat */
    #include /* ctime */
    #include /* getpwuid */
    #include /* getgrgid */
    #include /* printf */

    int main(void)
    {
    DIR *dp;
    struct dirent *dirp;
    struct stat st;
    struct passwd *pw;
    struct group *grp;
    char *ftime;

    if ((dp = opendir(".")) == NULL) {
    printf("Cannot open this directory\n");
    perror("opendir");
    exit(EXIT_FAILURE);
    }

    while ((dirp = readdir(dp)) != NULL) {
    if (dirp->d_reclen != 0) {
    stat(dirp->d_name, &st);
    printf("%s\n", dirp->d_name);
    }
    }

    closedir(dp);
    return 0;
    }

    ReplyDelete