I've been learning C from the K&R book for the past few months. To pull it all together, I decided to write a program that has (part of) the same functionality as one of the commands in my favorite data and statistics program.
Ended up being a headache, but I learned a lot in doing so. Does it make me a masochist if I like coding in C?
If anybody cares to take a look at the code, I would really appreciate hearing ways to improve. The print_table function in particular seems wonky and there must be a better way to implement that.
Code:
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define MAXLENGTH 20
#define ROWS 21
#define CHARS 256
#define LINES 5
#define DEL 127
#define MEMBERS 14
#define FORMAT " %-24s%15s %-19s%15s\n"
typedef struct {
unsigned long n;
char s[MAXLENGTH];
} cell;
struct filestats {
cell counts[CHARS];
cell windows;
cell mac;
cell unix;
cell ctl;
cell extctl;
cell uc;
cell lc;
cell digit;
cell special;
cell extended;
cell filesize;
cell lengths[LINES];
cell lmin;
cell lmax;
cell lnum;
cell eoleof;
char * format;
};
struct row {
char * a;
char * b;
char * c;
char * d;
};
int count_chars(FILE *, struct filestats *);
int other_stats(struct filestats *);
void create_strings(struct filestats *);
int print_table(struct filestats *);
int main(int argc, char *argv[])
{
struct filestats stats = {0};
FILE *fp;
if (argc == 1) {
printf("invalid file specification\n");
return 1;
}
if ((fp = fopen(*++argv, "rb")) == NULL) {
printf("hexdump: can't open %s\n", *argv);
return 1;
} else {
count_chars(fp, &stats);
fclose(fp);
other_stats(&stats);
create_strings(&stats);
print_table(&stats);
}
return 0;
}
int count_chars(FILE *fp, struct filestats *stats)
{
char c, prev = '\0';
int len = 1, line = 0;
while ((c = (char) getc(fp)) != EOF) {
++stats->counts[(unsigned char) c].n;
if (c != '\n' && c != '\r')
++len;
else if (prev == '\r' && c == '\n')
++stats->windows.n;
else {
if (line <= LINES) {
stats->lengths[line++].n = len;
}
if (len < stats->lmin.n || stats->lmin.n == 0)
stats->lmin.n = len;
if (len > stats->lmax.n)
stats->lmax.n = len;
len = 1;
}
prev = c;
}
stats->eoleof.n = (prev =='\r' || prev == '\n');
strncpy(stats->eoleof.s, stats->eoleof.n ? "yes" : "no", MAXLENGTH);
return 0;
}
int other_stats(struct filestats *stats)
{
int i;
char c;
unsigned long count;
for (i = 0; i < CHARS; ++i) {
count = stats->counts[i].n;
c = (char) i;
stats->filesize.n += count;
if (i > 0 && i < 32 && i != '\n' && i != '\r' && i != '\t')
stats->ctl.n += count;
else if ((i >= 128 && i <= 159) || i == 255)
stats->extctl.n += count;
else if (isupper(c))
stats->uc.n += count;
else if (islower(c))
stats->lc.n += count;
else if (isdigit(c))
stats->digit.n += count;
else if (isgraph(c) && i != ',')
stats->special.n += count;
else if (i >= 160 && i <= 254)
stats->extended.n += count;
}
stats->mac.n = stats->counts['\r'].n - stats->windows.n;
stats->unix.n = stats->counts['\n'].n - stats->windows.n;
stats->lnum.n = stats->windows.n + stats->mac.n + stats->unix.n;
if (stats->ctl.n + stats->extctl.n + stats->counts['\0'].n //
+ stats->counts[DEL].n)
stats->format = "BINARY";
else
stats->format = ((stats->extended.n) ? "EXTENDED ASCII" : "ASCII");
return 0;
}
void convert(cell *);
void create_strings(struct filestats* st)
{
int i;
cell *members[] = {
&st->windows, &st->mac, &st->unix, &st->ctl, &st->extctl, &st->uc,
&st->lc, &st->digit, &st->special, &st->extended, &st->filesize,
&st->lmin, &st->lmax, &st->lnum
};
for (i = 0; i < CHARS; ++i)
convert(&st->counts[i]);
for (i = 0; i < LINES; ++ i) {
convert(&st->lengths[i]);
if (!st->lengths[i].n)
strncpy(st->lengths[i].s, ".", MAXLENGTH);
}
for (i = 0; i < MEMBERS; ++ i)
convert(members[i]);
}
void itoa(unsigned long, char *);
void reverse(char *);
void add_commas(char *);
void convert(cell * x)
{
itoa(x->n, x->s);
add_commas(x->s);
}
int print_table(struct filestats* st)
{
struct row *r, table[ROWS] = {
"Line-end characters", "", "Line length (tab=1)", "",
" \\r\\n (Windows)", st->windows.s, " minimum", st->lmin.s,
" \\r by itself (Mac)", st->mac.s, " maximum", st->lmax.s,
" \\n by itself (Unix)", st->unix.s, "", "",
"Space/separator characte", "rs ", "Number of lines", //
st->lnum.s,
" [blank]", st->counts[' '].s, " EOL at EOF?", st->eoleof.s,
" [tab]", st->counts['\t'].s, "", "",
" [comma] (,)", st->counts[','].s, "Length of first 5 l", //
"ines ",
"Control characters", "", " Line 1", st->lengths[0].s,
" binary 0", st->counts['\0'].s, " Line 2", st->lengths[1].s,
" CTL excl. \\r, \\n, \\t", st->ctl.s, " Line 3", st->lengths[2].s,
" DEL", st->counts[DEL].s, " Line 4", st->lengths[3].s,
" Extended (128-159,255)", st->extctl.s, " Line 5", st->lengths[4].s,
"ASCII printable", "", "", "",
" A-Z", st->uc.s, "", "",
" a-z", st->lc.s, "File format", st->format,
" 0-9", st->digit.s, "", "",
" Special (!@#$ etc.)", st->special.s, "", "",
" Extended (160-254)", st->extended.s, "", "",
"", "---------------", "", "",
"Total", st->filesize.s, "", ""
};
int i;
char *string_b[16], *string_c[18];
printf("\n");
for (i = 0; i < ROWS; ++i) {
r = &table[i];
printf(FORMAT, r->a, r->b, r->c, r->d);
}
}
void itoa(unsigned long n, char * s)
{
char * i = s;
do {
*i++ = n % 10 + '0';
} while ((n /= 10) > 0);
*i = '\0';
reverse(s);
}
void reverse(char * s)
{
int c;
char *j;
for (j = s+strlen(s)-1; s < j; s++, j--)
c = *s, *s = *j, *j = c;
}
void add_commas(char * s)
{
int commas, i;
commas = (strlen(s) - 1) / 3;
for (s += strlen(s), i = 0; commas > 0; --s, ++i) {
*(s + commas) = *s;
if (i > 0 && !(i % 3))
*(s - 1 + commas--) = ',';
}
}
Here is what the output looks like when you run the program on the source file.
Code:
Line-end characters Line length (tab=1)
\r\n (Windows) 236 minimum 1
\r by itself (Mac) 0 maximum 74
\n by itself (Unix) 0
Space/separator characters Number of lines 236
[blank] 662 EOL at EOF? yes
[tab] 276
[comma] (,) 130 Length of first 5 lines
Control characters Line 1 19
binary 0 0 Line 2 19
CTL excl. \r, \n, \t 0 Line 3 20
DEL 0 Line 4 21
Extended (128-159,255) 0 Line 5 16
ASCII printable
A-Z 204
a-z 2,549 File format ASCII
0-9 107
Special (!@#$ etc.) 1,210
Extended (160-254) 0
---------------
Total 5,610