Here's a detab utility.
This should bread and butter code. In fact it is tricky to ensure that a
malicious user cannot cause a buffer overrun. Hence the maxmemory() routine.
getline() is also difficult.
/*
Utility to replaces tabs in text files with spaces.
Can either do a blind replace or replace with user-specifed tab stops
By Malcolm McLean, 2009
- include <stdio.h>
- include <stdlib.h>
- include <string.h>
- include <limits.h>
- include <assert.h>
/*
count the occurences of ch in str
size_t chcount(const char *str, int ch)
{
size_t answer = 0;
while(*str)
if(*str++ == ch)
answer++;
return answer;
}
/*
is a string an integer?
int integral(char *str)
{
char *end;
long x;
if(!str)
return 0;
if(!*str)
return 0;
x = strtol(str, &end, 10);
if(x > INT_MAX || x < INT_MIN)
return 0;
if(*end)
return 0;
return 1;
}
/*
check that an array of integers is ascending
int ascending(int *x, int N)
{
int i;
assert(N > 0);
for(i=1;i<N;i++)
if(x[i] <= x[i-1])
return 0;
return 1;
}
/*
get line from file
Params: fp - input file
Returns malloced line, 0 on EOF
char *getline(FILE *fp)
{
char *buff;
char *temp;
size_t buffsize = 128;
size_t newsize;
size_t N = 0;
int ch;
buff = malloc(buffsize);
if(!buff)
goto out_of_memory;
while( (ch = fgetc(fp)) != EOF)
{
if(N >= buffsize - 2)
{
newsize = buffsize + buffsize/10;
if(newsize < buffsize)
{
goto out_of_memory;
}
temp = realloc(buff, newsize);
if(!temp)
{
goto out_of_memory;
}
buff = temp;
buffsize = newsize;
}
buff[N++] = ch;
if(ch == '\n')
break;
}
buff[N] = 0;
if(N == 0 && ch == EOF)
{
free(buff);
return 0;
}
return buff;
out_of_memory:
free(buff);
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}
/*
calculate maximum amount of memory needed to hold tabbed line
size_t maxmemory(char *str, int Ntabs, int *tablist)
{
int last;
int penultimate;
size_t tabcount;
size_t answer;
/* assume string is after last tab */
last = tablist[Ntabs -1];
penultimate = (Ntabs > 1) ? tablist[Ntabs-2] : 0;
answer = strlen(str);
if(answer + last + 1< answer)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}
answer += last + 1;
/* now assume that all the tabs are stacked up after the string */
tabcount = chcount(str, '\t');
if(tabcount > 0)
{
if( ((size_t) (last - penultimate) * (size_t) tabcount)/tabcount != last - penultimate )
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}
if(answer + (last - penultimate) * tabcount < answer)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}
answer = answer + (last - penultimate) * tabcount;
}
return answer;
}
/*
get the position of a tab at pos
Notes: the last tab is considered to be a repeat
int gettabposition(int pos, int Ntabs, int *tablist)
{
int i;
int last, penultimate;
int answer;
assert(Ntabs > 0);
for(i=0;i<Ntabs;i++)
if(tablist[i] > pos)
return tablist[i];
last = tablist[Ntabs -1];
penultimate = (Ntabs > 1) ? tablist[Ntabs-2] : 0;
answer = last;
while(answer < pos)
answer += last - penultimate;
return answer;
}
/*
remove tabs from a line, replacing with spaces
int detabline(char *out, const char *in, int Ntabs, int *tablist)
{
int i, ii, j;
int tabpos;
assert(Ntabs > 0);
j = 0;
for(i=0;in[i];i++)
{
if(in[i] != '\t')
out[j++] = in[i];
else
{
tabpos = gettabposition(j, Ntabs, tablist);
for(ii=j;ii<tabpos;ii++)
out[j++] = ' ';
}
}
out[j] = 0;
return 0;
}
/*
detab a file
void detab(FILE *out, FILE *in, int Ntabs, int *tablist)
{
char *buff;
char *temp;
char *line;
size_t buffsize = 1024;
buff = malloc(buffsize);
if(!buff)
{
fprintf(stderr, "Put of memory\n");
exit(EXIT_FAILURE);
}
while( (line = getline(in)) )
{
if(buffsize < maxmemory(line, Ntabs, tablist))
{
temp = realloc(buff, maxmemory(line, Ntabs, tablist));
if(!temp)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}
buff = temp;
buffsize = maxmemory(line, Ntabs, tablist);
}
detabline(buff, line, Ntabs, tablist);
fprintf(out, "%s", buff);
free(line);
}
free(buff);
}
/*
detab by simply replacing tabs with spaces
void detabsimple(FILE *out, FILE *in, int Nspaces)
{
int ch;
int i;
while( (ch = fgetc(in)) != EOF)
{
if(ch == '\t')
for(i=0;i<Nspaces;i++)
fputc(' ', out);
else
fputc(ch, out);
}
}
/*
get list of tabs from argument list
int *gettablist(char **args, int *Ntabs)
{
int N;
int i;
char *end;
int *answer;
for(i=0;args[i];i++)
if(!integral(args[i]))
break;
N = i;
if(N == 0)
{
return 0;
}
answer = malloc(N * sizeof(int));
if(!answer)
{
return 0;
}
for(i=0;i<N;i++)
answer[i] = (int) strtol(args[i], &end, 10);
return answer;
}
/*
get list of tabs from argument list, with checking
int *getcheckedtablist(char **args, int *Ntabs)
{
int *answer;
int i;
answer = gettablist(args, Ntabs);
if(!answer)
{
fprintf(stderr, "Bad tab list\n");
exit(EXIT_FAILURE);
}
if(answer[0] <= 0)
{
fprintf(stderr, "tabs must be positive integers\n");
exit(EXIT_FAILURE);
}
if(!ascending(answer, *Ntabs))
{
fprintf(stderr, "tabs must be in ascending order\n");
exit(EXIT_FAILURE);
}
for(i=0;i<*Ntabs;i++)
answer[i] -= 1;
return answer;
}
/*
print out usage message
void usage(void)
{
printf("detab - replace tabs with spaces\n");
printf("Usage [options] <filename>\n");
printf("Options: -tab <tablist, ...>\n");
printf(" -s <Nspaces>\n");
printf(" -tab, list of tab positions (1-based). Last tab repeats.\n");
printf(" -s -simply replace tabs with given number of spaces\n");
printf(" (Note -s 0 will remove all the tabs)\n");
exit(EXIT_FAILURE);
}
/*
main fucntion, detab a text file by replacing with spaces
int main(int argc, char **argv)
{
FILE *fpin;
int Nspaces;
int Ntabs;
int *tablist;
char *end;
if(argc == 1)
usage();
if(!strcmp(argv[1], "-s"))
{
if(!integral(argv[2]))
usage();
Nspaces = (int) strtol(argv[2], &end, 10);
if(Nspaces < 0)
{
fprintf(stderr, "negative number of spaces %d\n", Nspaces);
exit(EXIT_FAILURE);
}
if(argc == 3)
fpin = stdin;
else if(argc == 4)
{
fpin = fopen(argv[4], "r");
if(!fpin)
{
fprintf(stderr, "Error opening %s\n", argv[4]);
exit(EXIT_FAILURE);
}
}
else
usage();
detabsimple(stdout, fpin, Nspaces);
if(fpin != stdin)
fclose(fpin);
}
else if(!strcmp(argv[1], "-tab"))
{
tablist = getcheckedtablist(argv + 2, &Ntabs);
if(argc == Ntabs + 2)
fpin = stdin;
else if(argc == Ntabs + 3)
{
fpin = fopen(argv[Ntabs + 2], "r");
if(!fpin)
{
fprintf(stderr, "Error opening %s\n", argv[Ntabs+2]);
exit(EXIT_FAILURE);
}
}
else
usage();
detab(stdout, fpin, Ntabs, tablist);
free(tablist);
}
else
usage();
fflush(stdout);
return 0;
}