Theme Graphic
Theme Graphic

Protein Folding

Triumphs and woes of a PhD student working on the protein folding problem. Fortran 77 (yes, really), Message Passing Interface (MPI), C,...

Subscribe

Author

I'm an ex-games programmer, now a PhD student at Leeds University studying the protein folding problem. Read my books "MiniBasic, how to write a script interpreter" and "Basic Algoritms". You can also read my MRes thesis on fuzzy logic trees.

Archive

Tags

Posted on Friday, August 07, 2009 at 6:06 AM

detab utility in C


Here's a detab utility.

This should bread and butter code. In fact it is tricky to ensure that a malicious user cannot cause a buffer overrun. Hence the maxmemory() routine. getline() is also difficult.





 /*
  Utility to replaces tabs in text files with spaces.
  Can either do a blind replace or replace with user-specifed tab stops
  By Malcolm McLean, 2009
    • /
  1. include <stdio.h>
  2. include <stdlib.h>
  3. include <string.h>
  4. include <limits.h>
  5. include <assert.h>
/* count the occurences of ch in str
    • /
size_t chcount(const char *str, int ch) { size_t answer = 0; while(*str) if(*str++ == ch) answer++; return answer; } /* is a string an integer?
    • /
int integral(char *str) { char *end; long x; if(!str) return 0; if(!*str) return 0; x = strtol(str, &end, 10); if(x > INT_MAX || x < INT_MIN) return 0; if(*end) return 0; return 1; } /* check that an array of integers is ascending
    • /
int ascending(int *x, int N) { int i; assert(N > 0); for(i=1;i<N;i++) if(x[i] <= x[i-1]) return 0; return 1; } /* get line from file Params: fp - input file Returns malloced line, 0 on EOF
    • /
char *getline(FILE *fp) { char *buff; char *temp; size_t buffsize = 128; size_t newsize; size_t N = 0; int ch; buff = malloc(buffsize); if(!buff) goto out_of_memory; while( (ch = fgetc(fp)) != EOF) { if(N >= buffsize - 2) { newsize = buffsize + buffsize/10; if(newsize < buffsize) { goto out_of_memory; } temp = realloc(buff, newsize); if(!temp) { goto out_of_memory; } buff = temp; buffsize = newsize; } buff[N++] = ch; if(ch == '\n') break; } buff[N] = 0; if(N == 0 && ch == EOF) { free(buff); return 0; } return buff; out_of_memory: free(buff); fprintf(stderr, "Out of memory\n"); exit(EXIT_FAILURE); } /* calculate maximum amount of memory needed to hold tabbed line
    • /
size_t maxmemory(char *str, int Ntabs, int *tablist) { int last; int penultimate; size_t tabcount; size_t answer; /* assume string is after last tab */ last = tablist[Ntabs -1]; penultimate = (Ntabs > 1) ? tablist[Ntabs-2] : 0; answer = strlen(str); if(answer + last + 1< answer) { fprintf(stderr, "Out of memory\n"); exit(EXIT_FAILURE); } answer += last + 1; /* now assume that all the tabs are stacked up after the string */ tabcount = chcount(str, '\t'); if(tabcount > 0) { if( ((size_t) (last - penultimate) * (size_t) tabcount)/tabcount != last - penultimate ) { fprintf(stderr, "Out of memory\n"); exit(EXIT_FAILURE); } if(answer + (last - penultimate) * tabcount < answer) { fprintf(stderr, "Out of memory\n"); exit(EXIT_FAILURE); } answer = answer + (last - penultimate) * tabcount; } return answer; } /* get the position of a tab at pos Notes: the last tab is considered to be a repeat
    • /
int gettabposition(int pos, int Ntabs, int *tablist) { int i; int last, penultimate; int answer; assert(Ntabs > 0); for(i=0;i<Ntabs;i++) if(tablist[i] > pos) return tablist[i]; last = tablist[Ntabs -1]; penultimate = (Ntabs > 1) ? tablist[Ntabs-2] : 0; answer = last; while(answer < pos) answer += last - penultimate; return answer; } /* remove tabs from a line, replacing with spaces
    • /
int detabline(char *out, const char *in, int Ntabs, int *tablist) { int i, ii, j; int tabpos; assert(Ntabs > 0); j = 0; for(i=0;in[i];i++) { if(in[i] != '\t') out[j++] = in[i]; else { tabpos = gettabposition(j, Ntabs, tablist); for(ii=j;ii<tabpos;ii++) out[j++] = ' '; } } out[j] = 0; return 0; } /* detab a file
    • /
void detab(FILE *out, FILE *in, int Ntabs, int *tablist) { char *buff; char *temp; char *line; size_t buffsize = 1024; buff = malloc(buffsize); if(!buff) { fprintf(stderr, "Put of memory\n"); exit(EXIT_FAILURE); } while( (line = getline(in)) ) { if(buffsize < maxmemory(line, Ntabs, tablist)) { temp = realloc(buff, maxmemory(line, Ntabs, tablist)); if(!temp) { fprintf(stderr, "Out of memory\n"); exit(EXIT_FAILURE); } buff = temp; buffsize = maxmemory(line, Ntabs, tablist); } detabline(buff, line, Ntabs, tablist); fprintf(out, "%s", buff); free(line); } free(buff); } /* detab by simply replacing tabs with spaces
    • /
void detabsimple(FILE *out, FILE *in, int Nspaces) { int ch; int i; while( (ch = fgetc(in)) != EOF) { if(ch == '\t') for(i=0;i<Nspaces;i++) fputc(' ', out); else fputc(ch, out); } } /* get list of tabs from argument list
    • /
int *gettablist(char **args, int *Ntabs) { int N; int i; char *end; int *answer; for(i=0;args[i];i++) if(!integral(args[i])) break; N = i; if(N == 0) {
          • Ntabs = 0;
return 0; } answer = malloc(N * sizeof(int)); if(!answer) {
          • Ntabs = 0;
return 0; } for(i=0;i<N;i++) answer[i] = (int) strtol(args[i], &end, 10);
      • Ntabs = N;
return answer; } /* get list of tabs from argument list, with checking
    • /
int *getcheckedtablist(char **args, int *Ntabs) { int *answer; int i; answer = gettablist(args, Ntabs); if(!answer) { fprintf(stderr, "Bad tab list\n"); exit(EXIT_FAILURE); } if(answer[0] <= 0) { fprintf(stderr, "tabs must be positive integers\n"); exit(EXIT_FAILURE); } if(!ascending(answer, *Ntabs)) { fprintf(stderr, "tabs must be in ascending order\n"); exit(EXIT_FAILURE); } for(i=0;i<*Ntabs;i++) answer[i] -= 1; return answer; } /* print out usage message
    • /
void usage(void) { printf("detab - replace tabs with spaces\n"); printf("Usage [options] <filename>\n"); printf("Options: -tab <tablist, ...>\n"); printf(" -s <Nspaces>\n"); printf(" -tab, list of tab positions (1-based). Last tab repeats.\n"); printf(" -s -simply replace tabs with given number of spaces\n"); printf(" (Note -s 0 will remove all the tabs)\n"); exit(EXIT_FAILURE); } /* main fucntion, detab a text file by replacing with spaces
    • /
int main(int argc, char **argv) { FILE *fpin; int Nspaces; int Ntabs; int *tablist; char *end; if(argc == 1) usage(); if(!strcmp(argv[1], "-s")) { if(!integral(argv[2])) usage(); Nspaces = (int) strtol(argv[2], &end, 10); if(Nspaces < 0) { fprintf(stderr, "negative number of spaces %d\n", Nspaces); exit(EXIT_FAILURE); } if(argc == 3) fpin = stdin; else if(argc == 4) { fpin = fopen(argv[4], "r"); if(!fpin) { fprintf(stderr, "Error opening %s\n", argv[4]); exit(EXIT_FAILURE); } } else usage(); detabsimple(stdout, fpin, Nspaces); if(fpin != stdin) fclose(fpin); } else if(!strcmp(argv[1], "-tab")) { tablist = getcheckedtablist(argv + 2, &Ntabs); if(argc == Ntabs + 2) fpin = stdin; else if(argc == Ntabs + 3) { fpin = fopen(argv[Ntabs + 2], "r"); if(!fpin) { fprintf(stderr, "Error opening %s\n", argv[Ntabs+2]); exit(EXIT_FAILURE); } } else usage(); detab(stdout, fpin, Ntabs, tablist); free(tablist); } else usage(); fflush(stdout); return 0; }
Bookmark: Submit To Digg Submit To reddit Submit To del.icio.us Bookmark With StumbleUpon Bookmark With FaceBook Bookmark With Google Bookmarks   Share: Share By Email By Email

0 comments on "detab utility in C"
No comments posted yet.

Leave A Comment
Subject:


Comment:
   Bold Italic Underline          Code Link Image Horizontal Rule


Because you do not have or are not logged in to your Programmer's Heaven account, please enter your name.

Name:


To help prevent comment SPAM, please enter the magic code '31' in the box:




Posting Rules
Please follow these rules when posting comments on blog posts.
  • Do not post anything that is racist, hate speech or of a sexual or adult nature.
  • Do not post or link to anything that infringes copyrighted laws.
  • Posting about security or legal topics is fine so long as you are not glorifying or encouraging people to perform illegal activities.
  • Both the author of this blog and the Programmer's Heaven administrators may delete any inappropriate comments without notice at their own discretion.
 

Recent Jobs

Official Programmer's Heaven Blogs
Web Hosting | Browser and Social Games | Gadgets

Popular resources on Programmersheaven.com
Assembly | Basic | C | C# | C++ | Delphi | Flash | Java | JavaScript | Pascal | Perl | PHP | Python | Ruby | Visual Basic
© Copyright 2011 Programmersheaven.com - All rights reserved.
Reproduction in whole or in part, in any form or medium without express written permission is prohibited.
Violators of this policy may be subject to legal action. Please read our Terms Of Use and Privacy Statement for more information.
Operated by CommunityHeaven, a BootstrapLabs company.