// This project is the result of combined effort of Aravinda.C, Deepak.A.P,Kamal.S,Mahantesh.H.L
// It is a system software project as prescribed by the Bangalore University.
// This project was completed on 1st July,2003.
// All of us are from UVCE,Bangalore, CSE 2004 batch.
#include <iostream>
#include<fstream>
#include<iomanip>
#include<string>
#include<vector>
#include <cstring>
#include<conio.h>
using namespace std;
unsigned int LocCtr=0x00;
int indicator = 0,bpointer=-1;
int type=0,nErr=0,model=-1,nSegs=0,CLOC=0,DLOC=0;
char execodes[350],datacodes[200]; // for exe file.
int dind=0,cind=0; // segment indicators
enum segreg{ cs=0,ds,es,ss};
//the various assembler-specific directives
string Keyword[]={ ".exit",".model",".startup",".stack","byte",".code",".data",
"db","dd","dw","dup","end","endm","endp",
"equ","far","macro","near","offset","org",
"proc","ptr","word","small","tiny","local"
};
int Code[] = { 0,1,2,3,4,5,6,7};
string Byte[] = { "al","cl","dl","bl","ah","ch","dh","bh" };
string Word[] = { "ax","cx","dx","bx","sp","bp","si","bi" };
string Segment[] = { "es","cs","ss","ds"};
//------------------------------------------------------------------------------------------
// General utilities and search functions.
class Utilities
{
public:
int search(string a[],string ele,int n = 8);
int reg8(const string s);
int reg16(const string s);
int Segreg(const string s);
void WriteExeFile(int dloc,int cloc,ofstream &exefile );
};
int Utilities::search(string a[],string ele,int n )
{
for(int i=0;i<n;++i)
if(a[i] == ele) return i;
return -1;
}
int Utilities::reg8(const string s)
{
int pos;
pos = search(Byte,s);
return pos;
}
int Utilities::reg16(const string s)
{
int pos;
pos = search(Word,s);
return pos;
}
int Utilities::Segreg(const string s)
{
int pos;
pos = search(Segment,s,4);
return pos;
}
//write to the exefile
void Utilities::WriteExeFile(int dloc,int cloc,ofstream & exefile)
{
int rem,total,seg;
if(dloc % 16 != 0)
{
seg = dloc/16 +1;
if(dloc < 16)
rem = 16-dloc;
else
{
rem = dloc % 16;
rem = 16-rem;
}
}//stuff rem to make DS 16 byte multiple
else
{
rem = 0;
seg = dloc/16;
}
total = dloc + rem + cloc;// total bytes needed
int div = total/512; // 512 byte page of pgm
int lmod = div + 2; // 2 pages more than required.
int cbeg = (dloc + rem)/16;
// HEADER RECORD for .exe file.
unsigned char header[]={0x4d,
0x5a,
total,//1 word.Number of bytes in last 512-byte page of executable
div,
lmod,//1 word.Total number of 512-byte pages in executable
0x00,
0x01,//relocatable
0x00,
0x20,//header size
0x00,
0x00,//min para-
0x00,//-allocated in + to code size
0xff,//max para
0xff,
0x00,//init ss value;
0x00,
0x00,//init sp value
0x00,
0x00,//ckecksum or ZERO00..
0x00,
0x00,//CS:IP start of executable
0x00,
cbeg,
0x00,
0x1e,//offset of relocatable
0x00,
0x00,//overlay no 0h mainpgm
0x00,
0x01,//apparently
0x00,//always
0x01,
0x00,
seg,
0x00,
0x00,
0x00,
};
int i;
for(i=0;i<0x1c+8;i++)
exefile<<header[i];
for(i=0x1c+8;i<0x200;i++)
exefile<<(char)0x00;//stuff 0 till 512
exefile.write(datacodes,dind);//data segment codes
if(dind%16!=0)
{
if(dind<16)
for(i=0;i<(16-dind);i++)
exefile<<(char)0x00;
else
{
for(i=0;i<16-(dind%16);i++)
exefile<<(char)0x00;
}
}//stuff 0 to make code 16 mul
exefile.write(execodes,cind);//ins execodes
}
Utilities utils;
//----------------------------------------------------------------------------------------
// SearchTable is the Super class for Symtab and LitTab
class SearchTable{
public:
bool search(const char*);
protected:
vector<string> StringTable;
};
bool SearchTable::search(const char *s)
{
for(vector<string>::iterator it=StringTable.begin();it!=StringTable.end();++it)
if(*it==s)return true;
return false;
}
//------------------------------------------------------------------------------------------
class SymTab : public SearchTable
{
private:
unsigned LocTable[50]; //stores the location of labels and variables
int SegTable[50]; //indicates the segment,where labels are stored
int Index; // index of the arrays
public:
void assign(string,unsigned,int);
unsigned retrieve(string);
void display(ofstream & );
void load(string,unsigned int );
};
// to store a label in Symtab along with it's location and segment indicator
void SymTab::assign(string s,unsigned loc,int seg)
{
StringTable.push_back(s);
LocTable[Index]=loc;
SegTable[Index++]=seg;
}
// to retrieve the location address of the label s.
// returns 9999 if label not found.
unsigned SymTab::retrieve(string s)
{
int counter=0,flag=0;
for(vector<string>:: iterator it=StringTable.begin();it!=StringTable.end();++it)
{
if(*it==s)
{
flag=1;
break;
}
++counter;
}
if(flag==1)
return LocTable[counter];
else
return 9999;
}
// used to assign address, especially during forward references
// in the same segment.Hence, the string will surely match with
// one entry or the other.
void SymTab::load(string s,unsigned int addr)
{
int counter=0;
for(vector<string>:: iterator it=StringTable.begin();it!=StringTable.end();++it)
{
if(*it==s.c_str()) // if it matches, break away.
{
break;
}
++counter; // update the counter and go for the next string.
}
LocTable[counter]=addr; // load new address into the LocTable for this label.
}
// for displaying the symbol table in the List File.
void SymTab::display(ofstream &os)
{ int i=0;
char segname[][10]={"undefined","DATA","CODE"};
os<<"\n ---------------- SYMBOL TABLE ----------------\n"<<endl;
os<<" NAME LOCATION SEGMENT"<<endl;
os<<hex;
for(vector<string>::iterator it=StringTable.begin();it!=StringTable.end();++it,++i)
os<<" "<<(*it)<<"\t\t\t"<<setw(4)<<right<<setfill('0')<<LocTable[i]<<setw(1)<<"\t\t\t"<<segname[SegTable[i]]<<endl;
os<<dec<<setfill(' ');
}
SymTab symble; //symble is an object or an instance of SymTab
//---------------------------------------------------------------------------
template <class type>
class LitTab: public SearchTable
{
private:
//vector<string> StringTable;
type ValueTable[30];
int Index;
public:
LitTab(){ Index=0;}
void assign(string,int);
unsigned retrieve(string);
void display(ofstream&);
};
// to load string s and it's corresponding value.
void LitTab<int>::assign(string s,int val)
{
StringTable.push_back(s);
ValueTable[Index++] = val;
}
// to get back the equivalent value of the string.
unsigned LitTab<int>::retrieve(string s)
{
int counter=0,flag=0;
for(vector<string>:: iterator it=StringTable.begin();it!=StringTable.end();++it)
{
if(*it==s)
{
flag=1;
break;
}
++counter;
}
if(flag==1)
return ValueTable[counter];
else
return -1;
}
// to display the Literal Table in the ListFile.
void LitTab<int>::display(ofstream &os)
{ int i=0;
char segname[][10]={"undefined","DATA","CODE"};
os<<"\n---------------- LITERAL TABLE ----------------\n"<<endl;
os<<" NAME VALUE "<<endl;
os<<hex;
for(vector<string>::iterator it=StringTable.begin();it!=StringTable.end();++it,++i)
os<<" "<<(*it)<<"\t\t\t\t\t"<<setw(4)<<right<<setfill('0')<<ValueTable[i]<<setw(1)<<endl;
os<<dec<<setfill(' ');
}
LitTab<int> literal; //object of literal table
//-----------------------------------------------------------------------------
class Inset
{
public: Inset(const string& n,int t,unsigned op1,unsigned op2,unsigned op3,unsigned op4)
: name(n),type(t),opcode1(op1),opcode2(op2),opcode3(op3),opcode4(op4){ }
static int IsInstruction(const string& );
static int getopcode(int index,int i);
static int gettype(int index);
private:
string name;
int type;
unsigned opcode1,opcode2,opcode3,opcode4;
};
// instructions and the opcodes are stored as an array of static objects.
static Inset OpTab[] =
{
// ( mnemonic,type,opcode,displacement,immediate)
Inset ("aaa", 1, 0x37 , 0x00 , 0x00 , 0x00 ),
Inset ("aas", 1, 0x3f , 0x00 , 0x00 , 0x00 ),
Inset ("cbw", 1, 0x98 , 0x00 , 0x00 , 0x00 ),
Inset ("clc", 1, 0xf8 , 0x00 , 0x00 , 0x00 ),
Inset ("cld", 1, 0xfc , 0x00 , 0x00 , 0x00 ),
Inset ("cli", 1, 0xfa , 0x00 , 0x00 , 0x00 ),
Inset ("cmc", 1, 0xf5 , 0x00 , 0x00 , 0x00 ),
Inset ("cmpsb", 1, 0xa6 , 0x00 , 0x00 , 0x00 ),
Inset ("cmpsw", 1, 0xa7 , 0x00 , 0x00 , 0x00 ),
Inset ("cwd", 1, 0x99 , 0x00 , 0x00 , 0x00 ),
Inset ("daa", 1, 0x27 , 0x00 , 0x00 , 0x00 ),
Inset ("das", 1, 0x2f , 0x00 , 0x00 , 0x00 ),
Inset ("hlt", 1, 0xf4 , 0x00 , 0x00 , 0x00 ),
Inset ("insb", 1, 0x6c , 0x00 , 0x00 , 0x00 ),
Inset ("insw", 1, 0x6d , 0x00 , 0x00 , 0x00 ),
Inset ("into", 1, 0xce , 0x00 , 0x00 , 0x00 ),
Inset ("iret", 1, 0xcf , 0x00 , 0x00 , 0x00 ),
Inset ("lahf", 1, 0x9f , 0x00 , 0x00 , 0x00 ),
Inset ("lodsb", 1, 0xac , 0x00 , 0x00 , 0x00 ),
Inset ("lodsw", 1, 0xad , 0x00 , 0x00 , 0x00 ),
Inset ("movsb", 1, 0xa4 , 0x00 , 0x00 , 0x00 ),
Inset ("movsw", 1, 0xa5 , 0x00 , 0x00 , 0x00 ),
Inset ("nop", 1, 0x90 , 0x00 , 0x00 , 0x00 ),
Inset ("outsb", 1, 0x6e , 0x00 , 0x00 , 0x00 ),
Inset ("outsw", 1, 0x6f , 0x00 , 0x00 , 0x00 ),
Inset ("popa", 1, 0x61 , 0x00 , 0x00 , 0x00 ),
Inset ("popf", 1, 0x9d , 0x00 , 0x00 , 0x00 ),
Inset ("pusha", 1, 0x60 , 0x00 , 0x00 , 0x00 ),
Inset ("pushf", 1, 0x9c , 0x00 , 0x00 , 0x00 ),
Inset ("sahf", 1, 0x9e , 0x00 , 0x00 , 0x00 ),
Inset ("scasb", 1, 0xae , 0x00 , 0x00 , 0x00 ),
Inset ("scasw", 1, 0xaf , 0x00 , 0x00 , 0x00 ),
Inset ("segcs", 1, 0x2e , 0x00 , 0x00 , 0x00 ),
Inset ("segds", 1, 0x3e , 0x00 , 0x00 , 0x00 ),
Inset ("seges", 1, 0x26 , 0x00 , 0x00 , 0x00 ),
Inset ("segss", 1, 0x36 , 0x00 , 0x00 , 0x00 ),
Inset ("stc", 1, 0xf9 , 0x00 , 0x00 , 0x00 ),
Inset ("std", 1, 0xfd , 0x00 , 0x00 , 0x00 ),
Inset ("sti", 1, 0xfb , 0x00 , 0x00 , 0x00 ),
Inset ("stosb", 1, 0xaa , 0x00 , 0x00 , 0x00 ),
Inset ("stosw", 1, 0xab , 0x00 , 0x00 , 0x00 ),
Inset ("wait", 1, 0x9b , 0x00 , 0x00 , 0x00 ),
Inset ("xlat", 1, 0xd7 , 0x00 , 0x00 , 0x00 ),
// 0 or 1 operand and 2 bytes
// if the value supplied by the operand,else it is substituted by 0x0a
Inset ("aad", 2, 0xd5 , 0x0a , 0x00 , 0x00 ),
Inset ("aam", 2, 0xd4 , 0x0a , 0x00 , 0x00 ),
// 2 operands -- all combinations except segment registers
Inset ("adc", 3, 0x14 , 0x00 , 0xd0 , 0x14 ),
Inset ("add", 3, 0x00 , 0x00 , 0xc0 , 0x04 ),
Inset ("and", 3, 0x24 , 0x00 , 0xe0 , 0x24 ),
Inset ("cmp", 3, 0x3a , 0x00 , 0xf8 , 0x3c ),
Inset ("or", 3, 0x0c , 0x00 , 0xc8 , 0x0c ),
Inset ("sbb", 3, 0x1c , 0x00 , 0xd8 , 0x1c ),
Inset ("sub", 3, 0x2c , 0x00 , 0xe8 , 0x2c ),
Inset ("test", 3, 0xa8 , 0x00 , 0x00 , 0x00 ),
Inset ("xor", 3, 0x34 , 0x00 , 0xf0 , 0x34 ),
Inset ("call", 4, 0xe8 , 0x00 , 0x02 , 0x03 ),
Inset ("jmp", 4, 0xeb , 0x00 , 0x04 , 0x05 ),
Inset ("inc", 5, 0xfe , 0x00 , 0x40 , 0x00 ),
Inset ("dec", 5, 0xfe , 0x00 , 0x48 , 0x00 ),
// 1 operand ( rm8 or rm16)
Inset ("div", 6, 0xf6 , 0xf0 , 0x00 , 0x00 ),
Inset ("idiv", 6, 0xf6 , 0xf8 , 0x00 , 0x00 ),
Inset ("imul", 6, 0xf6 , 0xe8 , 0x00 , 0x00 ),
Inset ("mul", 6, 0xf6 , 0xe0 , 0x00 , 0x00 ),
Inset ("neg", 6, 0xf6 , 0xd8 , 0x00 , 0x00 ),
Inset ("not", 6, 0xf6 , 0xd0 , 0x00 , 0x00 ),
Inset ("in", 7, 0xe4 , 0xec , 0x00 , 0x00 ),
Inset ("int", 8, 0xcd , 0x00 , 0x00 , 0x00 ),
// Byte relative
Inset ("ja", 9, 0x77 , 0x00 , 0x00 , 0x00 ),
Inset ("jae", 9, 0x73 , 0x00 , 0x00 , 0x00 ),
Inset ("jb", 9, 0x72 , 0x00 , 0x00 , 0x00 ),
Inset ("jbe", 9, 0x76 , 0x00 , 0x00 , 0x00 ),
Inset ("jc", 9, 0x72 , 0x00 , 0x00 , 0x00 ),
Inset ("jcxz", 9, 0xe3 , 0x00 , 0x00 , 0x00 ),
Inset ("je", 9, 0x74 , 0x00 , 0x00 , 0x00 ),
Inset ("jg", 9, 0x7f , 0x00 , 0x00 , 0x00 ),
Inset ("jge", 9, 0x7d , 0x00 , 0x00 , 0x00 ),
Inset ("jl", 9, 0x7c , 0x00 , 0x00 , 0x00 ),
Inset ("jle", 9, 0x7e , 0x00 , 0x00 , 0x00 ),
Inset ("jna", 9, 0x76 , 0x00 , 0x00 , 0x00 ),
Inset ("jnae", 9, 0x72 , 0x00 , 0x00 , 0x00 ),
Inset ("jnb", 9, 0x73 , 0x00 , 0x00 , 0x00 ),
Inset ("jnbe", 9, 0x77 , 0x00 , 0x00 , 0x00 ),
Inset ("jnc", 9, 0x73 , 0x00 , 0x00 , 0x00 ),
Inset ("jne", 9, 0x75 , 0x00 , 0x00 , 0x00 ),
Inset ("jng", 9, 0x7e , 0x00 , 0x00 , 0x00 ),
Inset ("jnge", 9, 0x7c , 0x00 , 0x00 , 0x00 ),
Inset ("jnl", 9, 0x7d , 0x00 , 0x00 , 0x00 ),
Inset ("jnle", 9, 0x7f , 0x00 , 0x00 , 0x00 ),
Inset ("jno", 9, 0x71 , 0x00 , 0x00 , 0x00 ),
Inset ("jnp", 9, 0x7b , 0x00 , 0x00 , 0x00 ),
Inset ("jns", 9, 0x79 , 0x00 , 0x00 , 0x00 ),
Inset ("jnz", 9, 0x75 , 0x00 , 0x00 , 0x00 ),
Inset ("jo", 9, 0x70 , 0x00 , 0x00 , 0x00 ),
Inset ("jp", 9, 0x7a , 0x00 , 0x00 , 0x00 ),
Inset ("jpe", 9, 0x7a , 0x00 , 0x00 , 0x00 ),
Inset ("jpo", 9, 0x7b , 0x00 , 0x00 , 0x00 ),
Inset ("js", 9, 0x78 , 0x00 , 0x00 , 0x00 ),
Inset ("jz", 9, 0x74 , 0x00 , 0x00 , 0x00 ),
Inset ("loop", 9, 0xe2 , 0x00 , 0x00 , 0x00 ),
Inset ("loope", 9, 0xe1 , 0x00 , 0x00 , 0x00 ),
Inset ("loopne", 9, 0xe0 , 0x00 , 0x00 , 0x00 ),
Inset ("loopnz", 9, 0xe0 , 0x00 , 0x00 , 0x00 ),
Inset ("loopz", 9, 0xe1 , 0x00 , 0x00 , 0x00 ),
// register to memory
Inset ("lea", 11, 0x8d , 0x00 , 0x00 , 0x00 ),
Inset ("les", 11, 0xc4 , 0x00 , 0x00 , 0x00 ),
Inset ("lds", 11, 0xc5 , 0x00 , 0x00 , 0x00 ),
Inset ("mov", 12, 0xa0 , 0xb0 , 0x8a , 0x8c ),
Inset ("out", 13, 0xe6 , 0xee , 0xff , 0x00 ),
Inset ("push", 14, 0x06 , 0x50 , 0xff , 0x06 ),
Inset ("pop", 15, 0x07 , 0x58 , 0x8f , 0x00 ),
// rotate and shift
Inset ("rcl", 16, 0xd0 , 0x02 , 0x00 , 0x00 ),
Inset ("rcr", 16, 0xd0 , 0x03 , 0x00 , 0x00 ),
Inset ("sal", 16, 0xd0 , 0x04 , 0x00 , 0x00 ),
Inset ("sar", 16, 0xd0 , 0x07 , 0x00 , 0x00 ),
Inset ("shl", 16, 0xd0 , 0x04 , 0x00 , 0x00 ),
Inset ("shr", 16, 0xd0 , 0x05 , 0x00 , 0x00 ),
Inset ("ror", 16, 0xd0 , 0x01 , 0x00 , 0x00 ),
Inset ("rol", 16, 0xd0 , 0x00 , 0x00 , 0x00 ),
// prefixes
Inset ("rep", 17, 0xf3 , 0x00 , 0x00 , 0x00 ),
Inset ("repe", 17, 0xf3 , 0x00 , 0x00 , 0x00 ),
Inset ("repne", 17, 0xf2 , 0x00 , 0x00 , 0x00 ),
Inset ("lock", 17, 0xf0 , 0x00 , 0x00 , 0x00 ),
Inset ("ret", 18, 0xc3 , 0xc2 , 0x00 , 0x00 ),
Inset ("retf", 18, 0xcb , 0xca , 0x00 , 0x00 ),
Inset ("xchg", 19, 0x86 , 0x90 , 0x00 , 0x00 )
};
// returns index if it's a valid instruction, else, returns 0.
int Inset::IsInstruction(const string &word)
{
for(int i=0;i<=123;i++)
{
if(OpTab[i].name==word)
return i+1;
}
return 0;
}
// returns the relevant opcode depending on the op no. requested.
int Inset::getopcode(int index,int i)
{
if(i == 1) return OpTab[index-1].opcode1;
if(i == 2) return OpTab[index-1].opcode2;
if(i == 3) return OpTab[index-1].opcode3;
if(i == 4) return OpTab[index-1].opcode4;
}
// returns the type of the instruction.
int Inset::gettype(int index)
{
return OpTab[index-1].type;
}
//------------------------------------------------------------------------------------
class MacroProcessor
{
private:
ifstream source;
ofstream intermediate; //macros are expanded & written to an intermediate file.
class nameTable // Macro names are entered into the nameTable.
{ // For each macro instruction defined,the nameTable
public: // contains pointers to the beginning and end of
string name; // definition in the DEFTAB.
int start;
int end;
} ;
nameTable NAMTAB[5];
// for local variables
class twoString
{
public:
string original; // original name of the local variable.
string updated; // new name.
};
string DEFTAB[100];// macro definitions are stored in DEFTAB.
string ARGTAB[10]; // stores arguements from the main program.
string PARTAB[10]; //parameter table of macro currently being expanded.
twoString LOCTAB[10]; // upto 10 local variables.
int nArgs; // number of arguements.
int nPars; // number of parameters.
int nDefs; // number of macro definitions.
int nLocs; // number of local variables.
int Expanding;// indicates whether the line is from DEFTAB or input file.
int defCntr; // counter for the no. of macro definitions.
string tmp[30];// array of strings for tokenizing.
int token_no; // no. of tokens in a line.
string line; // the line as an array of characters.
char char1,char2; // used for local labels.
public:
MacroProcessor(char* );
~MacroProcessor();
void writeLine();
bool tokenize();
void processLine();
void define(string );
void updateChar();
void initLocTab();
void expand(int j);
void getLine(int i=0);
void macro_process();
};
// The constructor opens an intermediate file "tmp.txt".
// All macro invocations are replaced by their defintions.
MacroProcessor::MacroProcessor(char* name)
{
source.open(name);
if(source)
{
intermediate.open("tmp.txt");
nArgs=nDefs=defCntr=0;
char1='a';
char2='a';
}
else
{
cout<<"Fatal error: Unable to open Source file\n";
exit(-1);
}
}
// Destructor closes the source & intermediate file.
MacroProcessor::~MacroProcessor()
{
source.close();
intermediate.close();
}
// Writes every source line to the intermediate file.
void MacroProcessor::writeLine()
{
string s = line;
intermediate<<line<<endl;
if(s.find(".exit")!=-1)