/* C compiler Copyright 1972 Bell Telephone Laboratories, Inc. */ /* * Origin of the space used to store * an expression tree. * Overwrites init and main code. * Note that ospace is at the beginning of the text segment * in both c0 and c1 so addresses pointing into it are * the same in both passes. */ ossiz 250; ospace() {} /* fake */ /* * Enter a keyword into the symbol hash table. * s is the string, t is the internal id. */ init(s, t) char s[]; { extern lookup, symbuf, namsiz; char symbuf[], sp[]; int np[], i; i = namsiz; sp = symbuf; while(i--) if ((*sp++ = *s++)=='\0') --s; np = lookup(); /* Sort is keyword, type is keyword number. */ *np++ = 1; *np = t; } main(argc, argv) int argv[]; { extern init, flush; extern extdef, eof, open, creat; extern fout, fin, error, exit, nerror, tmpfil; if(argc<4) { error("Arg count"); exit(1); } if((fin=open(argv[1],0))<0) { error("Can't find %s", argv[1]); exit(1); } if((fout=creat(argv[2], 017))<0) { error("Can't create %s", argv[2]); exit(1); } tmpfil = argv[3]; /* * These numbers are not arbitrary. * 0-4 are types. * int must be 0, the default type. * >1 aren't integers. * 5-8 (TODO: 9?) are sorts, i.e. storage types. */ init("int", 0); init("char", 1); init("float", 2); init("double", 3); /* init("long", 4); */ init("auto", 5); init("extern", 6); init("static", 7); init("goto", 10); init("return", 11); init("if", 12); init("while", 13); init("else", 14); init("switch", 15); init("case", 16); init("break", 17); init("continue", 18); init("do", 19); init("default", 20); /* End of space that is safe to overwrite after init is done. */ /* A C program is a series of external definitions. */ while(!eof) { extdef(); blkend(); } flush(); flshw(); exit(nerror!=0); } /* * Get the symbol named by the string in symbuf. * If it is not in the hash table yet, enter it. */ lookup() { extern hshtab[], hshsiz, pssiz, symbuf[]; extern hshlen, hshused, exit, error, nwps; auto i, j, np[], sp[], rp[]; i = 0; sp = symbuf; j = nwps; while(j--) i =+ *sp++; if (i<0) i = -i; i =% hshsiz; i =* pssiz; while(*(np = &hshtab[i+4])) { sp = symbuf; j = nwps; while(j--) if (*np++ != *sp++) goto no; return(&hshtab[i]); no: if ((i =+ pssiz) >= hshlen) i = 0; } if(hshused++ > hshsiz) { error("Symbol table overflow"); exit(1); } rp = np = &hshtab[i]; sp = symbuf; j = 4; while(j--) *np++ = 0; j = nwps; while(j--) *np++ = *sp++; return(rp); } /* * Lexical analysis, return token type. * Names will have csym set. * Keywords and constants will have cval set. */ symbol() { extern peeksym, peekc, eof, getchar, subseq, error, line; extern csym[], getstr, symbuf, namsiz, lookup[], ctab, cval; auto b, c; char symbuf[], sp[], ctab[]; /* Return peeked symbol if there has been one */ if (peeksym>=0) { c = peeksym; peeksym = -1; return(c); } /* Get peeked character if possible, or new one */ if (peekc) { c = peekc; peekc = 0; } else if (eof) return(0); else c = getchar(); loop: /* Characters that stand for themselves are returned as-is. */ switch(ctab[c]) { case 125: /* newline */ line++; case 126: /* white space */ c = getchar(); goto loop; case 0: /* EOF */ eof++; return(0); case 40: /* + */ /* + or ++ */ return(subseq(c,40,30)); case 41: /* - */ /* - or -- */ return(subseq(c,41,31)); case 80: /* = */ /* simple = */ if (subseq(' ',0,1)) return(80); c = symbol(); /* =op */ if (c>=40 & c<=49) return(c+30); /* == */ if (c==80) return(60); peeksym = c; return(80); case 63: /* < */ /* << */ if (subseq(c,0,1)) return(46); /* < or <= */ return(subseq('=',63,62)); case 65: /* > */ /* >> */ if (subseq(c,0,1)) return(45); /* > or >= */ return(subseq('=',65,64)); case 34: /* ! */ /* ! or != */ return(subseq('=',34,61)); case 43: /* / */ /* ignore characters in comment */ if (subseq('*',1,0)) return(43); com: c = getchar(); com1: if (c=='\0') { eof++; error("Nonterminated comment"); return(0); } if (c=='\n') line++; if (c!='*') goto com; c = getchar(); if (c!='/') goto com1; c = getchar(); goto loop; case 124: /* number */ cval = 0; if (c=='0') b = 8; else b = 10; while(ctab[c]==124) { cval = cval*b + c -'0'; c = getchar(); } peekc = c; return(21); case 122: /* " */ /* cval set to label number of the string */ return(getstr()); case 121: /* ' */ return(getcc()); case 123: /* letter */ sp = symbuf; while(ctab[c]==123 | ctab[c]==124) { if (sp= 0) printf("%o,", c); printf("0;.even;.text\n"); return(22); } /* * Put a character constant into cval. * Return simple constant token. */ getcc() { extern cval, ncpw; auto c, cc; char cp[]; cval = 0; cp = &cval; cc = 0; while((c=mapch('\'')) >= 0) if(cc++ < ncpw) *cp++ = c; if(cc>ncpw) error("Long character constant"); return(21); } /* Return characters ended by c. Handle escaping. */ mapch(c) { extern peekc, line; auto a; if((a=getchar())==c) return(-1); switch(a) { case '\n': case 0: error("Nonterminated string"); peekc = a; return(-1); case '\\': switch (a=getchar()) { case 't': return('\t'); case 'n': return('\n'); case '0': return('\0'); case 'r': return('\r'); case '\n': line++; return('\n'); } } return(a); } /* * Parse an expression into a tree. * Node stack maintained in cmst. * Nodes pointed to from stack stored in ospace. */ tree() { extern symbol, block, csym[], ctyp, isn, peeksym, opdope[], build, error, cp[], cmst[], space, ospace, cval, ossiz, exit, errflush, cmsiz; auto op[], opst[20], pp[], prst[20], andflg, o, p, ps, os; /* Reset space and stack pointers. */ space = ospace; op = opst; /* operator stack */ pp = prst; /* precedence stack */ cp = cmst; /* operand (cm == complement?) stack */ *op = 200; /* stack EOF */ *pp = 06; andflg = 0; /* operand flag, set when the last element was an operand. */ advanc: switch (o=symbol()) { /* name */ case 20: /* If the name has no storage class yet */ if (*csym==0) if((peeksym=symbol())==6) /* Declare extern if it looks like a function call. */ *csym = 6; /* extern */ else { /* Assume it's a label if it has no value. */ if(csym[2]==0) /* unseen so far */ csym[2] = isn++; } if(*csym==6) /* extern */ /* * Externs must be stored with name since the symbol * table is gone in the next pass. */ *cp++ = block(5,20,csym[1],0,*csym, csym[4],csym[5],csym[6],csym[7]); else /* Everything else just needs the value. */ *cp++ = block(2,20,csym[1],0,*csym,csym[2]); goto tand; /* short constant */ case 21: case21: *cp++ = block(1,21,ctyp,0,cval); goto tand; /* string constant */ case 22: *cp++ = block(1,22,17,0,cval); tand: /* We have just put an operand on the stack. */ if(cp>=cmst+cmsiz) { error("Expression overflow"); exit(1); } /* Two adjacent operands are impossible. */ if (andflg) goto syntax; andflg = 1; goto advanc; /* * The rest are operators. * Unaries are handled here, unambiguous binaries after the switch. */ /* ++, -- */ case 30: case 31: /* * Convert to postfix if there was an operand. * andflg can stay set in this case. */ if (andflg) o =+ 2; goto oponst; /* * NOTE: ~ is not implemented yet, * but already has an entry in the opdope table. */ /* ! */ case 34: /* Postfix ! is illegal. */ if (andflg) goto syntax; goto oponst; /* - */ case 41: /* Unary - */ if (!andflg) { /* Negate a constant immediately. */ peeksym = symbol(); if (peeksym==21) { peeksym = -1; cval = -cval; goto case21; } o = 37; } andflg = 0; goto oponst; /* & */ /* * */ case 47: case 42: if (andflg) /* Binary & and *. */ andflg = 0; else /* Unary */ if(o==47) o = 35; else o = 36; goto oponst; /* ( */ case 6: /* Function call */ if (andflg) { o = symbol(); /* Distinguish general and empty call. */ if (o==7) /* empty */ o = 101; else { /* not empty */ peeksym = o; o = 100; andflg = 0; } } goto oponst; /* ) */ /* ] */ case 5: case 7: if (!andflg) goto syntax; goto oponst; } /* binaries */ if (!andflg) goto syntax; andflg = 0; oponst: /* Check operator o against op on stack. */ /* Get o's precedence. */ p = (opdope[o]>>9) & 077; opon1: /* Precedence on stack. */ ps = *pp; /* * If the new operator binds tighter (higher precedence or * same precedence and right associative) than the last on stack, * push it. */ if (p>ps | p==ps & (opdope[o]&0200)!=0) { /* right-assoc */ putin: switch (o) { /* * Hack for parentheses. * Highest precedence when read * so it will start a new sub-expression, * next to lowest on stack * so only closing parenthesis can end it. */ case 6: /* ( */ case 4: /* [ */ case 100: /* call */ p = 04; } if(op>=opst+20) { /* opstack size */ error("expression overflow"); exit(1); } *++op = o; *++pp = p; goto advanc; } /* * Pop off top operator, combine it with its operands, * and push result onto the operand stack. * The switch handles different kinds of end-of-expression markers * that don't have regular operator syntax. * Mostly it throws parentheses away. */ --pp; switch (os = *op--) { /* EOF */ case 200: peeksym = o; return(*--cp); /* call */ case 100: if (o!=7) goto syntax; build(os); goto advanc; /* mcall */ case 101: /* * Convert to regular call. * Ignore parentheses. */ *cp++ = 0; /* 0 arg call */ os = 100; goto fbuild; /* ( */ case 6: /* * Just pop off opening parenthesis * and don't push closing one. */ if (o!=7) goto syntax; goto advanc; /* [ */ case 4: /* * Same as above but actually do * build an indexing node. */ if (o!=5) goto syntax; build(4); goto advanc; } fbuild: /* * Build tree node and pop operators as long as * they're binding tighter than the current one. */ build(os); goto opon1; syntax: error("Expression syntax"); errflush(o); return(0); } /* Declare a list of names as being of type or storage class 'kw'. */ declare(kw) { extern csym[], symbol, paraml[], parame[]; extern error, cval, errflush, peeksym, exit; int t[], n, o; while((o=symbol())==20) { /* name */ if(kw>=5) { /* type or sort? */ /* Declare storage class, only one possible. */ if(*csym>0) error("%p redeclared", csym[4]); *csym = kw; } else { /* Declare type, only one possible. */ if ((csym[1]&017)!=0) error("%p redeclared", &csym[4]); /* =| is a BUG but should't break anything. */ csym[1] =| csym[1]&0760 | kw; /* Storage class not mentioned. */ if (*csym==0) *csym = -2; } /* Declare vectors or pointers. */ while((o=symbol())==4) { /* [ */ if((o=symbol())==21) { /* const */ /* No prior [] possible. */ if(csym[1]>=020) error("Bad vector"); /* Store vector dimension in symbol. */ csym[3] = cval; o = symbol(); } if (o!=5) /* ] */ goto syntax; /* Add one degree of indirection. */ csym[1] =+ 020; } if(kw==8) { /* parameter */ /* * Append to parameter list. * NOTE: Storage class replaced by pointer/-1. */ *csym = -1; if (paraml==0) paraml = csym; else *parame = csym; parame = csym; } if (o!=9) /* , */ break; } /* * End parameter list with ), * everything else with ; */ if(o==1 & kw!=8 | o==7 & kw==8) return; syntax: error("Declaration syntax"); errflush(o); } /* storage */ regtab 0; efftab 1; cctab 2; sptab 3; symbuf[4]; /* Buffer for symbol name */ pssiz 8; /* Number of words in a hashtable entry */ namsiz 8; /* Number of characters in a symbol name */ nwps 4; /* Number of words per symbol name */ hshused 0; /* Number of hash table entries used */ hshsiz 100; /* Maximum number of hash table entries */ hshlen 800; /* 8*hshsiz */ hshtab[800]; /* Hash table */ space 0; /* Pointer to free space for building trees */ cp 0; /* Stack pointer into cmst */ cmsiz 40; cmst[40]; /* Expression node stack, pointers into ospace */ ctyp 0; /* Type of simple constant (int); never changed */ isn 1; /* Next label number */ swsiz 120; swtab[120]; /* Switch table */ swp 0; /* Current pointer for building switch table */ contlab 0; /* The label a continue jumps to */ brklab 0; /* The label a break jumps to */ deflab 0; /* The default label of a switch */ nreg 4; /* Map of relational operators when operands are flipped */ maprel[] 60,61,64,65,62,63,68,69,66,67; nauto 0; stack 0; peeksym 0177777; /* Peeked symbol (-1 == none) */ peekc 0; /* Peeked character (0 == none) */ eof 0; line 1; csym 0; /* Hash table entry if symbol is a name */ cval 0; /* Constant value if symbol is a constant */ ncpw 2; /* Number of characters per word */ nerror 0; paraml; /* Function parameter list */ parame; /* End of parameter list */ tmpfil;