WebSVN – planix.SVN – Blame – /os/branches/feature_unix/sys/src/libhtml/lex.c

Rev	Author	Line No.	Line
2	-	1	`#include <u.h>`
		2	`#include <libc.h>`
		3	`#include <draw.h>`
		4	`#include <ctype.h>`
		5	`#include <html.h>`
		6	`#include "impl.h"`
		7
		8	`typedef struct TokenSource TokenSource;`
		9	`struct TokenSource`
		10	`{`
		11	`int i; // index of next byte to use`
		12	`uchar* data; // all the data`
		13	`int edata; // data[0:edata] is valid`
		14	`int chset; // one of US_Ascii, etc.`
		15	`int mtype; // TextHtml or TextPlain`
		16	`};`
		17
		18	`enum {`
		19	`EOF = -2,`
		20	`EOB = -1`
		21	`};`
		22
		23	`#define ISNAMCHAR(c) ((c)<256 && (isalpha(c) \|\| isdigit(c) \|\| (c) == '-' \|\| (c) == '.'))`
		24
		25	`#define SMALLBUFSIZE 240`
		26	`#define BIGBUFSIZE 2000`
		27
		28	`// HTML 4.0 tag names.`
		29	`// Keep sorted, and in correspondence with enum in iparse.h.`
		30	`Rune* tagnames[] = {`
		31	`L" ",`
		32	`L"!",`
		33	`L"a",`
		34	`L"abbr",`
		35	`L"acronym",`
		36	`L"address",`
		37	`L"applet",`
		38	`L"area",`
		39	`L"b",`
		40	`L"base",`
		41	`L"basefont",`
		42	`L"bdo",`
		43	`L"big",`
		44	`L"blink",`
		45	`L"blockquote",`
		46	`L"body",`
		47	`L"bq",`
		48	`L"br",`
		49	`L"button",`
		50	`L"caption",`
		51	`L"center",`
		52	`L"cite",`
		53	`L"code",`
		54	`L"col",`
		55	`L"colgroup",`
		56	`L"dd",`
		57	`L"del",`
		58	`L"dfn",`
		59	`L"dir",`
		60	`L"div",`
		61	`L"dl",`
		62	`L"dt",`
		63	`L"em",`
		64	`L"fieldset",`
		65	`L"font",`
		66	`L"form",`
		67	`L"frame",`
		68	`L"frameset",`
		69	`L"h1",`
		70	`L"h2",`
		71	`L"h3",`
		72	`L"h4",`
		73	`L"h5",`
		74	`L"h6",`
		75	`L"head",`
		76	`L"hr",`
		77	`L"html",`
		78	`L"i",`
		79	`L"iframe",`
		80	`L"img",`
		81	`L"input",`
		82	`L"ins",`
		83	`L"isindex",`
		84	`L"kbd",`
		85	`L"label",`
		86	`L"legend",`
		87	`L"li",`
		88	`L"link",`
		89	`L"map",`
		90	`L"menu",`
		91	`L"meta",`
		92	`L"nobr",`
		93	`L"noframes",`
		94	`L"noscript",`
		95	`L"object",`
		96	`L"ol",`
		97	`L"optgroup",`
		98	`L"option",`
		99	`L"p",`
		100	`L"param",`
		101	`L"pre",`
		102	`L"q",`
		103	`L"s",`
		104	`L"samp",`
		105	`L"script",`
		106	`L"select",`
		107	`L"small",`
		108	`L"span",`
		109	`L"strike",`
		110	`L"strong",`
		111	`L"style",`
		112	`L"sub",`
		113	`L"sup",`
		114	`L"table",`
		115	`L"tbody",`
		116	`L"td",`
		117	`L"textarea",`
		118	`L"tfoot",`
		119	`L"th",`
		120	`L"thead",`
		121	`L"title",`
		122	`L"tr",`
		123	`L"tt",`
		124	`L"u",`
		125	`L"ul",`
		126	`L"var"`
		127	`};`
		128
		129	`// HTML 4.0 attribute names.`
		130	`// Keep sorted, and in correspondence with enum in impl.h.`
		131	`Rune* attrnames[] = {`
		132	`L"abbr",`
		133	`L"accept-charset",`
		134	`L"access-key",`
		135	`L"action",`
		136	`L"align",`
		137	`L"alink",`
		138	`L"alt",`
		139	`L"archive",`
		140	`L"axis",`
		141	`L"background",`
		142	`L"bgcolor",`
		143	`L"border",`
		144	`L"cellpadding",`
		145	`L"cellspacing",`
		146	`L"char",`
		147	`L"charoff",`
		148	`L"charset",`
		149	`L"checked",`
		150	`L"cite",`
		151	`L"class",`
		152	`L"classid",`
		153	`L"clear",`
		154	`L"code",`
		155	`L"codebase",`
		156	`L"codetype",`
		157	`L"color",`
		158	`L"cols",`
		159	`L"colspan",`
		160	`L"compact",`
		161	`L"content",`
		162	`L"coords",`
		163	`L"data",`
		164	`L"datetime",`
		165	`L"declare",`
		166	`L"defer",`
		167	`L"dir",`
		168	`L"disabled",`
		169	`L"enctype",`
		170	`L"face",`
		171	`L"for",`
		172	`L"frame",`
		173	`L"frameborder",`
		174	`L"headers",`
		175	`L"height",`
		176	`L"href",`
		177	`L"hreflang",`
		178	`L"hspace",`
		179	`L"http-equiv",`
		180	`L"id",`
		181	`L"ismap",`
		182	`L"label",`
		183	`L"lang",`
		184	`L"link",`
		185	`L"longdesc",`
		186	`L"marginheight",`
		187	`L"marginwidth",`
		188	`L"maxlength",`
		189	`L"media",`
		190	`L"method",`
		191	`L"multiple",`
		192	`L"name",`
		193	`L"nohref",`
		194	`L"noresize",`
		195	`L"noshade",`
		196	`L"nowrap",`
		197	`L"object",`
		198	`L"onblur",`
		199	`L"onchange",`
		200	`L"onclick",`
		201	`L"ondblclick",`
		202	`L"onfocus",`
		203	`L"onkeypress",`
		204	`L"onkeyup",`
		205	`L"onload",`
		206	`L"onmousedown",`
		207	`L"onmousemove",`
		208	`L"onmouseout",`
		209	`L"onmouseover",`
		210	`L"onmouseup",`
		211	`L"onreset",`
		212	`L"onselect",`
		213	`L"onsubmit",`
		214	`L"onunload",`
		215	`L"profile",`
		216	`L"prompt",`
		217	`L"readonly",`
		218	`L"rel",`
		219	`L"rev",`
		220	`L"rows",`
		221	`L"rowspan",`
		222	`L"rules",`
		223	`L"scheme",`
		224	`L"scope",`
		225	`L"scrolling",`
		226	`L"selected",`
		227	`L"shape",`
		228	`L"size",`
		229	`L"span",`
		230	`L"src",`
		231	`L"standby",`
		232	`L"start",`
		233	`L"style",`
		234	`L"summary",`
		235	`L"tabindex",`
		236	`L"target",`
		237	`L"text",`
		238	`L"title",`
		239	`L"type",`
		240	`L"usemap",`
		241	`L"valign",`
		242	`L"value",`
		243	`L"valuetype",`
		244	`L"version",`
		245	`L"vlink",`
		246	`L"vspace",`
		247	`L"width"`
		248	`};`
		249
		250
		251	`// Character entity to unicode character number map.`
		252	`// Keep sorted by name.`
		253	`StringInt chartab[]= {`
		254	`{L"AElig", 198},`
		255	`{L"Aacute", 193},`
		256	`{L"Acirc", 194},`
		257	`{L"Agrave", 192},`
		258	`{L"Alpha", 913},`
		259	`{L"Aring", 197},`
		260	`{L"Atilde", 195},`
		261	`{L"Auml", 196},`
		262	`{L"Beta", 914},`
		263	`{L"Ccedil", 199},`
		264	`{L"Chi", 935},`
		265	`{L"Dagger", 8225},`
		266	`{L"Delta", 916},`
		267	`{L"ETH", 208},`
		268	`{L"Eacute", 201},`
		269	`{L"Ecirc", 202},`
		270	`{L"Egrave", 200},`
		271	`{L"Epsilon", 917},`
		272	`{L"Eta", 919},`
		273	`{L"Euml", 203},`
		274	`{L"Gamma", 915},`
		275	`{L"Iacute", 205},`
		276	`{L"Icirc", 206},`
		277	`{L"Igrave", 204},`
		278	`{L"Iota", 921},`
		279	`{L"Iuml", 207},`
		280	`{L"Kappa", 922},`
		281	`{L"Lambda", 923},`
		282	`{L"Mu", 924},`
		283	`{L"Ntilde", 209},`
		284	`{L"Nu", 925},`
		285	`{L"OElig", 338},`
		286	`{L"Oacute", 211},`
		287	`{L"Ocirc", 212},`
		288	`{L"Ograve", 210},`
		289	`{L"Omega", 937},`
		290	`{L"Omicron", 927},`
		291	`{L"Oslash", 216},`
		292	`{L"Otilde", 213},`
		293	`{L"Ouml", 214},`
		294	`{L"Phi", 934},`
		295	`{L"Pi", 928},`
		296	`{L"Prime", 8243},`
		297	`{L"Psi", 936},`
		298	`{L"Rho", 929},`
		299	`{L"Scaron", 352},`
		300	`{L"Sigma", 931},`
		301	`{L"THORN", 222},`
		302	`{L"Tau", 932},`
		303	`{L"Theta", 920},`
		304	`{L"Uacute", 218},`
		305	`{L"Ucirc", 219},`
		306	`{L"Ugrave", 217},`
		307	`{L"Upsilon", 933},`
		308	`{L"Uuml", 220},`
		309	`{L"Xi", 926},`
		310	`{L"Yacute", 221},`
		311	`{L"Yuml", 376},`
		312	`{L"Zeta", 918},`
		313	`{L"aacute", 225},`
		314	`{L"acirc", 226},`
		315	`{L"acute", 180},`
		316	`{L"aelig", 230},`
		317	`{L"agrave", 224},`
		318	`{L"alefsym", 8501},`
		319	`{L"alpha", 945},`
		320	`{L"amp", 38},`
		321	`{L"and", 8743},`
		322	`{L"ang", 8736},`
		323	`{L"aring", 229},`
		324	`{L"asymp", 8776},`
		325	`{L"atilde", 227},`
		326	`{L"auml", 228},`
		327	`{L"bdquo", 8222},`
		328	`{L"beta", 946},`
		329	`{L"brvbar", 166},`
		330	`{L"bull", 8226},`
		331	`{L"cap", 8745},`
		332	`{L"ccedil", 231},`
		333	`{L"cdots", 8943},`
		334	`{L"cedil", 184},`
		335	`{L"cent", 162},`
		336	`{L"chi", 967},`
		337	`{L"circ", 710},`
		338	`{L"clubs", 9827},`
		339	`{L"cong", 8773},`
		340	`{L"copy", 169},`
		341	`{L"crarr", 8629},`
		342	`{L"cup", 8746},`
		343	`{L"curren", 164},`
		344	`{L"dArr", 8659},`
		345	`{L"dagger", 8224},`
		346	`{L"darr", 8595},`
		347	`{L"ddots", 8945},`
		348	`{L"deg", 176},`
		349	`{L"delta", 948},`
		350	`{L"diams", 9830},`
		351	`{L"divide", 247},`
		352	`{L"eacute", 233},`
		353	`{L"ecirc", 234},`
		354	`{L"egrave", 232},`
		355	`{L"emdash", 8212}, /* non-standard but commonly used */`
		356	`{L"empty", 8709},`
		357	`{L"emsp", 8195},`
		358	`{L"endash", 8211}, /* non-standard but commonly used */`
		359	`{L"ensp", 8194},`
		360	`{L"epsilon", 949},`
		361	`{L"equiv", 8801},`
		362	`{L"eta", 951},`
		363	`{L"eth", 240},`
		364	`{L"euml", 235},`
		365	`{L"euro", 8364},`
		366	`{L"exist", 8707},`
		367	`{L"fnof", 402},`
		368	`{L"forall", 8704},`
		369	`{L"frac12", 189},`
		370	`{L"frac14", 188},`
		371	`{L"frac34", 190},`
		372	`{L"frasl", 8260},`
		373	`{L"gamma", 947},`
		374	`{L"ge", 8805},`
		375	`{L"gt", 62},`
		376	`{L"hArr", 8660},`
		377	`{L"harr", 8596},`
		378	`{L"hearts", 9829},`
		379	`{L"hellip", 8230},`
		380	`{L"iacute", 237},`
		381	`{L"icirc", 238},`
		382	`{L"iexcl", 161},`
		383	`{L"igrave", 236},`
		384	`{L"image", 8465},`
		385	`{L"infin", 8734},`
		386	`{L"int", 8747},`
		387	`{L"iota", 953},`
		388	`{L"iquest", 191},`
		389	`{L"isin", 8712},`
		390	`{L"iuml", 239},`
		391	`{L"kappa", 954},`
		392	`{L"lArr", 8656},`
		393	`{L"lambda", 955},`
		394	`{L"lang", 9001},`
		395	`{L"laquo", 171},`
		396	`{L"larr", 8592},`
		397	`{L"lceil", 8968},`
		398	`{L"ldots", 8230},`
		399	`{L"ldquo", 8220},`
		400	`{L"le", 8804},`
		401	`{L"lfloor", 8970},`
		402	`{L"lowast", 8727},`
		403	`{L"loz", 9674},`
		404	`{L"lrm", 8206},`
		405	`{L"lsaquo", 8249},`
		406	`{L"lsquo", 8216},`
		407	`{L"lt", 60},`
		408	`{L"macr", 175},`
		409	`{L"mdash", 8212},`
		410	`{L"micro", 181},`
		411	`{L"middot", 183},`
		412	`{L"minus", 8722},`
		413	`{L"mu", 956},`
		414	`{L"nabla", 8711},`
		415	`{L"nbsp", 160},`
		416	`{L"ndash", 8211},`
		417	`{L"ne", 8800},`
		418	`{L"ni", 8715},`
		419	`{L"not", 172},`
		420	`{L"notin", 8713},`
		421	`{L"nsub", 8836},`
		422	`{L"ntilde", 241},`
		423	`{L"nu", 957},`
		424	`{L"oacute", 243},`
		425	`{L"ocirc", 244},`
		426	`{L"oelig", 339},`
		427	`{L"ograve", 242},`
		428	`{L"oline", 8254},`
		429	`{L"omega", 969},`
		430	`{L"omicron", 959},`
		431	`{L"oplus", 8853},`
		432	`{L"or", 8744},`
		433	`{L"ordf", 170},`
		434	`{L"ordm", 186},`
		435	`{L"oslash", 248},`
		436	`{L"otilde", 245},`
		437	`{L"otimes", 8855},`
		438	`{L"ouml", 246},`
		439	`{L"para", 182},`
		440	`{L"part", 8706},`
		441	`{L"permil", 8240},`
		442	`{L"perp", 8869},`
		443	`{L"phi", 966},`
		444	`{L"pi", 960},`
		445	`{L"piv", 982},`
		446	`{L"plusmn", 177},`
		447	`{L"pound", 163},`
		448	`{L"prime", 8242},`
		449	`{L"prod", 8719},`
		450	`{L"prop", 8733},`
		451	`{L"psi", 968},`
		452	`{L"quad", 8193},`
		453	`{L"quot", 34},`
		454	`{L"rArr", 8658},`
		455	`{L"radic", 8730},`
		456	`{L"rang", 9002},`
		457	`{L"raquo", 187},`
		458	`{L"rarr", 8594},`
		459	`{L"rceil", 8969},`
		460	`{L"rdquo", 8221},`
		461	`{L"real", 8476},`
		462	`{L"reg", 174},`
		463	`{L"rfloor", 8971},`
		464	`{L"rho", 961},`
		465	`{L"rlm", 8207},`
		466	`{L"rsaquo", 8250},`
		467	`{L"rsquo", 8217},`
		468	`{L"sbquo", 8218},`
		469	`{L"scaron", 353},`
		470	`{L"sdot", 8901},`
		471	`{L"sect", 167},`
		472	`{L"shy", 173},`
		473	`{L"sigma", 963},`
		474	`{L"sigmaf", 962},`
		475	`{L"sim", 8764},`
		476	`{L"sp", 8194},`
		477	`{L"spades", 9824},`
		478	`{L"sub", 8834},`
		479	`{L"sube", 8838},`
		480	`{L"sum", 8721},`
		481	`{L"sup", 8835},`
		482	`{L"sup1", 185},`
		483	`{L"sup2", 178},`
		484	`{L"sup3", 179},`
		485	`{L"supe", 8839},`
		486	`{L"szlig", 223},`
		487	`{L"tau", 964},`
		488	`{L"there4", 8756},`
		489	`{L"theta", 952},`
		490	`{L"thetasym", 977},`
		491	`{L"thinsp", 8201},`
		492	`{L"thorn", 254},`
		493	`{L"tilde", 732},`
		494	`{L"times", 215},`
		495	`{L"trade", 8482},`
		496	`{L"uArr", 8657},`
		497	`{L"uacute", 250},`
		498	`{L"uarr", 8593},`
		499	`{L"ucirc", 251},`
		500	`{L"ugrave", 249},`
		501	`{L"uml", 168},`
		502	`{L"upsih", 978},`
		503	`{L"upsilon", 965},`
		504	`{L"uuml", 252},`
		505	`{L"varepsilon", 8712},`
		506	`{L"varphi", 981},`
		507	`{L"varpi", 982},`
		508	`{L"varrho", 1009},`
		509	`{L"vdots", 8942},`
		510	`{L"vsigma", 962},`
		511	`{L"vtheta", 977},`
		512	`{L"weierp", 8472},`
		513	`{L"xi", 958},`
		514	`{L"yacute", 253},`
		515	`{L"yen", 165},`
		516	`{L"yuml", 255},`
		517	`{L"zeta", 950},`
		518	`{L"zwj", 8205},`
		519	`{L"zwnj", 8204}`
		520	`};`
		521	`#define NCHARTAB (sizeof(chartab)/sizeof(chartab[0]))`
		522
		523	`// Characters Winstart..Winend are those that Windows`
		524	`// uses interpolated into the Latin1 set.`
		525	`// They aren't supposed to appear in HTML, but they do....`
		526	`enum {`
		527	`Winstart = 127,`
		528	`Winend = 159`
		529	`};`
		530
		531	`static int winchars[]= { 8226, // 8226 is a bullet`
		532	`8226, 8226, 8218, 402, 8222, 8230, 8224, 8225,`
		533	`710, 8240, 352, 8249, 338, 8226, 8226, 8226,`
		534	`8226, 8216, 8217, 8220, 8221, 8226, 8211, 8212,`
		535	`732, 8482, 353, 8250, 339, 8226, 8226, 376};`
		536
		537	`static StringInt* tagtable; // initialized from tagnames`
		538	`static StringInt* attrtable; // initialized from attrnames`
		539
		540	`static void lexinit(void);`
		541	`static int getplaindata(TokenSource* ts, Token* a, int* pai);`
		542	`static int getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai);`
		543	`static int getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai, int findtag);`
		544	`static int gettag(TokenSource* ts, int starti, Token* a, int* pai);`
		545	`static Rune* buftostr(Rune* s, Rune* buf, int j);`
		546	`static int comment(TokenSource* ts);`
		547	`static int findstr(TokenSource* ts, Rune* s);`
		548	`static int ampersand(TokenSource* ts);`
		549	`static int lowerc(int c);`
		550	`static int getchar(TokenSource* ts);`
		551	`static void ungetchar(TokenSource* ts, int c);`
		552	`static void backup(TokenSource* ts, int savei);`
		553	`static void freeinsidetoken(Token* t);`
		554	`static void freeattrs(Attr* ahead);`
		555	`static Attr* newattr(int attid, Rune* value, Attr* link);`
		556	`static int Tconv(Fmt* f);`
		557
		558	`int dbglex = 0;`
		559	`static int lexinited = 0;`
		560
		561	`static void`
		562	`lexinit(void)`
		563	`{`
		564	`tagtable = _makestrinttab(tagnames, Numtags);`
		565	`attrtable = _makestrinttab(attrnames, Numattrs);`
		566	`fmtinstall('T', Tconv);`
		567	`lexinited = 1;`
		568	`}`
		569
		570	`static TokenSource*`
		571	`newtokensource(uchar* data, int edata, int chset, int mtype)`
		572	`{`
		573	`TokenSource* ans;`
		574
		575	`assert(chset == US_Ascii \|\| chset == ISO_8859_1 \|\|`
		576	`chset == UTF_8 \|\| chset == Unicode);`
		577	`ans = (TokenSource*)emalloc(sizeof(TokenSource));`
		578	`ans->i = 0;`
		579	`ans->data = data;`
		580	`ans->edata = edata;`
		581	`ans->chset = chset;`
		582	`ans->mtype = mtype;`
		583	`return ans;`
		584	`}`
		585
		586	`enum {`
		587	`ToksChunk = 500,`
		588	`};`
		589
		590	`// Call this to get the tokens.`
		591	`// The number of returned tokens is returned in *plen.`
		592	`Token*`
		593	`_gettoks(uchar* data, int datalen, int chset, int mtype, int* plen)`
		594	`{`
		595	`TokenSource* ts;`
		596	`Token* a;`
		597	`int alen;`
		598	`int ai;`
		599	`int starti;`
		600	`int c;`
		601	`int tag;`
		602
		603	`if(!lexinited)`
		604	`lexinit();`
		605	`ts = newtokensource(data, datalen, chset, mtype);`
		606	`if(dbglex)`
		607	`fprint(2, "_gettoks starts, ts.i=%d, ts.edata=%d\n", ts->i, ts->edata);`
		608	`alen = 0;`
		609	`ai = 0;`
		610	`a = 0;`
		611	`if(ts->mtype == TextHtml) {`
		612	`for(;;) {`
		613	`if(alen - ai < ToksChunk/32) {`
		614	`alen += ToksChunk;`
		615	`a = erealloc(a, alensizeof a);`
		616	`}`
		617	`starti = ts->i;`
		618	`c = getchar(ts);`
		619	`if(c < 0)`
		620	`break;`
		621	`if(c == '<') {`
		622	`tag = gettag(ts, starti, a, &ai);`
		623	`if(tag == Tscript \|\| tag == Tstyle) {`
		624	`// special rules for getting Data after....`
		625	`starti = ts->i;`
		626	`c = getchar(ts);`
		627	`tag = getscriptdata(ts, c, starti, a, &ai, tag);`
		628	`}`
		629	`}`
		630	`else`
		631	`tag = getdata(ts, c, starti, a, &ai);`
		632	`if(tag == -1)`
		633	`break;`
		634	`else if(dbglex > 1 && tag != Comment)`
		635	`fprint(2, "lex: got token %T\n", &a[ai-1]);`
		636	`}`
		637	`}`
		638	`else {`
		639	`// plain text (non-html) tokens`
		640	`for(;;) {`
		641	`if(alen - ai < ToksChunk/32) {`
		642	`alen += ToksChunk;`
		643	`a = erealloc(a, alensizeof a);`
		644	`}`
		645	`tag = getplaindata(ts, a, &ai);`
		646	`if(tag == -1)`
		647	`break;`
		648	`if(dbglex > 1)`
		649	`fprint(2, "lex: got token %T\n", &a[ai]);`
		650	`}`
		651	`}`
		652	`free(ts);`
		653	`if(dbglex)`
		654	`fprint(2, "lex: returning %d tokens\n", ai);`
		655	`*plen = ai;`
		656	`if(ai == 0){`
		657	`free(a);`
		658	`a = 0;`
		659	`}`
		660	`return a;`
		661	`}`
		662
		663	`// For case where source isn't HTML.`
		664	`// Just make data tokens, one per line (or partial line,`
		665	`// at end of buffer), ignoring non-whitespace control`
		666	`// characters and dumping \r's.`
		667	`// If find non-empty token, fill in a[pai], bump pai, and return Data.`
		668	`// Otherwise return -1;`
		669	`static int`
		670	`getplaindata(TokenSource* ts, Token* a, int* pai)`
		671	`{`
		672	`Rune* s;`
		673	`int j;`
		674	`int starti;`
		675	`int c;`
		676	`Token* tok;`
		677	`Rune buf[BIGBUFSIZE];`
		678
		679	`s = nil;`
		680	`j = 0;`
		681	`starti = ts->i;`
		682	`for(c = getchar(ts); c >= 0; c = getchar(ts)) {`
		683	`if(c < ' ') {`
		684	`if(isspace(c)) {`
		685	`if(c == '\r') {`
		686	`// ignore it unless no following '\n',`
		687	`// in which case treat it like '\n'`
		688	`c = getchar(ts);`
		689	`if(c != '\n') {`
		690	`if(c >= 0)`
		691	`ungetchar(ts, c);`
		692	`c = '\n';`
		693	`}`
		694	`}`
		695	`}`
		696	`else`
		697	`c = 0;`
		698	`}`
		699	`if(c != 0) {`
		700	`buf[j++] = c;`
		701	`if(j == nelem(buf)-1) {`
		702	`s = buftostr(s, buf, j);`
		703	`j = 0;`
		704	`}`
		705	`}`
		706	`if(c == '\n')`
		707	`break;`
		708	`}`
		709	`s = buftostr(s, buf, j);`
		710	`if(s == nil)`
		711	`return -1;`
		712	`tok = &a[(*pai)++];`
		713	`tok->tag = Data;`
		714	`tok->text = s;`
		715	`tok->attr = nil;`
		716	`tok->starti = starti;`
		717	`return Data;`
		718	`}`
		719
		720	`// Return concatenation of s and buf[0:j]`
		721	`static Rune*`
		722	`buftostr(Rune* s, Rune* buf, int j)`
		723	`{`
		724	`int i;`
		725
		726	`if(s == nil)`
		727	`s = _Strndup(buf, j);`
		728	`else {`
		729	`i = _Strlen(s);`
		730	`s = realloc(s, ( i+j+1)sizeof s);`
		731	`memcpy(&s[i], buf, jsizeof s);`
		732	`s[i+j] = 0;`
		733	`}`
		734	`return s;`
		735	`}`
		736
		737	`// Gather data up to next start-of-tag or end-of-buffer.`
		738	`// Translate entity references (&).`
		739	`// Ignore non-whitespace control characters and get rid of \r's.`
		740	`// If find non-empty token, fill in a[pai], bump pai, and return Data.`
		741	`// Otherwise return -1;`
		742	`static int`
		743	`getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai)`
		744	`{`
		745	`Rune* s;`
		746	`int j;`
		747	`int c;`
		748	`Token* tok;`
		749	`Rune buf[SMALLBUFSIZE];`
		750
		751	`s = nil;`
		752	`j = 0;`
		753	`for(c = firstc; c >= 0; c = getchar(ts)){`
		754	`if(c == '&') {`
		755	`c = ampersand(ts);`
		756	`if(c < 0)`
		757	`break;`
		758	`}`
		759	`else if(c < ' ') {`
		760	`if(isspace(c)) {`
		761	`if(c == '\r') {`
		762	`// ignore it unless no following '\n',`
		763	`// in which case treat it like '\n'`
		764	`c = getchar(ts);`
		765	`if(c != '\n') {`
		766	`if(c >= 0)`
		767	`ungetchar(ts, c);`
		768	`c = '\n';`
		769	`}`
		770	`}`
		771	`}`
		772	`else {`
		773	`if(warn)`
		774	`fprint(2, "warning: non-whitespace control character %d ignored\n", c);`
		775	`c = 0;`
		776	`}`
		777	`}`
		778	`else if(c == '<') {`
		779	`ungetchar(ts, c);`
		780	`break;`
		781	`}`
		782	`if(c != 0) {`
		783	`buf[j++] = c;`
		784	`if(j == nelem(buf)-1) {`
		785	`s = buftostr(s, buf, j);`
		786	`j = 0;`
		787	`}`
		788	`}`
		789	`}`
		790	`s = buftostr(s, buf, j);`
		791	`if(s == nil)`
		792	`return -1;`
		793	`tok = &a[(*pai)++];`
		794	`tok->tag = Data;`
		795	`tok->text = s;`
		796	`tok->attr = nil;`
		797	`tok->starti = starti;`
		798	`return Data;`
		799	`}`
		800
		801	`// The rules for lexing scripts are different (ugh).`
		802	`// Gather up everything until see an "</" tagnames[tok] ">"`
		803	`static int`
		804	`getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai, int findtag)`
		805	`{`
		806	`Rune* s;`
		807	`int j;`
		808	`int tstarti;`
		809	`int savei;`
		810	`int c;`
		811	`int tag;`
		812	`int done;`
		813	`Token* tok;`
		814	`Rune buf[BIGBUFSIZE];`
		815
		816	`s = nil;`
		817	`j = 0;`
		818	`tstarti = starti;`
		819	`c = firstc;`
		820	`done = 0;`
		821	`while(c >= 0) {`
		822	`if(c == '<') {`
		823	`// other browsers ignore stuff to end of line after <!`
		824	`savei = ts->i;`
		825	`c = getchar(ts);`
		826	`if(c == '!') {`
		827	`if(comment(ts) == -1)`
		828	`break;`
		829	`if(c == '\r')`
		830	`c = getchar(ts);`
		831	`if(c == '\n')`
		832	`c = getchar(ts);`
		833	`}`
		834	`else if(c >= 0) {`
		835	`backup(ts, savei);`
		836	`tag = gettag(ts, tstarti, a, pai);`
		837	`if(tag == -1)`
		838	`break;`
		839	`if(tag != Comment)`
		840	`(*pai)--;`
		841	`backup(ts, tstarti);`
		842	`if(tag == findtag + RBRA) {`
		843	`done = 1;`
		844	`break;`
		845	`}`
		846	`// here tag was not the one we were looking for, so take as regular data`
		847	`c = getchar(ts);`
		848	`}`
		849	`}`
		850	`if(c < 0)`
		851	`break;`
		852	`if(c != 0) {`
		853	`buf[j++] = c;`
		854	`if(j == nelem(buf)-1) {`
		855	`s = buftostr(s, buf, j);`
		856	`j = 0;`
		857	`}`
		858	`}`
		859	`tstarti = ts->i;`
		860	`c = getchar(ts);`
		861	`}`
		862	`if(done \|\| ts->i == ts->edata) {`
		863	`s = buftostr(s, buf, j);`
		864	`tok = &a[(*pai)++];`
		865	`tok->tag = Data;`
		866	`tok->text = s;`
		867	`tok->attr = nil;`
		868	`tok->starti = starti;`
		869	`return Data;`
		870	`}`
		871	`free(s);`
		872	`backup(ts, starti);`
		873	`return -1;`
		874	`}`
		875
		876	`// We've just seen a '<'. Gather up stuff to closing '>' (if buffer`
		877	`// ends before then, return -1).`
		878	`// If it's a tag, look up the name, gather the attributes, and return`
		879	`// the appropriate token.`
		880	`// Else it's either just plain data or some kind of ignorable stuff:`
		881	`// return Data or Comment as appropriate.`
		882	`// If it's not a Comment, put it in a[pai] and bump pai.`
		883	`static int`
		884	`gettag(TokenSource* ts, int starti, Token* a, int* pai)`
		885	`{`
		886	`int rbra;`
		887	`int ans;`
		888	`Attr* al;`
		889	`int nexti;`
		890	`int c;`
		891	`int ti;`
		892	`int afnd;`
		893	`int attid;`
		894	`int quote;`
		895	`Rune* val;`
		896	`int nv;`
		897	`int i;`
		898	`int tag;`
		899	`Token* tok;`
		900	`Rune buf[BIGBUFSIZE];`
		901
		902	`rbra = 0;`
		903	`nexti = ts->i;`
		904	`tok = &a[*pai];`
		905	`tok->tag = Notfound;`
		906	`tok->text = nil;`
		907	`tok->attr = nil;`
		908	`tok->starti = starti;`
		909	`c = getchar(ts);`
		910	`if(c == '/') {`
		911	`rbra = RBRA;`
		912	`c = getchar(ts);`
		913	`}`
		914	`if(c < 0)`
		915	`goto eob_done;`
		916	`if(c >= 256 \|\| !isalpha(c)) {`
		917	`// not a tag`
		918	`if(c == '!') {`
		919	`ans = comment(ts);`
		920	`if(ans != -1)`
		921	`return ans;`
		922	`goto eob_done;`
		923	`}`
		924	`else {`
		925	`backup(ts, nexti);`
		926	`tok->tag = Data;`
		927	`tok->text = _Strdup(L"<");`
		928	`(*pai)++;`
		929	`return Data;`
		930	`}`
		931	`}`
		932	`// c starts a tagname`
		933	`buf[0] = c;`
		934	`i = 1;`
		935	`while(1) {`
		936	`c = getchar(ts);`
		937	`if(c < 0)`
		938	`goto eob_done;`
		939	`if(!ISNAMCHAR(c))`
		940	`break;`
		941	`// if name is bigger than buf it won't be found anyway...`
		942	`if(i < BIGBUFSIZE)`
		943	`buf[i++] = c;`
		944	`}`
		945	`if(_lookup(tagtable, Numtags, buf, i, &tag))`
		946	`tok->tag = tag + rbra;`
		947	`else`
		948	`tok->text = _Strndup(buf, i); // for warning print, in build`
		949	`// attribute gathering loop`
		950	`al = nil;`
		951	`while(1) {`
		952	`// look for "ws name" or "ws name ws = ws val" (ws=whitespace)`
		953	`// skip whitespace`
		954	`attrloop_continue:`
		955	`while(c < 256 && isspace(c)) {`
		956	`c = getchar(ts);`
		957	`if(c < 0)`
		958	`goto eob_done;`
		959	`}`
		960	`if(c == '>')`
		961	`goto attrloop_done;`
		962	`if(c == '<') {`
		963	`if(warn)`
		964	`fprint(2, "warning: unclosed tag\n");`
		965	`ungetchar(ts, c);`
		966	`goto attrloop_done;`
		967	`}`
		968	`if(c >= 256 \|\| !isalpha(c)) {`
		969	`if(warn)`
		970	`fprint(2, "warning: expected attribute name\n");`
		971	`// skipt to next attribute name`
		972	`while(1) {`
		973	`c = getchar(ts);`
		974	`if(c < 0)`
		975	`goto eob_done;`
		976	`if(c < 256 && isalpha(c))`
		977	`goto attrloop_continue;`
		978	`if(c == '<') {`
		979	`if(warn)`
		980	`fprint(2, "warning: unclosed tag\n");`
		981	`ungetchar(ts, 60);`
		982	`goto attrloop_done;`
		983	`}`
		984	`if(c == '>')`
		985	`goto attrloop_done;`
		986	`}`
		987	`}`
		988	`// gather attribute name`
		989	`buf[0] = c;`
		990	`i = 1;`
		991	`while(1) {`
		992	`c = getchar(ts);`
		993	`if(c < 0)`
		994	`goto eob_done;`
		995	`if(!ISNAMCHAR(c))`
		996	`break;`
		997	`if(i < BIGBUFSIZE-1)`
		998	`buf[i++] = c;`
		999	`}`
		1000	`afnd = _lookup(attrtable, Numattrs, buf, i, &attid);`
		1001	`if(warn && !afnd) {`
		1002	`buf[i] = 0;`
		1003	`fprint(2, "warning: unknown attribute name %S\n", buf);`
		1004	`}`
		1005	`// skip whitespace`
		1006	`while(c < 256 && isspace(c)) {`
		1007	`c = getchar(ts);`
		1008	`if(c < 0)`
		1009	`goto eob_done;`
		1010	`}`
		1011	`if(c != '=') {`
		1012	`if(afnd)`
		1013	`al = newattr(attid, nil, al);`
		1014	`goto attrloop_continue;`
		1015	`}`
		1016	`//# c is '=' here; skip whitespace`
		1017	`while(1) {`
		1018	`c = getchar(ts);`
		1019	`if(c < 0)`
		1020	`goto eob_done;`
		1021	`if(c >= 256 \|\| !isspace(c))`
		1022	`break;`
		1023	`}`
		1024	`quote = 0;`
		1025	`if(c == '\'' \|\| c == '"') {`
		1026	`quote = c;`
		1027	`c = getchar(ts);`
		1028	`if(c < 0)`
		1029	`goto eob_done;`
		1030	`}`
		1031	`val = nil;`
		1032	`nv = 0;`
		1033	`while(1) {`
		1034	`valloop_continue:`
		1035	`if(c < 0)`
		1036	`goto eob_done;`
		1037	`if(c == '>') {`
		1038	`if(quote) {`
		1039	`// c might be part of string (though not good style)`
		1040	`// but if line ends before close quote, assume`
		1041	`// there was an unmatched quote`
		1042	`ti = ts->i;`
		1043	`while(1) {`
		1044	`c = getchar(ts);`
		1045	`if(c < 0)`
		1046	`goto eob_done;`
		1047	`if(c == quote) {`
		1048	`backup(ts, ti);`
		1049	`buf[nv++] = '>';`
		1050	`if(nv == BIGBUFSIZE-1) {`
		1051	`val = buftostr(val, buf, nv);`
		1052	`nv = 0;`
		1053	`}`
		1054	`c = getchar(ts);`
		1055	`goto valloop_continue;`
		1056	`}`
		1057	`if(c == '\n') {`
		1058	`if(warn)`
		1059	`fprint(2, "warning: apparent unmatched quote\n");`
		1060	`backup(ts, ti);`
		1061	`c = '>';`
		1062	`goto valloop_done;`
		1063	`}`
		1064	`}`
		1065	`}`
		1066	`else`
		1067	`goto valloop_done;`
		1068	`}`
		1069	`if(quote) {`
		1070	`if(c == quote) {`
		1071	`c = getchar(ts);`
		1072	`if(c < 0)`
		1073	`goto eob_done;`
		1074	`goto valloop_done;`
		1075	`}`
		1076	`if(c == '\r') {`
		1077	`c = getchar(ts);`
		1078	`goto valloop_continue;`
		1079	`}`
		1080	`if(c == '\t' \|\| c == '\n')`
		1081	`c = ' ';`
		1082	`}`
		1083	`else {`
		1084	`if(c < 256 && isspace(c))`
		1085	`goto valloop_done;`
		1086	`}`
		1087	`if(c == '&') {`
		1088	`c = ampersand(ts);`
		1089	`if(c == -1)`
		1090	`goto eob_done;`
		1091	`}`
		1092	`buf[nv++] = c;`
		1093	`if(nv == BIGBUFSIZE-1) {`
		1094	`val = buftostr(val, buf, nv);`
		1095	`nv = 0;`
		1096	`}`
		1097	`c = getchar(ts);`
		1098	`}`
		1099	`valloop_done:`
		1100	`if(afnd) {`
		1101	`val = buftostr(val, buf, nv);`
		1102	`al = newattr(attid, val, al);`
		1103	`}`
		1104	`}`
		1105
		1106	`attrloop_done:`
		1107	`tok->attr = al;`
		1108	`(*pai)++;`
		1109	`return tok->tag;`
		1110
		1111	`eob_done:`
		1112	`if(warn)`
		1113	`fprint(2, "warning: incomplete tag at end of page\n");`
		1114	`backup(ts, nexti);`
		1115	`tok->tag = Data;`
		1116	`tok->text = _Strdup(L"<");`
		1117	`return Data;`
		1118	`}`
		1119
		1120	`// We've just read a '<!' at position starti,`
		1121	`// so this may be a comment or other ignored section, or it may`
		1122	`// be just a literal string if there is no close before end of file`
		1123	`// (other browsers do that).`
		1124	`// The accepted practice seems to be (note: contrary to SGML spec!):`
		1125	`// If see <!--, look for --> to close, or if none, > to close.`
		1126	`// If see <!(not --), look for > to close.`
		1127	`// If no close before end of file, leave original characters in as literal data.`
		1128	`//`
		1129	`// If we see ignorable stuff, return Comment.`
		1130	`// Else return nil (caller should back up and try again when more data arrives,`
		1131	`// unless at end of file, in which case caller should just make '<' a data token).`
		1132	`static int`
		1133	`comment(TokenSource* ts)`
		1134	`{`
		1135	`int nexti;`
		1136	`int havecomment;`
		1137	`int c;`
		1138
		1139	`nexti = ts->i;`
		1140	`havecomment = 0;`
		1141	`c = getchar(ts);`
		1142	`if(c == '-') {`
		1143	`c = getchar(ts);`
		1144	`if(c == '-') {`
		1145	`if(findstr(ts, L"-->"))`
		1146	`havecomment = 1;`
		1147	`else`
		1148	`backup(ts, nexti);`
		1149	`}`
		1150	`}`
		1151	`if(!havecomment) {`
		1152	`if(c == '>')`
		1153	`havecomment = 1;`
		1154	`else if(c >= 0) {`
		1155	`if(findstr(ts, L">"))`
		1156	`havecomment = 1;`
		1157	`}`
		1158	`}`
		1159	`if(havecomment)`
		1160	`return Comment;`
		1161	`return -1;`
		1162	`}`
		1163
		1164	`// Look for string s in token source.`
		1165	`// If found, return 1, with buffer at next char after s,`
		1166	`// else return 0 (caller should back up).`
		1167	`static int`
		1168	`findstr(TokenSource* ts, Rune* s)`
		1169	`{`
		1170	`int c0;`
		1171	`int n;`
		1172	`int nexti;`
		1173	`int i;`
		1174	`int c;`
		1175
		1176	`c0 = s[0];`
		1177	`n = runestrlen(s);`
		1178	`while(1) {`
		1179	`c = getchar(ts);`
		1180	`if(c < 0)`
		1181	`break;`
		1182	`if(c == c0) {`
		1183	`if(n == 1)`
		1184	`return 1;`
		1185	`nexti = ts->i;`
		1186	`for(i = 1; i < n; i++) {`
		1187	`c = getchar(ts);`
		1188	`if(c < 0)`
		1189	`goto mainloop_done;`
		1190	`if(c != s[i])`
		1191	`break;`
		1192	`}`
		1193	`if(i == n)`
		1194	`return 1;`
		1195	`backup(ts, nexti);`
		1196	`}`
		1197	`}`
		1198	`mainloop_done:`
		1199	`return 0;`
		1200	`}`
		1201
		1202	`// We've just read an '&'; look for an entity reference`
		1203	`// name, and if found, return translated char.`
		1204	`// if there is a complete entity name but it isn't known,`
		1205	`// back up to just past the '&' and return '&'.`
		1206	`// If the entity can't be completed in the current buffer, back up`
		1207	`// to the '&' and return -1.`
		1208	`static int`
		1209	`ampersand(TokenSource* ts)`
		1210	`{`
		1211	`int savei;`
		1212	`int c;`
		1213	`int fnd;`
		1214	`int ans;`
		1215	`int v;`
		1216	`int k;`
		1217	`Rune buf[25];`
		1218
		1219	`savei = ts->i;`
		1220	`c = getchar(ts);`
		1221	`fnd = 0;`
		1222	`ans = -1;`
		1223	`if(c == '#') {`
		1224	`c = getchar(ts);`
		1225	`v = 0;`
		1226	`if(c == 'X' \|\| c == 'x')`
		1227	`for(c = getchar(ts); c < 256; c = getchar(ts))`
		1228	`if(c >= '0' && c <= '9')`
		1229	`v = v*16+c-'0';`
		1230	`else if(c >= 'A' && c<= 'F')`
		1231	`v = v*16+c-'A'+10;`
		1232	`else if(c >= 'a' && c <= 'f')`
		1233	`v = v*16+c-'a'+10;`
		1234	`else`
		1235	`break;`
		1236	`else`
		1237	`while(c >= 0) {`
		1238	`if(!(c < 256 && isdigit(c)))`
		1239	`break;`
		1240	`v = v*10 + c - 48;`
		1241	`c = getchar(ts);`
		1242	`}`
		1243	`if(c >= 0) {`
		1244	`if(!(c == ';' \|\| c == '\n' \|\| c == '\r'))`
		1245	`ungetchar(ts, c);`
		1246	`c = v;`
		1247	`if(c == 160)`
		1248	`c = 160;`
		1249	`if(c >= Winstart && c <= Winend) {`
		1250	`c = winchars[c - Winstart];`
		1251	`}`
		1252	`ans = c;`
		1253	`fnd = 1;`
		1254	`}`
		1255	`}`
		1256	`else if(c < 256 && isalpha(c)) {`
		1257	`buf[0] = c;`
		1258	`k = 1;`
		1259	`while(1) {`
		1260	`c = getchar(ts);`
		1261	`if(c < 0)`
		1262	`break;`
		1263	`if(c < 256 && (isalpha(c) \|\| isdigit(c))) {`
		1264	`if(k < nelem(buf)-1)`
		1265	`buf[k++] = c;`
		1266	`}`
		1267	`else {`
		1268	`if(!(c == ';' \|\| c == '\n' \|\| c == '\r'))`
		1269	`ungetchar(ts, c);`
		1270	`break;`
		1271	`}`
		1272	`}`
		1273	`if(c >= 256 \|\| c != '=' && !(isalpha(c) \|\| isdigit(c)))`
		1274	`fnd = _lookup(chartab, NCHARTAB, buf, k, &ans);`
		1275	`}`
		1276	`if(!fnd) {`
		1277	`backup(ts, savei);`
		1278	`ans = '&';`
		1279	`}`
		1280	`return ans;`
		1281	`}`
		1282
		1283	`// Get next char, obeying ts.chset.`
		1284	`// Returns -1 if no complete character left before current end of data.`
		1285	`static int`
		1286	`getchar(TokenSource* ts)`
		1287	`{`
		1288	`uchar* buf;`
		1289	`int c;`
		1290	`int n;`
		1291	`int ok;`
		1292	`Rune r;`
		1293
		1294	`if(ts->i >= ts->edata)`
		1295	`return -1;`
		1296	`buf = ts->data;`
		1297	`c = buf[ts->i];`
		1298	`switch(ts->chset) {`
		1299	`case ISO_8859_1:`
		1300	`if(c >= Winstart && c <= Winend)`
		1301	`c = winchars[c - Winstart];`
		1302	`ts->i++;`
		1303	`break;`
		1304	`case US_Ascii:`
		1305	`if(c > 127) {`
		1306	`if(warn)`
		1307	`fprint(2, "non-ascii char (%x) when US-ASCII specified\n", c);`
		1308	`}`
		1309	`ts->i++;`
		1310	`break;`
		1311	`case UTF_8:`
		1312	`ok = fullrune((char*)(buf+ts->i), ts->edata-ts->i);`
		1313	`n = chartorune(&r, (char*)(buf+ts->i));`
		1314	`if(ok) {`
		1315	`if(warn && c == 0x80)`
		1316	`fprint(2, "warning: invalid utf-8 sequence (starts with %x)\n", ts->data[ts->i]);`
		1317	`ts->i += n;`
		1318	`c = r;`
		1319	`}`
		1320	`else {`
		1321	`// not enough bytes in buf to complete utf-8 char`
		1322	`ts->i = ts->edata; // mark "all used"`
		1323	`c = -1;`
		1324	`}`
		1325	`break;`
		1326	`case Unicode:`
		1327	`if(ts->i < ts->edata - 1) {`
		1328	`//standards say most-significant byte first`
		1329	`c = (c << 8)\|(buf[ts->i + 1]);`
		1330	`ts->i += 2;`
		1331	`}`
		1332	`else {`
		1333	`ts->i = ts->edata; // mark "all used"`
		1334	`c = -1;`
		1335	`}`
		1336	`break;`
		1337	`default:`
		1338	`return -1;`
		1339	`}`
		1340	`return c;`
		1341	`}`
		1342
		1343	`// Assuming c was the last character returned by getchar, set`
		1344	`// things up so that next getchar will get that same character`
		1345	`// followed by the current 'next character', etc.`
		1346	`static void`
		1347	`ungetchar(TokenSource* ts, int c)`
		1348	`{`
		1349	`int n;`
		1350	`Rune r;`
		1351	`char a[UTFmax];`
		1352
		1353	`n = 1;`
		1354	`switch(ts->chset) {`
		1355	`case UTF_8:`
		1356	`if(c >= 128) {`
		1357	`r = c;`
		1358	`n = runetochar(a, &r);`
		1359	`}`
		1360	`break;`
		1361	`case Unicode:`
		1362	`n = 2;`
		1363	`break;`
		1364	`}`
		1365	`ts->i -= n;`
		1366	`}`
		1367
		1368	`// Restore ts so that it is at the state where the index was savei.`
		1369	`static void`
		1370	`backup(TokenSource* ts, int savei)`
		1371	`{`
		1372	`if(dbglex)`
		1373	`fprint(2, "lex: backup; i=%d, savei=%d\n", ts->i, savei);`
		1374	`ts->i = savei;`
		1375	`}`
		1376
		1377
		1378	`// Look for value associated with attribute attid in token t.`
		1379	`// If there is one, return 1 and put the value in *pans,`
		1380	`// else return 0.`
		1381	`// If xfer is true, transfer ownership of the string to the caller`
		1382	`// (nil it out here); otherwise, caller must duplicate the answer`
		1383	`// if it needs to save it.`
		1384	`// OK to have pans==0, in which case this is just looking`
		1385	`// to see if token is present.`
		1386	`int`
		1387	`_tokaval(Token* t, int attid, Rune** pans, int xfer)`
		1388	`{`
		1389	`Attr* attr;`
		1390
		1391	`attr = t->attr;`
		1392	`while(attr != nil) {`
		1393	`if(attr->attid == attid) {`
		1394	`if(pans != nil)`
		1395	`*pans = attr->value;`
		1396	`if(xfer)`
		1397	`attr->value = nil;`
		1398	`return 1;`
		1399	`}`
		1400	`attr = attr->next;`
		1401	`}`
		1402	`if(pans != nil)`
		1403	`*pans = nil;`
		1404	`return 0;`
		1405	`}`
		1406
		1407	`static int`
		1408	`Tconv(Fmt *f)`
		1409	`{`
		1410	`Token* t;`
		1411	`int i;`
		1412	`int tag;`
		1413	`char* srbra;`
		1414	`Rune* aname;`
		1415	`Rune* tname;`
		1416	`Attr* a;`
		1417	`char buf[BIGBUFSIZE];`
		1418
		1419	`t = va_arg(f->args, Token*);`
		1420	`if(t == nil)`
		1421	`sprint(buf, "<null>");`
		1422	`else {`
		1423	`i = 0;`
		1424	`if(dbglex > 1)`
		1425	`i = snprint(buf, sizeof(buf), "[%d]", t->starti);`
		1426	`tag = t->tag;`
		1427	`if(tag == Data) {`
		1428	`i += snprint(buf+i, sizeof(buf)-i-1, "'%S'", t->text);`
		1429	`}`
		1430	`else {`
		1431	`srbra = "";`
		1432	`if(tag >= RBRA) {`
		1433	`tag -= RBRA;`
		1434	`srbra = "/";`
		1435	`}`
		1436	`tname = tagnames[tag];`
		1437	`if(tag == Notfound)`
		1438	`tname = L"?";`
		1439	`i += snprint(buf+i, sizeof(buf)-i-1, "<%s%S", srbra, tname);`
		1440	`for(a = t->attr; a != nil; a = a->next) {`
		1441	`aname = attrnames[a->attid];`
		1442	`i += snprint(buf+i, sizeof(buf)-i-1, " %S", aname);`
		1443	`if(a->value != nil)`
		1444	`i += snprint(buf+i, sizeof(buf)-i-1, "=%S", a->value);`
		1445	`}`
		1446	`i += snprint(buf+i, sizeof(buf)-i-1, ">");`
		1447	`}`
		1448	`buf[i] = 0;`
		1449	`}`
		1450	`return fmtstrcpy(f, buf);`
		1451	`}`
		1452
		1453	`// Attrs own their constituent strings, but build may eventually`
		1454	`// transfer some values to its items and nil them out in the Attr.`
		1455	`static Attr*`
		1456	`newattr(int attid, Rune* value, Attr* link)`
		1457	`{`
		1458	`Attr* ans;`
		1459
		1460	`ans = (Attr*)emalloc(sizeof(Attr));`
		1461	`ans->attid = attid;`
		1462	`ans->value = value;`
		1463	`ans->next = link;`
		1464	`return ans;`
		1465	`}`
		1466
		1467	`// Free list of Attrs linked through next field`
		1468	`static void`
		1469	`freeattrs(Attr* ahead)`
		1470	`{`
		1471	`Attr* a;`
		1472	`Attr* nexta;`
		1473
		1474	`a = ahead;`
		1475	`while(a != nil) {`
		1476	`nexta = a->next;`
		1477	`free(a->value);`
		1478	`free(a);`
		1479	`a = nexta;`
		1480	`}`
		1481	`}`
		1482
		1483	`// Free array of Tokens.`
		1484	`// Allocated space might have room for more than n tokens,`
		1485	`// but only n of them are initialized.`
		1486	`// If caller has transferred ownership of constitutent strings`
		1487	`// or attributes, it must have nil'd out the pointers in the Tokens.`
		1488	`void`
		1489	`_freetokens(Token* tarray, int n)`
		1490	`{`
		1491	`int i;`
		1492	`Token* t;`
		1493
		1494	`if(tarray == nil)`
		1495	`return;`
		1496	`for(i = 0; i < n; i++) {`
		1497	`t = &tarray[i];`
		1498	`free(t->text);`
		1499	`freeattrs(t->attr);`
		1500	`}`
		1501	`free(tarray);`
		1502	`}`

Subversion Repositories planix.SVN

(root)/os/branches/feature_unix/sys/src/libhtml/lex.c – Rev 60