1
2
3
4
5 """This program takes files named *.q in the current directory, and converts them to HTML.
6 To do that, it looks for a python script called 'headfoot.py' or '../headfoot.py' or ... .
7 This script needs to define one variable ( server ) and two functions (header and footer).
8
9 The .q files have the following format::
10
11 TITLE title string
12 OTHER_HEADER_KEYWORD header info
13
14 P
15 Text is 3>5 ?
16 A ref="foo.html" No!
17 More text inside the link.
18 IMG ref="foo.gif"
19 UL
20 LI List item
21 LI Another List item
22
23
24 The header info is separated from the body with a blank line.
25 HTML tags are always the first thing on a line.
26 Line continuations and enclosures are indicated with indentation.
27 Tags automatically close when the indentation gets smaller.
28
29 The PRE tag is the only exception.
30 You have to close it with a line beginning '/PRE'.
31 No indenting is needed inside a PRE /PRE pair.
32
33 The user needs to define one thing:
34 the address of the web server.
35 This address is prepended to
36 all hyperlinks and image references that aren't absolute.
37
38 All the header information gets put into a dictionary, and is
39 passed into the header() and footer() functions.
40 That dictionary has 'filename' and '_server_root' entries added.
41
42 The user can also define three things:
43
44 DEFAULT_HEADER, a dictionary passed to the header() and footer functions().
45 Useful things to put in DEFAULT_HEADER are:
46 - 'lang': 'en' # appears in the <html lang="en"> tag
47 - 'stylesheet': url becomes a link to a stylesheet: <link rel="stylesheet" type="text/css" href="url">
48 - Anything in the form M.x:y becomes a <meta name="x" content="y">
49 - M.description and M.keywords are useful.
50 - Anything in the form HTTP.x:y becomes a <meta http-equiv="x" content="y">
51
52 Header(thd, hinfo) takes two arguments, a file descriptor
53 to which it should write the HTML, and a dictionary of header
54 information.
55
56 Footer(thd, hinfo) takes the same arguments.
57
58 See _header() and _footer() below, for examples.
59 """
60
61 import os
62 import re
63 import time
64 import urlparse
65 import htmlentitydefs as ED
66 from gmisclib import die
67 from gmisclib import g_pipe
68
69 XML = 0
70
71 POINTTAG = 1
72 _anytag = {'P':0, 'STYLE': 0,
73 'UL':0, 'OL':0, 'DL':0,
74 'LI':0, 'DT': 0, 'DD': 0,
75 'PRE':'SPECIAL',
76 'TABLE':0, 'TR':0, 'TD':0,
77 'CAPTION':0,
78 'TBODY':0, 'THEAD':0, 'TFOOT':0,
79 'FORM':0, 'INPUT':1, 'SELECT':0, 'OPTION':0, 'TEXTAREA':0,
80 'A':0, 'NOP': 0,
81 'H1':0, 'H2':0, 'H3':0, 'H4':0,
82 'EM':0, 'STRONG':0, 'CITE':0, 'DFN':0,
83 'CODE':0, 'SAMP':0, 'KBD':0, 'VAR':0, 'ABBR':0, 'ACRONYM':0,
84 'OBJECT': 0, 'DIV': 0, 'SPAN': 0,
85 'FONT': 0,
86 'IMG': POINTTAG, 'BR': POINTTAG, 'HR': POINTTAG
87 }
88
89 _ck = re.compile(r'\b(H1|PRE|P|TITLE|IMG)\b', re.IGNORECASE)
90
92 """Look at the beginning of a file to see if is plausibly input for this program."""
93 fd = open(f, "r")
94 for l in fd.readlines()[:10]:
95 if _ck.match(l):
96 return 1
97 return 0
98
99
100
102 o = []
103 minname = re.compile('^.*[.]q$')
104 l = os.listdir(dir)
105 for t in l:
106 ts = t.strip()
107 x = minname.match(ts)
108 if not x:
109 continue
110 if not _checkfile(ts):
111 continue
112 o.append(ts)
113 return o
114
115
116
117
119 if not v.startswith('"') or not v.endswith('"'):
120 v = '"%s"' % v
121 return "%s=%s" % (k, v)
122
123
124 LL = 60
125 TABW = 8
126
127 DEFAULT_TAG = 'P'
128 DEFAULT_INDENT = -1
129
131 indent = 0
132 i = 0
133 if s.isspace():
134 return (None, '')
135
136 while i<len(s) and s[i].isspace():
137 if s[i] == ' ':
138 indent += 1
139 elif s[i] == '\t':
140 indent = TABW*((indent+TABW)/TABW)
141 elif s[i] == '\r' or s[i] == '\n':
142 return (None, '')
143 else:
144 raise ValueError, "Unrecognized whitespace: %d" % ord(s[i])
145 i += 1
146 return indent
147
148
150 l = l.strip()
151 if l.startswith('%'):
152 l = l[1:].lstrip()
153 return l
154
155
157 a = l.split(None, 1)
158 if len(a) == 0:
159 return False
160 elif len(a) == 1:
161 return _anytag.has_key(a[0])
162 else:
163 return _anytag.has_key(a[0]) and '=' in a[1]
164
165
166 _eqtoken = re.compile("""\s*([a-zA-Z0-9_:]+)\s*=\s*(("[^"]*")|(\S+))\s*""")
168
169 a = l.split(None, 1)
170 tag = a[0]
171 args = {}
172 if len(a) > 1:
173 s = a[1]
174 while True:
175 m = _eqtoken.match(s)
176 if not m:
177 break
178 args[m.group(1)] = m.group(2)
179 s = s[m.end():]
180 txt = s
181 else:
182 txt = ''
183 die.info("TOKENIZED:%s %s %s" % (tag, args, txt))
184 return (tag, args, txt)
185
186
197
199 return self.args.get(k, defv)
200
201
202
203
214
215
217 if len(a) > 1 and a[0]=='"' and a[-1]=='"':
218 return a[1:-1]
219 return a
220
223
224
226 """Process a URL. Pathnames are prepended with the server root.
227 Complete URLs are untouched."""
228
229
230 return '"%s"' % urlparse.urljoin(urlq(a['_abs_root'] + '/' + a['_cwd'] + '/'),
231 urlq(dequote(b)))
232
233
234
236 references = []
237
238
239 if d.tag == 'IMG':
240 d.args['src'] = prepend(hinfo, d.args['ref'])
241 references = [ d.args['src'] ]
242 del d.args['ref']
243 elif d.tag == 'OBJECT':
244 d.args['data'] = prepend(hinfo, d.args['ref'])
245 references = [ d.args['data'] ]
246 del d.args['ref']
247 elif d.tag == 'A' and 'ref' in d.args:
248 d.args['href'] = prepend(hinfo, d.args['ref'])
249 references = [ d.args['href'] ]
250 del d.args['ref']
251
252 args = format_dict(d.args, '_')
253
254 if XML:
255 pointclose = ([], ['/'])[ _anytag[d.tag]==POINTTAG ]
256 else:
257 pointclose = []
258
259 od.write('<%s>%s%s' % (' '.join([d.tag] + args + pointclose), d.txt, eol))
260
261 if not _anytag[d.tag] == POINTTAG:
262 return (references, d.tag)
263 return (references, None)
264
265 _escape = dict([(unichr(q), nm) for (q, nm) in ED.codepoint2name.items()])
266 _escape.update( {'~': ' ', '>':'>', '<':'<', '&':'&',
267 '"': '"'
268 } )
269
270 _epat = '[%s]' % (''.join(_escape.keys()))
271
272
273 _ere = re.compile(_epat)
275 """Converts a character in a MatchObject to a &xx; (HTML) escape sequence"""
276 mstring = x.group(0)
277 if len(mstring)==1:
278 return _escape[mstring]
279 return mstring
280
281
282
284 return _ere.sub(_esc1, s)
285
286
288 while 1:
289 if len(lines) == 0:
290 break
291 o = lines.pop(0)
292 if not o.startswith('#'):
293 return o
294 return None
295
296
297
299 stack = [('BODY', -1000)]
300
301 references = []
302 eol = '\n'
303 n = 0
304 while 1:
305 n += 1
306 l = get_logical_line(lines)
307 if l is None:
308 break
309 if l.lower().startswith('pre'):
310 od.write('<pre>' + l[len('pre'):])
311 while len(lines) > 0:
312 l = lines.pop(0).rstrip()
313 if l.lower().startswith('/pre'):
314 break
315 od.write(l + '\n')
316 od.write("</pre>\n")
317 if len(lines) == 0:
318 break
319 l = get_logical_line(lines)
320
321
322 try:
323 d = lineC(l)
324 except ValueError, x:
325 raise ValueError, "Bad Parse, line %d: %s" % (n, x)
326
327 if d is None:
328
329 continue
330
331 while d.indent <= stack[-1][1]:
332 cltag, oindent = stack.pop(-1)
333 od.write("</%s>%s" % (cltag, eol))
334
335 if d.tag is not None:
336
337 some_refs, cltag = process_tag(d, od, hinfo, eol)
338 if cltag is not None:
339 stack.append((cltag, d.indent))
340 references.extend( some_refs )
341
342 elif d.txt is not None:
343 od.write('%s%s' % (escape(d.txt), eol))
344
345 while len(stack) > 0:
346 od.write('</%s>%s' % (stack.pop(-1)[0], eol))
347
348 return references
349
350
352 t = a.rindex('.')
353 return a[:t] + e
354
355
356 -def aget(dic, alist):
357 o = []
358 for a in alist:
359 for ac in [a, a.lower(), a.upper()]:
360 if dic.has_key(ac):
361 o.append('%s="%s"' % (ac, dic[ac]))
362 break
363 if len(o) == 0:
364 return ''
365 return " " + ' '.join(o)
366
367
369 return s.startswith(prefix) or s.startswith(prefix.lower()) or s.startswith(prefix.upper())
370
371 -def dpre(s, prefix):
373
374 -def agp(dic, prefix):
375 lp = len(prefix)
376 o = []
377 for (k, v) in dic.items():
378 if len(k) <= lp:
379 continue
380 if sw(k, prefix):
381 o.append('%s="%s"' % (k[lp:], v))
382 if len(o) == 0:
383 return ''
384 return " " + ' '.join(o)
385
386
387 -def get(dic, key, dfl):
388 for k in [key, key.lower(), key.upper()]:
389 if dic.has_key(k):
390 return dic[k]
391 return dfl
392
393
395 thd.write( get(hinfo, 'DOCTYPE',
396 '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
397 ) + '\n'
398 )
399 thd.write('<html%s>\n' % aget(hinfo, ['lang']))
400 thd.write("<head>\n")
401 sty = get(hinfo, 'stylesheet', '')
402 if sty != '':
403 thd.write('<link rel="stylesheet" type="text/css" href="%s">' % sty)
404 if hinfo.has_key('_mod_time'):
405 thd.write('<meta http-equiv="Last-Modified" content="%s">\n'
406 % time.asctime(time.gmtime(hinfo['_mod_time'])))
407 for (k, v) in hinfo.items():
408 if sw(k, 'M.'):
409 thd.write('<meta name="%s" content="%s">\n' % (dpre(k, 'M.'), v))
410 if sw(k, 'HTTP.'):
411 thd.write('<meta http-equiv="%s" content="%s">\n' % (dpre(k, 'HTTP:'), v))
412 thd.write("<title>%s</title>\n" % get(hinfo, 'TITLE', hinfo['filename']))
413 thd.write("</head>\n")
414 thd.write("<body%s>\n" % agp(hinfo, 'B.'))
415
416
419
420
423
424
426 h = {'filename': t}
427 while 1:
428 l = fd.readline()
429 if l is None:
430 break
431 l = l.rstrip()
432 if l == '':
433 break
434 if l[0] == '#':
435 continue
436 a = l.split(None, 1)
437 assert len(a) == 2
438 h[a[0].strip()] = a[1].strip()
439 return h
440
441
442 MTIME = 8
443
445 """Do we need to reconstruct file b from
446 file a? Also reconstruct b if it is older
447 than the date in force.
448 """
449 assert os.access(a, os.F_OK + os.R_OK)
450 ossa = os.stat(a)
451 if not os.access(b, os.F_OK):
452 return 1
453 ossb = os.stat(b)
454 if force is not None and ossb[MTIME] < force:
455 return 1
456 if ossb[MTIME] < ossa[MTIME]:
457 return 1
458 return 0
459
460
462 """Given a URL, just return the file part,
463 not the target specifier inside the file."""
464
465 a = url.split('#')
466 if len(a) > 2:
467 die.warn("URL has more than one '#': can't split into file and target.")
468 return url
469 return a[0]
470
471
473 if s.endswith('/'):
474 return s[:-1]
475 return s
476
477
478 -def go(env, force=None):
479 htmlheader = env.get('header', _header)
480 htmlfooter = env.get('footer', _footer)
481 htmlcommon = env.get('common', _common)
482 default_header = env.get('DEFAULT_HEADER', {})
483 all = set()
484 q = qfiles('.')
485 for t in q:
486 th = swapend(t, '.html')
487 if not needcopy(t, th, force):
488 print "# No need to process", t
489 continue
490 die.info("# processing:%s -> %s\n" % (t, th))
491 fd = open(t, "r")
492
493 thd, thdo = g_pipe.popen2("/usr/bin/tidy", ['tidy', '-asxhtml', '-c', '-i', '-q', '-o', th])
494 hinfo = default_header.copy()
495 hinfo.update(readheader(fd, t))
496 hinfo['_mod_time'] = os.fstat(fd.fileno()).st_mtime
497 hinfo['_server_root'] = del_fin_sl(env.get('server', '.'))
498 hinfo['_abs_root'] = del_fin_sl(env.get('server', '.'))
499 hinfo['_rel_root'] = del_fin_sl(env.get('rel_root', '.'))
500 if env.has_key('cwd'):
501 hinfo['_cwd'] = env['cwd']
502 hinfo['_local_root'] = env['cwd']
503 htmlheader(thd, hinfo)
504 lines = htmlcommon([x.rstrip() for x in fd.readlines()], hinfo)
505 for reference in process(lines, thd, hinfo):
506 all.add(reference)
507 fd.close()
508 htmlfooter(thd, hinfo)
509 thd.close()
510
511 return all
512
513
514
515 DEFF = 'headfoot.py'
516
517
519 force = None
520 env = {}
521 cwd = os.getcwd().split('/') + ['.']
522 for i in range(10):
523 chkd = './' + ( '../' * i )
524 chk = chkd + DEFF
525 print "checking", chk
526 if os.access(chk, os.R_OK):
527 env['rel_root'] = chkd[:-1]
528 env['cwd'] = '/'.join(cwd[-i-1:])
529 execfile(chk, env)
530 assert env.has_key('server')
531 force = os.stat(chk)[MTIME]
532 break
533 return go(env, force)
534
535
536
537
538
539
540 if __name__ == '__main__':
541 o = easygo()
542 print "# Files to upload:"
543 for t in o:
544 print t
545