1 | #!/usr/bin/env python
|
---|
2 | # -*- coding: utf-8 -*-
|
---|
3 |
|
---|
4 | # $Id: extract_onlinehelp_links.py 10982 2010-09-15 11:02:25Z pstorz $
|
---|
5 |
|
---|
6 | import re
|
---|
7 | import pprint
|
---|
8 | from httplib import *
|
---|
9 |
|
---|
10 | baseurl = 'http://www.bacula.org/5.0.x-manuals/en/main/main/'
|
---|
11 | htmlpages = {
|
---|
12 | 'client': 'Client_File_daemon_Configur.html',
|
---|
13 | 'storagedaemon':'Storage_Daemon_Configuratio.html',
|
---|
14 | 'autochanger':'Autochanger_Resource.html',
|
---|
15 | 'director': 'Configuring_Director.html',
|
---|
16 | 'console':'Console_Configuration.html',
|
---|
17 | 'monitor':'Monitor_Configuration.html',
|
---|
18 | 'messages': 'Messages_Resource.html',
|
---|
19 | }
|
---|
20 | childtablelinks = '''
|
---|
21 | <!--Table of Child-Links-->
|
---|
22 | <A NAME="CHILD_LINKS"><STRONG>Subsections</STRONG></A>
|
---|
23 |
|
---|
24 | <UL CLASS="ChildLinks">
|
---|
25 |
|
---|
26 | <LI><A NAME="tex2html1449"
|
---|
27 | HREF="Configuring_Director.html#SECTION001810000000000000000">Director Resource Types</A>
|
---|
28 | <LI><A NAME="tex2html1450"
|
---|
29 | HREF="Configuring_Director.html#SECTION001820000000000000000">The Director Resource</A>
|
---|
30 | <LI><A NAME="tex2html1451"
|
---|
31 | HREF="Configuring_Director.html#SECTION001830000000000000000">The Job Resource</A>
|
---|
32 | <LI><A NAME="tex2html1452"
|
---|
33 | HREF="Configuring_Director.html#SECTION001840000000000000000">The JobDefs Resource</A>
|
---|
34 | <LI><A NAME="tex2html1453"
|
---|
35 | HREF="Configuring_Director.html#SECTION001850000000000000000">The Schedule Resource</A>
|
---|
36 | <LI><A NAME="tex2html1454"
|
---|
37 | HREF="Configuring_Director.html#SECTION001860000000000000000">Technical Notes on Schedules</A>
|
---|
38 | <LI><A NAME="tex2html1455"
|
---|
39 | HREF="Configuring_Director.html#SECTION001870000000000000000">The FileSet Resource</A>
|
---|
40 | <LI><A NAME="tex2html1456"
|
---|
41 | HREF="Configuring_Director.html#SECTION001880000000000000000">FileSet Examples</A>
|
---|
42 | <LI><A NAME="tex2html1457"
|
---|
43 | HREF="Configuring_Director.html#SECTION001890000000000000000">Backing up Raw Partitions</A>
|
---|
44 |
|
---|
45 | <LI><A NAME="tex2html1458"
|
---|
46 | HREF="Configuring_Director.html#SECTION0018100000000000000000">Excluding Files and Directories</A>
|
---|
47 | <LI><A NAME="tex2html1459"
|
---|
48 | HREF="Configuring_Director.html#SECTION0018110000000000000000">Windows FileSets</A>
|
---|
49 | <UL>
|
---|
50 | <LI><A NAME="tex2html1460"
|
---|
51 | HREF="Configuring_Director.html#SECTION0018110010000000000000">A Windows Example FileSet</A>
|
---|
52 | <LI><A NAME="tex2html1461"
|
---|
53 | HREF="Configuring_Director.html#SECTION0018110020000000000000">Windows NTFS Naming Considerations</A>
|
---|
54 | </UL>
|
---|
55 | <BR>
|
---|
56 | <LI><A NAME="tex2html1462"
|
---|
57 | HREF="Configuring_Director.html#SECTION0018120000000000000000">Testing Your FileSet</A>
|
---|
58 | <LI><A NAME="tex2html1463"
|
---|
59 | HREF="Configuring_Director.html#SECTION0018130000000000000000">The Client Resource</A>
|
---|
60 | <LI><A NAME="tex2html1464"
|
---|
61 | HREF="Configuring_Director.html#SECTION0018140000000000000000">The Storage Resource</A>
|
---|
62 |
|
---|
63 | <LI><A NAME="tex2html1465"
|
---|
64 | HREF="Configuring_Director.html#SECTION0018150000000000000000">The Pool Resource</A>
|
---|
65 | <UL>
|
---|
66 | <LI><A NAME="tex2html1466"
|
---|
67 | HREF="Configuring_Director.html#SECTION0018151000000000000000">The Scratch Pool</A>
|
---|
68 | </UL>
|
---|
69 | <BR>
|
---|
70 | <LI><A NAME="tex2html1467"
|
---|
71 | HREF="Configuring_Director.html#SECTION0018160000000000000000">The Catalog Resource</A>
|
---|
72 | <LI><A NAME="tex2html1468"
|
---|
73 | HREF="Configuring_Director.html#SECTION0018170000000000000000">The Messages Resource</A>
|
---|
74 | <LI><A NAME="tex2html1469"
|
---|
75 | HREF="Configuring_Director.html#SECTION0018180000000000000000">The Console Resource</A>
|
---|
76 | <LI><A NAME="tex2html1470"
|
---|
77 | HREF="Configuring_Director.html#SECTION0018190000000000000000">The Counter Resource</A>
|
---|
78 | <LI><A NAME="tex2html1471"
|
---|
79 | HREF="Configuring_Director.html#SECTION0018200000000000000000">Example Director Configuration File</A>
|
---|
80 |
|
---|
81 | </UL>
|
---|
82 | <!--End of Table of Child-Links-->
|
---|
83 | '''
|
---|
84 |
|
---|
85 |
|
---|
86 | RXP_TABLE_OF_CHILDLINKS = re.compile('^<UL CLASS="ChildLinks">(.*)^</UL>', re.M|re.S)
|
---|
87 | #RXP_HREF = re.compile('HREF="(?P<anchor>[^"]+)">(?P<title>.*)</A>')
|
---|
88 |
|
---|
89 | RXP_HREF = re.compile('HREF="(?P<anchor>[^"]+)">(The )?(?P<title>.*) Resource</A>')
|
---|
90 | #for href in RXP_TABLE_OF_CHILDLINKS.finditer(childtablelinks):
|
---|
91 | # print href.groups()
|
---|
92 | #print href.group('title'),href.group('anchor')
|
---|
93 |
|
---|
94 |
|
---|
95 |
|
---|
96 | htmlhelp = {}
|
---|
97 |
|
---|
98 | connection = HTTPConnection('www.bacula.org')
|
---|
99 | for config,page in htmlpages.iteritems():
|
---|
100 | connection.request('GET', baseurl+page )
|
---|
101 | reply = connection.getresponse()
|
---|
102 | print reply.status, reply.reason
|
---|
103 | htmltext = reply.read()
|
---|
104 |
|
---|
105 | htmlhelp[config]={}
|
---|
106 |
|
---|
107 | for childlinks in RXP_TABLE_OF_CHILDLINKS.finditer(htmltext):
|
---|
108 | #print childlinks
|
---|
109 | for href in RXP_HREF.finditer(childlinks.group(0)):
|
---|
110 | print config,href.group('title') + ' link: ' + href.group('anchor')
|
---|
111 | htmlhelp[config][href.group('title').lower()]=baseurl+href.group('anchor')
|
---|
112 |
|
---|
113 | pp = pprint.PrettyPrinter(indent=4)
|
---|
114 |
|
---|
115 | pp.pprint(htmlhelp) |
---|