[953] | 1 | #!/usr/bin/env python
|
---|
| 2 | # -*- coding: utf-8 -*-
|
---|
| 3 |
|
---|
| 4 | # $Id: extract_onlinehelp_links.py 10982 2010-09-15 11:02:25Z pstorz $
|
---|
| 5 |
|
---|
| 6 | import re
|
---|
| 7 | import pprint
|
---|
| 8 | from httplib import *
|
---|
| 9 |
|
---|
| 10 | baseurl = 'http://www.bacula.org/5.0.x-manuals/en/main/main/'
|
---|
| 11 | htmlpages = {
|
---|
| 12 | 'client': 'Client_File_daemon_Configur.html',
|
---|
| 13 | 'storagedaemon':'Storage_Daemon_Configuratio.html',
|
---|
| 14 | 'autochanger':'Autochanger_Resource.html',
|
---|
| 15 | 'director': 'Configuring_Director.html',
|
---|
| 16 | 'console':'Console_Configuration.html',
|
---|
| 17 | 'monitor':'Monitor_Configuration.html',
|
---|
| 18 | 'messages': 'Messages_Resource.html',
|
---|
| 19 | }
|
---|
| 20 | childtablelinks = '''
|
---|
| 21 | <!--Table of Child-Links-->
|
---|
| 22 | <A NAME="CHILD_LINKS"><STRONG>Subsections</STRONG></A>
|
---|
| 23 |
|
---|
| 24 | <UL CLASS="ChildLinks">
|
---|
| 25 |
|
---|
| 26 | <LI><A NAME="tex2html1449"
|
---|
| 27 | HREF="Configuring_Director.html#SECTION001810000000000000000">Director Resource Types</A>
|
---|
| 28 | <LI><A NAME="tex2html1450"
|
---|
| 29 | HREF="Configuring_Director.html#SECTION001820000000000000000">The Director Resource</A>
|
---|
| 30 | <LI><A NAME="tex2html1451"
|
---|
| 31 | HREF="Configuring_Director.html#SECTION001830000000000000000">The Job Resource</A>
|
---|
| 32 | <LI><A NAME="tex2html1452"
|
---|
| 33 | HREF="Configuring_Director.html#SECTION001840000000000000000">The JobDefs Resource</A>
|
---|
| 34 | <LI><A NAME="tex2html1453"
|
---|
| 35 | HREF="Configuring_Director.html#SECTION001850000000000000000">The Schedule Resource</A>
|
---|
| 36 | <LI><A NAME="tex2html1454"
|
---|
| 37 | HREF="Configuring_Director.html#SECTION001860000000000000000">Technical Notes on Schedules</A>
|
---|
| 38 | <LI><A NAME="tex2html1455"
|
---|
| 39 | HREF="Configuring_Director.html#SECTION001870000000000000000">The FileSet Resource</A>
|
---|
| 40 | <LI><A NAME="tex2html1456"
|
---|
| 41 | HREF="Configuring_Director.html#SECTION001880000000000000000">FileSet Examples</A>
|
---|
| 42 | <LI><A NAME="tex2html1457"
|
---|
| 43 | HREF="Configuring_Director.html#SECTION001890000000000000000">Backing up Raw Partitions</A>
|
---|
| 44 |
|
---|
| 45 | <LI><A NAME="tex2html1458"
|
---|
| 46 | HREF="Configuring_Director.html#SECTION0018100000000000000000">Excluding Files and Directories</A>
|
---|
| 47 | <LI><A NAME="tex2html1459"
|
---|
| 48 | HREF="Configuring_Director.html#SECTION0018110000000000000000">Windows FileSets</A>
|
---|
| 49 | <UL>
|
---|
| 50 | <LI><A NAME="tex2html1460"
|
---|
| 51 | HREF="Configuring_Director.html#SECTION0018110010000000000000">A Windows Example FileSet</A>
|
---|
| 52 | <LI><A NAME="tex2html1461"
|
---|
| 53 | HREF="Configuring_Director.html#SECTION0018110020000000000000">Windows NTFS Naming Considerations</A>
|
---|
| 54 | </UL>
|
---|
| 55 | <BR>
|
---|
| 56 | <LI><A NAME="tex2html1462"
|
---|
| 57 | HREF="Configuring_Director.html#SECTION0018120000000000000000">Testing Your FileSet</A>
|
---|
| 58 | <LI><A NAME="tex2html1463"
|
---|
| 59 | HREF="Configuring_Director.html#SECTION0018130000000000000000">The Client Resource</A>
|
---|
| 60 | <LI><A NAME="tex2html1464"
|
---|
| 61 | HREF="Configuring_Director.html#SECTION0018140000000000000000">The Storage Resource</A>
|
---|
| 62 |
|
---|
| 63 | <LI><A NAME="tex2html1465"
|
---|
| 64 | HREF="Configuring_Director.html#SECTION0018150000000000000000">The Pool Resource</A>
|
---|
| 65 | <UL>
|
---|
| 66 | <LI><A NAME="tex2html1466"
|
---|
| 67 | HREF="Configuring_Director.html#SECTION0018151000000000000000">The Scratch Pool</A>
|
---|
| 68 | </UL>
|
---|
| 69 | <BR>
|
---|
| 70 | <LI><A NAME="tex2html1467"
|
---|
| 71 | HREF="Configuring_Director.html#SECTION0018160000000000000000">The Catalog Resource</A>
|
---|
| 72 | <LI><A NAME="tex2html1468"
|
---|
| 73 | HREF="Configuring_Director.html#SECTION0018170000000000000000">The Messages Resource</A>
|
---|
| 74 | <LI><A NAME="tex2html1469"
|
---|
| 75 | HREF="Configuring_Director.html#SECTION0018180000000000000000">The Console Resource</A>
|
---|
| 76 | <LI><A NAME="tex2html1470"
|
---|
| 77 | HREF="Configuring_Director.html#SECTION0018190000000000000000">The Counter Resource</A>
|
---|
| 78 | <LI><A NAME="tex2html1471"
|
---|
| 79 | HREF="Configuring_Director.html#SECTION0018200000000000000000">Example Director Configuration File</A>
|
---|
| 80 |
|
---|
| 81 | </UL>
|
---|
| 82 | <!--End of Table of Child-Links-->
|
---|
| 83 | '''
|
---|
| 84 |
|
---|
| 85 |
|
---|
| 86 | RXP_TABLE_OF_CHILDLINKS = re.compile('^<UL CLASS="ChildLinks">(.*)^</UL>', re.M|re.S)
|
---|
| 87 | #RXP_HREF = re.compile('HREF="(?P<anchor>[^"]+)">(?P<title>.*)</A>')
|
---|
| 88 |
|
---|
| 89 | RXP_HREF = re.compile('HREF="(?P<anchor>[^"]+)">(The )?(?P<title>.*) Resource</A>')
|
---|
| 90 | #for href in RXP_TABLE_OF_CHILDLINKS.finditer(childtablelinks):
|
---|
| 91 | # print href.groups()
|
---|
| 92 | #print href.group('title'),href.group('anchor')
|
---|
| 93 |
|
---|
| 94 |
|
---|
| 95 |
|
---|
| 96 | htmlhelp = {}
|
---|
| 97 |
|
---|
| 98 | connection = HTTPConnection('www.bacula.org')
|
---|
| 99 | for config,page in htmlpages.iteritems():
|
---|
| 100 | connection.request('GET', baseurl+page )
|
---|
| 101 | reply = connection.getresponse()
|
---|
| 102 | print reply.status, reply.reason
|
---|
| 103 | htmltext = reply.read()
|
---|
| 104 |
|
---|
| 105 | htmlhelp[config]={}
|
---|
| 106 |
|
---|
| 107 | for childlinks in RXP_TABLE_OF_CHILDLINKS.finditer(htmltext):
|
---|
| 108 | #print childlinks
|
---|
| 109 | for href in RXP_HREF.finditer(childlinks.group(0)):
|
---|
| 110 | print config,href.group('title') + ' link: ' + href.group('anchor')
|
---|
| 111 | htmlhelp[config][href.group('title').lower()]=baseurl+href.group('anchor')
|
---|
| 112 |
|
---|
| 113 | pp = pprint.PrettyPrinter(indent=4)
|
---|
| 114 |
|
---|
| 115 | pp.pprint(htmlhelp) |
---|