source: people/peter.buschman/backup_monitoring/dal/dbapi/paramstyles.py@ 976

Last change on this file since 976 was 976, checked in by peter, on Dec 6, 2011 at 10:19:33 AM

Raw checkin of current NetBackup / TSM parsing code.

File size: 13.9 KB
RevLine 
[976]1###############################################################################
2###
3### Python DBAPI 2.0 Paramstyle Conversions
4###
5### Peter L. Buschman <plb@iotk.com>
6###
7### 2004-08-30
8###
9###############################################################################
10
11import string
12import re
13
14
15##
16## Categorization of paramstyles into different categories. Different types of conversions
17## will have different optimal algorithms.
18##
19PARAMSTYLES = {
20 'all' : [ 'qmark', 'numeric', 'named', 'format', 'pyformat' ],
21 'sequence' : [ 'qmark', 'numeric', 'format' ],
22 'dict' : [ 'named', 'pyformat' ],
23 'token' : [ 'qmark', 'format' ],
24}
25
26
27##
28## Valid quote characters.
29##
30QUOTE_CHARS = [ '"', "'" ] # " or '
31
32
33##
34## Valid escape characters.
35##
36ESCAPE_CHARS = [ '\\' ]
37
38
39##
40## This dictionary is used to lookup the placeholder strings for paramstyles whose
41## placeholders are always the same. This is currently just qmark and format.
42##
43PLACEHOLDER_TOKENS = {
44 'qmark' : '?',
45 'format' : '%s',
46}
47
48
49##
50## This dictionary is used to look-up regular expressions for matching placeholders
51## in a query string for a given paramstyle.
52##
53PLACEHOLDER_EXPS = {
54 'qmark' : re.compile(r'(\?)'),
55 'numeric' : re.compile(r'(:\d+)'),
56 'named' : re.compile(r'(:\w+)'),
57 'format' : re.compile(r'(%s)'),
58 'pyformat' : re.compile(r'(%\(\w+\)s)'),
59}
60
61
62##
63## This dictionary is used to get the correct datastructure to return params in when converting
64## from one paramstyle to another.
65##
66PARAM_TYPES = {
67 'qmark' : lambda : [],
68 'numeric' : lambda : [],
69 'named' : lambda : {},
70 'format' : lambda : [],
71 'pyformat' : lambda : {},
72}
73
74
75##
76## Add a parameter to a sequence or dictionary of parameters.
77##
78def param_add( param_num, param_name, param, params ):
79 try:
80 params.append(param)
81 except AttributeError:
82 params[param_name] = param
83 return params
84
85
86##
87## This dictionary is used to lookup regular expressions for matching the parameter
88## name contained within the placeholder of a named paramstyle. This is currently
89## just named and pyformat.
90##
91PARAMNAME_EXPS = {
92 'named' : re.compile(':(\w+)'),
93 'pyformat' : re.compile('%\((\w+)\)s'),
94}
95
96
97##
98## Parameter name generators.
99##
100PARAMNAME_GENS = {
101 'qmark' : lambda param_num, placeholder : 'param%d' % (param_num),
102 'numeric' : lambda param_num, placeholder : 'param%d' % (param_num),
103 'named' : lambda param_num, placeholder : PARAMNAME_EXPS['named'].findall(placeholder)[0],
104 'format' : lambda param_num, placeholder : 'param%d' % (param_num),
105 'pyformat' : lambda param_num, placeholder : PARAMNAME_EXPS['pyformat'].findall(placeholder)[0],
106}
107
108
109##
110## Parameter value generators.
111##
112PARAMVALUE_GENS = {
113 'qmark' : lambda param_num, param_name, params : params[param_num - 1],
114 'numeric' : lambda param_num, param_name, params : params[param_num - 1],
115 'named' : lambda param_num, param_name, params : params[param_name],
116 'format' : lambda param_num, param_name, params : params[param_num - 1],
117 'pyformat' : lambda param_num, param_name, params : params[param_name],
118}
119
120
121##
122## This dictionary contains lambda functions that return the appropriate replacement
123## string given the parameter's sequence number.
124##
125PLACEHOLDER_SUBS = {
126 'qmark' : lambda param_num, param_name : '?',
127 'numeric' : lambda param_num, param_name : ':%d' % (param_num),
128 'named' : lambda param_num, param_name : ':%s' % (param_name),
129 'format' : lambda param_num, param_name : '%s',
130 'pyformat' : lambda param_num, param_name : '%%(%s)s' % (param_name),
131}
132
133
134##
135## The following conversion matrix was inspired by similar code in Wichert Akkerman's dhm
136## module at http://www.wiggy.net/code/python-dhm.
137##
138## The primary reason for listing conversion algorithms in a lookup table like this is that
139## it allows for individual conversions to be overridden as additional, possibly experimental,
140## algorithms are developed without breaking the functionality of the entire module.
141##
142CONVERSION_MATRIX = {
143 'qmark' : {
144 'qmark' : lambda query, params : (query, params),
145 'numeric' : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'numeric', query, params ),
146 'named' : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'named', query, params ),
147 'format' : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'format', query, params ),
148 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'pyformat', query, params ),
149 },
150 'numeric' : {
151 'qmark' : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'qmark', query, params ),
152 'numeric' : lambda query, params : (query, params),
153 'named' : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'named', query, params ),
154 'format' : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'format', query, params ),
155 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'pyformat', query, params ),
156 },
157 'named' : {
158 'qmark' : lambda query, params : paramstyle_to_paramstyle( 'named', 'qmark', query, params ),
159 'numeric' : lambda query, params : paramstyle_to_paramstyle( 'named', 'numeric', query, params ),
160 'named' : lambda query, params : (query, params),
161 'format' : lambda query, params : paramstyle_to_paramstyle( 'named', 'format', query, params ),
162 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'named', 'pyformat', query, params ),
163 },
164 'format' : {
165 'qmark' : lambda query, params : paramstyle_to_paramstyle( 'format', 'qmark', query, params ),
166 'numeric' : lambda query, params : paramstyle_to_paramstyle( 'format', 'numeric', query, params ),
167 'named' : lambda query, params : paramstyle_to_paramstyle( 'format', 'named', query, params ),
168 'format' : lambda query, params : (query, params),
169 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'format', 'pyformat', query, params ),
170 },
171 'pyformat' : {
172 'qmark' : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'qmark', query, params ),
173 'numeric' : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'numeric', query, params ),
174 'named' : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'named', query, params ),
175 'format' : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'format', query, params ),
176 'pyformat' : lambda query, params : (query, params),
177 },
178}
179
180##
181## Return True if the character at pos in string is escaped.
182##
183def escaped( string, pos ):
184 escape_chars = ESCAPE_CHARS
185 count = 0
186 if pos > 0:
187 pos -= 1
188 if string[pos] in escape_chars:
189 escape_char = string[pos]
190 while string[pos] == escape_char and pos >= 0:
191 count += 1
192 pos -= 1
193 if count % 2 == 1:
194 return True
195 else:
196 return False
197
198##
199## Return True if the string is quoted.
200##
201def quoted( string ):
202 if string[0] in QUOTE_CHARS and string[-1] == string[0]:
203 return True
204 else:
205 return False
206
207##
208##
209##
210class SegmentizeError(Exception):
211 """
212 Error associated with string segmentization.
213 """
214 pass
215
216
217##
218## Parse a string into quoted and non-quoted segments. We do this so that it is easy to tell
219## which segments of a string to look for placeholders in and which to ignore.
220##
221def segmentize( string ):
222 """
223 Split a string into quoted and non-quoted segments.
224 """
225 quote_chars = QUOTE_CHARS
226 segments = []
227 current_segment = ''
228 previous_char = None
229 quote_char = None
230 quoted = False
231 pos = 0
232 for char in string:
233 if quoted:
234 if char == quote_char and not escaped( string, pos ):
235 current_segment += char
236 segments.append(current_segment)
237 current_segment = ''
238 previous_char = char
239 quoted = False
240 else:
241 current_segment += char
242 previous_char = char
243 elif not quoted:
244 if char in quote_chars and not escaped( string, pos ):
245 if current_segment != '':
246 segments.append(current_segment)
247 current_segment = ''
248 quoted = True
249 quote_char = char
250 current_segment += char
251 previous_char = char
252 else:
253 current_segment += char
254 previous_char = char
255 pos += 1
256 if current_segment != '':
257 segments.append(current_segment)
258 if quoted:
259 raise SegmentizeError, 'Unmatched quotes in string'
260
261 return segments
262
263
264##
265## Universal paramstyle converter. This is the initial conversion algorithm supplied with PyDAL.
266## It is intended to complete, but may not offer optimal performance in all cases.
267##
268## --PLB 2004-09-08
269##
270def paramstyle_to_paramstyle( from_paramstyle, to_paramstyle, query, params ):
271 placeholder_exp = PLACEHOLDER_EXPS[from_paramstyle]
272 placeholder_sub = PLACEHOLDER_SUBS[to_paramstyle]
273 paramname_gen = PARAMNAME_GENS[from_paramstyle]
274 paramvalue_gen = PARAMVALUE_GENS[from_paramstyle]
275 new_query = ''
276 segments = segmentize(query)
277 new_params = PARAM_TYPES[to_paramstyle]()
278 param_num = 0
279 for segment in segments:
280 #
281 # If the segment is a quoted string, do not check for placeholders.
282 #
283 if quoted(segment):
284 new_query += segment
285 else:
286 #
287 # ...otherwise, check for any placeholder matches.
288 #
289 pos = 0
290 match = placeholder_exp.search(segment, pos)
291 if match != None:
292 #
293 # If there are placeholders...
294 #
295 while match != None:
296 new_query += segment[pos:match.start()]
297 placeholder = segment[match.start():match.end()]
298 #
299 # Ignore the placeholder if it is escaped...
300 #
301 if escaped( segment, match.start() ):
302 new_query += placeholder
303 else:
304 #
305 # ...otherwise replace it.
306 #
307 param_num += 1
308 param_name = paramname_gen( param_num, placeholder )
309 param_value = paramvalue_gen( param_num, param_name, params )
310 new_placeholder = placeholder_sub( param_num, param_name )
311 new_query += new_placeholder
312 new_params = param_add( param_num, param_name, param_value, new_params )
313 pos = match.end()
314 match = placeholder_exp.search(segment, pos)
315 #
316 # Tack on the end of the string segment when there are no more matches.
317 #
318 if pos < len(segment):
319 new_query += segment[pos:]
320 #
321 # If there were no placeholders, just add the segment to our query.
322 #
323 else:
324 new_query += segment
325
326 return new_query, new_params
327
328
329##
330## Convert from any paramstyle to any other paramstyle.
331##
332def convert( from_paramstyle, to_paramstyle, query, params ):
333
334 try:
335 convert_function = CONVERSION_MATRIX[from_paramstyle][to_paramstyle]
336 except KeyError:
337 raise NotImplementedError, 'Unsupported paramstyle conversion: %s to %s' % (from_paramstyle, to_paramstyle)
338
339 new_query, new_params = convert_function(query, params)
340
341 return new_query, new_params
342
343##
344## Unit Tests
345##
346## Need to move these to the python unit testing framework...
347##
348if __name__ == '__main__':
349 sequence_params = ['a', 'b', 'c', 'd']
350 dict_params = {
351 'foo' : 'a',
352 'bar' : 'b',
353 'baz' : 'c',
354 'quux' : 'd',
355 }
356 tests = {
357 'qmark' : [ 'SELECT * FROM ? WHERE ? > ? OR ? IS NOT NULL', sequence_params ],
358 'numeric' : [ 'SELECT * FROM :1 WHERE :2 > :3 OR :4 IS NOT NULL', sequence_params ],
359 'named' : [ 'SELECT * FROM :foo WHERE :bar > :baz OR :quux IS NOT NULL', dict_params ],
360 'format' : [ 'SELECT * FROM %s WHERE %s > %s OR %s IS NOT NULL', sequence_params ],
361 'pyformat' : [ 'SELECT * FROM %(foo)s WHERE %(bar)s > %(baz)s OR %(quux)s IS NOT NULL', dict_params ],
362 }
363 indent = 4
364 width = 16
365 print ''
366 print '[ PARAMSTYLE TRANSLATIONS ]'
367 print ''
368 for from_paramstyle in PARAMSTYLES['all']:
369 query = tests[from_paramstyle][0]
370 params = tests[from_paramstyle][1]
371 print ''
372 print '%s[ %s ]' % (' ' * indent, from_paramstyle.upper())
373 print ''
374 label = 'query'
375 print '%s%s%s: %s' % (' ' * indent, label, '.' * (width + indent - len(label)), query)
376 label = 'paramstyle'
377 print '%s%s%s: %s' % (' ' * indent, label, '.' * (width + indent - len(label)), from_paramstyle)
378 print ''
379 for to_paramstyle in PARAMSTYLES['all']:
380 converted_query, converted_params = convert(from_paramstyle, to_paramstyle, query, params)
381 label = '%s_query' % (to_paramstyle)
382 print '%s%s%s: %s' % (' ' * indent * 2, label, '.' * (width - len(label)), converted_query)
383 label = '%s_params' % (to_paramstyle)
384 print '%s%s%s: %s' % (' ' * indent * 2, label, '.' * (width - len(label)), converted_params)
385 print ''
386
Note: See TracBrowser for help on using the repository browser.