Thursday, December 11, 2008

SQL embedder

Well, have you ever been tired of making a nice little SQL script and then having to put all those damn "'s and +'s?

Well, here's a thing for you - an AWK script to parse the SQL script into those pesky things.

And here's how to use it - simple operation will just copy the string, preserving the indentation and turn it into Java-style code, like this:

"select * " +
"from " +
     "employees " ;

You need to feed a file through it though, so it's called like this:

cat select.sql | ./sql_embedder.awk

And here's a big request - it preserves the indentation in the generated code, as well as in the actual SQL script after evaluation, if prefixes the whole thing with comment tokens (for some reason) and replaces all dollar signs with the word hello and all percent signs with the word world...

cat select.sql | ./sql_embedder.awk -v indent=true -v prefix="//\t" -v replace='\$->hello,%->world'

Want to know more, read the code comments, and read the code if you want to.

And it is code:
1  #!/usr/bin/awk -f 
2  
3  # SQL Embedder
4  
5  # Wraps SQL statements so that they become string literals
6  # in various languages. You sometimes have to do that, and
7  # it's probably the most boring part of low-level database
8  # development.
9  
10 # Parameters
11 #   language - select the output language, takes values
12 #               'java', 'php'; Java is the default
13 #   prefix - include this string before each output line
14 #   indent - include indentation in the output strings 
15 #               (i.e. output "\tselect *\n"), takes values
16 #               'true'/'false' or 'yes'/'no'
17 #   replace - if the specified token is found, replace it 
18 #               with the specified string, it takes the
19 #               following syntax:
20 #                   "p0->s0,...,pn->sn"
21 #                   p0-pn are placeholders and s0-sn are
22 #                   substitutions. Placeholders cannot 
23 #                   contain any whitespaces and they are
24 #                   treated as POSIX regular expressions.
25 #               (i.e. to insert the string 'joe' for '$':
26 #                   "$->joe" 
27 #                it's really easier than it looks)
28 #   rule_separator - modify the rule separator of the 
29 #               'replace' clause; comma by default
30 #   implication - modify the rule implication of the 
31 #               'replace' clause; '->' by default
32 #
33 # Author
34 #   Konrad Siek
35 
36 # Do all those tedious pre-op things
37 BEGIN {
38 
39     # Select language and define tokens
40     if (language == "java" || language == "") {
41         # Java (acts as default settings)
42         STRING_TOKEN = "\""
43         CONCATENATION_TOKEN = "+"
44         INSTRUCTION_TERMINATOR = ";"
45     } else if (language == "php") {
46         # PHP
47         STRING_TOKEN = "\""
48         CONCATENATION_TOKEN = "."
49         INSTRUCTION_TERMINATOR = ";"
50     } else {
51         print "Unsupported language: " language
52         exit
53     }
54 
55     # Rename the variables
56     OUTPUT_INDENTATION = (indent ~ /(true|yes)/)
57     PREFIX = prefix
58 
59     # Initiate rule separator
60     if (rule_separator == "") {
61         RULE_SEPARATOR = ","
62     } else {
63         RULE_SEPARATOR = rule_separator
64     }
65     
66     # Initiate separators within rules
67     if (implication == "") {
68         IMPLICATION = "->"
69     } else {
70         IMPLICATION = implication
71     }
72 
73     # Initialize replacement table
74     if (length(replace) > 0) {
75         split(replace, rules, RULE_SEPARATOR)
76         for (in rules) {
77             split(rules[r], a, IMPLICATION)
78             key = a[1]
79             value = a[2]
80             REPLACEMENTS[key] = value
81         }                 
82     }
83 }
84 
85 # Print terminator
86 END {
87     print INSTRUCTION_TERMINATOR
88 }
89 
90 # Ignore empty lines
91 /^[ \t]*$/ {
92     next
93 }
94 
95 # Concatenate previous line to this one
96 NR > 1 {
97     printf("%s\n", CONCATENATION_TOKEN)
98     indentation = ""
99     if (OUTPUT_INDENTATION) {
100        code_indentation = ""
101    }
102}
103
104# Mimic original indentation
105/^[ \t]+/ {
106    line_length = length($0)
107    for(= 1; i < line_length; i++) {
108        char = substr($0, i, i)
109        if (char ~ /^[ \t]*$/) {
110            indentation = indentation char
111            if (OUTPUT_INDENTATION) {
112                if(indentation == "\t") {
113                    code_indentation = code_indentation "\\t"
114                } else {
115                    code_indentation = code_indentation char
116                }
117            }
118        } else {
119            break;
120        }
121    }
122}
123
124# Print a line of the statement word-by-word
125{
126    printf("%s", PREFIX)
127    printf("%s", indentation)
128    printf("%s", STRING_TOKEN)
129    if (OUTPUT_INDENTATION) {
130        printf("%s", code_indentation)    
131    }        
132    for (= 1; i <= NF; i++) {
133        word = $i
134        for (in REPLACEMENTS) {
135            s = REPLACEMENTS[p]
136            gsub(p, s, word)
137        }
138        printf("%s ", word)
139    }
140    if (OUTPUT_INDENTATION) {
141        printf("\\n")
142    }
143    printf("%s ", STRING_TOKEN)    
144}


The code is also available at GitHub as awk/sql_embedder.awk.

No comments: