Monday, August 25, 2008

Substitute

Another Ocaml script (although, I suppose it isn't a script, because I compile it). I really like to write in Ocaml, because it's a different kind of thought process.

This one is just a little tool that I sometimes like to use, when I have to go through a file and change the name of some array or similar. Granted, this can be done automatically through many text editors, but sometimes it's a hassle to even open a file, especially if it's biggish.

Besides, I found this bugger on my desktop today, in a slightly different form, and couldn't figure out what it was, that I wanted it to do (which is embarrassing, because it wasn't that hard). When I did figure out what it does, I decided to complicate it a bit, and use some of that fantastic functional paradigm, that Ocaml offers (lines 81, 84, and others).

I used a trick here, that I often use when doing functional programming. It's in the function on line 94 - handle_arguments. Inside this function I create a function with the same name. I can do this, because, unless I use the rec keyword during declaration, the function is not declared yet within itself. The inner function is recursive (the rec keyword), so that it can traverse a list and pass on the results, whilst the outer function calls it with proper arguments. Thanks to this, I don't have to remember to call handle_arguments with an additional parameter (an empty list for the results) from the outside, so the code is just that little bit cleaner.

Technical hints:
To compile the code just go:
ocamlc -o substitute str.cma substitute.ml

As you can see, it needs the additional Str module.

To run the program go:
ocamlc -o substitute str.cma substitute.ml


Oh yeah, and if you need a different format for the command line arguments, you can change the delimiter at line 28.

Warning: The substitution may fail somewhere, so if you try doing something important with this piece of software, better have a backup copy of whathever you're using it with.

The program:
1  (**
2   * Substitute
3   * Substitutes things for other things in files.
4   *
5   * Compiling:
6   *  ocamlc -o substitute str.cma substitute.ml
7   * Example usage:
8   *  echo '$0 $1!' | ./substitute '$0->Hello' '$1->world'
9   *  #=> Hello world!
10  * Potential issues:
11  *  May substitute more than you'd like, so keep a 
12  *  backup.
13  * Parameters:
14  *  Each parameter is in the form of a placeholder,
15  *  the delimiter '->' and a substitution
16  *  (best to look at the example).
17  * Requires:
18  *  Ocaml (http://caml.inria.fr/)
19  * Author:
20  *  Konrad Siek
21  *)
22 
23 (**
24  * Delimiter for program arguments.
25  * Establishes which part of the argument is the 
26  * placeholder and which is the substitution.
27  *)
28 let delimiter = "->";;
29 (**
30  * Substitutes a placeholder with a phrase.
31  * This function runs recursivelly on a list.
32  * @param what - placeholder
33  * @param into - substitution
34  * @param contents - array of strings to traverse
35  * @return an array of strings
36  *)
37 let substitute functions contents =
38     let rec apply functions content =
39         match functions with
40         | transform::tail -> 
41             apply tail (transform content)
42         | [] -> content
43     in
44     let result = ref [] in
45     let iterate element =
46         result := !result @ 
47         [apply functions element]
48     in
49     List.iter iterate contents;
50     !result
51 ;;
52 (**
53  * Outputs the contents of an array to standard
54  * output.
55  * @param contents - an array of strings
56  *)
57 let rec print_contents contents = 
58     match contents with
59     | head::tail -> 
60         print_endline head; 
61         print_contents tail
62     | [] -> () 
63 ;;
64 (**
65  * Converts a program argument into a translation
66  * function.
67  * @param argument
68  * @return a function
69  *)
70 let handle_argument argument = 
71     let regex = Str.regexp_string delimiter in
72     let bits = Str.split regex argument in 
73     if List.length bits < 2 then (
74         prerr_string 
75             ("Illegal argument: '" ^ argument ^ "'");
76         prerr_newline ();
77         fun input -> input
78     ) else (
79         let from = Str.regexp_string (List.hd bits) in 
80         let into = List.fold_left
81             (fun a b -> a ^ delimiter ^ b) 
82             (List.hd (List.tl bits)) 
83             (List.tl (List.tl bits)) in
84         fun input -> 
85             (Str.global_replace from into input)
86     )
87 ;;
88 (**
89  * Converts a list of program arguments into a
90  * list of translation functions.
91  * @param arguments
92  * @return functions
93  *)
94 let handle_arguments arguments = 
95     let rec handle_arguments arguments results = 
96         match arguments with
97         | head::tail -> 
98             let argument = 
99                 (handle_argument head) in 
100            handle_arguments tail
101                (results @ [argument])
102        | [] -> results
103    in
104    handle_arguments arguments []
105;;
106(**
107 * Grab input from standard input - read until
108 * an end of stream occurs.
109 * @param unit
110 * @return list of strings
111 *)
112let read_input () =
113    let list = ref [] in 
114    let _ = try 
115        while true do
116            let line = input_line stdin in
117            list := !list @ [line]
118        done
119    with _ -> () in 
120    !list
121;;    
122
123(* Convert argument vector to list *)
124let arguments = (List.tl (Array.to_list Sys.argv)) in
125(* Translate arguments to functions *)
126let functions = handle_arguments arguments in
127(* Read contents from standard input *)
128let contents = read_input () in 
129(* Apply transformations on contents *)
130let results = substitute functions contents in
131(* And print results to standard output *)
132print_contents results;;

Sunday, August 24, 2008

GPL comment generator

I have the problem of having to insert those comments into tons of files, especially, when I need to document a new project from scratch - inserting the little comment marks by hand is never fun. Technically, If I kept some older project I could skip this part and copy the comment from that earlier project, but I can never find it, or simply cannot be buggered.

Therefore, this is a script which generates GPL comments, that are easy to insert into all sorts of code. Just run the AWK script with a template file and Björn Stronginthearm's your uncle.

Also, I really like AWK, so I can't resist making a script in it when there's the opportunity.

The AWK script works like this: first it creates a massive array holding commenting styles for a mass of languages (lines 52-through-120), then, the template is read in, and all known variables are substitutes for values passed through parameters to the script (lines 145 to 153), finally, the file to which we attach is read in and printed out (lines 130 to 140). As an after-tought, I added the attempt to recognize a language by extension (line 45). There some other bits in there but they are not extremely interesting.

Example of use:

Supposing the template is saved as template.tpl and the file we need to append to is hello_world.c, the bash command would be something like this:
cat template.tpl | ./gpl_comment.awk -v program="Hello World" -v language="C" -v name="Konrad Siek" -v date="2008" -v attach="hello_world.c"


... or, if you want to rely on autodetection of language (by extension):
cat template.tpl | ./gpl_comment.awk -v program="Hello World" -v name="Konrad Siek" -v date="2008" -v attach="hello_world.c"


... and to append the generated comment to a file you can just go:
cat template.tpl | ./gpl_comment.awk -v program="Hello World" -v language="C" -v name="Konrad Siek" -v date="2008" -v attach="hello_world.c" > /tmp/hello_world.c && mv /tmp/hello_world.c hello_world.c


The awk script itself:
1  #!/usr/bin/awk -f
2  #
3  # Creates a GPL comment (in proper comment characters
4  # for a given language), including the name of the 
5  # program, which the file is supposed to be a part of
6  # in the proper place.
7  #
8  # Supported languages include:
9  #   - XML, HTML,
10 #   - C, C++, Java, Javascript,
11 #   - Bash, Python, Ruby, AWK,
12 #   - ML, Ocaml,
13 #   - TeX, LaTeX,
14 #   - SQL.
15 # Parameters:
16 #   program - name of the program main program
17 #   language - language to generate comment for
18 #   name - author of the program
19 #   date - copyright date
20 #   attach - file to append to comment
21 # Standard input - the template of the comment, where:
22 #   $name is replaced with the value of name
23 #   $date is replaced with the value of date
24 #   $program is replaced with program name
25 # Author:
26 #   Konrad Siek
27 
28 # Add style definition for language to global styles.
29 function add_style(languages, style) {
30     for (in languages) {
31         for (in style) {
32             styles[l,s]=style[s];
33         }
34     }
35 }
36 
37 BEGIN {
38     # Variables
39     begin="begin";
40     line="line";
41     end="end";
42     SUBSEP="~";
43 
44     # Try autodetecting type by extension
45     if (language == "" && attach != "") {
46         split(attach, arr, /\./);
47         for (i=2; i in arr; i++) {
48             language=arr[i];
49         }
50     }
51 
52     # Define C-style comment languages
53     c_style[begin]="/* ";
54     c_style[line]=" * ";
55     c_style[end]=" */";
56 
57     c_languages["c"];
58     c_languages["c++"];
59     c_languages["cpp"];
60     c_languages["java"];
61     c_languages["javascript"];
62     c_languages["js"];
63 
64     add_style(c_languages, c_style);
65 
66     # Define Unix-style comment languages
67     unix_style[begin]="# ";
68     unix_style[line]="# ";
69     unix_style[end]="# ";
70 
71     unix_languages["bash"];
72     unix_languages["python"];
73     unix_languages["ruby"];
74     unix_languages["awk"];
75 
76     add_style(unix_languages, unix_style);
77 
78     # Define ML-style comment languages
79     ml_style[begin]="(* ";
80     ml_style[line]=" * ";
81     ml_style[end]=" *)";
82 
83     ml_languages["ml"];
84     ml_languages["ocaml"];
85     ml_languages["caml"];
86 
87     add_style(ml_languages, ml_style);
88 
89     # Defin HTML-style comment languages
90     html_style[begin]="<!-- ";
91     html_style[line]="  -- ";
92     html_style[end]="  --> ";
93 
94     html_languages["html"];
95     html_languages["xml"];
96     html_languages["svg"];
97 
98     add_style(html_languages, html_style);
99 
100    # Define TeX-style comment languages
101    tex_style[begin]="% ";
102    tex_style[line]="% ";
103    tex_style[end]="% ";
104
105    tex_languages["tex"];
106    tex_languages["latex"];
107
108    add_style(tex_languages, tex_style);
109
110    # Define SQL-style comment languages
111    sql_style[begin]="-- ";
112    sql_style[line]="-- ";
113    sql_style[end]="-- ";
114
115    sql_languages["sql"];
116
117    add_style(sql_languages, sql_style);
118
119    # Select language
120    language=tolower(language);
121
122    # Print first line
123    print styles[language, begin];
124}
125
126END {
127    # Add final comment
128    print styles[language, end];
129
130    # Attach file if needed
131    if (attach != "") {
132        # Read file
133        while ((r=getline < attach) > 0) {
134            print $0;        
135        }
136        # Report error.
137        if (== -1) {
138            print "Can't read '"attach"'." > "/dev/stderr";
139        }
140    }
141}
142    
143{
144    # Read template from standard input 
145    input = $0;
146
147    # Apply substitution to template
148    gsub("\$name", name, input);
149    gsub("\$date", date, input);
150    gsub("\$program", program, input);            
151
152    # Apply comments and print to output
153    print styles[language, line]""input;
154}


The accompanying template file:
Copyright $date $name.

This file is part of $program.

$program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

$program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with $program. If not, see <http://www.gnu.org/licenses/>.


The code is also available at GitHub as awk/gpl_comment.awk
and awk/gpl_comment.tpl.

Monday, August 11, 2008

Translate Properties

This is a tool for translating Java properties files, where there are sources in many languages. I needed to translate some of these properties at work, so I knocked up this script to help me out. And it was pretty useful.

I will not go into the details of the code - you can check it out yourself if you're interested. It's pretty clean as code goes, although not so well commented as per usual.

Also, beware, it's GPL-ed! The full text of the license is available at the GNU site.

Anyway, the sources:
 
1 #!/usr/bin/python
2 #
3 # Copyright 2008 Konrad Siek
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #
18
19 import os.path;
20 import getopt;
21 import sys;
22
23 sources = [];
24 empty = ["", "*", None];
25 startAt = 1;
26 output = sys.stdout;
27 sort = False;
28 ignore = False;
29 quiet = False;
30
31 def addSource (source):
32 if os.path.exists(source):
33 sources.append(source);
34 elif not quiet:
35 sys.stderr.write("Source: '" + source +
36 "' does not exist. Skipping...\n");
37
38 def setLine (line):
39 global startAt;
40 try:
41 number = int(line);
42 except ValueError:
43 if not quiet:
44 sys.stderr.write("Line value: " + line +
45 " is not a number. Ignoring...\n");
46 return;
47 if number <= 0:
48 if not quiet:
49 sys.stderr.write("Line value: " + line +
50 " must be greater than 0. Ignoring...\n");
51 return;
52 startAt = number;
53 return startAt;
54
55 def setOutput (path):
56 """
57 Sets the file, which will be used throughout the program to
58 write results. If this is not done, the module will write to
59 sys.stdout instead.
60
61 @param path: is the path to the file.
62 """
63 global output;
64 output = open(path, 'w');
65
66 def setSort():
67 global sort;
68 sort = True;
69
70 def setQuiet():
71 global quiet;
72 quiet = True;
73
74 def setIgnore():
75 global ignore;
76 ignore = True;
77
78 def printUsage ():
79 """
80 Print usage information for the script.
81 """
82
83 sys.stdout.write("Usage:\n");
84 sys.stdout.write("\t"+ sys.argv[0]
85 +" -s source [-s source ... ] "
86 +"[-l line ] target\n");
87 sys.stdout.write("\t"+ sys.argv[0] +" -h\n");
88 sys.stdout.write("\t-s source\t\t"
89 +"Select an output file to write to.\n");
90 sys.stdout.write("\t-l line\t\t"
91 +"Select a line in the target to start from.\n");
92 sys.stdout.write("\t-o path\t\t"
93 +"Output results to file, instead of stdout.\n");
94 sys.stdout.write("\t-i \t\t\t"
95 +"Ignore if all sources have no values "
96 +"(or value of '*').\n");
97 sys.stdout.write("\t-a \t\t\t"
98 +"Sort results.\n");
99 sys.stdout.write("\t-q \t\t\t"
100 +"Quiet mode.\n");
101 sys.stdout.write("\t-h \t\t\t"
102 +"Show this message.\n");
103 sys.stdout.write("");
104 sys.exit(0);
105
106 def fileToList(path):
107 file = open(path, 'r');
108 lines = file.readlines();
109 file.close();
110 list = [];
111 counter = 1;
112 for line in lines:
113 if (not line.startswith("#") and (len(line.strip())) > 0):
114 line = line.rstrip("\n");
115 key, separator, value = line.partition("=");
116 if value.find("=") >= 0:
117 sys.stderr.write("Warning: value of line "
118 +str(counter) + " in " + path);
119 sys.stderr.write(" contains an equals sign '=' ("
120 +line + ")\n");
121 list.append((counter, key, value));
122 counter += 1;
123 return list;
124
125 def loadSources():
126 lists = {};
127 for source in sources:
128 lists[source] = fileToList(source);
129 return lists;
130
131 def popByKey(target, list):
132 for i in range(0, len(list)):
133 line, key, value = list[i];
134 if key == target:
135 del list[i];
136 return line, key, value;
137 return None, None, None;
138
139 def dialog(property, sources):
140 sys.stdout.write(property+"\n");
141 for source, line, value in sources:
142 sys.stdout.write("\t"+source+"\t("+str(line)+"): "
143 +str(value)+"\n");
144 sys.stdout.write("$("+property+"):");
145
146 def save(results):
147 if sort:
148 results.sort();
149
150 printout(results);
151
152 def control(property, value, results):
153 line = sys.stdin.readline().strip();
154 if line == "=stop":
155 save(results);
156 exit(0);
157 elif line == "=skip":
158 pass
159 elif line == "":
160 results.append((property, value));
161 pass
162 else:
163 results.append((property, line));
164
165 def printout(list):
166 global output;
167 for key, value in list:
168 output.write(key+"="+value+"\n");
169
170 def outOfScope(target, list):
171 for i in range(0, len(list)):
172 line, key, value = list[i];
173 if key == target:
174 return (line < startAt);
175 return False;
176
177 def absent(occurances):
178 for key, line, value in occurances:
179 if empty.count(value.strip()) == 0:
180 return False;
181 return True;
182
183 def parse (target):
184 sources = loadSources();
185 workspace = fileToList(target);
186 results = [];
187
188 # Translate the lines, which were in the target
189 for line, key, value in workspace:
190 if line < startAt:
191 continue;
192 occurances = [(target, line, value)];
193 for s in sources:
194 list = sources[s];
195 l, k, v = popByKey(key, list);
196 occurances.append((s, l, v));
197 if ignore and absent(occurances):
198 continue;
199 dialog(key, occurances);
200 control(key, value, results);
201
202 # Translate the lines, which were in the sources, but not the target
203 for source in sources:
204 for line, key, value in sources[source]:
205 if not outOfScope(key, workspace):
206 occurances = [(target, line, value)];
207 for s in sources:
208 if source != s:
209 list = sources[s];
210 l, k, v = popByKey(key, list);
211 occurances.append((s, l, v));
212 dialog(key, occurances);
213 control(key, results);
214
215 save(results);
216
217 def resolveOptions (optionHandling, argumentHandler):
218 """
219 Handles all the options and parameters for the script with the
220 provided functions.
221
222 @param optionHandling: a dictionary, translating an option string
223 to a function.Depending on whether the function is parameterless
224 or has one parameter the option string will be just a letter or a
225 letter ending in a colon.
226
227 @param argumentHandler: a function used to handle all the arguments
228 - it takes one parameter.
229 """
230
231 string = "".join(["%s" % (i) for i in optionHandling.keys()]);
232 options, arguments = getopt.getopt(sys.argv[1:], string);
233
234 # Handle options.
235 for key, value in options :
236 if value != '':
237 optionHandling[key[1:]+":"](value);
238 else:
239 optionHandling[key[1:]]();
240
241 # Handle arguments.
242 if len(arguments) > 0 :
243 for argument in arguments:
244 argumentHandler(argument);
245
246 if __name__ == "__main__":
247 options = {"s:": addSource,
248 "h": printUsage,
249 "l:": setLine,
250 "o:": setOutput,
251 "a": setSort,
252 "q": setQuiet,
253 "i": setIgnore}
254
255 resolveOptions(options, parse);


The code is also available at GitHub as python/translate_properties.py.