Sunday, August 24, 2008

GPL comment generator

I have the problem of having to insert those comments into tons of files, especially, when I need to document a new project from scratch - inserting the little comment marks by hand is never fun. Technically, If I kept some older project I could skip this part and copy the comment from that earlier project, but I can never find it, or simply cannot be buggered.

Therefore, this is a script which generates GPL comments, that are easy to insert into all sorts of code. Just run the AWK script with a template file and Björn Stronginthearm's your uncle.

Also, I really like AWK, so I can't resist making a script in it when there's the opportunity.

The AWK script works like this: first it creates a massive array holding commenting styles for a mass of languages (lines 52-through-120), then, the template is read in, and all known variables are substitutes for values passed through parameters to the script (lines 145 to 153), finally, the file to which we attach is read in and printed out (lines 130 to 140). As an after-tought, I added the attempt to recognize a language by extension (line 45). There some other bits in there but they are not extremely interesting.

Example of use:

Supposing the template is saved as template.tpl and the file we need to append to is hello_world.c, the bash command would be something like this:
cat template.tpl | ./gpl_comment.awk -v program="Hello World" -v language="C" -v name="Konrad Siek" -v date="2008" -v attach="hello_world.c"


... or, if you want to rely on autodetection of language (by extension):
cat template.tpl | ./gpl_comment.awk -v program="Hello World" -v name="Konrad Siek" -v date="2008" -v attach="hello_world.c"


... and to append the generated comment to a file you can just go:
cat template.tpl | ./gpl_comment.awk -v program="Hello World" -v language="C" -v name="Konrad Siek" -v date="2008" -v attach="hello_world.c" > /tmp/hello_world.c && mv /tmp/hello_world.c hello_world.c


The awk script itself:
1  #!/usr/bin/awk -f
2  #
3  # Creates a GPL comment (in proper comment characters
4  # for a given language), including the name of the 
5  # program, which the file is supposed to be a part of
6  # in the proper place.
7  #
8  # Supported languages include:
9  #   - XML, HTML,
10 #   - C, C++, Java, Javascript,
11 #   - Bash, Python, Ruby, AWK,
12 #   - ML, Ocaml,
13 #   - TeX, LaTeX,
14 #   - SQL.
15 # Parameters:
16 #   program - name of the program main program
17 #   language - language to generate comment for
18 #   name - author of the program
19 #   date - copyright date
20 #   attach - file to append to comment
21 # Standard input - the template of the comment, where:
22 #   $name is replaced with the value of name
23 #   $date is replaced with the value of date
24 #   $program is replaced with program name
25 # Author:
26 #   Konrad Siek
27 
28 # Add style definition for language to global styles.
29 function add_style(languages, style) {
30     for (in languages) {
31         for (in style) {
32             styles[l,s]=style[s];
33         }
34     }
35 }
36 
37 BEGIN {
38     # Variables
39     begin="begin";
40     line="line";
41     end="end";
42     SUBSEP="~";
43 
44     # Try autodetecting type by extension
45     if (language == "" && attach != "") {
46         split(attach, arr, /\./);
47         for (i=2; i in arr; i++) {
48             language=arr[i];
49         }
50     }
51 
52     # Define C-style comment languages
53     c_style[begin]="/* ";
54     c_style[line]=" * ";
55     c_style[end]=" */";
56 
57     c_languages["c"];
58     c_languages["c++"];
59     c_languages["cpp"];
60     c_languages["java"];
61     c_languages["javascript"];
62     c_languages["js"];
63 
64     add_style(c_languages, c_style);
65 
66     # Define Unix-style comment languages
67     unix_style[begin]="# ";
68     unix_style[line]="# ";
69     unix_style[end]="# ";
70 
71     unix_languages["bash"];
72     unix_languages["python"];
73     unix_languages["ruby"];
74     unix_languages["awk"];
75 
76     add_style(unix_languages, unix_style);
77 
78     # Define ML-style comment languages
79     ml_style[begin]="(* ";
80     ml_style[line]=" * ";
81     ml_style[end]=" *)";
82 
83     ml_languages["ml"];
84     ml_languages["ocaml"];
85     ml_languages["caml"];
86 
87     add_style(ml_languages, ml_style);
88 
89     # Defin HTML-style comment languages
90     html_style[begin]="<!-- ";
91     html_style[line]="  -- ";
92     html_style[end]="  --> ";
93 
94     html_languages["html"];
95     html_languages["xml"];
96     html_languages["svg"];
97 
98     add_style(html_languages, html_style);
99 
100    # Define TeX-style comment languages
101    tex_style[begin]="% ";
102    tex_style[line]="% ";
103    tex_style[end]="% ";
104
105    tex_languages["tex"];
106    tex_languages["latex"];
107
108    add_style(tex_languages, tex_style);
109
110    # Define SQL-style comment languages
111    sql_style[begin]="-- ";
112    sql_style[line]="-- ";
113    sql_style[end]="-- ";
114
115    sql_languages["sql"];
116
117    add_style(sql_languages, sql_style);
118
119    # Select language
120    language=tolower(language);
121
122    # Print first line
123    print styles[language, begin];
124}
125
126END {
127    # Add final comment
128    print styles[language, end];
129
130    # Attach file if needed
131    if (attach != "") {
132        # Read file
133        while ((r=getline < attach) > 0) {
134            print $0;        
135        }
136        # Report error.
137        if (== -1) {
138            print "Can't read '"attach"'." > "/dev/stderr";
139        }
140    }
141}
142    
143{
144    # Read template from standard input 
145    input = $0;
146
147    # Apply substitution to template
148    gsub("\$name", name, input);
149    gsub("\$date", date, input);
150    gsub("\$program", program, input);            
151
152    # Apply comments and print to output
153    print styles[language, line]""input;
154}


The accompanying template file:
Copyright $date $name.

This file is part of $program.

$program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

$program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with $program. If not, see <http://www.gnu.org/licenses/>.


The code is also available at GitHub as awk/gpl_comment.awk
and awk/gpl_comment.tpl.

No comments: