xref: /freebsd/contrib/one-true-awk/testdir/chem.awk (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1BEGIN {
2	macros = "/usr/bwk/chem/chem.macros"	# CHANGE ME!!!!!
3	macros = "/dev/null" # since originals are lost
4
5	pi = 3.141592654
6	deg = 57.29578
7	setparams(1.0)
8	set(dc, "up 0 right 90 down 180 left 270 ne 45 se 135 sw 225 nw 315")
9	set(dc, "0 n 30 ne 45 ne 60 ne 90 e 120 se 135 se 150 se 180 s")
10	set(dc, "300 nw 315 nw 330 nw 270 w 210 sw 225 sw 240 sw")
11}
12function init() {
13	printf ".PS\n"
14	if (firsttime++ == 0) {
15		printf "copy \"%s\"\n", macros
16		printf "\ttextht = %g; textwid = .1; cwid = %g\n", textht, cwid
17		printf "\tlineht = %g; linewid = %g\n", lineht, linewid
18	}
19	printf "Last: 0,0\n"
20	RING = "R"; MOL = "M"; BOND = "B"; OTHER = "O"	# manifests
21	last = OTHER
22	dir = 90
23}
24function setparams(scale) {
25	lineht = scale * 0.2
26	linewid = scale * 0.2
27	textht = scale * 0.16
28	db = scale * 0.2		# bond length
29	cwid = scale * 0.12		# character width
30	cr = scale * 0.08		# rad of invis circles at ring vertices
31	crh = scale * 0.16		# ht of invis ellipse at ring vertices
32	crw = scale * 0.12		# wid
33	dav = scale * 0.015		# vertical shift up for atoms in atom macro
34	dew = scale * 0.02		# east-west shift for left of/right of
35	ringside = scale * 0.3		# side of all rings
36	dbrack = scale * 0.1		# length of bottom of bracket
37}
38
39	{ lineno++ }
40
41/^(\.cstart)|(begin chem)/	{ init(); inchem = 1; next }
42/^(\.cend)|(end)/		{ inchem = 0; print ".PE"; next }
43
44/^\./		{ print; next }		# troff
45
46inchem == 0	{ print; next }		# everything else
47
48$1 == "pic"	{ shiftfields(1); print; next }	# pic pass-thru
49$1 ~ /^#/	{ next }	# comment
50
51$1 == "textht"	{ textht = $NF; next }
52$1 == "cwid"	{ cwid = $NF; next }
53$1 == "db"	{ db = $NF; next }
54$1 == "size"	{ if ($NF <= 4) size = $NF; else size = $NF/10
55		  setparams(size); next }
56
57	{ print "\n#", $0 }	# debugging, etc.
58	{ lastname = "" }
59
60$1 ~ /^[A-Z].*:$/ {	# label;  falls thru after shifting left
61	lastname = substr($1, 1, length($1)-1)
62	print $1
63	shiftfields(1)
64}
65
66$1 ~ /^\"/	{ print "Last: ", $0; last = OTHER; next }
67
68$1 ~ /bond/	{ bond($1); next }
69$1 ~ /^(double|triple|front|back)$/ && $2 == "bond" {
70		   $1 = $1 $2; shiftfields(2); bond($1); next }
71
72$1 == "aromatic" { temp = $1; $1 = $2; $2 = temp }
73$1 ~ /ring|benz/ { ring($1); next }
74
75$1 == "methyl"	{ $1 = "CH3" }	# left here as an example
76
77$1 ~ /^[A-Z]/	{ molecule(); next }
78
79$1 == "left"	{ left[++stack] = fields(2, NF); printf("Last: [\n"); next }
80
81$1 == "right"	{ bracket(); stack--; next }
82
83$1 == "label"	{ label(); next }
84
85/./	{ print "Last: ", $0; last = OTHER }
86
87END	{ if (firsttime == 0) error("did you forget .cstart and .cend?")
88	  if (inchem) printf ".PE\n"
89}
90
91function bond(type,	i, goes, from) {
92	goes = ""
93	for (i = 2; i <= NF; i++)
94		if ($i == ";") {
95			goes = $(i+1)
96			NF = i - 1
97			break
98		}
99	leng = db
100	from = ""
101	for (cf = 2; cf <= NF; ) {
102		if ($cf ~ /(\+|-)?[0-9]+|up|down|right|left|ne|se|nw|sw/)
103			dir = cvtdir(dir)
104		else if ($cf ~ /^leng/) {
105			leng = $(cf+1)
106			cf += 2
107		} else if ($cf == "to") {
108			leng = 0
109			from = fields(cf, NF)
110			break
111		} else if ($cf == "from") {
112			from = dofrom()
113			break
114		} else if ($cf ~ /^#/) {
115			cf = NF+1
116			break;
117		} else {
118			from = fields(cf, NF)
119			break
120		}
121	}
122	if (from ~ /( to )|^to/)	# said "from ... to ...", so zap length
123		leng = 0
124	else if (from == "")	# no from given at all
125		from = "from Last." leave(last, dir) " " fields(cf, NF)
126	printf "Last: %s(%g, %g, %s)\n", type, leng, dir, from
127	last = BOND
128	if (lastname != "")
129		labsave(lastname, last, dir)
130	if (goes) {
131		$0 = goes
132		molecule()
133	}
134}
135
136function dofrom(	n, s) {
137	cf++	# skip "from"
138	n = $cf
139	if (n in labtype)	# "from Thing" => "from Thing.V.s"
140		return "from " n "." leave(labtype[n], dir)
141	if (n ~ /^\.[A-Z]/)	# "from .V" => "from Last.V.s"
142		return "from Last" n "." corner(dir)
143	if (n ~ /^[A-Z][^.]*\.[A-Z][^.]*$/)	# "from X.V" => "from X.V.s"
144		return "from " n "." corner(dir)
145	return fields(cf-1, NF)
146}
147
148function bracket(	t) {
149	printf("]\n")
150	if ($2 == ")")
151		t = "spline"
152	else
153		t = "line"
154	printf("%s from last [].sw+(%g,0) to last [].sw to last [].nw to last [].nw+(%g,0)\n",
155		t, dbrack, dbrack)
156	printf("%s from last [].se-(%g,0) to last [].se to last [].ne to last [].ne-(%g,0)\n",
157		t, dbrack, dbrack)
158	if ($3 == "sub")
159		printf("\" %s\" ljust at last [].se\n", fields(4,NF))
160}
161
162function molecule(	n, type) {
163	n = $1
164	if (n == "BP") {
165		$1 = "\"\" ht 0 wid 0"
166		type = OTHER
167	} else {
168		$1 = atom(n)
169		type = MOL
170	}
171	gsub(/[^A-Za-z0-9]/, "", n)	# for stuff like C(OH3): zap non-alnum
172	if ($2 == "")
173		printf "Last: %s: %s with .%s at Last.%s\n", \
174			n, $0, leave(type,dir+180), leave(last,dir)
175	else if ($2 == "below")
176		printf("Last: %s: %s with .n at %s.s\n", n, $1, $3)
177	else if ($2 == "above")
178		printf("Last: %s: %s with .s at %s.n\n", n, $1, $3)
179	else if ($2 == "left" && $3 == "of")
180		printf("Last: %s: %s with .e at %s.w+(%g,0)\n", n, $1, $4, dew)
181	else if ($2 == "right" && $3 == "of")
182		printf("Last: %s: %s with .w at %s.e-(%g,0)\n", n, $1, $4, dew)
183	else
184		printf "Last: %s: %s\n", n, $0
185	last = type
186	if (lastname != "")
187		labsave(lastname, last, dir)
188	labsave(n, last, dir)
189}
190
191function label(	i, v) {
192	if (substr(labtype[$2], 1, 1) != RING)
193		error(sprintf("%s is not a ring", $2))
194	else {
195		v = substr(labtype[$2], 2, 1)
196		for (i = 1; i <= v; i++)
197			printf("\"\\s-3%d\\s0\" at 0.%d<%s.C,%s.V%d>\n", i, v+2, $2, $2, i)
198	}
199}
200
201function ring(type,	typeint, pt, verts, i) {
202	pt = 0	# points up by default
203	if (type ~ /[1-8]$/)
204		verts = substr(type, length(type), 1)
205	else if (type ~ /flat/)
206		verts = 5
207	else
208		verts = 6
209	fused = other = ""
210	for (i = 1; i <= verts; i++)
211		put[i] = dbl[i] = ""
212	nput = aromatic = withat = 0
213	for (cf = 2; cf <= NF; ) {
214		if ($cf == "pointing")
215			pt = cvtdir(0)
216		else if ($cf == "double" || $cf == "triple")
217			dblring(verts)
218		else if ($cf ~ /arom/) {
219			aromatic++
220			cf++	# handled later
221		} else if ($cf == "put") {
222			putring(verts)
223			nput++
224		} else if ($cf ~ /^#/) {
225			cf = NF+1
226			break;
227		} else {
228			if ($cf == "with" || $cf == "at")
229				withat = 1
230			other = other " " $cf
231			cf++
232		}
233	}
234	typeint = RING verts pt		# RING | verts | dir
235	if (withat == 0)
236		fused = joinring(typeint, dir, last)
237	printf "Last: [\n"
238	makering(type, pt, verts)
239	printf "] %s %s\n", fused, other
240	last = typeint
241	if (lastname != "")
242		labsave(lastname, last, dir)
243}
244
245function makering(type, pt, v,       i, a, r) {
246	if (type ~ /flat/)
247		v = 6
248    # vertices
249	r = ringside / (2 * sin(pi/v))
250	printf "\tC: 0,0\n"
251	for (i = 0; i <= v+1; i++) {
252		a = ((i-1) / v * 360 + pt) / deg
253		printf "\tV%d: (%g,%g)\n", i, r * sin(a), r * cos(a)
254	}
255	if (type ~ /flat/) {
256		printf "\tV4: V5; V5: V6\n"
257		v = 5
258	}
259    # sides
260	if (nput > 0) {	# hetero ...
261		for (i = 1; i <= v; i++) {
262			c1 = c2 = 0
263			if (put[i] != "") {
264				printf("\tV%d: ellipse invis ht %g wid %g at V%d\n",
265					i, crh, crw, i)
266				printf("\t%s at V%d\n", put[i], i)
267				c1 = cr
268			}
269			j = i+1
270			if (j > v)
271				j = 1
272			if (put[j] != "")
273				c2 = cr
274			printf "\tline from V%d to V%d chop %g chop %g\n", i, j, c1, c2
275			if (dbl[i] != "") {	# should check i<j
276				if (type ~ /flat/ && i == 3) {
277					rat = 0.75; fix = 5
278				} else {
279					rat = 0.85; fix = 1.5
280				}
281				if (put[i] == "")
282					c1 = 0
283				else
284					c1 = cr/fix
285				if (put[j] == "")
286					c2 = 0
287				else
288					c2 = cr/fix
289				printf "\tline from %g<C,V%d> to %g<C,V%d> chop %g chop %g\n",
290					rat, i, rat, j, c1, c2
291				if (dbl[i] == "triple")
292					printf "\tline from %g<C,V%d> to %g<C,V%d> chop %g chop %g\n",
293						2-rat, i, 2-rat, j, c1, c2
294			}
295		}
296	} else {	# regular
297		for (i = 1; i <= v; i++) {
298			j = i+1
299			if (j > v)
300				j = 1
301			printf "\tline from V%d to V%d\n", i, j
302			if (dbl[i] != "") {	# should check i<j
303				if (type ~ /flat/ && i == 3) {
304					rat = 0.75
305				} else
306					rat = 0.85
307				printf "\tline from %g<C,V%d> to %g<C,V%d>\n",
308					rat, i, rat, j
309				if (dbl[i] == "triple")
310					printf "\tline from %g<C,V%d> to %g<C,V%d>\n",
311						2-rat, i, 2-rat, j
312			}
313		}
314	}
315	# punt on triple temporarily
316    # circle
317	if (type ~ /benz/ || aromatic > 0) {
318		if (type ~ /flat/)
319			r *= .4
320		else
321			r *= .5
322		printf "\tcircle rad %g at 0,0\n", r
323	}
324}
325
326function putring(v) {	# collect "put Mol at n"
327	cf++
328	mol = $(cf++)
329	if ($cf == "at")
330		cf++
331	if ($cf >= 1 && $cf <= v) {
332		m = mol
333		gsub(/[^A-Za-z0-9]/, "", m)
334		put[$cf] = m ":" atom(mol)
335	}
336	cf++
337}
338
339function joinring(type, dir, last) {	# join a ring to something
340	if (substr(last, 1, 1) == RING) {	# ring to ring
341		if (substr(type, 3) == substr(last, 3))	# fails if not 6-sided
342			return "with .V6 at Last.V2"
343	}
344	# if all else fails
345	return sprintf("with .%s at Last.%s", \
346		leave(type,dir+180), leave(last,dir))
347}
348
349function leave(last, d,		c, c1) {	# return vertex of last in dir d
350	if (last == BOND)
351		return "end"
352	d = reduce(d)
353	if (substr(last, 1, 1) == RING)
354		return ringleave(last, d)
355	if (last == MOL) {
356		if (d == 0 || d == 180)
357			c = "C"
358		else if (d > 0 && d < 180)
359			c = "R"
360		else
361			c = "L"
362		if (d in dc)
363			c1 = dc[d]
364		else
365			c1 = corner(d)
366		return sprintf("%s.%s", c, c1)
367	}
368	if (last == OTHER)
369		return corner(d)
370	return "c"
371}
372
373function ringleave(last, d,	rd, verts) {	# return vertex of ring in dir d
374	verts = substr(last, 2, 1)
375	rd = substr(last, 3)
376	return sprintf("V%d.%s", int(reduce(d-rd)/(360/verts)) + 1, corner(d))
377}
378
379function corner(dir) {
380	return dc[reduce(45 * int((dir+22.5)/45))]
381}
382
383function labsave(name, type, dir) {
384	labtype[name] = type
385	labdir[name] = dir
386}
387
388function dblring(v,	d, v1, v2) {	# should canonicalize to i,i+1 mod v
389	d = $cf
390	for (cf++; $cf ~ /^[1-9]/; cf++) {
391		v1 = substr($cf,1,1)
392		v2 = substr($cf,3,1)
393		if (v2 == v1+1 || v1 == v && v2 == 1)	# e.g., 2,3 or 5,1
394			dbl[v1] = d
395		else if (v1 == v2+1 || v2 == v && v1 == 1)	# e.g., 3,2 or 1,5
396			dbl[v2] = d
397		else
398			error(sprintf("weird %s bond in\n\t%s", d, $0))
399	}
400}
401
402function cvtdir(d) {	# maps "[pointing] somewhere" to degrees
403	if ($cf == "pointing")
404		cf++
405	if ($cf ~ /^[+\-]?[0-9]+/)
406		return reduce($(cf++))
407	else if ($cf ~ /left|right|up|down|ne|nw|se|sw/)
408		return reduce(dc[$(cf++)])
409	else {
410		cf++
411		return d
412	}
413}
414
415function reduce(d) {	# reduces d to 0 <= d < 360
416	while (d >= 360)
417		d -= 360
418	while (d < 0)
419		d += 360
420	return d
421}
422
423function atom(s,    c, i, n, nsub, cloc, nsubc) { # convert CH3 to atom(...)
424	if (s == "\"\"")
425		return s
426	n = length(s)
427	nsub = nsubc = 0
428	cloc = index(s, "C")
429	if (cloc == 0)
430		cloc = 1
431	for (i = 1; i <= n; i++)
432		if (substr(s, i, 1) !~ /[A-Z]/) {
433			nsub++
434			if (i < cloc)
435				nsubc++
436		}
437	gsub(/([0-9]+\.[0-9]+)|([0-9]+)/, "\\s-3\\d&\\u\\s+3", s)
438	if (s ~ /([^0-9]\.)|(\.[^0-9])/)	# centered dot
439		gsub(/\./, "\\v#-.3m#.\\v#.3m#", s)
440	return sprintf("atom(\"%s\", %g, %g, %g, %g, %g, %g)",
441		s, (n-nsub/2)*cwid, textht, (cloc-nsubc/2-0.5)*cwid, crh, crw, dav)
442}
443
444function in_line(	i, n, s, s1, os) {
445	s = $0
446	os = ""
447	while ((n = match(s, /!?[A-Z][A-Za-z]*(([0-9]+\.[0-9]+)|([0-9]+))/)) > 0) {
448		os = os substr(s, 1, n-1)	# prefix
449		s1 = substr(s, n, RLENGTH)	# molecule
450		if (substr(s1, 1, 1) == "!") {	# !mol => leave alone
451			s1 = substr(s1, 2)
452		} else {
453			gsub(/([0-9]+\.[0-9]+)|([0-9]+)/, "\\s-3\\d&\\u\\s+3", s1)
454			if (s1 ~ /([^0-9]\.)|(\.[^0-9])/)	# centered dot
455				gsub(/\./, "\\v#-.3m#.\\v#.3m#", s1)
456		}
457		os = os s1
458		s = substr(s, n + RLENGTH)	# tail
459	}
460	os = os s
461	print os
462	return
463}
464
465function shiftfields(n,		i) {	# move $n+1..$NF to $n..$NF-1, zap $NF
466	for (i = n; i < NF; i++)
467		$i = $(i+1)
468	$NF = ""
469	NF--
470}
471
472function fields(n1, n2,		i, s) {
473	if (n1 > n2)
474		return ""
475	s = ""
476	for (i = n1; i <= n2; i++) {
477		if ($i ~ /^#/)
478			break;
479		s = s $i " "
480	}
481	return s
482}
483
484function set(a, s,     i, n, q) {
485	n = split(s, q)
486	for (i = 1; i <= n; i += 2)
487		a[q[i]] = q[i+1]
488}
489
490function error(s) {
491	printf "chem\007: error on line %d: %s\n", lineno, s | "cat 1>&2"
492}
493