/* <xyz2pdb.c> 26jan05
**
** Simple conversion of FAH ".xyz" format file to ".pdb"
**
** Copyright (C) 2004-2005 Richard P. Howell IV.  This is free
** software; you can distribute it and/or modify it under the terms of
** the GNU General Public License.  There is no warranty whatsoever.
**
** To compile:
**  cc -o xyz2pdb xyz2pdb.c -lm
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <math.h>

#define MAXBONDS	8		/* Maximum bonds on one atom */
#define MAXATOMS	100000		/* Maximum atoms in molecule */

/* Element numbers, when they are indexed */

#define E_Q			0	/* Unknown */
#define E_H			1	/* Hydrogen */
#define E_C			2	/* Carbon */
#define E_N			3	/* Nitrogen */
#define E_O			4	/* Oxygen */
#define E_P			5	/* Phosphorus */
#define E_S			6	/* Sulfur */

#define ETYPES		"?HCNOPS"	/* Known elements (in table order) */

typedef unsigned int u32;

struct bnd
{	int		toward;		/* Atom bonded to */
	float		len;		/* Bond length (/2) */
};

struct atm
{	short		element;	/* Element */
	short		flags;		/* Flags */
	int		n;		/* Atom number (renumbered) */
	float		x;		/* X coordinate */
	float		y;		/* Y coordinate */
	float		z;		/* Z coordinate */
	u32		bpat;		/* Bond pattern */
	struct bnd	achc;		/* Alpha carbon peptide bond toward carbonyl */
	struct bnd	acha;		/* Alpha carbon peptide bond toward imino */
	struct bnd	bond[MAXBONDS];	/* Bond descriptions */
};

/* Atom flag values */

#define AF_H	0x0001			/* Hydrogen */
#define AF_H2O	0x0002			/* Water */
#define AF_UREA	0x0004			/* Urea */
#define AF_CA	0x0008			/* Alpha carbon */
#define AF_BB	0x0010			/* Backbone */
#define AF_CN	0x0020			/* Carbon in carbonyl of peptide bond or amide */
#define AF_NNH2	0x0040			/* Nitrogen in amino */
#define AF_DL	0x4000			/* Atom is in display list */

struct adist
{	float		r;
	struct atm *pa;
};

/* Atomic radii, bond lengths, and similar things are in Angstroms.
** The bonding radii of the atoms are, to some extent, a fudge.
*/

struct atmprop				/* Atom properties (in ETYPES order) */
{	float		b_radius;	/* Bonding radius (max) of this element type */
} aprops[] =
 {	{ 1.00, },	/* ? */
	{ 0.37, },	/* H */
	{ 0.77, },	/* C */
	{ 0.77, },	/* N */
	{ 0.74, },	/* O */
	{ 1.11, },	/* P */
	{ 1.06, }	/* S */
 };

#define MAX_BOND_LEN	2.4
#define CH_BOND_LEN	1.095
#define CH_BOND_ID	21

/* Current option state */

#define OF_BO	0x0001	/* Display backbone only */
#define OF_IH	0x0002	/* Don't display hydrogen atoms */
#define OF_IW	0x0004	/* Don't display water molecules */
#define OF_IU	0x0008	/* Don't display urea molecules */
#define OF_BI	0x0010	/* Derive bond info from XYZ location only */

int oflags;		/* Selections */

/* Global variables */

int atoms;		/* Total number of atoms in "atom" table */
struct atm *atom;	/* Pointer to allocated storage for all XYZ file data */
struct adist *patom;	/* Pointer to auxiliary pointer table for sorting */
float scale;		/* Scale factor for normalizing molecule size */
char pname[32];		/* Name of protein */
char *argv0;		/* Name of program */
char *ifile;		/* Name of XYZ file */
char *ofile;		/* Name of PDB file */
FILE *ifp;		/* Input file pointer */
FILE *ofp;		/* Output file pointer */

int debug;
int nh2o;
int nurea;

char wbuf[200];
char errmsg[200];
char ebuf[8];

int readxyz(void);
void writepdb(void);

int main(int argc, char **argv)
{
	int i;
	char *p;
	int *pd;
	char zc;

	argv0 = argv[0];
	oflags = OF_BI;
	atom = NULL;

	ifile = NULL;
	ofile = NULL;
	ifp = stdin;
	ofp = stdout;
	debug = 0;

	/* Process the command-line flags */

	while (--argc > 0)
	{	p = *++argv;
		if (*p++ != '-')
			goto us;
		if (*p == '-')
			++p;
		if (strcmp(p, "o") == 0)
		{	ofile = *++argv;
			goto ca;
		}
		if (strcmp(p, "xyzfile") == 0)
		{	ifile = *++argv;
ca:			if (--argc <= 0)
			{	fprintf(stderr, "\n\
%s: Missing argument after \"-%s\"", argv0, p);
				goto us;
			}
		}
		else if (strcmp(p, "debug") == 0)
		{	pd = &debug;
			if	(	(--argc <= 0)
				||	(sscanf(*++argv,"%d%c", pd, &zc) != 1)
				)
			{	fprintf(stderr, "\n\
%s: Bad or missing numeric argument after \"-%s\"", argv0, p);
				goto us;
			}
		}
		else if (strcmp(p, "backbone") == 0)
			oflags |= OF_BO;
		else if (strcmp(p, "noh") == 0)
			oflags |= OF_IH;
		else if (strcmp(p, "noh2o") == 0)
			oflags |= OF_IW;
		else if (strcmp(p, "nourea") == 0)
			oflags |= OF_IU;
		else if (strcmp(p, "usebond") == 0)
			oflags &= ~OF_BI;
		else		/* Including "-help" */
		{
us:			fprintf(stderr, "\n\
Usage: %s [<flags> with arguments, as follows]\n\n\
 -o <PDB file name>        default is <stdout>\n\
 -xyzfile <XYZ file name>  default is <stdin>\n\
 -debug <level>            0 to 9 [0]\n\
 -backbone                 output backbone only\n\
 -noh                      don't output hydrogen atoms\n\
 -noh2o                    don't output water molecules\n\
 -nourea                   don't output urea molecules\n", argv0);
			fprintf(stderr, "\
 -usebond                  derive bond info from input file only\n\
 -help                     print this information\n\n");
			exit(1);
		}
	}

	/* Process and do sanity checks on the command-line data */

	if ((i = debug) > 9)			/* Set up debug level first (!) */
		i = 9;
	if (i < 0)
		i = 0;
	if (i != debug)
		fprintf(stderr, "Debug level %d out of range, setting to %d\n", debug, i);
	debug = i;

	/* Open input (XYZ) file */

	if ((ifile != NULL) && ((ifp = fopen(ifile, "r")) == NULL))
	{	fprintf(stderr, "Can't open XYZ file \"%.160s\"\n", ifile);
		return (1);
	}

	/* Read in the XYZ file data */

	if ((i = readxyz()) != 0)		/* Read in the XYZ file data */
		fprintf(stderr, "Error reading XYZ file: %s\n", errmsg);
	fclose(ifp);
	if (i != 0)
		return (1);

	/* Write out the PDB file */

	writepdb();
	return (0);
}

/* Calculate bond length */

void bondparm(struct atm *p1, struct atm *p2, struct bnd *pb)
{
float x, y, z;

	x = p2->x - p1->x;
	y = p2->y - p1->y;
	z = p2->z - p1->z;
	pb->len = sqrt(x * x + y * y + z * z) / 2.0;
}

/* Comparison function for sorting */

int cmpf(struct adist *pa, struct adist *pb)
{
	if (pa->r < pb->r) return (-1);
	if (pa->r > pb->r) return (1);
	return (0);
}

/* Calculate scaling from XYZ data only */

float biscale(void)
{
	int i;
	struct atm *p0, *p1;
	float min, best, lim;
	float d, f, g;
	struct adist *q0, *q1, *qb, *qe;

	qe = patom;
	for (i = atoms + 1; --i > 0; )
	{	p0 = &atom[i];
		qe->pa = p0;
		qe->r = p0->z;		/* Sort on Z position */
		++qe;
	}
	qsort(patom, atoms, sizeof(struct adist), (int (*)(const void *, const void *)) &cmpf);

	min = best = lim = 1e10;
	qb = patom;
	for (q0 = qb; q0 < qe; ++q0)
	{	if ((p0 = q0->pa)->element != E_C)
			continue;
		while (qb->r < q0->r - lim) ++qb;
		for (q1 = qb; q1 < qe; ++q1)
		{	if (q1->r > q0->r + lim)
				break;
			p1 = q1->pa;
			if ((i = p1->element) == E_H)
				f = 1.000;
			else if ((i == E_C) && (q1 > q0))
				f = 0.708;
			else if (i == E_N)
				f = 0.734;
			else if (i == E_O)
				f = 0.758;
			else continue;
			if ((d = (p0->x - p1->x) * f) < 0) d = -d;
			if (d > lim) continue;
			g = d * d;
			if ((d = (p0->y - p1->y) * f) < 0) d = -d;
			if (d > lim) continue;
			g += d * d;
			d = (p0->z - p1->z) * f;
			g += d * d;
			if ((g = sqrt(g)) > lim) continue;
			if (g == 0.0) continue;		/* Yeah, some files are that bad */
			if (g > best) best = g;
			else if (g < min)
			{	lim = g * 1.3;
				if (best > lim)
					best = min;
				min = g;
			}
			else continue;
			if (debug >= 3)
				fprintf(stderr, "min = %g, best = %g, (%d to %d)\n", min, best, p0 - atom, p1 - atom);
		}
	}
	if (best > lim)
		best = min;
	return (best);
}

/* Construct bonds from scaled XYZ positions */

void bibond(float s)
{
	int n0, n1;
	int b0, b1;
	struct atm *p0, *p1;
	struct adist *q0, *q1;
	float d, g, r0, r, mbl;

	s *= 1.28;			/* Margin seems to be from 1.13 to 1.45 */	/***** RECHECK THIS *****/
	mbl = MAX_BOND_LEN * s;
	for (q0 = patom; q0 < &patom[atoms]; ++q0)
	{	r0 = aprops[(p0 = q0->pa)->element].b_radius;
		n0 = p0 - atom;
		for (b0 = 0; b0 < MAXBONDS; ++b0)
			if (p0->bond[b0].toward == 0)
				break;
		for (q1 = q0; ++q1 < &patom[atoms]; )
		{	p1 = q1->pa;
			if ((d = p0->z - p1->z) < 0) d = -d;
			if (d > mbl) break;
			r = (r0 + aprops[p1->element].b_radius) * s;
			if (d > r) continue;
			g = d * d;
			if ((d = p0->x - p1->x) < 0) d = -d;
			if (d > r) continue;
			g += d * d;
			if ((d = p0->y - p1->y) < 0) d = -d;
			if (d > r) continue;
			g += d * d;
			if (g == 0.0) continue;
			if ((g = sqrt(g)) > r) continue;
			n1 = p1 - atom;
			if (g < r * 0.5)
			{	if (debug >= 2)
					fprintf(stderr, "Atoms %d and %d too close for bond (distance %g, limit %g)\n",
							n0, n1, g, r * 0.5);
				continue;
			}
			if (b0 >= MAXBONDS)
			{	if (debug >= 2)
					fprintf(stderr, "Too many bonds to atom %d\n", n0);
				break;
			}
			for (b1 = 0; b1 < MAXBONDS; ++b1)
				if (p1->bond[b1].toward == 0)
					break;
			if (b1 >= MAXBONDS)
			{	if (debug >= 2)
					fprintf(stderr, "Too many bonds to atom %d\n", n1);
				continue;
			}
			p0->bond[b0].toward = n1;
			++b0;
			p1->bond[b1].toward = n0;
		}
	}
}

/* Clean up bond data */

void xyzclean(void)
{
int i, j;
int n0, n1;
int b0, b1;
struct atm *p0, *p1;

	/* Step 1: identify XYZ file type (first "bond" pointer is atom ID with Genome and Tinker) */

	i = atoms;
	for (n0 = 1; n0 <= atoms; ++n0)
	{	p0 = &atom[n0];
		if (((n1 = p0->bond[0].toward) <= 0) || (n1 > atoms))
			continue;
		p1 = &atom[n1];
		for (b1 = MAXBONDS; --b1 >= 0; )
			if (p1->bond[b1].toward == n0)
			{	--i;
				break;
			}
	}
	j = (i * 5 > atoms) ? 1 : 0;	/* Which bond pointer is really the first one? */

	/* Step 2: clean up bad bond pointers */

	for (n0 = 1; n0 <= atoms; ++n0)
	{	p0 = &atom[n0];
		for (b0 = j; b0 < MAXBONDS; ++b0)
		{	if ((n1 = p0->bond[b0].toward) <= 0)
				continue;
			if (n1 == n0)
			{	p0->bond[b0].toward = 0;	/* Atom bonded to itself */
				if (debug >= 2)
					fprintf(stderr, "Atom %d bonded to itself (bond %d)\n", n0, b0 - j + 1);
				continue;
			}
			if (n1 > atoms)
			{	p0->bond[b0].toward = 0;	/* Bond pointer out of range */
				if (debug >= 2)
					fprintf(stderr, "Bond %d on atom %d goes to atom %d but there are only %d atoms total\n",
							b0 - j + 1, n0, n1, atoms);
				continue;
			}
			p1 = &atom[n1];
			i = 0;
			for (b1 = j; b1 < MAXBONDS; ++b1)
				if	(	(p1->bond[b1].toward == n0)
					&&	(++i > 1)
					)
				{	p1->bond[b1].toward = 0;	/* Redundant */
					if (debug >= 2)
						fprintf(stderr, "Redundant bond from atom %d toward %d\n", n1, n0);
				}
			if (i == 0)
			{	p0->bond[b0].toward = 0;	/* Not reciprocal */
				if (debug >= 2)
					fprintf(stderr, "Hanging bond from atom %d toward %d\n", n0, n1);
			}
		}
	}

	/* Step 3: compress the remaining bond pointers to start at index 0 */

	for (n0 = 1; n0 <= atoms; ++n0)
	{	p0 = &atom[n0];
		i = -1;
		for (b0 = j; b0 < MAXBONDS; ++b0)
			if ((n1 = p0->bond[b0].toward) > 0)
				p0->bond[++i].toward = n1;
		while (++i < MAXBONDS)
			p0->bond[i].toward = 0;
	}
}

/* Find bond patterns, peptide bonds, alpha carbon chain, water, urea, etc */

void bondpat(void)
{
	int i, j;
	int n0, n1, n2;
	int b0, b1;
	struct atm *p0, *p1, *p2;
	float pbscale;
	int pbnum;

	/* Step 1: calculate "bpat" words for all atoms */

	for (n0 = 1; n0 <= atoms; ++n0)
		for (b0 = MAXBONDS; --b0 >= 0; )
			if ((n1 = atom[n0].bond[b0].toward) > 0)
				atom[n0].bpat += (1 << (atom[n1].element * 3));

	/* Declare a few interesting patterns we will look for */

#define BP_CNO		((1 << (E_C * 3)) + (1 << (E_N * 3)) + (1 << (E_O * 3)))
#define BP_CO		((1 << (E_C * 3)) + (1 << (E_O * 3)))
#define BP_CO2		((1 << (E_C * 3)) + (2 << (E_O * 3)))
#define BP_H2		(2 << (E_H * 3))
#define BP_HMASK	(7 << (E_H * 3))
#define BP_O		(1 << (E_O * 3))
#define BP_N		(1 << (E_N * 3))
#define BP_N2O		((2 << (E_N * 3)) + (1 << (E_O * 3)))
#define BP_C		(1 << (E_C * 3))

	/* Step 2A: find all carbon - carbonyl - imino sequences */

	for (n0 = 1; n0 <= atoms; ++n0)
	{	j = (p0 = &atom[n0])->element;
		if (j == E_H)
			p0->flags |= AF_H;
		if (j != E_C)
			continue;
		n2 = 0;
		p2 = NULL;							/* (just to make the compiler happy) */
		for (b0 = MAXBONDS; --b0 >= 0; )
			if ((n1 = p0->bond[b0].toward) > 0)
			{	if ((j = (p1 = &atom[n1])->element) == E_N)
					n2 = n1;
				else if (j == E_C)
					p2 = p1;
			}
		if ((i = p0->bpat) == BP_CNO)
		{	p0->flags |= AF_CN;					/* Either peptide or amide */
			p2->achc.toward = n2;					/* Temporary pointer from (probably alpha) C to N */
		}
		else if (((i == BP_CO) || (i == BP_CO2)) && (p2->achc.toward <= 0))
			p2->achc.toward = -1;					/* At least eligible to be alpha C */
	}

	/* Step 2B: find imino - alpha-carbon sequences and peptide bonds */

	for (n0 = 1; n0 <= atoms; ++n0)
	{	p0 = &atom[n0];
		if ((n1 = p0->achc.toward) <= 0)				/* Find our temporary pointer */
			continue;
		p0->achc.toward = -1;
		p1 = &atom[n1];
		for (b1 = MAXBONDS; --b1 >= 0; )
		{	if	(	((n2 = p1->bond[b1].toward) <= 0)
				||	((p2 = &atom[n2])->achc.toward == 0)
				) continue;
			p0->achc.toward = n2;					/* Peptide bond, carbonyl side */
			p2->acha.toward = n0;					/* Peptide bond, imino side */
		}
	}

	/* Step 2C: calculate angles and flush isolated peptide bonds */

	pbscale = 0.0;
	pbnum = 0;
	for (n0 = 1; n0 <= atoms; ++n0)
	{	p0 = &atom[n0];
		if ((n2 = p0->achc.toward) <= 0)
			goto dc;
		p2 = &atom[n2];
		if	(	(p0->acha.toward == 0)
			&&	(p2->achc.toward == 0)
			)
		{	p2->acha.toward = 0;					/* Isolated peptide bond, not in backbone */
dc:			p0->achc.toward = 0;
			continue;
		}
		atom[n0].flags |= AF_CA;					/* Mark both as alpha carbon */
		atom[n2].flags |= AF_CA;
		bondparm(p0, p2, &p0->achc);					/* Calculate parameters */
		bondparm(p2, p0, &p2->acha);
		if (debug >= 3)
		{	for (b0 = MAXBONDS; --b0 >= 0; )
				if	(	((n1 = p0->bond[b0].toward) > 0)
					||	(atom[n1].element == E_N)
					)
				{	fprintf(stderr, "Alpha C1 #%d, C2 #%d\n", n0, n2);
					goto nb;
				}
			fprintf(stderr, "No amino alpha C1 #%d, alpha C2 #%d\n", n0, n2);
		}
nb:		for (b0 = MAXBONDS; --b0 >= 0; )				/* Mark atoms in backbone */
		{	if	(	((n1 = p2->bond[b0].toward) > 0)
				&&	((p1 = &atom[n1])->element == E_N)
				)						/* p1 is peptide N */
			{	p0 = NULL;
				for (b1 = MAXBONDS; --b1 >= 0; )
				{	if ((n1 = p1->bond[b1].toward) <= 0)
						continue;
					atom[n1].flags |= AF_BB;		/* Mark if bonded to peptide N */
					if ((atom[n1].flags & AF_CN) != 0)	/* Already identified as carbonyl or amide */
						p0 = &atom[n1];			/* p0 is peptide C */
				}
				if (p0 != NULL)
				{	pbscale += sqrt((p1->x - p0->x) * (p1->x - p0->x) +
							(p1->y - p0->y) * (p1->y - p0->y) + (p1->z - p0->z) * (p1->z - p0->z));
					pbnum++;
					for (b1 = MAXBONDS; --b1 >= 0; )
						if ((n1 = p0->bond[b1].toward) > 0)
							atom[n1].flags |= AF_BB;	/* Mark if bonded to peptide C */
				}
			}
		}
	}
	if ((debug >= 2) && (pbnum > 0))
	{	pbscale /= (float) pbnum * scale;
		fprintf(stderr, "Average C-N in peptide bond: %g (%.2f scale)\n", pbscale, pbscale / 1.33);
	}

	/* Step 3: identify water molecules */

	for (n0 = 1; n0 <= atoms; ++n0)
	{	if	(	((p0 = &atom[n0])->bpat != BP_H2)
			||	(p0->element != E_O)
			)
nw:			continue;
		for (b0 = MAXBONDS; --b0 >= 0; )
			if	(	((n1 = p0->bond[b0].toward) > 0)
				&&	(atom[n1].bpat != BP_O)
				) goto nw;
		p0->flags |= AF_H2O;				/* Mark the oxygen */
		for (b0 = MAXBONDS; --b0 >= 0; )
		{	if ((n1 = p0->bond[b0].toward) > 0)
				atom[n1].flags |= AF_H2O;	/* Mark the hydrogens */
		}
		++nh2o;
	}

	/* Step 4A: mark all amino nitrogens */

	for (n0 = 1; n0 <= atoms; ++n0)
	{	if	(	((p0 = &atom[n0])->element != E_N)
			||	((p0->bpat & BP_HMASK) != BP_H2)
			)
na:			continue;
		for (b0 = MAXBONDS; --b0 >= 0; )
			if	(	((n1 = p0->bond[b0].toward) > 0)
				&&	(atom[n1].element == E_H)
				&&	(atom[n1].bpat != BP_N)
				) goto na;
		p0->flags |= AF_NNH2;				/* Mark it */
	}

	/* Step 4B: identify urea molecules */

	for (n0 = 1; n0 <= atoms; ++n0)
	{	if	(	((p0 = &atom[n0])->bpat != BP_N2O)
			||	(p0->element != E_C)
			)
nu:			continue;
		for (b0 = MAXBONDS; --b0 >= 0; )
			if	(	((n1 = p0->bond[b0].toward) > 0)
				&&	(	(	(atom[n1].element == E_O)
						&&	(atom[n1].bpat != BP_C)
						)
					||	(	(atom[n1].element == E_N)
						&&	((atom[n1].flags & AF_NNH2) == 0)
						)
					)
				) goto nu;
		for (b0 = MAXBONDS; --b0 >= 0; )
		{	if ((n1 = p0->bond[b0].toward) <= 0)
				continue;
			(p1 = &atom[n1])->flags |= AF_UREA;	/* Mark nitrogens and oxygen */
			for (b1 = MAXBONDS; --b1 >= 0; )
				if ((n2 = p1->bond[b1].toward) > 0)
					atom[n2].flags |= AF_UREA;	/* Mark everything else */
		}
		++nurea;
	}
}

/* Read in the input XYZ file data */

int readxyz(void)
{
	int i, j, k, n;
	char *p, *q;
	struct atm *pa;
	int na;
	int zi, zn, zz;
	float xa, xb, ya, yb, za, zb;
	float chdst;
	float d, g;

	p = ETYPES;
	nh2o = 0;
	nurea = 0;
	na = 0;
	if (fgets(wbuf, sizeof(wbuf), ifp) != NULL)
	{	pname[0] = '\0';
		pname[30] = '\0';
		sscanf(wbuf, "%d %30c", &na, pname);
		for (i = 0; i < 30; ++i)
			if (((j = pname[i]) == '\012') || (j == '\015'))
			{	pname[i] = '\0';
				break;
			}
	}
	if (pname[0] == '\0')	/* Genome */
		strcpy(pname, "Genome");

	n = 0;
	atoms = 0;
	++na;								/* Tinker reports one too few atoms */
	if (na >= MAXATOMS)
	{	sprintf(errmsg, "Molecule too large.  Max = %d atoms", MAXATOMS);
		n = 2;
		goto cf;
	}
	if (atom != NULL)
		free(atom);
	i = sizeof(struct atm) * (na + 3);
	j = i + sizeof(struct adist) * (na + 3);
	if ((atom = malloc(j)) == NULL)
	{	sprintf(errmsg, "Can't assign memory.  Need %d bytes.", j);
		n = 3;
		goto cf;
	}
	memset(atom, 0, j);					/* Zeroing the array is important */
	patom = (struct adist *) ((char *) atom + i);
	pa = &atom[1];						/* Atom 0 isn't used */

	while (fgets(wbuf, sizeof(wbuf), ifp) != NULL)
	{	if (sscanf(wbuf, "%d%7s%f%f%f%n", &zi, ebuf, &pa->x, &pa->y, &pa->z, &zn) < 5)
			continue;
		if ((atoms = pa - atom) != zi)
		{	if (atoms >= na) break;		/* Don't complain if last atom */
			sprintf(errmsg, "Sequence error in XYZ file expecting atom number %d", atoms);
			n = 4;
			break;
		}
		pa->element = ((q = strchr(p, ebuf[0])) == NULL) ? 0 : (q - p);
		if ((oflags & OF_BI) == 0)
		{	zi = 0;
			for (i = 0; ; ++i)
			{	zn += zi;
				if (sscanf(&wbuf[zn], "%d%n", &zz, &zi) != 1)
					break;
				if (i >= MAXBONDS)			/* Too much garbage */
				{	sprintf(errmsg, "Too many fields in XYZ file at atom number %d", pa - atom);
					n = 5;
					goto cf;
				}
				pa->bond[i].toward = zz;
			}
		}
		if (atoms >= na) break;			/* End of atom list, according the the header line */
		++pa;
	}
cf:	if (n != 0)
		return (n);

	if (atoms == 0)
	{	sprintf(errmsg, "No atoms in molecule");
		return (6);
	}

	if ((oflags & OF_BI) == 0)
		xyzclean();			/* Clean up bond data */

	g = 0.0;			/* Calculate scaling */
	chdst = xa = ya = za = 1e10;
	xb = yb = zb = -1e10;
	for (i = atoms + 1; --i > 0; )
	{	pa = &atom[i];
		if (xa > pa->x) xa = pa->x;
		if (xb < pa->x) xb = pa->x;
		if (ya > pa->y) ya = pa->y;
		if (yb < pa->y) yb = pa->y;
		if (za > pa->z) za = pa->z;
		if (zb < pa->z) zb = pa->z;
		if ((oflags & OF_BI) != 0)
			continue;
		for (j = MAXBONDS; --j >= 0; )
		{	if ((k = pa->bond[j].toward) <= 0)
				continue;
			if (pa->element * 10 + atom[k].element != CH_BOND_ID)
				continue;				/* Just look at lengths of C-H bonds */
			bondparm(pa, &atom[k], &pa->bond[j]);	/* Temporarily calculate len */
			g += pa->bond[j].len;
			d = g * 1.2 / (float) ++n;
			if (d > 1.44 * chdst)
				g = chdst * (float) n;
			if	(	(chdst > d)
				||	(	(pa->bond[j].len > chdst)
					&&	(pa->bond[j].len < d)
					)
				) chdst = pa->bond[j].len;		/* Best guess for "typical" C-H bond */
		}
	}
	if ((oflags & OF_BI) == 0)
		chdst *= 2;
	if (chdst > 1e5)					/* Either no C-H bonds, or we've been told to calculate it all */
		chdst = biscale();
	if (chdst > 1e5)					/* Not a reasonable value, set to default */
		chdst = CH_BOND_LEN;

	chdst *= 1.0177;					/* This seems to give the best match in peptide bonds */

	xa = (xb + xa) / 2;
	xb -= xa;
	ya = (yb + ya) / 2;
	yb -= ya;
	za = (zb + za) / 2;
	zb -= za;
	if (xb < yb) xb = yb;
	if (xb < zb) xb = zb;
	xb = 1 / xb;
	scale = xb * chdst / CH_BOND_LEN;

	for (i = atoms + 1; --i > 0; )
	{	pa = &atom[i];
		pa->x = (pa->x - xa) * xb;		/* Adjust to fit in box (-1, -1, -1) to (+1, +1, +1) */
		pa->y = (pa->y - ya) * xb;
		pa->z = (pa->z - za) * xb;
	}
	if ((oflags & OF_BI) != 0)
		bibond(scale);					/* Scaling seems to work from 1.13 to 1.45 */
	for (i = atoms + 1; --i > 0; )
	{	pa = &atom[i];
		for (j = MAXBONDS; --j >= 0; )
		{	if ((k = pa->bond[j].toward) <= 0)
				continue;
			bondparm(pa, &atom[k], &pa->bond[j]);	/* Calculate len */
			d = pa->bond[j].len * 2.0 / scale;
			g = (aprops[pa->element].b_radius + aprops[atom[k].element].b_radius) * 1.46;
			if (d > g)
			{	pa->bond[j].toward = 0;		/* Something is wrong, bond is too long */
				if ((debug >= 2) && (i < k))
					fprintf(stderr, "Bond from atom %d to atom %d too long (length %g, limit %g)\n", i, k, d, g);
			}
			if	(	(debug >= 4)
				&&	(pa->element * 10 + atom[k].element == CH_BOND_ID)
				) fprintf(stderr, "C-H scaled bond length %g (%d to %d)\n", d, i, k);
		}
	}

	bondpat();		/* Find peptide bonds and set up the alpha carbon chain */

	if (debug >= 1)
	{	fprintf(stderr, "Atom size scale %g, molecule scale %g\npname = \"%s\", atoms = %d\n",
				scale / xb, scale, pname, atoms);
		if (nh2o != 0)
			fprintf(stderr, "Number of water molecules = %d\n", nh2o);
		if (nurea != 0)
			fprintf(stderr, "Number of urea molecules = %d\n", nurea);
	}
	if (debug >= 5)
		for (i = 1; i <= atoms; ++i)
		{	pa = &atom[i];
			fprintf(stderr, " Atom %03d, element %d (%c): (%g, %g, %g), bonds:", i,
					pa->element, p[pa->element], pa->x, pa->y, pa->z);
			for (na = 0; ; na = k)
			{	k = atoms + 1;
				for (j = 0; j < MAXBONDS; ++j)
					if	(	((n = pa->bond[j].toward) > na)
						&&	(n < k)
						) k = n;
				if (k > atoms) break;
				fprintf(stderr, " %d", k);
			}
			if ((pa->flags & AF_H2O) != 0)
				fprintf(stderr, " H2O");
			fprintf(stderr, "\n");
		}
	return (0);
}

/* Write the PDB output file */

void writepdb(void)
{
	int i, j, k, n;
	int na, c;
	struct atm *pa;
	struct adist *pd, *pe;
	time_t t;

	/* Select the atoms to be included in the output */

	pe = patom;
	for (i = atoms + 1; --i > 0; )
	{	pa = &atom[i];
		pa->flags &= ~AF_DL;
		if	(	(	((oflags & OF_BO) != 0)			/* Display backbone only */
				&&	((pa->flags & AF_BB) == 0)		/* This isn't backbone */
				)
			||	(	((oflags & OF_IH) != 0)			/* Don't display hydrogen */
				&&	((pa->flags & AF_H) != 0)		/* This is hydrogen */
				)
			||	(	((oflags & OF_IW) != 0)			/* Don't display water */
				&&	((pa->flags & AF_H2O) != 0)		/* This is water */
				)
			||	(	((oflags & OF_IU) != 0)			/* Don't display urea */
				&&	((pa->flags & AF_UREA) != 0)	/* This is urea */
				)
			) continue;				/* Skip this atom */
		pe->pa = pa;
		pa->flags |= AF_DL;
		++pe;
	}
	if (debug >= 2)
		fprintf(stderr, "Atoms translated: %d\n", pe - patom);

	/* Open the output file and write its header */

	if ((ofile != NULL) && ((ofp = fopen(ofile, "w")) == NULL))
	{	fprintf(stderr, "Can't open PDF file \"%.160s\"\n", ofile);
		return;
	}
	fprintf(ofp, "COMPND    %s\n", pname);
	time(&t);
	if (ifile == NULL)
		fprintf(ofp, "AUTHOR    Written by %s on %s", argv0, ctime(&t));
	else
	{	fprintf(ofp, "AUTHOR    Translated by %s on %s", argv0, ctime(&t));
		fprintf(ofp, "AUTHOR     Source file: %s\n", ifile);
	}

	/* Process and output each atom in selected list */

	na = 0;
	for (pd = patom; pd < pe; ++pd)
	{	pa = pd->pa;
		pa->n = ++na;		/* Renumber atoms to sequence only the selected ones */
		c = ETYPES[pa->element];
		fprintf(ofp, "HETATM%5d  %c           2    %8.3f%8.3f%8.3f  1.00  0.00           %c\n",
				na, c, pa->x / scale, pa->y / scale, pa->z / scale, c);
	}

	/* Reprocess each atom to output bond information */

	for (pd = patom; pd < pe; ++pd)
	{	pa = pd->pa;
		fprintf(ofp, "CONECT%8d", pa->n);
		for (i = 0; ; i = k)
		{	k = atoms + 1;
			for (j = 0; j < MAXBONDS; ++j)
				if	(	((n = pa->bond[j].toward) > i)
					&&	(n < k)
					&&	((atom[n].flags & AF_DL) != 0)
					) k = n;
			if (k > atoms) break;
			fprintf(ofp, "%5d", atom[k].n);
		}
		fprintf(ofp, "\n");
	}

	/* Finish the output and close the file */

	fprintf(ofp, "MASTER        0    0    0    0    0    0    0    0%5d    0%5d    0\n",
			na, na);
	if (ofile != NULL)
		fclose(ofp);
}