--- exec.h.orig	2009-03-08 11:10:36.000000000 +1100
+++ exec.h	2009-03-07 18:45:48.000000000 +1100
@@ -139,7 +139,7 @@
 	exec_makecmds_fcn es_makecmds;	/* function to setup vmcmds */
 	union {				/* probe function */
 		int (*elf_probe_func)(struct lwp *,
-			struct exec_package *, void *, char *, vaddr_t *);
+			struct exec_package *, u_long, void *, char *, vaddr_t *);
 		int (*ecoff_probe_func)(struct lwp *, struct exec_package *);
 		int (*mach_probe_func)(const char **);
 	} u;
--- exec_elf32.c.orig	2009-03-07 18:14:30.000000000 +1100
+++ exec_elf32.c	2009-03-08 08:21:27.000000000 +1100
@@ -90,6 +90,8 @@
 
 #include <sys/pax.h>
 
+#define DPRINTF(x) printf x
+
 extern const struct emul emul_netbsd;
 
 #define elf_check_header	ELFNAME(check_header)
@@ -102,12 +104,13 @@
 
 int	elf_load_file(struct lwp *, struct exec_package *, char *,
 	    struct exec_vmcmd_set *, u_long *, struct elf_args *, Elf_Addr *);
-void	elf_load_psection(struct exec_vmcmd_set *, struct vnode *,
+void	elf_load_psection(struct exec_vmcmd_set *, struct vnode *, u_long,
 	    const Elf_Phdr *, Elf_Addr *, u_long *, int *, int);
 
-int	netbsd_elf_signature(struct lwp *, struct exec_package *, Elf_Ehdr *);
-int	netbsd_elf_probe(struct lwp *, struct exec_package *, void *, char *,
-	    vaddr_t *);
+int	netbsd_elf_signature(struct lwp *, struct exec_package *,
+	    u_long, Elf_Ehdr *);
+int	netbsd_elf_probe(struct lwp *, struct exec_package *, u_long, void *,
+	    char *, vaddr_t *);
 
 /* round up and down to page boundaries. */
 #define	ELF_ROUND(a, b)		(((a) + (b) - 1) & ~((b) - 1))
@@ -310,11 +313,14 @@
  */
 void
 elf_load_psection(struct exec_vmcmd_set *vcset, struct vnode *vp,
-    const Elf_Phdr *ph, Elf_Addr *addr, u_long *size, int *prot, int flags)
+    u_long file_offset, const Elf_Phdr *ph, Elf_Addr *addr,
+    u_long *size, int *prot, int flags)
 {
 	u_long msize, psize, rm, rf;
 	long diff, offset;
 
+	DPRINTF(("loading psection\n"));
+
 	/*
 	 * If the user specified an address, then we load there.
 	 */
@@ -343,7 +349,7 @@
 	 * Adjust everything so it all starts on a page boundary.
 	 */
 	*addr -= diff;
-	offset = ph->p_offset - diff;
+	offset = file_offset + ph->p_offset - diff;
 	*size = ph->p_filesz + diff;
 	msize = ph->p_memsz + diff;
 
@@ -395,9 +401,9 @@
  * so it might be used externally.
  */
 int
-elf_load_file(struct lwp *l, struct exec_package *epp, char *path,
-    struct exec_vmcmd_set *vcset, u_long *entryoff, struct elf_args *ap,
-    Elf_Addr *last)
+elf_load_file(struct lwp *l, struct exec_package *epp,
+    char *path, struct exec_vmcmd_set *vcset, u_long *entryoff,
+    struct elf_args *ap, Elf_Addr *last)
 {
 	int error, i;
 	struct vnode *vp;
@@ -411,6 +417,8 @@
 	Elf_Addr addr = *last;
 	struct proc *p;
 
+	DPRINTF(("loading interpreter %s\n", path));
+
 	p = l->l_proc;
 
 	/*
@@ -466,9 +474,11 @@
 
 	VOP_UNLOCK(vp, 0);
 
+	DPRINTF(("loading interpreter header\n"));
 	if ((error = exec_read_from(l, vp, 0, &eh, sizeof(eh))) != 0)
 		goto bad;
 
+	DPRINTF(("checking interpreter header\n"));
 	if ((error = elf_check_header(&eh, ET_DYN)) != 0)
 		goto bad;
 
@@ -480,6 +490,7 @@
 	phsize = eh.e_phnum * sizeof(Elf_Phdr);
 	ph = kmem_alloc(phsize, KM_SLEEP);
 
+	DPRINTF(("loading interpreter psections\n"));
 	if ((error = exec_read_from(l, vp, eh.e_phoff, ph, phsize)) != 0)
 		goto bad;
 
@@ -578,7 +589,7 @@
 				flags = VMCMD_RELATIVE;
 			}
 			last_ph = ph0;
-			elf_load_psection(vcset, vp, &ph[i], &addr,
+			elf_load_psection(vcset, vp, 0, &ph[i], &addr,
 			    &size, &prot, flags);
 			/*
 			 * If entry is within this psection then this
@@ -638,32 +649,86 @@
 int
 exec_elf_makecmds(struct lwp *l, struct exec_package *epp)
 {
+	struct fat_header {
+		uint32_t magic;
+		uint32_t nfat_arch;
+	} *fat = epp->ep_hdr;
 	Elf_Ehdr *eh = epp->ep_hdr;
 	Elf_Phdr *ph, *pp;
 	Elf_Addr phdr = 0, pos = 0;
 	int error, i, nload;
 	char *interp = NULL;
 	u_long phsize;
+	u_long file_offset = 0;
 	struct proc *p;
 	bool is_dyn;
 
 	if (epp->ep_hdrvalid < sizeof(Elf_Ehdr))
 		return ENOEXEC;
 
+	error = vn_marktext(epp->ep_vp);
+	if (error)
+		return error;
+
+	DPRINTF(("ELF: %s, fat->magic=0x%x\n", epp->ep_name, be32toh(fat->magic)));
+	if (be32toh(fat->magic) == 0xcafebabe) {
+		struct arch {
+			uint32_t cputype;
+			uint32_t cpusubtype;
+			uint32_t offset;
+			uint32_t size;
+			uint32_t align;
+		} arch;
+		uint32_t *sc = NULL;
+		DPRINTF(("found fat header: narch=%d\n", be32toh(fat->nfat_arch)));
+		for (i = 0; i < be32toh(fat->nfat_arch); i++) {
+			static uint32_t supported_cpus[] = { 7, 0 };
+			error = exec_read_from(l, epp->ep_vp,
+			    sizeof(*fat) + sizeof(arch) * i, &arch,
+			    sizeof(arch));
+			if (error) {
+				vrele(epp->ep_vp);
+				return ENOEXEC;
+			}
+			DPRINTF(("read arch entry\n"));
+			for (sc = supported_cpus; *sc; sc++)
+				if (*sc == be32toh(arch.cputype))
+					break;
+			if (*sc != 0)
+				break;
+		}
+		if (sc == NULL || *sc == 0) {
+			vrele(epp->ep_vp);
+			return ENOEXEC;
+		}
+		DPRINTF(("using arch %d\n", be32toh(arch.cputype)));
+		DPRINTF(("using offset %d\n", be32toh(arch.offset)));
+		file_offset = be32toh(arch.offset);
+		error = exec_read_from(l, epp->ep_vp,
+		    file_offset, eh, sizeof(Elf_Ehdr));
+		if (error) {
+			DPRINTF(("failed to read real header\n"));
+			vrele(epp->ep_vp);
+			return ENOEXEC;
+		}
+	}
+
 	is_dyn = elf_check_header(eh, ET_DYN) == 0;
 	/*
 	 * XXX allow for executing shared objects. It seems silly
 	 * but other ELF-based systems allow it as well.
 	 */
-	if (elf_check_header(eh, ET_EXEC) != 0 && !is_dyn)
+	if (elf_check_header(eh, ET_EXEC) != 0 && !is_dyn) {
+		DPRINTF(("invalid ELF header\n"));
+		vrele(epp->ep_vp);
 		return ENOEXEC;
+	}
 
-	if (eh->e_phnum > MAXPHNUM || eh->e_phnum == 0)
+	if (eh->e_phnum > MAXPHNUM || eh->e_phnum == 0) {
+		DPRINTF(("wrong number of sections\n"));
+		vrele(epp->ep_vp);
 		return ENOEXEC;
-
-	error = vn_marktext(epp->ep_vp);
-	if (error)
-		return error;
+	}
 
 	/*
 	 * Allocate space to hold all the program headers, and read them
@@ -673,13 +738,14 @@
 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
 	ph = kmem_alloc(phsize, KM_SLEEP);
 
-	if ((error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize)) !=
-	    0)
+	if ((error = exec_read_from(l, epp->ep_vp, file_offset + eh->e_phoff,
+	    ph, phsize)) != 0)
 		goto bad;
 
 	epp->ep_taddr = epp->ep_tsize = ELFDEFNNAME(NO_ADDR);
 	epp->ep_daddr = epp->ep_dsize = ELFDEFNNAME(NO_ADDR);
 
+	DPRINTF(("number of sections: %d\n", eh->e_phnum));
 	for (i = 0; i < eh->e_phnum; i++) {
 		pp = &ph[i];
 		if (pp->p_type == PT_INTERP) {
@@ -690,7 +756,8 @@
 			interp = PNBUF_GET();
 			interp[0] = '\0';
 			if ((error = exec_read_from(l, epp->ep_vp,
-			    pp->p_offset, interp, pp->p_filesz)) != 0)
+			    file_offset + pp->p_offset, interp,
+			    pp->p_filesz)) != 0)
 				goto bad;
 			break;
 		}
@@ -708,10 +775,12 @@
 	if (epp->ep_esch->u.elf_probe_func) {
 		vaddr_t startp = (vaddr_t)pos;
 
-		error = (*epp->ep_esch->u.elf_probe_func)(l, epp, eh, interp,
-							  &startp);
-		if (error)
+		error = (*epp->ep_esch->u.elf_probe_func)(l, epp, file_offset,
+				eh, interp, &startp);
+		if (error) {
+			DPRINTF(("probe func failed\n"));
 			goto bad;
+		}
 		pos = (Elf_Addr)startp;
 	}
 
@@ -745,7 +814,8 @@
 				goto bad;
 			}
 			elf_load_psection(&epp->ep_vmcmds, epp->ep_vp,
-			    &ph[i], &addr, &size, &prot, VMCMD_FIXED);
+			    file_offset, &ph[i], &addr, &size, &prot,
+			    VMCMD_FIXED);
 
 			/*
 			 * Decide whether it's text or data by looking
@@ -797,6 +867,7 @@
 
 		MALLOC(ap, struct elf_args *, sizeof(struct elf_args),
 		    M_TEMP, M_WAITOK);
+		DPRINTF(("loading interpreter\n"));
 		if ((error = elf_load_file(l, epp, interp,
 		    &epp->ep_vmcmds, &interp_offset, ap, &pos)) != 0) {
 			FREE(ap, M_TEMP);
@@ -834,7 +905,7 @@
 
 int
 netbsd_elf_signature(struct lwp *l, struct exec_package *epp,
-    Elf_Ehdr *eh)
+    u_long file_offset, Elf_Ehdr *eh)
 {
 	size_t i;
 	Elf_Phdr *ph;
@@ -849,7 +920,8 @@
 
 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
 	ph = kmem_alloc(phsize, KM_SLEEP);
-	error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize);
+	error = exec_read_from(l, epp->ep_vp, file_offset + eh->e_phoff,
+	    ph, phsize);
 	if (error)
 		goto out;
 
@@ -863,8 +935,8 @@
 			continue;
 
 		np = kmem_alloc(ephp->p_filesz, KM_SLEEP);
-		error = exec_read_from(l, epp->ep_vp, ephp->p_offset, np,
-		    ephp->p_filesz);
+		error = exec_read_from(l, epp->ep_vp,
+		    file_offset + ephp->p_offset, np, ephp->p_filesz);
 		if (error)
 			goto next;
 
@@ -906,12 +978,14 @@
 }
 
 int
-netbsd_elf_probe(struct lwp *l, struct exec_package *epp, void *eh, char *itp,
-    vaddr_t *pos)
+netbsd_elf_probe(struct lwp *l, struct exec_package *epp, u_long file_offset,
+    void *eh, char *itp, vaddr_t *pos)
 {
 	int error;
 
-	if ((error = netbsd_elf_signature(l, epp, eh)) != 0)
+	DPRINTF(("elf probe\n"));
+
+	if ((error = netbsd_elf_signature(l, epp, file_offset, eh)) != 0)
 		return error;
 #ifdef ELF_INTERP_NON_RELOCATABLE
 	*pos = ELF_LINK_ADDR;
--- exec_conf.c.orig	2009-03-07 19:01:05.000000000 +1100
+++ exec_conf.c	2009-03-07 19:04:15.000000000 +1100
@@ -82,7 +82,7 @@
 #define	ELF64NAME2(x,y)	CONCAT(x,CONCAT(_elf64_,y))
 #ifdef EXEC_ELF32
 int ELF32NAME2(netbsd,probe)(struct lwp *, struct exec_package *,
-    void *, char *, vaddr_t *);
+    u_long, void *, char *, vaddr_t *);
 #endif
 #ifdef EXEC_ELF64
 int ELF64NAME2(netbsd,probe)(struct lwp *, struct exec_package *,