#!/usr/bin/env python # -*- coding: utf8 -*- #---------------------------------------------------------------------------- # Created By : vloegler # Last Modification Date: 2022/03/03 # --------------------------------------------------------------------------- ''' This script runs Genome Wide Association Study using FaST-LMM. It performs association and permutation test to output significant SNPs. It takes as input : -g --genotype: the genotype matrix (Plink format) -p --phenotype: the phenotypes, in format : Strain Cond1 Cond2 Cond3 StrainXXX 0.02 0.98 0.14 StrainXXX 0.12 0.52 0.65 -c --covariance: covariance matrix (optional) -o --output: prefix of output files -n --nbPermutations: number of permutations for the permutation test Output will be: prefix_first_assoc.txt : FaST-LMM association results prefix_condition_threshold.txt: Value of the threshold prefix_condition_signif_snps: FaST-LMM results for SNPs above the threshold ''' # --------------------------------------------------------------------------- import numpy as np import pandas as pd import time from sys import argv from fastlmm.association import single_snp from pysnptools.snpreader import Bed, SnpData from pysnptools.util.mapreduce1.runner import LocalMultiProc import logging import argparse # --------------------------------------------------------------------------- logging.basicConfig(level=logging.ERROR) # Number of proc = 3 runner=LocalMultiProc(3) # ============= # Get arguments # ============= # Initiate the parser parser = argparse.ArgumentParser() parser.add_argument("-g", "--genotype", help="genotype matrix in Plink format", required=True) parser.add_argument("-p", "--phenotype", help="phenotypes data", required=True) parser.add_argument("-o", "--output", help="prefix of the output files", required=True) parser.add_argument("-c", "--covariance", help="covariance matrix (optional)", type=str, default="") parser.add_argument("-n", "--nbPermutations", help="number of permutations for the permutation test (defult = 100)", type=int, default=100) # Read arguments from the command line args = parser.parse_args() # variant matrix in PLINK format bed_fn = args.genotype # Phenotypes in format # strain Cond1 Cond2 Cond3 # StrainXXX 0.02 0.98 0.14 # StrainXXX 0.12 0.52 0.65 pheno_fn = args.phenotype # Covariance matrix in format # Strain Strain Covariant1 Covariant2 ... covar_fn = args.covariance # Prefix of output files outdir = args.output # Number of permutations nperm = args.nbPermutations # ===================== # Run First Association # ===================== # Read phenotypes df=pd.read_csv(pheno_fn,sep="\t",index_col=0) # Get phenotype file header (condition names) headers = np.array(df.columns) # Get phenotype values values = df.values # Get list of strains strainslist=np.c_[np.array(df.index)] strainslist_2el=np.append(strainslist,strainslist,axis=1).astype('