import sys, math, glob
import numpy as np
from scipy.interpolate import interp1d
#from scipy.stats import pearsonr
from itertools import combinations,izip
from collections import defaultdict

var_names = []
observations = []
features_txt = open(sys.argv[1])
amps_out = open(sys.argv[2])

start = True

pmax = 0.0
pmin = 10000.0
for l in features_txt:
    if start:
        start = False
        var_names = l.strip().split('\t')[2:-1]
        continue
    if 'undefined' in l:
        continue
    observations.append([float(x) for x in l.split('\t')[2:-1]])
    cur_pmax = observations[-1][var_names.index('pmax')]
    cur_pmin = observations[-1][var_names.index('pmin')]
    if cur_pmax >= pmax:
        pmax = cur_pmax
    if cur_pmin <= pmin:
        pmin = cur_pmin
    

#observations = observations[1:]
#print '\t'.join(var_names)
#print '\n'.join(['\t'.join([str(y) for y in x]) for x in observations])
#exit(0)



start = True
indices = []
new_obs = []
d = defaultdict(list)
others = {}
all_amps = []
for i, l in enumerate(amps_out):
    if i < 100: continue
    all_amps.append(math.log(float(l.strip().split('\t')[1])))
    

arr = np.array(all_amps)
mmax = np.max(arr)
mmin = np.min(arr)
m = interp1d([mmin,mmax],[0,1])

p_interp = interp1d([pmin,pmax],[0,1])
amps_mean = np.mean(arr)
amps_std = np.std(arr)
amps_out = open(sys.argv[2])


for l in amps_out:
    if start:
        start = False
        indices = l.split()
        continue
    time = float(l.split()[2])
    
    for obs in observations:
        starttime = obs[0]
        endtime = obs[1]
        
        try:
            #val = float(l.split('\t')[3]) # zscore of log amp
            #val = math.log(float(l.split('\t')[1])) # raw log amp
            val = (math.log(float(l.split('\t')[1])) - amps_mean) / amps_std  # convert amps to z-scores
            #val = float(m(float(l.split()[1]))) # linear interpolation between 0 and 1
        except:
            print >> sys.stderr, 'ERROR getting MA'
            val = 1.0
        if time <= endtime and time >= starttime:
            d[obs[0]].append(val)
         

var_names += ['s_pmin','s_pmax','prange','MAmean','MAmin','MAmax','MAstd']

for obs in observations:
    
    scaled_pmin = p_interp(obs[var_names.index('pmin')])
    scaled_pmax = p_interp(obs[var_names.index('pmax')])
    prange = scaled_pmax - scaled_pmin

    amps = np.array(d[obs[0]])
    if len(amps) == 0: 
        #print >> sys.stderr, 'INHERE'
        continue
    new_point = obs + [scaled_pmin, scaled_pmax, prange,  np.mean(amps),np.min(amps),np.max(amps),np.std(amps)]
    new_obs.append(new_point)

#print len(new_obs)

print '\t'.join(var_names)
for r in new_obs:
    print '\t'.join([str(x) for x in r])
