#! /usr/bin/env python

# This is parsemarks version 1.5

import sys, os.path, getopt, re

__usage = """[-m marker...] [-l] [-c classmark...] [-q query] dataset

Extract the marks from the marker file associated with dataset and print
them in a useful format.

With -m, the specified mark is extracted, otherwise all of them are. More
than one mark may be specified, by listing them in quotes, like this:
	-m 'm1 m2'

With -l, the marks are labeled in the output. Use this for debugging, but
don't include it when you make the final output, which will be input to
addMarker -p.

When -c is used, the marks are assumed to identify the class of a trial.
Usually you will have one of these marks per trial (probably right at the
start). In this case, only those marks with the given class are printed.

When -q is used, the query is treated as a boolean expression; only marks
for which the expression is true are printed. The variables trial, t, name,
and n may be used in the expression, where n is the event number, and the
other variables have the expected meaning. In addition, if any of trial, t,
or name are specified with an array index, it refers to a different event,
rather than the current one. For example, name[n+1] is the name of the next
event, and t[n] is the same as just t.

One common construction is the following:
    parsemarks -m 'stim resp' \\
	-q "name == 'resp' and name[n-1] == 'stim' and t < t[n-1] + .5" $ds
which prints resp marks that are less than .5 sec after the stim. To make
this a little simpler, the "inwindow()" function is available:
    parsemarks -m 'stim resp' -q "inwindow(-.5, 0, 'stim')" $ds

In detail, inwindow(t0, t1, mark) is true when 'mark' is in the window
[t0, t1] relative to the current event. However, 'mark' itself is never
considered, even if it is in the window. This means the example above
is equivalent to:
    parsemarks -m 'stim resp' \\
	-q "name == 'resp' and inwindow(-.5, 0, 'stim')" $ds
Remember that the window is relative to 'resp' here, not 'stim'.

As a final example, to recode marks from an optical sensor, use:
    parsemarks -m 'bar cond' -q "inwindow(-.1, 0, 'cond')" $ds
"""

__scriptname = os.path.basename(sys.argv[0])

def printerror(s):
	sys.stderr.write("%s: %s\n" % (__scriptname, s))

def printusage():
	sys.stderr.write("usage: %s %s\n" % (__scriptname, __usage))

def parseargs(opt):
	try:
		optlist, args = getopt.getopt(sys.argv[1:], opt)
	except Exception, msg:
		printerror(msg)
		printusage()
		sys.exit(1)
	return optlist, args

optlist, args = parseargs("m:t:lc:q:")

marker = []
classid = None
labelit = 0
classmark = []
query = None
tcdict = {}     # indexed by trial yielding the name of a mark

for opt, arg in optlist:
	if opt == '-m':
		marker = arg.split()
	elif opt == '-t':
		classid = int(arg)
	elif opt == '-l':
		labelit = 1
	elif opt == '-q':
		query = arg
	elif opt == '-c':
		classmark = arg.split()

if len(args) != 1:
	printusage()
	sys.exit(1)

dsname = args[0]
filename = dsname + '/MarkerFile.mrk'

try:
	f = open(filename)
except:
	printerror("error opening %s" % filename)
	sys.exit(1)
f.close()

marks = []

# This processes the samples for a marker.

def get_samples(f, name, num):
	if name in classmark:
		for x in xrange(num):
			l = f.next().split()
			trial = int(l[0])
			t = float(l[1])
			tcdict[trial] = name
	elif marker == [] or name in marker:
		for x in xrange(num):
			l = f.next().split()
			trial = int(l[0])
			t = float(l[1])
			marks.append((trial, t, name))
	else:
		# just skip them
		for x in xrange(num):
			f.next()

# Pre-parse the marker file with this sed script.
# It folds the line after a : up onto the same line.

sedscript = r"""/:/{
N
s/\n/ /
}
/^$/d
"""

from subprocess import Popen, PIPE

sedcmd = ['sed', '-f', '-', filename]
p = Popen(sedcmd, stdin = PIPE, stdout = PIPE)
p.stdin.write(sedscript)
p.stdin.close()
f = p.stdout

# Look at each line.

START = 1
MARK = 2
NUM = 3
state = START

for l in f:
	s = l.split(':')
	if state == START:
		if s[0] == 'CLASSGROUPID':
			if classid == None or int(s[1]) == classid:
				state = MARK
	elif state == MARK:
		if s[0] == 'NAME':
			name = s[1].strip()
			state = NUM
	elif state == NUM:
		if s[0] == 'NUMBER OF SAMPLES':
			num = int(s[1])
			f.next()
			get_samples(f, name, num)
			state = START

# If -c was used, filter out the marks not in the given class(es).

if classmark:
	def inclass(m, c = classmark):
		return tcdict.get(m[0]) in classmark
	marks = filter(inclass, marks)

# Sort the remaining marks by trial and time.

def cmp(m1, m2):
	if m1[0] != m2[0]:              # trial
		return int(m1[0] - m2[0])
	if m1[1] < m2[1]: return -1
	if m1[1] > m2[1]: return 1
	return 0

marks.sort(cmp)

# Objects of this class hold one event.  We'll make an array of them
# so that queries can reference the entire thing.

class Event:
	def __init__(self, mark):
		trial, t, name = mark
		self.trial = trial
		self.t = t
		self.name = name

# Number the marks, and create an event array.

nmarks = map(None, range(len(marks)), marks)
ev = map(Event, marks)

if len(ev) == 0:
	printerror("no marks found!")
	sys.exit(1)

# Index the start of each trial for faster searching.

trial_start = [0] * (ev[-1].trial + 1)
t = -1
i = 0
for e in ev:
	if e.trial != t:
		t = e.trial
		trial_start[t] = i
	i += 1

# This is used to index the event list.

def doidx(x):
	if x < 0 or x >= len(ev):
		raise 'evidx'
	return x

# Helper function to determine if a time window around the current
# event contains the specified mark.  The times are with respect to
# the current event!  We don't cross trial boundaries, either.

def inwindow(t0, t1, mark):
	global n

	# Make sure the arguments are sane.

	if t1 <= t0:
		printerror("inwindow() times out of order")
		sys.exit(1)

	# Ignore instances of the specified mark.

	if ev[n].name == mark:
		return 0

	# Now get the range, relative to the current mark.

	now = ev[n].t
	start = now + t0
	end = now + t1
	trial = ev[n].trial

	# Search for the mark in the given time window of this trial.

	i = trial_start[trial]
	while i < len(ev) and ev[i].t <= end and ev[i].trial == trial:
		if ev[i].t >= start and ev[i].name == mark:
			return 1
		i += 1
	return 0

# If a query was given, treat it as a python boolean expression and use it
# to filter the marks.

if query:
	# Mangle the query slightly: if the user said "name[idx]" then
	# rewrite it as "ev[doidx(idx)].name", for name, trial, and t.

	p = re.compile(r"(?P<var>name|trial|t)\[(?P<idx>[^\]]+)\]")
	def rpl(m):
		return "ev[doidx(%s)].%s" % (m.group('idx'), m.group('var'))
	q = p.sub(rpl, query)

	# Now filter the marks with the mangled query.

	def fn(x, expr = q):
		global n
		(n, (trial, t, name)) = x
		try:
			res = eval(expr)
		except 'evidx':
			return 0
		except:
			printerror('warning: query failed on %s' % repr(x))
			return 0
		return res
	nmarks = filter(fn, nmarks)

# Print out whatever is left.

for (n, (trial, t, name)) in nmarks:
	if labelit:
		print name, trial, t
	else:
		print trial, t