from constraint_grammar import *

from collections import defaultdict
from dataclasses import dataclass, field
import struct
from typing import DefaultDict, Dict, List, Optional

@dataclass
class Reading:
	lemma: str = ''
	tags: List[str] = field(default_factory=list)
	subreading: Optional['Reading'] = None
	deleted: bool = False

@dataclass
class Cohort:
	static: Reading = field(default_factory=Reading)
	readings: List[Reading] = field(default_factory=list)
	dep_self: int = 0
	dep_parent: Optional[int] = None
	relations: DefaultDict[str, List[int]] = field(
		default_factory=lambda: defaultdict(list))
	text: str = ''
	wblank: str = ''

@dataclass
class Window:
	cohorts: List[Cohort] = field(default_factory=list)
	set_vars: Dict[str, Optional[str]] = field(default_factory=dict)
	rem_vars: List[str] = field(default_factory=list)
	text: str = ''
	text_post: str = ''
	dep_has_spanned: bool = False

@dataclass
class Packet:
	type: str = ''
	window: Optional[Window] = None
	command: str = ''
	text: str = ''

def parse_binary_window(buf):
	'''Given a bytestring `buf` containing a single window
	(not including the length header), parse and return a Window()
	object. For most applications you probbaly want parse_binary_stream()
	instead.'''

	pos = 0
	def read_pat(pat):
		nonlocal pos, buf
		ret = struct.unpack_from('<'+pat, buf, pos)
		pos += struct.calcsize('<'+pat)
		return ret
	# These two get called often enough that pre-compiling them
    # gives us a ~25% speedup over calling read_pat()
	u16_reader = struct.Struct('<H')
	def read_u16():
		nonlocal pos
		ret = u16_reader.unpack_from(buf, pos)[0]
		pos += 2
		return ret
	u32_reader = struct.Struct('<I')
	def read_u32():
		nonlocal pos
		ret = u32_reader.unpack_from(buf, pos)[0]
		pos += 4
		return ret
	def read_str():
		l = read_u16()
		if l == 0:
			return ''
		return read_pat(f'{l}s')[0].decode('utf-8')
	window = Window()
	window_flags = read_u16()
	if window_flags & 1:
		window.dep_has_spanned = True
	tag_count = read_u16()
	tags = [read_str() for i in range(tag_count)]
	def read_tags():
		nonlocal tags
		ct = read_u16()
		if ct == 0:
			return []
		idx = read_pat(f'{ct}H')
		return [tags[t] for t in idx]
	var_count = read_u16()
	for i in range(var_count):
		mode = read_pat('B')[0]
		t1 = read_u16()
		t2 = read_u16()
		if mode == 1:
			window.set_vars[tags[t1]] = tags[t2]
		elif mode == 2:
			window.set_vars[tags[t1]] = None
		elif mode == 3:
			window.rem_vars.append(tags[t1])
	window.text = read_str()
	window.text_post = read_str()
	cohort_count = read_u16()
	for i in range(cohort_count):
		cohort = Cohort()
		cohort_flags = read_u16()
		cohort.static.lemma = tags[read_u16()]
		cohort.static.tags = read_tags()
		cohort.dep_self = read_u32()
		cohort.dep_parent = read_u32()
		if cohort.dep_parent == 0xffffffff:
			cohort.dep_parent = None
		rel_count = read_u16()
		for i in range(rel_count):
			tag = tags[read_u16()]
			head = read_u32()
			cohort.relations[tag].append(head)
		cohort.text = read_str()
		cohort.wblank = read_str()
		reading_count = read_u16()
		prev = None
		for i in range(reading_count):
			reading_flags = read_u16()
			reading = Reading()
			reading.lemma = tags[read_u16()]
			reading.tags = read_tags()
			if reading_flags & 1 and prev is not None:
				prev.subreading = reading
			else:
				cohort.readings.append(reading)
			if reading_flags & 2:
				reading.deleted = True
			prev = reading
		window.cohorts.append(cohort)
	return window

def parse_binary_stream(fin, windows_only=False):
	'''Given a file `fin`, yield a series of Packet() objects.
	raises ValueError if stream header is missing or invalid.
	If `windows_only` is True, packets containing commands or text
	are skipped and Window() objects are returned instead.'''

	header = fin.read(8)
	label, version = struct.unpack('<4sI', header)
	if label != b'CGBF':
		raise ValueError('Binary format header not found!')
	if version != 1:
		raise ValueError('Unknown binary format version!')
	while True:
		ptype = fin.read(1)
		if len(ptype) != 1:
			break
		if ptype[0] == 1:
			spec = fin.read(4)
			if len(spec) != 4:
				break;
			block_len = struct.unpack('<I', spec)[0]
			block = fin.read(block_len)
			if len(block) != block_len:
				break
			window = parse_binary_window(block)
			if windows_only:
				yield window
			else:
				yield Packet(type='window', window=window)
		elif ptype[0] == 2:
			cmd = fin.read(1)
			if len(cmd) != 1:
				break
			if windows_only:
				continue
			pack = Packet(type='command')
			if cmd[0] == 1:
				pack.command = 'FLUSH'
			elif cmd[0] == 2:
				pack.command = 'EXIT'
			elif cmd[0] == 3:
				pack.command = 'IGNORE'
			elif cmd[0] == 4:
				pack.command = 'RESUME'
			else:
				continue
		elif ptype[0] == 3:
			lbuf = fin.read(2)
			ln = struct.unpack('<I', lbuf)[0]
			pack = Packet(type='text')
			pack.text = fin.read(ln).decode('utf-8')
			if not windows_only:
				yield pack
		else:
			continue