from constraint_grammar import * from collections import defaultdict from dataclasses import dataclass, field import struct from typing import DefaultDict, Dict, List, Optional @dataclass class Reading: lemma: str = '' tags: List[str] = field(default_factory=list) subreading: Optional['Reading'] = None deleted: bool = False @dataclass class Cohort: static: Reading = field(default_factory=Reading) readings: List[Reading] = field(default_factory=list) dep_self: int = 0 dep_parent: Optional[int] = None relations: DefaultDict[str, List[int]] = field( default_factory=lambda: defaultdict(list)) text: str = '' wblank: str = '' @dataclass class Window: cohorts: List[Cohort] = field(default_factory=list) set_vars: Dict[str, Optional[str]] = field(default_factory=dict) rem_vars: List[str] = field(default_factory=list) text: str = '' text_post: str = '' dep_has_spanned: bool = False @dataclass class Packet: type: str = '' window: Optional[Window] = None command: str = '' text: str = '' def parse_binary_window(buf): '''Given a bytestring `buf` containing a single window (not including the length header), parse and return a Window() object. For most applications you probbaly want parse_binary_stream() instead.''' pos = 0 def read_pat(pat): nonlocal pos, buf ret = struct.unpack_from('<'+pat, buf, pos) pos += struct.calcsize('<'+pat) return ret # These two get called often enough that pre-compiling them # gives us a ~25% speedup over calling read_pat() u16_reader = struct.Struct('