Memelang is an AI-optimized query language that significantly reduces token count and model size for LLM text-to-SQL. The code below is designed to be copy-and-pasted into your LLM.
arXiv Paper · GitHub Repo · Patent Spec
Copy all code
# info@memelang.net | (c)2026 HOLTWORK LLC | Patented
# MEMELANG is a terse query DSL IR for LLM text-to-SQL
# Axial grammar: Axis2 -> Axis1 -> Axis0 -> Cell
# Whitespaces are syntactic and trigger "new Cell"
# Never space between operator/comparator/comma/flag and values
MEMELANG_VER = 11.04
basic_syntax = '[table WS] [column WS] [":$" var][":" ("min"|"max"|"cnt"|"sum"|"avg"|"last"|"grp")] [":" ("asc"|"des")] ["<=>" "\"" string "\""] [("="|"!="|">"|"<"|">="|"<="|"~"|"!~") (string|int|float|("$" var)|"@"|"_")] ";"'
examples = '''
%mode=tab;
roles id :int>0; rating :DESC="Decimal 0-5 star rating of performance";:dec>0.0;<=5; actor :DESC="Actor's full name";:str; movie :DESC="Movie's full name";:str; character :DESC="Character's full name";:str;;
actors id :int>0; name :DESC="Actor's full name";:str; age :DESC="Actor's age in years";:int>=0;<200;;
movies id :int>0; description :DESC="Brief description of movie plot";:str; year :DESC="Year of production AD";:int>1800;<2100; genre scifi,drama,comedy,documentary;:str; title :DESC="Full movie title";:str;;
actors name _; roles actor @;;
movies title _; roles movie @;;
roles id :gct=1;;
roles movie :grp; actor :grp; character :gct=1;;
actors id :gct=1;;
movies id :gct=1;;
%mode=qry;
""" All movies """
movies _ _;;
""" Every role """
roles _ _;;
""" Titles and descriptions for movies """
movies title _; description _;;
""" Actor name and ages """
actors name _; age _;;
""" Actors age 41 years or older """
actors age >=41; _;;
""" Role 567 and 8901 """
roles id 567,8901; _;;
""" Films with dystopian society narratives sim>.33 """
movies description <=>"dystopian"<0.33; _;;
""" Movies titled with Star released in 1977 or 1980 """
movies title ~"Star"; year 1977,1980; _;;
""" Actors named like Ana aged 20 to 35 inclusive """
actors name ~"Ana"; age >=20;<=35; _;;
""" Roles rated below 1.5 for movies before 1980 """
movies year <1980; title _; roles movie @; rating <1.5; _;;
""" Roles sort rating descending, movie descending """
roles rating :des; movie :des;;
""" All movies before 1970 ordered by year ascending """
movies year :asc<1970; _;;
""" Average performer rating at least 4.2 """
roles rating :avg>=4.2; actor :grp;;
""" Minimum role rating by actor, low to high """
roles rating :min:asc; actor :grp;;
""" Roles in movies mentioning robot rated 3+ """
movies description <=>"robot"<=$sim; title _; roles movie @; rating >=3;;
""" Costars seen with Bruce Willis or Uma Thurman """
roles actor :$a~"Bruce Willis","Uma Thurman"; movie _;@ @ @; actor !$a;;
""" War stories before 1980: top 12 movies by minimum role rating """
movies year <1980; description <=>"war"<=$sim; title :grp; roles movie @; rating :min:des;%beg=0;%lim=12;;
""" Roles for movies Hero or House of Flying Daggers where actor name includes Li, actor A-Z """
movies title "Hero","House of Flying Daggers"; roles movie @; actor :asc~"Li";;
""" Titles containing Here about robots between 1900 and 2000 """
movies title ~"Hero"; description <=>"robot"; year >=1900; <=2000;;
%tab=movies; #%val; %col=title; ~"Hero"; %col=description; <=>"robot"; %col=year; >=1900; <=2000;;
%tab=movies; #title #description #year; ~"Hero" <=>"robot" >=1900; <=2000;;
%tab=movies; #title; ~"Hero"; #description; <=>"robot"; #year; >=1900; <=2000;;
#%tab #%val; movies :#title~"Hero"; :#description<=>"robot"; :#year>=1900; <=2000;;
'''
import re, sys, json
from typing import Optional, Union, List, Iterator, Pattern, Any
Err = SyntaxError
### SYNTAX ###
CELL_PATTERN = (
('QUO', r'"(?:[^"\\\n\r]|\\.)*"'),
('EMB', r'\[(?:-?\d+(?:\.\d+)?)(?:\s*,\s*-?\d+(?:\.\d+)?)*\]'),
('MOD', r'<->|<=>|<#>'),
('CMP', r'>=|<=|!~|!=|=|>|<|~|!'),
('BIND', r':\$\w+'),
('FLAG', r':[a-zA-Z]+'),
('VAR', r'\$\w+'),
('REL', r'@\d?|\^'),
('WLD', r'_'),
('EVAR', r'%[a-zA-Z0-9_]+'),
('SLOT', r'#%?[a-zA-Z0-9_]+'),
('ASSN', r':#[a-zA-Z0-9_]+'),
('TIM', r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}'),
('DEC', r'-?\d*\.\d+'),
('INT', r'-?\d+'),
('ALN', r'[A-Za-z][A-Za-z0-9_]*'),
('OR', r','),
('WS', r'\s+'),
('MISMATCH', r'.'),
)
CANON = {'!':'!='}
CELL_REGEX=re.compile("|".join(f"(?P<{k}>{p})" for k, p in CELL_PATTERN))
PAD_MODES = {'qry','tab'}
FLAG_KINDS = {'FLAG','BIND','EVAR','ASSN'}
LIT_KINDS = {'TIM','DEC','INT','ALN','QUO','EMB'}
VAR_KINDS = {'VAR','WLD','REL','EVAR','SLOT'}
DAT_KINDS = LIT_KINDS | VAR_KINDS
RELCOORD = {
'@0': ['-1','-1'],
'@1': ['-1','-2'],
'@2': ['-1','-3'],
'@3': ['-1','-4'],
'@4': ['-1','-5'],
'@' : ['-1','+0'],
'^' : ['-1','end','+0'],
}
# Atomic token
class Tok:
def __init__(self, kind: str, src: str, canon: Optional[str] = None):
self.kind = kind
self.src = src
canon = src if canon is None else canon
self.canon = CANON.get(canon) or canon
parser = {'QUO': json.loads, 'EMB': json.loads, 'DEC': float, 'INT': int}.get(kind)
self.dat = parser(src) if parser else src
def __str__(self): return self.src
def __repr__(self): return self.canon
def __eq__(self, other): return repr(self) == repr(other)
def __hash__(self): return hash(self.src)
def __bool__(self): return bool(self.src)
TOK_NULL = Tok('NULL', '')
# Sequence of tokens
class Seq(list[Tok]):
opr: Tok = TOK_NULL
def __init__(self, *items):
super().__init__(items)
self.opr = TOK_NULL
def __str__(self): return self.opr.src.join([str(t) for t in self if len(str(t)) or t.kind=='HOLD'])
def __repr__(self): return self.opr.src.join([repr(t) for t in self])
# Predicate expression
class Cell:
flag: Seq
left: Seq
comp: Tok
right: Seq
padded = False
def __init__(self, src: str):
self.left = Seq()
self.flag = Seq()
self.comp = Tok('EQL', '', '=')
self.right = Seq(Tok('WLD', '', '_'))
toks = []
for m in CELL_REGEX.finditer(src):
kind = m.lastgroup
text = m.group()
if kind == 'WS': continue
if kind == 'MISMATCH': raise Err(f'E_TOK {text!r}')
toks.append(Tok(kind, text))
i, n = 0, len(toks)
def peek(): return toks[i].kind if i < n else ''
def take():
nonlocal i
if i >= n: raise Err('E_EOF')
t = toks[i]
i += 1
return t
# FLAGS
while peek() in FLAG_KINDS:
self.flag.append(take())
# LEFT (prefix MOD)
if peek() == 'MOD':
self.left.opr = take()
self.left.append(Tok('HOLD', ''))
t = take()
if not t.kind in DAT_KINDS: raise Err('E_TERM_DAT')
self.left.append(t)
# COMPARATOR
if peek() == 'CMP':
self.comp = take()
if not peek() in DAT_KINDS: raise Err('E_DAT')
# RIGHT (values, OR-joined)
if peek() in DAT_KINDS:
self.right.clear()
while peek() in DAT_KINDS:
self.right.append(take())
if peek() == 'OR':
self.right.opr = take()
if not peek() in DAT_KINDS: raise Err('E_OR_TRAIL')
if i != n: raise Err(f'E_EXPR_TRAIL {toks[i:]}')
# PLACEHOLDER: OVERWRITE WITH YOUR EMBEDDING FUNCTION
def vectorize(self, tok: Tok) -> Tok:
if tok.kind == 'EMB': return tok
if tok.kind not in {'QUO', 'ALN'}: raise Err('E_EMBED')
return Tok('EMB', json.dumps([0.1, 0.2]))
@property
def single(self) -> Tok:
return self.right[0] if self.comp.canon == '=' and len(self.right) == 1 else TOK_NULL
@property
def literal(self) -> Tok:
tok = self.single
return tok if tok.kind in LIT_KINDS else TOK_NULL
def find(self, kind:str) -> Tok:
return next((flag for flag in self.flag if flag.kind == kind), TOK_NULL)
def bind(self, tok: Tok):
if tok not in self.flag: self.flag.append(tok)
def __str__(self) -> str: return f"{self.flag}{self.left}{self.comp}{self.right}"
def __repr__(self) -> str: return f"{self.flag!r}{self.left!r}{self.comp!r}{self.right!r}"
def __bool__(self) -> bool: return bool(self.flag or self.left or self.right)
### GRAMMAR ###
class Axis(list):
src: str = ''
sep: str = None # SEPERATOR TOKEN
sepreg: str = None # SEPERATOR REG EXP
sepstr: str = None # SEPERATOR OUT
empt: bool = False # ALLOW EMPTY SUB-AXES?
sub = None # SUB-AXIS NAME
def __init__(self, src: str):
if self.sep is None: raise Err('E_AXIS_SEP')
if not self.sepreg: self.sepreg = re.escape(self.sep)
if not self.sepstr: self.sepstr = self.sep + ' '
self.src= src
self.parse(src.strip())
@property
def regex(self) -> Pattern[str]:
return re.compile(rf'''
(?P<COMM>"""(?:(?!""")[^\n\r\\]|\\.)*""")|
(?P<EXPQ>"(?:[^"\\\n\r]|\\.)*")|
(?P<SEP>{self.sepreg})|
(?P<EXPM>[^"{re.escape(self.sep[0])}]+)|
(?P<EXPS>.)
''', re.VERBOSE)
def parse(self, src: str):
exprs: List[str] = []
for m in self.regex.finditer(src):
if m.lastgroup == 'SEP':
if exprs or self.empt:
self.append(self.sub("".join(exprs)))
exprs.clear()
elif m.lastgroup != 'COMM': exprs.append(m.group())
# debuffer expression
if exprs: self.append(self.sub("".join(exprs)))
def pull(self, coords):
value = self
for coord in coords: value = value[coord]
return value
def __str__(self) -> str:
items = [str(t) for t in self]
return self.sepstr.join([s for s in items if (s or self.empt)])
# "Table column value" semantic sequence of Cell predicates
class Axis0(Axis):
sep = ' '
sepstr = ' '
sepreg = r'\s+'
sub = Cell
# AND-joined sequence of Axis0
class Axis1(Axis):
sep = ';'
sub = Axis0
# OR-joined sequence of Axis1
class Axis2(Axis):
sep = ';;'
sub = Axis1
@staticmethod
def coordrel(coords, rel):
rel = ["+0"] * max(0, len(coords) - len(rel)) + [str(op) for op in rel]
out = []
for coord, op in zip(coords, rel):
if op == "end": v=-1
elif (v:=(coord + int(op)))<0: raise ValueError('E_REL_BIND')
out.append(v)
return out
# Rectangularize
# Left-pad Axis0
# Replace relatives `@` with coordinate vars `$x_y_z`
# Assign slots
def rect(self):
env = {'mode':'qry'}
slots = [Tok('EVAR','','%tab'), Tok('EVAR','','%col'), Tok('EVAR','','%val')]
for idx2, axis1 in enumerate(self):
idx = [idx2, None, None]
for idx1, axis0 in enumerate(axis1):
if not axis0: continue
idx[1:] = [idx1, None]
# %KEY=VAL %KEY=VAL
if bool(axis0[0].find('EVAR')):
for cell in axis0:
if not bool(cell.find('EVAR')): raise Err('E_AXIS_MET')
env[cell.find('EVAR').canon[1:]] = cell.single.dat
continue
#SLOT #SLOT #SLOT
if axis0[0].single.kind=='SLOT':
slots=[]
for cell in axis0:
if cell.single.kind!='SLOT': raise Err('E_AXIS_SLOT')
if cell.single.canon[1]=='%': slots.append(Tok('EVAR', '', cell.single.canon[1:]))
else: slots.append(Tok('ASSN','',':'+cell.single.canon))
continue
if env['mode'] not in PAD_MODES: continue
# Pad Axis0
axis0len=len(slots)
if len(axis0) > axis0len: raise Err('E_AXIS0_LONG')
for _ in range(axis0len - len(axis0)):
cell = axis0.sub('@')
cell.padded=True
axis0.insert(0, cell)
for idx0, cell in enumerate(axis0):
idx[2] = idx0
# Assign slot
cell.bind(slots[idx0])
# Replace relative tokens with coordinate vars
for seq in (cell.left, cell.right):
for n, tok in enumerate(seq):
if tok.kind!='REL': continue
coords=self.coordrel(idx, RELCOORD[tok.canon])
src=self.pull(coords)
if src.literal.kind != 'NULL':
seq[n] = src.literal
continue
name = '$'+'_'.join(map(str, coords)).replace('-1','E')
seq[n] = Tok('VAR', '', name)
src.bind(Tok('BIND', '', ':'+name))
### PG SQL ###
PH = '%s'
Param = List[Union[int, float, str, list]]
class SQL:
def __init__(self, sql: str = '', param: Optional[Param] = None):
self.sql = sql
self.param = [] if param is None else list(param)
def sql_value(self) -> "SQL":
return self
def __str__(self) -> str:
sql = self.sql
for p in self.param: sql = sql.replace(PH, json.dumps(p), 1)
return sql
def __repr__(self) -> str: return str((self.sql, self.param))
@staticmethod
def uniq(terms: "SQL") -> list["SQL"]:
out, seen = [], set()
for term in terms:
if term is None: continue
key = (term.sql, tuple(map(repr, term.param)))
if key in seen: continue
seen.add(key)
out.append(term)
return out
class CellSQL(Cell):
flag2agg = {':cnt':'COUNT', ':sum':'SUM', ':avg':'AVG', ':min':'MIN', ':max':'MAX', ':last':'MAX'}
cmp2sql = {'~':' ILIKE ', '!~':' NOT ILIKE '}
mod2sql = {}
def __init__(self, src: str):
super().__init__(src)
self.base = self.alias = ''
self.param = []
flags = {t.canon for t in self.flag if t.kind == 'FLAG'}
self.agg = next((sql for flag, sql in self.flag2agg.items() if flag in flags), '')
self.grouped = ':grp' in flags
self.sort = 'ASC' if ':asc' in flags else 'DESC' if ':des' in flags else ''
def deref(self, bind: dict[str, SQL], with_agg: bool = True) -> Iterator[SQL]:
for t in self.right:
if t.kind == 'VAR':
key = t.canon[1:]
if key not in bind: raise Err(f'E_VAR_BIND {key}')
ref = bind[key]
else:
yield SQL(PH, [t.dat])
continue
if isinstance(ref, CellSQL): yield ref.sql_value(with_agg=with_agg)
else: yield ref.sql_value()
@property
def sql_groupby(self) -> Optional[SQL]:
if not self.grouped: return None
if self.agg: raise Err('E_GRP_AGG')
return SQL(self.base, self.param)
def sql_value(self, grouped: bool = False, alias: bool = False, order: bool = False, with_agg: bool = True) -> SQL:
sql, param = self.base, list(self.param)
if self.left.opr.kind == 'MOD':
sql = f'({sql}{self.left.opr.canon}{PH}::VECTOR)'
param.append(self.vectorize(self.left[1]).canon)
agg = self.agg or ('MAX' if grouped and not self.grouped else '') if with_agg else ''
if agg: sql = f'{agg}({sql})'
if alias and self.alias: sql = f'{sql} AS {self.alias}'
if order and self.sort: sql = f'{sql} {self.sort}'
return SQL(sql, param)
def sql_clause(self, bind: dict[str, SQL]) -> Optional[tuple[str, SQL]]:
if not self.right or self.single.canon == '_': return None
left = self.sql_value()
rights = list(self.deref(bind, with_agg=bool(self.agg)))
comp = self.comp.canon
sqlcomp = self.cmp2sql.get(self.comp.canon) or self.comp.canon
if comp in {'>', '<', '>=', '<='} and len(rights) != 1: raise Err('E_COMP_OR')
items, params = [], []
for right in rights:
items.append(f"CONCAT('%', {right.sql}, '%')" if comp in {'~', '!~'} else right.sql)
params.extend(right.param)
if len(items) == 1: beg, end = '', ''
elif comp in {'=', '~'}: beg, end = 'ANY(ARRAY[', '])'
elif comp in {'!=', '!~'}: beg, end = 'ALL(ARRAY[', '])'
else: raise Err('E_COMP_OR2')
return ('having' if self.agg else 'where'), SQL(f"{left.sql}{sqlcomp}{beg}{','.join(items)}{end}", left.param + params)
class Grid(Axis2):
def select(self) -> List[SQL]:
self.rect()
out = []
env = {'mode':'qry', 'sim':0.5,'tab':'','taba':'','cola':''}
for axis1 in self:
env['lim'], env['beg'] = 0, 0
bind = {k: SQL(PH, [v]) for k, v in env.items()}
tab_cnt = 0
qry = {'select':[], 'from':[], 'fromall':[], 'groupby':[], 'where':[], 'having':[], 'orderby':[]}
grouped = False
allselected = False
for axis0 in axis1:
if env['mode']!='qry': continue
if axis0.src == '_':
allselected = True
continue
for idx0, cell in enumerate(axis0):
#print(repr(cell))
single = cell.single.dat
if cell.padded or cell.single.kind=='SLOT': continue
# EVAR for TAB/COL
evarval = cell.find('EVAR').canon
if evarval=='%val': pass
elif evarval:
env[evarval[1:]] = cell.single.dat
bind[evarval[1:]]=SQL(PH, [cell.single.dat])
if evarval=='%tab':
if not re.fullmatch(r'[A-Za-z_][A-Za-z0-9_$]{0,62}', single): raise Err('E_TAB_NAME')
tab_cnt += 1
env['tab']=single
env['taba']=f"t{tab_cnt}"
qry['from'].append(SQL(f"{env['tab']} AS {env['taba']}"))
qry['fromall'].append(env['taba'])
elif evarval=='%col':
if single == '_': allselected = True
elif not re.fullmatch(r'[A-Za-z_]+[A-Za-z0-9_$]{0,62}', single): raise Err('E_COL_NAME')
env['cola'] = single
continue
# SLOT for COL
assnval = cell.find('ASSN').canon
if assnval: env['cola'] = assnval[2:]
if not env['taba']: raise Err('E_TAB_REQ')
valcell = CellSQL(repr(cell))
valcell.base = f"{env['taba']}.{env['cola']}"
qry['select'].append(valcell)
if valcell.grouped:
grouped = True
qry['groupby'].append(valcell)
if valcell.sort: qry['orderby'].append(valcell)
clause = valcell.sql_clause(bind)
if clause:
key, term = clause
qry[key].append(term)
for flag in valcell.flag:
if flag.kind != 'BIND': continue
if flag.canon[2:] in env: raise Err('E_ENV_BIND')
bind[flag.canon[2:]] = valcell
if not qry['from']:
out.append(SQL())
continue
parts = (
('SELECT', ', ', [SQL(f"{a}.*") for a in qry['fromall']] if allselected else SQL.uniq(t.sql_value(grouped, True) for t in qry['select'])),
('FROM', ', ', qry['from']),
('WHERE', ' AND ', qry['where']),
('GROUP BY', ', ', SQL.uniq(t.sql_groupby for t in qry['groupby'])),
('HAVING', ' AND ', qry['having']),
('ORDER BY', ', ', SQL.uniq(t.sql_value(grouped, False, True) for t in qry['orderby'])),
)
sql, param = [], []
for keyword, sep, terms in parts:
if not terms: continue
sql.append(f"{keyword} " + sep.join(t.sql for t in terms))
for t in terms: param.extend(t.param)
if env['lim']: sql.append(f"LIMIT {int(env['lim'])}")
if env['beg']: sql.append(f"OFFSET {int(env['beg'])}")
out.append(SQL(' '.join(sql), param))
return out
©2026 HOLTWORK LLC. US Patent 12,475,098. This software is free to use for development, testing, and educational purposes. Commercial deployment, redistribution, or production use requires a separate license. Contact info@memelang.net.