Memelang is an AI-optimized query language that significantly reduces token count and model size for LLM RAG. The code below is designed to be copy-and-pasted into your LLM.
arXiv Paper · GitHub Repo · Patent Spec
Copy all code
'''
info@memelang.net | (c)2026 HOLTWORK LLC | Patents Pending
This script parses MEMELANG, a terse query DSL with axial grammar
Grid(Axis2) -> Axis1 -> Axis0 -> Cell
Whitespaces are syntatic and trigger "new Cell"
Never space between operator/comparator/comma/flag and values
'''
MEMELANG_VER = 9.50
syntax = '[table WS] [column WS] ["<=>" "\"" string "\""] [":" "$" var][":" ("min"|"max"|"cnt"|"sum"|"avg"|"last"|"grp")][":" ("asc"|"des")] [("="|"!="|">"|"<"|">="|"<="|"~"|"!~") (string|int|float|("$" var)|"@"|"_")] ";"'
examples = '''
%tab roles id :TYP=INT;>0;rating :DESC="Decimal 0-5 star rating of performance";:TYP=DEC;>0;<=5;actor :DESC="Actor's full name";:TYP=STR;movie :DESC="Movie's full name";:TYP=STR;character :DESC="Character's full name";:TYP=STR;;
%tab actors id :TYP=INT;>0;name :DESC="Actor's full name";:TYP=STR;age :DESC="Actor's age in years";:TYP=INT;>=0;<200;;
%tab movies id :TYP=INT;>0;description :DESC="Brief description of movie plot";:TYP=STR;year :DESC="Year of production AD";:TYP=INT;>1800;<2100;genre scifi,drama,comedy,documentary;:TYP=STR;title :DESC="Full movie title";:TYP=STR;;
%for actors name _;roles actor @;;
%for movies title _;roles movie @;;
%uni roles id;;
%uni roles movie;roles actor;roles character;;
%uni actors id;;
%uni movies id;;
""" All movies """
movies _ _;;
""" Every film """
movies _ _;;
""" Roles """
roles _ _;;
""" Titles and descriptions for movies """
movies title _;description _;;
""" Actor name and ages """
actors name _;age _;;
""" Actors age 41 years or older """
actors age >=41;_;;
""" Role 567 and 9766324436 """
roles id 567,9766324436;_;;
""" Films with dystopian society narratives sim>.33 """
movies description <=>"dystopian"<0.33;_;;
""" Movies titled with Star released in 1977 or 1980 """
movies title ~"Star";year 1977,1980;_;;
""" Actors named like Ana aged 20 to 35 inclusive """
actors name ~"Ana";age >=20;<=35;_;;
""" Roles rated below 1.5 for movies before 1980 """
movies year <1980;title _;roles movie @;rating <1.5;_;;
""" Roles sort rating descending, movie descending """
roles rating :des;movie :des;;
""" All movies before 1970 ordered by year ascending """
movies year :asc<1970;_;;
""" Average performer rating at least 4.2 """
roles rating :avg>=4.2;actor :grp;;
""" Minimum role rating by actor, low to high """
roles rating :min:asc;actor :grp;;
""" Roles in movies mentioning robot rated 3+ """
movies description <=>"robot"<=$sim;title _;roles movie @;rating >=3;;
""" Costars seen with Bruce Willis and Uma Thurman """
roles actor :$a~"Bruce Willis","Uma Thurman";movie _;@ @ @;actor !$a;;
""" War stories before 1980: top 12 movies by minimum role rating """
movies year <1980;description <=>"war"<=$sim;title :grp;roles movie @;rating :min:des;%m lim 12;;
""" Roles for movies Hero or House of Flying Daggers where actor name includes Li, actor A-Z """
movies title "Hero","House of Flying Daggers";roles movie @;actor :asc~"Li";;
'''
import re, sys, json
from typing import Optional, Union, List, Tuple, Iterator
Err = SyntaxError
### SYNTAX ###
CELL_PATTERN = (
('DAT_QUO', r'"(?:[^"\\\n\r]|\\.)*"'),
('DAT_EMB', r'\[(?:-?\d+(?:\.\d+)?)(?:\s*,\s*-?\d+(?:\.\d+)?)*\]'),
('DAT_MET', r'\%\w+'),
('MOD_L2', r'<->'),
('MOD_COS', r'<=>'),
('MOD_IP', r'<#>'),
('CMP_GE', r'>='),
('CMP_LE', r'<='),
('CMP_DSIM', r'!~'),
('CMP_NOT', r'!=?'), # a!=b or a!b
('CMP_EQL', r'='),
('CMP_GT', r'>'),
('CMP_LT', r'<'),
('CMP_SIM', r'~'),
('BIND', r':\$\w+'),
('FLAG', r':[a-zA-Z]+'),
('DAT_VAR', r'\$\w+'),
('DAT_WLD', r'_'),
('DAT_MS', r'\^'),
('DAT_AT', r'@'),
('DAT_TS', r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}'),
('DAT_YMD', r'\d{4}-\d{2}-\d{2}'),
('DAT_DEC', r'-?\d*\.\d+'),
('DAT_INT', r'-?\d+'),
('DAT_ID', r'[A-Za-z][A-Za-z0-9_]*'),
('OR', r','),
('WS', r'\s+'),
('MISMATCH', r'.'),
)
CELL_REGEX=re.compile("|".join(f"(?P<{k}>{p})" for k, p in CELL_PATTERN))
# Atomic token
class Tok:
def __init__(self, kind: str, lex: str):
self.kind = kind
self.lex = lex
if kind == 'DAT_QUO': self.dat = json.loads(lex)
elif kind == 'DAT_EMB': self.dat = json.loads(lex)
elif kind == 'DAT_DEC': self.dat = float(lex)
elif kind == 'DAT_INT': self.dat = int(lex)
else: self.dat = lex
def __str__(self): return self.lex
def __repr__(self): return self.lex
TOK_NULL = Tok('NULL', '')
TOK_EQL_ELIDE = Tok('CMP_EQL', '') # default, elided '='
# Sequence of tokens
class Seq(list[Tok]):
opr: Tok = TOK_NULL
def __init__(self, *items):
super().__init__(items)
self.opr = TOK_NULL
def __str__(self): return self.opr.lex.join(map(str, self))
# Predicate expression
class Cell:
left: Seq
flag: Seq
comp: Tok
right: Seq
def __init__(self, src: str):
self.left = Seq()
self.flag = Seq()
self.comp = TOK_EQL_ELIDE
self.right = Seq()
toks = []
for m in CELL_REGEX.finditer(src):
kind = m.lastgroup
text = m.group()
if kind == 'WS': continue
if kind == 'MISMATCH': raise Err(f'E_TOK {text!r}')
toks.append(Tok(kind, text))
i, n = 0, len(toks)
def peek(): return toks[i].kind if i < n else ''
def take():
nonlocal i
if i >= n: raise Err('E_EOF')
t = toks[i]
i += 1
return t
# LEFT (prefix MOD)
if peek().startswith('MOD'):
self.left.opr = take()
self.left.append(TOK_NULL)
t = take()
if not t.kind.startswith('DAT'): raise Err('E_TERM_DAT')
self.left.append(t)
# FLAGS
while peek() in {'FLAG','BIND'}:
self.flag.append(take())
# COMPARATOR
if peek().startswith('CMP'):
self.comp = take()
if not peek().startswith('DAT'): raise Err('E_CMP_DAT')
# RIGHT (values, OR-joined)
while peek().startswith('DAT'):
self.right.append(take())
if peek() == 'OR':
self.right.opr = take()
if not peek().startswith('DAT'): raise Err('E_OR_TRAIL')
if i != n: raise Err(f'E_EXPR_TRAIL {toks[i:]}')
# PLACEHOLDER: OVERWRITE WITH YOUR EMBEDDING FUNCTION
def vectorize(self, tok: Tok) -> Tok:
if tok.kind == 'DAT_EMB': return tok
if tok.kind not in {'DAT_QUO', 'DAT_ID'}: raise Err('E_EMBED')
return Tok('DAT_EMB', json.dumps([0.1, 0.2]))
@property
def single(self) -> Tok:
return (
self.right[0]
if self.comp.kind == 'CMP_EQL' and len(self.right) == 1
else TOK_NULL
)
def __str__(self) -> str: return f"{self.left}{self.flag}{self.comp.lex}{self.right}"
def __repr__(self) -> str: return str(self)
### GRAMMAR ###
class Axis(list):
sep: str = None # SEPERATOR TOKEN
sepreg: str = None # SEPERATOR REG EXP
sepstr: str = None # SEPERATOR OUT
minlen: int = 0 # FIXED LENGTH (0=No)
empt: bool = False # ALLOW EMPTY SUB-AXES?
sub = None # SUB-AXIS NAME
def __init__(self, src: str):
if self.sep is None: raise Err('E_AXIS_SEP')
if not self.sepreg: self.sepreg = re.escape(self.sep)
if not self.sepstr: self.sepstr = self.sep + ' '
self.parse(src.strip())
@property
def pattern(self) -> List[Tuple[str, str]]:
return [
("COMM", r'"""(?:[^"\\\n\r]|\\.)*"""'),
("EXPQ", r'"(?:[^"\\\n\r]|\\.)*"'),
("SEP", self.sepreg),
("EXPM", rf"[^\"{re.escape(self.sep[0])}]+"),
("EXPS", r".")
]
def parse(self, src: str):
regex=re.compile("|".join(f"(?P<{k}>{p})" for k, p in self.pattern))
exprs: List[str] = []
for m in regex.finditer(src):
if m.lastgroup in {"EXPQ", "EXPM", "EXPS"}: exprs.append(m.group())
elif m.lastgroup == "SEP" and (exprs or self.empt):
self.append(self.sub("".join(exprs)))
exprs.clear()
if exprs: self.append(self.sub("".join(exprs)))
if self.minlen and not src.startswith('%'): self[:0] = [self.sub('') for _ in range(self.minlen-len(self))]
def __str__(self) -> str:
items = [str(t) for t in self]
return self.sepstr.join([s for s in items if (s or self.empt)])
# "Table column value" semantic sequence of Cell predicates
class Axis0(Axis):
sep = ' '
sepstr = ' '
sepreg = r'\s+'
minlen = 3
sub = Cell
# AND-joined sequence of Axis0
class Axis1(Axis):
sep = ';'
sub = Axis0
# OR-joined sequence of Axis1
class Axis2(Axis):
sep = ';;'
sub = Axis1
### PG SQL ###
PH = '%s'
class SQL:
lex: str
param: List[Union[int, float, str, list]]
def __init__(self, lex: str = '', param: Optional[List[Union[int, float, str, list]]] = None):
self.lex = lex
self.param = [] if param is None else param
def __str__(self) -> str:
lex = self.lex
for p in self.param: lex = lex.replace(PH, json.dumps(p), 1)
return lex
def __repr__(self) -> str: return str(self)
class Alias(str):
pass
class Grid(Axis2):
def select(self) -> List[SQL]:
sql: List[SQL] = []
TAB, COL, VAL = 0, 1, 2
flag2sql = {':cnt':'COUNT',':sum': 'SUM', ':avg': 'AVG', ':min': 'MIN', ':max': 'MAX', ':last': 'MAX'}
cmp2sql = {'EQL':'=','NOT':'!=','GT':'>','GE':'>=','LT':'<','LE':'<=','SIM':'ILIKE','DSIM':'NOT ILIKE'}
mod2sql = {'MOD_COS': '<=>','MOD_L2': '<->','MOD_IP': '<#>'}
def deref(cell: Cell) -> Iterator[SQL]:
for t in cell.right:
if t.kind == 'DAT_VAR': key=t.lex[1:]
elif t.kind == 'DAT_AT': key='@'
else: key=None
if key:
if key not in bind: raise Err(f'E_VAR_BIND {key}')
val = bind[key]
else: val = t.dat
yield SQL(val) if isinstance(val, Alias) else SQL(PH, [val])
for axis1 in self:
bind = {'lim':0,'beg':0,'sim':0.5}
mem = [{'val':None,'alias':None,'cnt':-1} for _ in range(3)]
query = {'select':[],'from':[],'groupby':[],'where':[],'having':[],'orderby':[]}
GROUPED = False
ALLSELECTED = False
MODE = '%q'
for axis0 in axis1:
if not axis0: continue
axis0str = [str(cell.single.dat) for cell in axis0]
if axis0str == ['','','_']:
ALLSELECTED=True
continue
# META (INTENTIONALLY PERSISTS)
if axis0[0].single.kind=='DAT_MET':
MODE = axis0[TAB].single.lex
if axis0[TAB].single.lex=='%m': bind[axis0str[COL]]=axis0[VAL].single.dat
if MODE!='%q': continue
# TABLE
if axis0str[TAB]=='':
if mem[TAB]['alias'] is None: raise Err('E_TAB_REQ')
else:
tkind = axis0[TAB].single.kind
if tkind == 'NULL': raise Err('E_TAB_NON')
# @ means self-join
elif tkind == 'DAT_AT':
if mem[TAB]['val'] is None: raise Err('E_TAB_SAME')
mem[TAB]['cnt']+=1
mem[TAB]['alias']=Alias(f"t{mem[TAB]['cnt']}")
# named table
else:
if not re.fullmatch(r'[A-Za-z_][A-Za-z0-9_$]{0,62}', axis0str[TAB]): raise Err('E_TAB_CHR')
mem[TAB]['cnt']+=1
mem[TAB]['alias']=Alias(f"t{mem[TAB]['cnt']}")
mem[TAB]['val']=axis0str[TAB]
query['from'].append(SQL(f"{mem[TAB]['val']} AS {mem[TAB]['alias']}"))
# COLUMN
# select all
if axis0str[COL] == '_':
ALLSELECTED=True
continue
# update column alias with new table alias on self-join
elif axis0str[COL] in ('','@'):
if mem[COL]['val'] is None: raise Err('E_COL_NON')
mem[COL]['alias']=Alias(f"{mem[TAB]['alias']}.{mem[COL]['val']}")
# named column
else:
mem[COL]['cnt']+=1
if axis0[COL].single.kind != 'NULL':
if not re.fullmatch(r'[A-Za-z_][A-Za-z0-9_$]{0,62}', axis0str[COL]): raise Err('E_COL_CHR')
mem[COL]['alias']=Alias(f"{mem[TAB]['alias']}.{axis0str[COL]}")
mem[COL]['val']=axis0str[COL]
# VALUE
mem[VAL]['alias']=mem[COL]['alias']
mem[VAL]['val']=mem[COL]['alias']
# MOD
if axis0[VAL].left.opr.kind in mod2sql:
v = axis0[VAL].vectorize(axis0[VAL].left[1])
mem[VAL]['alias'] = Alias(f"({mem[VAL]['alias']}{mod2sql[axis0[VAL].left.opr.kind]}'{v.lex}'::VECTOR)")
flags = [str(f) for f in axis0[VAL].flag]
agged = False
# aggregate
for flag,agg in flag2sql.items():
if flag in flags:
if agged: raise Err('E_AGG_AGG')
mem[VAL]['alias']=Alias(f"{agg}({mem[VAL]['alias']})")
agged=True
# group by
if ':grp' in flags:
if agged: raise Err('E_GRP_AGG')
GROUPED = True
query['groupby'].append(SQL(mem[COL]['alias']))
# sort
if ':asc' in flags: query['orderby'].append(SQL(mem[VAL]['alias']))
elif ':des' in flags: query['orderby'].append(SQL(mem[VAL]['alias']+' DESC'))
# select
sel=SQL(mem[VAL]['alias'])
if not query['select'] or query['select'][-1].lex!=sel.lex:
query['select'].append(sel)
# where/having
if axis0[VAL].right and axis0[VAL].single.lex!='_':
rights = list(deref(axis0[VAL]))
wparams = [p for r in rights for p in r.param]
commas = ','.join([r.lex for r in rights])
hw = 'having' if agged else 'where'
compkind = axis0[VAL].comp.kind[4:]
if len(rights)==1 and compkind not in {'SIM', 'DSIM'}: query[hw].append(SQL(f"{mem[VAL]['alias']} {cmp2sql[compkind]} {commas}", wparams))
else:
if compkind=='EQL': query[hw].append(SQL(f"{mem[VAL]['alias']} IN ({commas})", wparams))
elif compkind=='NOT': query[hw].append(SQL(f"{mem[VAL]['alias']} NOT IN ({commas})", wparams))
elif compkind=='SIM': query[hw].append(SQL('('+" OR ".join([f"{mem[VAL]['alias']} ILIKE CONCAT('%', %s, '%')" for _ in rights])+')',wparams))
elif compkind=='DSIM': query[hw].append(SQL('('+" AND ".join([f"{mem[VAL]['alias']} NOT ILIKE CONCAT('%', %s, '%')" for _ in rights])+')',wparams))
else: raise Err('E_COMP_OR')
# bind
bind['@']=Alias(mem[VAL]['alias'])
for flag in axis0[VAL].flag:
if flag.kind=='BIND': bind[flag.lex[2:]]=Alias(mem[VAL]['alias'])
if ALLSELECTED:
query['select']=[]
for f in query['from']:
m = re.search(r"\bAS\s+(t\d+)\b", f.lex)
query['select'].append(SQL(f"{m.group(1)}.*"))
elif GROUPED:
groupstrs=[s.lex for s in query['groupby']]
for s in query['select']:
if '(' not in s.lex[1:] and s.lex not in groupstrs: s.lex=f"MAX({s.lex})"
if not query['from']:
sql.append(SQL(''))
continue
SQLPARTS=[
['SELECT', ', ', 'select'],
['FROM', ', ', 'from'],
['WHERE', ' AND ', 'where'],
['GROUP BY', ', ', 'groupby'],
['HAVING', ' AND ', 'having'],
['ORDER BY', ', ', 'orderby'],
]
sqlstr,params='',[]
for keyword, sep, ikey in SQLPARTS:
if not query[ikey]: continue
sqlstr+=' '+keyword+' '+sep.join([s.lex for s in query[ikey]])
for s in query[ikey]: params.extend(s.param)
if bind['lim']: sqlstr += f" LIMIT {int(bind['lim'])}"
if bind['beg']: sqlstr += f" OFFSET {int(bind['beg'])}"
sql.append(SQL(sqlstr[1:], params))
return sql
©2026 HOLTWORK LLC. US Patent 12,475,098. This software is free to use for development, testing, and educational purposes. Commercial deployment, redistribution, or production use requires a separate license. Contact info@memelang.net.