Skip to content

Programming by Design

If you're not prepared to be wrong, you'll never come up with anything original. – Sir Ken Robinson

  • About
  • Java-PbD
  • C-PbD
  • ASM-PbD
  • Algorithms
  • Other

Data Transformation with Regex

Posted on February 1, 2024February 2, 2024 By William Jojo
Uncategorized

Overview

Sometimes, we have data in one form that needs to be put into another as code or storage in a database. Many editors, like Sublime Text, have Find/Replace tools for complex data transformations. IntelliJ has a similar facility within its editor (Edit/Find/Replace). This brief tutorial explains some regexes (regular expressions) used to achieve quick


80 	128 	END
81 	129 	FOR
82 	130 	NEXT
83 	131 	DATA
84 	132 	INPUT#
85 	133 	INPUT
86 	134 	DIM
87 	135 	READ
88 	136 	LET
89 	137 	GOTO
8A 	138 	RUN
8B 	139 	IF
8C 	140 	RESTORE
8D 	141 	GOSUB
8E 	142 	RETURN
8F 	143 	REM
90 	144 	STOP
91 	145 	ON
92 	146 	WAIT
93 	147 	LOAD
94 	148 	SAVE
95 	149 	VERIFY
96 	150 	DEF
97 	151 	POKE
98 	152 	PRINT#
99 	153 	PRINT
9A 	154 	CONT
9B 	155 	LIST
9C 	156 	CLR
9D 	157 	CMD
9E 	158 	SYS
9F 	159 	OPEN
A0 	160 	CLOSE
A1 	161 	GET
A2 	162 	NEW
A3 	163 	TAB(
A4 	164 	TO
A5 	165 	FN
A6 	166 	SPC(
A7 	167 	THEN
A8 	168 	NOT
A9 	169 	STEP
AA 	170 	+
AB 	171 	-
AC 	172 	*
AD 	173 	/
AE 	174 	^
AF 	175 	AND
B0 	176 	OR
B1 	177 	>
B2 	178 	=
B3 	179 	<
B4 	180 	SGN
B5 	181 	INT
B6 	182 	ABS
B7 	183 	USR
B8 	184 	FRE
B9 	185 	POS
BA 	186 	SQR
BB 	187 	RND
BC 	188 	LOG
BD 	189 	EXP
BE 	190 	COS
BF 	191 	SIN
C0 	192 	TAN
C1 	193 	ATN
C2 	194 	PEEK
C3 	195 	LEN
C4 	196 	STR$
C5 	197 	VAL
C6 	198 	ASC
C7 	199 	CHR$
C8 	200 	LEFT$
C9 	201 	RIGHT$
CA 	202 	MID$
CB 	203 	GO

Using

^([0-9A-Z]+)\s+([0-9]+)\s+(\S+)

for find and

{0x$1,"$3"},

for replace we can get to

    static Object[][] Tokens = {
        {0x80,"END"},
        {0x81,"FOR"},
        {0x82,"NEXT"},
        {0x83,"DATA"},
        {0x84,"INPUT#"},
        {0x85,"INPUT"},
        {0x86,"DIM"},
        {0x87,"READ"},
        {0x88,"LET"},
        {0x89,"GOTO"},
        {0x8a,"RUN"},
        {0x8b,"IF"},
        {0x8c,"RESTORE"},
        {0x8d,"GOSUB"},
        {0x8e,"RETURN"},
        {0x8f,"REM"},
        {0x90,"STOP"},
        {0x91,"ON"},
        {0x92,"WAIT"},
        {0x93,"LOAD"},
        {0x94,"SAVE"},
        {0x95,"VERIFY"},
        {0x96,"DEF"},
        {0x97,"POKE"},
        {0x98,"PRINT#"},
        {0x99,"PRINT"},
        {0x9a,"CONT"},
        {0x9b,"LIST"},
        {0x9c,"CLR"},
        {0x9d,"CMD"},
        {0x9e,"SYS"},
        {0x9f,"OPEN"},
        {0xa0,"CLOSE"},
        {0xa1,"GET"},
        {0xa2,"NEW"},
        {0xa3,"TAB("},
        {0xa4,"TO"},
        {0xa5,"FN"},
        {0xa6,"SPC("},
        {0xa7,"THEN"},
        {0xa8,"NOT"},
        {0xa9,"STEP"},
        {0xaa,"+"},
        {0xab,"-"},
        {0xac,"*"},
        {0xad,"/"},
        {0xae,"↑"},
        {0xaf,"AND"},
        {0xb0,"OR"},
        {0xb1,">"},
        {0xb2,"="},
        {0xb3,"<"},
        {0xb4,"SGN"},
        {0xb5,"INT"},
        {0xb6,"ABS"},
        {0xb7,"USR"},
        {0xb7,"FRE"},
        {0xb9,"POS"},
        {0xba,"SQR"},
        {0xbb,"RND"},
        {0xbc,"LOG"},
        {0xbd,"EXP"},
        {0xbe,"COS"},
        {0xbf,"SIN"},
        {0xc0,"TAN"},
        {0xc1,"ATN"},
        {0xc2,"PEEK"},
        {0xc3,"LEN"},
        {0xc4,"STR$"},
        {0xc5,"VAL"},
        {0xc6,"ASC"},
        {0xc7,"CHR$"},
        {0xc8,"LEFT$"},
        {0xc9,"RIGHT$"},
        {0xca,"MID$"},
        {0xcb,"GO"},

Using

^([0-9A-Z]+)\s+([0-9]+)\s+(\S+)

for find and

keywords.put("$3", (byte)0x$1);

for replace we can get to

keywords.put("END",(byte)0x80);
keywords.put("FOR",(byte)0x81);
keywords.put("NEXT",(byte)0x82);
keywords.put("DATA",(byte)0x83);
keywords.put("INPUT#",(byte)0x84);
keywords.put("INPUT",(byte)0x85);
keywords.put("DIM",(byte)0x86);
keywords.put("READ",(byte)0x87);
keywords.put("LET",(byte)0x88);
keywords.put("GOTO",(byte)0x89);
keywords.put("RUN",(byte)0x8a);
keywords.put("IF",(byte)0x8b);
keywords.put("RESTORE",(byte)0x8c);
keywords.put("GOSUB",(byte)0x8d);
keywords.put("RETURN",(byte)0x8e);
keywords.put("REM",(byte)0x8f);
keywords.put("STOP",(byte)0x90);
keywords.put("ON",(byte)0x91);
keywords.put("WAIT",(byte)0x92);
keywords.put("LOAD",(byte)0x93);
keywords.put("SAVE",(byte)0x94);
keywords.put("VERIFY",(byte)0x95);
keywords.put("DEF",(byte)0x96);
keywords.put("POKE",(byte)0x97);
keywords.put("PRINT#",(byte)0x98);
keywords.put("PRINT",(byte)0x99);
keywords.put("CONT",(byte)0x9a);
keywords.put("LIST",(byte)0x9b);
keywords.put("CLR",(byte)0x9c);
keywords.put("CMD",(byte)0x9d);
keywords.put("SYS",(byte)0x9e);
keywords.put("OPEN",(byte)0x9f);
keywords.put("CLOSE",(byte)0xa0);
keywords.put("GET",(byte)0xa1);
keywords.put("NEW",(byte)0xa2);
keywords.put("TAB(",(byte)0xa3);
keywords.put("TO",(byte)0xa4);
keywords.put("FN",(byte)0xa5);
keywords.put("SPC(",(byte)0xa6);
keywords.put("THEN",(byte)0xa7);
keywords.put("NOT",(byte)0xa8);
keywords.put("STEP",(byte)0xa9);
keywords.put("+",(byte)0xaa);
keywords.put("-",(byte)0xab);
keywords.put("*",(byte)0xac);
keywords.put("/",(byte)0xad);
keywords.put("↑",(byte)0xae);
keywords.put("AND",(byte)0xaf);
keywords.put("OR",(byte)0xb0);
keywords.put(">",(byte)0xb1);
keywords.put("=",(byte)0xb2);
keywords.put("<",(byte)0xb3);
keywords.put("SGN",(byte)0xb4);
keywords.put("INT",(byte)0xb5);
keywords.put("ABS",(byte)0xb6);
keywords.put("USR",(byte)0xb7);
keywords.put("FRE",(byte)0xb7);
keywords.put("POS",(byte)0xb9);
keywords.put("SQR",(byte)0xba);
keywords.put("RND",(byte)0xbb);
keywords.put("LOG",(byte)0xbc);
keywords.put("EXP",(byte)0xbd);
keywords.put("COS",(byte)0xbe);
keywords.put("SIN",(byte)0xbf);
keywords.put("TAN",(byte)0xc0);
keywords.put("ATN",(byte)0xc1);
keywords.put("PEEK",(byte)0xc2);
keywords.put("LEN",(byte)0xc3);
keywords.put("STR$",(byte)0xc4);
keywords.put("VAL",(byte)0xc5);
keywords.put("ASC",(byte)0xc6);
keywords.put("CHR$",(byte)0xc7);
keywords.put("LEFT$",(byte)0xc8);
keywords.put("RIGHT$",(byte)0xc9);
keywords.put("MID$",(byte)0xca);
keywords.put("GO",(byte)0xcb);

Post navigation

❮ Previous Post: Machine Code – Assembly Language
Next Post: Converting Strings ❯

Creative Commons License
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.

Copyright © 2018 – 2025 Programming by Design.