1 //          Copyright Yazan Dabain 2014.
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE_1_0.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 module dwarf.debugline;
7 
8 // this implementation follows the DWARF v3 documentation
9 
10 import std.exception;
11 import std.range;
12 import std.conv : to;
13 import dwarf.meta;
14 import dwarf.elf;
15 import std.stdio;
16 import std.format;
17 
18 align(1) struct LineProgramHeader32L {
19   align(1):
20 	uint unitLength;
21 	ushort dwarfVersion;
22 	uint headerLength;
23 	ubyte minimumInstructionLength;
24   ubyte maximumOperationsPerInstruction; // dwarf 4
25 	bool defaultIsStatement;
26 	byte lineBase;
27 	ubyte lineRange;
28 	ubyte opcodeBase;
29 }
30 
31 align(1) struct LineProgramHeader64L {
32   align(1):
33 	uint unitLength_;
34 	ulong unitLength;
35 	ushort dwarfVersion;
36 	ulong headerLength;
37 	ubyte minimumInstructionLength;
38   ubyte maximumOperationsPerInstruction; // dwarf 4
39 	bool defaultIsStatement;
40 	byte lineBase;
41 	ubyte lineRange;
42 	ubyte opcodeBase;
43 }
44 
45 struct DebugLine {
46 	private Appender!(LineProgram[]) m_lps;
47 
48 	private enum uint DWARF_64BIT_FLAG = 0xffff_ffff;
49 
50 	this(ubyte[] lineProgramContents) {
51 		while (!lineProgramContents.empty) {
52 			LineProgram lp;
53 
54 			// detect dwarf 32bit or 64bit
55 			uint initialLength = * cast(uint*) lineProgramContents.ptr;
56 			if (initialLength == DWARF_64BIT_FLAG) {
57 				LineProgramHeader64L data = * cast(LineProgramHeader64L*) lineProgramContents.ptr;
58 				lp.m_header = new LineProgramHeader64(data);
59 			} else {
60 				LineProgramHeader32L data = * cast(LineProgramHeader32L*) lineProgramContents.ptr;
61 				lp.m_header = new LineProgramHeader32(data);
62 			}
63 
64 			// start reading sections
65 			lp.m_standardOpcodeLengths = new ubyte[lp.m_header.opcodeBase - 1];
66 			foreach (i; 0 .. lp.m_standardOpcodeLengths.length) {
67 				lp.m_standardOpcodeLengths[i] = lineProgramContents[lp.m_header.datasize + i .. lp.m_header.datasize + i + 1][0];
68 			}
69 
70 			lp.m_files = new FileInfo[0];
71 			lp.m_dirs = new string[0];
72 
73 			auto pathData = lineProgramContents[lp.m_header.datasize + lp.m_standardOpcodeLengths.length .. $];
74 
75 			while (pathData[0] != 0) {
76 				lp.m_dirs ~= (cast(char*) pathData.ptr).to!string();
77 				pathData = pathData[lp.m_dirs[$ - 1].length + 1 .. $];
78 			}
79 
80 			pathData.popFront();
81 
82 			while (pathData[0] != 0) {
83 				string file = (cast(char*) pathData.ptr).to!string();
84 				pathData = pathData[file.length + 1 .. $];
85 
86 				auto dirIndex = pathData.readULEB128();
87 				auto lastMod = pathData.readULEB128(); // unused
88 				auto fileLength = pathData.readULEB128(); // unused
89 
90 				lp.m_files ~= FileInfo(file, dirIndex);
91 			}
92 
93 			static if (__VERSION__ < 2065) { // bug workaround for older versions
94 				auto startOffset = lp.m_header.is32bit() ? uint.sizeof * 2 + ushort.sizeof : uint.sizeof + 2 * ulong.sizeof + ushort.sizeof;
95 				auto endOffset   = lp.m_header.is32bit() ? uint.sizeof : uint.sizeof + ulong.sizeof;
96 			} else {
97 				auto startOffset = lp.m_header.bits == 32 ? LineProgramHeader32L.minimumInstructionLength.offsetof : LineProgramHeader64L.minimumInstructionLength.offsetof;
98 				auto endOffset   = lp.m_header.bits == 32 ? LineProgramHeader32L.unitLength.sizeof : LineProgramHeader64L.unitLength.offsetof + LineProgramHeader64L.unitLength.sizeof;
99 			}
100 
101 			auto program = lineProgramContents[startOffset + lp.m_header.headerLength() .. endOffset + lp.m_header.unitLength()];
102 
103 			buildMachine(lp, program);
104 			m_lps.put(lp);
105 
106 			lineProgramContents = lineProgramContents[endOffset + lp.m_header.unitLength() .. $];
107 		}
108 
109 	}
110 
111 	private void buildMachine(ref LineProgram lp, ubyte[] program) {
112 		import std.range;
113 
114 		Machine m;
115 		m.isStatement = lp.m_header.defaultIsStatement();
116 		while (!program.empty) {
117 			ubyte opcode = program.read!ubyte();
118 
119 			if (opcode < lp.m_header.opcodeBase) {
120 
121 				switch (opcode) with (StandardOpcode) {
122 					case extendedOp:
123 						ulong len = program.readULEB128();
124 						ubyte eopcode = program.read!ubyte();
125 
126 						switch (eopcode) with (ExtendedOpcode) {
127 							case endSequence:
128 								m.isEndSequence = true;
129 								// trace("endSequence ", "0x%x".format(m.address));
130 								lp.m_addresses ~= m.toAddressInfo();
131 								m = Machine.init;
132 								m.isStatement = lp.m_header.defaultIsStatement;
133 								break;
134 
135 							case setAddress:
136 								ulong address = program.read!uint(); // on 32bit needs to be uint, on 64bit needs to be ulong
137 								// trace("setAddress ", "0x%x".format(address));
138 								m.address = address;
139 								break;
140 
141 							case defineFile:
142 								auto file = (cast(char*) program.ptr).to!string();
143 								program = program[file.length + 1 .. $];
144 								auto dirIndex = program.readULEB128(); // unused
145 								auto fileMod = program.readULEB128(); // unused
146 								auto fileSize = program.readULEB128(); // unused
147 								trace("defineFile ", dirIndex);
148 								break;
149 
150 							default:
151 								// unknown opcode
152 								// trace("unknown extended opcode ", eopcode);
153 								program = program[len - 1 .. $];
154 								break;
155 
156 						}
157 
158 						break;
159 
160 					case copy:
161 						// trace("copy");
162             lp.m_addresses ~= m.toAddressInfo();
163 						m.isBasicBlock = false;
164 						m.isPrologueEnd = false;
165 						m.isEpilogueBegin = false;
166 						break;
167 
168 					case advancePC:
169 						ulong op = readULEB128(program);
170 						// trace("advancePC ", op * lp.m_header.minimumInstructionLength);
171 						m.address += op * lp.m_header.minimumInstructionLength;
172 						break;
173 
174 					case advanceLine:
175 						long ad = readSLEB128(program);
176 						// trace("advanceLine ", ad);
177 						m.line += ad;
178 						break;
179 
180 					case setFile:
181 						uint index = readULEB128(program).to!uint();
182 						// trace("setFile to ", index);
183 						m.fileIndex = index;
184 						break;
185 
186 					case setColumn:
187 						uint col = readULEB128(program).to!uint();
188 						// trace("setColumn ", col);
189 						m.column = col;
190 						break;
191 
192 					case negateStatement:
193 						// trace("negateStatement");
194 						m.isStatement = !m.isStatement;
195 						break;
196 
197 					case setBasicBlock:
198 						// trace("setBasicBlock");
199 						m.isBasicBlock = true;
200 						break;
201 
202 					case constAddPC:
203             // trace("constAddPC ", (255 - lp.m_header.opcodeBase) / lp.m_header.lineRange * lp.m_header.minimumInstructionLength);
204 						m.address += (255 - lp.m_header.opcodeBase) / lp.m_header.lineRange * lp.m_header.minimumInstructionLength;
205 						break;
206 
207 					case fixedAdvancePC:
208 						uint add = program.read!uint();
209 						// trace("fixedAdvancePC ", add);
210 						m.address += add;
211 						break;
212 
213 					case setPrologueEnd:
214 						m.isPrologueEnd = true;
215 						// trace("setPrologueEnd");
216 						break;
217 
218 					case setEpilogueBegin:
219 						m.isEpilogueBegin = true;
220 						// trace("setEpilogueBegin");
221 						break;
222 
223 					case setISA:
224 						m.isa = readULEB128(program).to!uint();
225 						// trace("setISA ", m.isa);
226 						break;
227 
228 					default:
229 						throw new ELFException("unimplemented/invalid opcode " ~ opcode.to!string);
230 				}
231 
232 			} else {
233 				opcode -= lp.m_header.opcodeBase;
234 				auto ainc = (opcode / lp.m_header.lineRange) * lp.m_header.minimumInstructionLength;
235 				m.address += ainc;
236 				auto linc = lp.m_header.lineBase + (opcode % lp.m_header.lineRange);
237 				m.line += linc;
238 
239 				// trace("special ", ainc, " ", linc);
240         lp.m_addresses.put(m.toAddressInfo());
241 			}
242 		}
243 	}
244 
245 	const(LineProgram)[] programs() const {
246 		return m_lps.data;
247 	}
248 }
249 
250 abstract class LineProgramHeader {
251 	@property:
252 	@ReadFrom("unitLength") ulong unitLength();
253 	@ReadFrom("dwarfVersion") ushort dwarfVersion();
254 	@ReadFrom("headerLength") ulong headerLength();
255 	@ReadFrom("minimumInstructionLength") ubyte minimumInstructionLength();
256 	@ReadFrom("defaultIsStatement") bool defaultIsStatement();
257 	@ReadFrom("lineBase") byte lineBase();
258 	@ReadFrom("lineRange") ubyte lineRange();
259 	@ReadFrom("opcodeBase") ubyte opcodeBase();
260 
261 	size_t datasize();
262 	ubyte bits();
263 }
264 
265 final class LineProgramHeader32 : LineProgramHeader {
266 	private LineProgramHeader32L m_data;
267 	mixin(generateVirtualReads!(LineProgramHeader, "m_data"));
268 
269 	this(LineProgramHeader32L lph) {
270 		this.m_data = lph;
271 	}
272 
273 	@property override size_t datasize() {
274 		return LineProgramHeader32L.sizeof;
275 	}
276 
277 	@property override ubyte bits() {
278 		return 32;
279 	}
280 }
281 
282 final class LineProgramHeader64 : LineProgramHeader {
283 	private LineProgramHeader64L m_data;
284 	mixin(generateVirtualReads!(LineProgramHeader, "m_data"));
285 
286 	this(LineProgramHeader64L lph) {
287 		this.m_data = lph;
288 	}
289 
290 	@property override size_t datasize() {
291 		return LineProgramHeader64L.sizeof;
292 	}
293 
294 	@property override ubyte bits() {
295 		return 64;
296 	}
297 }
298 
299 private T read(T)(ref ubyte[] buffer) {
300 	T result = *(cast(T*) buffer[0 .. T.sizeof].ptr);
301 	buffer.popFrontExactly(T.sizeof);
302 	return result;
303 }
304 
305 private ulong readULEB128(ref ubyte[] buffer) {
306 	import std.array;
307 	ulong val = 0;
308 	ubyte b;
309 	uint shift = 0;
310 
311 	while (true) {
312 		b = buffer.read!ubyte();
313 
314 		val |= (b & 0x7f) << shift;
315 		if ((b & 0x80) == 0) break;
316 		shift += 7;
317 	}
318 
319 	return val;
320 }
321 
322 unittest {
323 	ubyte[] data = [0xe5, 0x8e, 0x26, 0xDE, 0xAD, 0xBE, 0xEF];
324 	assert(readULEB128(data) == 624_485);
325 	assert(data[] == [0xDE, 0xAD, 0xBE, 0xEF]);
326 }
327 
328 private long readSLEB128(ref ubyte[] buffer) {
329 	import std.array;
330 	long val = 0;
331 	uint shift = 0;
332 	ubyte b;
333 	int size = 8 << 3;
334 
335 	while (true) {
336 		b = buffer.read!ubyte();
337 		val |= (b & 0x7f) << shift;
338 		shift += 7;
339 		if ((b & 0x80) == 0)
340 			break;
341 	}
342 
343 	if (shift < size && (b & 0x40) != 0) val |= -(1 << shift);
344 	return val;
345 }
346 
347 private enum StandardOpcode : ubyte {
348 	extendedOp = 0,
349 	copy = 1,
350 	advancePC = 2,
351 	advanceLine = 3,
352 	setFile = 4,
353 	setColumn = 5,
354 	negateStatement = 6,
355 	setBasicBlock = 7,
356 	constAddPC = 8,
357 	fixedAdvancePC = 9,
358 	setPrologueEnd = 10,
359 	setEpilogueBegin = 11,
360 	setISA = 12,
361 }
362 
363 private enum ExtendedOpcode : ubyte {
364 	endSequence = 1,
365 	setAddress = 2,
366 	defineFile = 3,
367     setDescriminator = 4,
368     lo_user = 0x80,
369     hi_user = 0xFF
370 }
371 
372 private struct Machine {
373 	ulong address = 0;
374 	uint operationIndex = 0;
375 	uint fileIndex = 1;
376 	uint line = 1;
377 	uint column = 0;
378 	bool isStatement;
379 	bool isBasicBlock = false;
380 	bool isEndSequence = false;
381 	bool isPrologueEnd = false;
382 	bool isEpilogueBegin = false;
383 	uint isa = 0;
384 	uint discriminator = 0;
385 }
386 
387 auto toAddressInfo(ref Machine m) {
388   return AddressInfo(m.line, m.column, m.fileIndex, m.address, m.operationIndex, m.isStatement, m.isBasicBlock, m.isEndSequence, m.isPrologueEnd, m.isEpilogueBegin);
389 }
390 
391 struct LineProgram {
392 	private {
393 		LineProgramHeader m_header;
394 
395 		ubyte[] m_standardOpcodeLengths;
396 		FileInfo[] m_files;
397 		string[] m_dirs;
398 
399 		Appender!(AddressInfo[]) m_addresses;
400 	}
401 
402 	const(AddressInfo)[] addressInfo() const {
403 		return m_addresses.data;
404 	}
405 
406   size_t dirIndex(ulong fileIndex) const {
407     return m_files[fileIndex - 1].dirIndex;
408   }
409 
410 	string fileFromIndex(ulong fileIndex) const {
411 		import std.path : buildPath;
412 
413 		FileInfo f = m_files[fileIndex - 1];
414 		if (f.dirIndex == 0) return f.file;
415 		else return buildPath(m_dirs[f.dirIndex - 1], f.file);
416 	}
417 
418 	string[] allFiles() const {
419 		import std.path : buildPath;
420 
421 		string[] result;
422 		foreach (file; m_files)
423 		{
424 			if (file.dirIndex == 0) result ~= file.file;
425 			else result ~= buildPath(m_dirs[file.dirIndex - 1], file.file);
426 		}
427 
428 		return result;
429 	}
430 }
431 
432 struct FileInfo {
433 	string file;
434 	size_t dirIndex;
435 }
436 
437 struct AddressInfo {
438 	uint line;
439   uint column;
440 	uint fileIndex;
441 	ulong address;
442 	uint operationIndex;
443 	bool isStatement;
444 	bool isBasicBlock;
445 	bool isEndSequence;
446 	bool isPrologueEnd;
447 	bool isEpilogueBegin;
448 }