by Gu Wei
2021年10月
Architecture Lab 一共有三个部分。第一个部分让你手写Y86-64汇编代码,并利用提供的assembler模拟器将.ys汇编成目标文件.yo。第二部分让你给顺序执行的处理器增加iaddq指令。第三部分是建立在前两者之上,我们既要手写Y86-64汇编来从算法上提高程序性能,还要修改处理器来使它执行这个程序更加快。
在开始之前,建议直接把Makefile中的GUIMODE、TKLIBS、TKINC三个全部注释掉,不使用GUI模式只用TTY模式。本人一开始也想要用GUI,于是下载了tcl、tk等工具,但是在编译的时候,发现tcl8.6会报错:error: ‘Tcl_Interp’ has no member named ‘result’
。查了一下是tcl8.6相比tcl8.5把result给去掉了,使得编译不成功。准备下载tcl8.5,可是ubuntu20.04的包管理器似乎已经不提供tcl8.5了,要么去网上下载tcl8.5的deb文件再安装。但是嫌麻烦就不做了。
在seq/misc文件夹下,将三个C程序翻译成Y86-64汇编程序(*.ys)。利用提供的yas程序将.ys变成.yo目标文件(.yo这里是ASCII格式的,虽然严格意义上应该是二进制文件,但是这样才有可读性),再通过yis程序来模拟机器代码的运行,会输出内存和寄存器的变化。三个C程序分别是链表求和、递归链表求和以及拷贝内存地址,其中handout要求的sample list以及copy block分别为:
x1# Sample linked list
2.align 8
3ele1:
4 .quad 0x00a
5 .quad ele2
6ele2:
7 .quad 0x0b0
8 .quad ele3
9ele3:
10 .quad 0xc00
11 .quad 0
12
13.align 8
14# Source block
15src:
16 .quad 0x00a
17 .quad 0x0b0
18 .quad 0xc00
19# Destination block
20dest:
21 .quad 0x111
22 .quad 0x222
23 .quad 0x333
链表求和。C程序如下:
xxxxxxxxxx
161/* linked list element */
2typedef struct ELE {
3 long val;
4 struct ELE *next;
5} *list_ptr;
6
7/* sum_list - Sum the elements of a linked list */
8long sum_list(list_ptr ls)
9{
10 long val = 0;
11 while (ls) {
12 val += ls->val;
13 ls = ls->next;
14 }
15 return val;
16}
本人手写的汇编如下:
xxxxxxxxxx
421# sum.ys
2# Y86-64 for sum_list
3 .pos 0
4 irmovq stack, %rsp
5 call main
6 halt
7
8
9 .align 8
10 ele1:
11 .quad 0x00a
12 .quad ele2
13 ele2:
14 .quad 0x0b0
15 .quad ele3
16 ele3:
17 .quad 0xc00
18 .quad 0
19
20
21main:
22 irmovq ele1,%rdi
23 call sum_list
24 ret
25
26
27sum_list:
28 irmovq $0x0,%rax
29 jmp test
30loop:
31 mrmovq 0x0(%rdi),%r8
32 addq %r8,%rax
33 mrmovq 0x8(%rdi),%rdi
34test:
35 andq %rdi,%rdi
36 jne loop
37 ret
38
39
40 .pos 0x200
41stack:
42
利用yas生成.yo程序:
xxxxxxxxxx
11❯ ./yas sum.ys
sum.yo如下:
xxxxxxxxxx
411 | # Y86-64 for sum_list
20x000: | .pos 0
30x000: 30f40002000000000000 | irmovq stack, %rsp
40x00a: 804800000000000000 | call main
50x013: 00 | halt
6 |
7 |
80x018: | .align 8
90x018: | ele1:
100x018: 0a00000000000000 | .quad 0x00a
110x020: 2800000000000000 | .quad ele2
120x028: | ele2:
130x028: b000000000000000 | .quad 0x0b0
140x030: 3800000000000000 | .quad ele3
150x038: | ele3:
160x038: 000c000000000000 | .quad 0xc00
170x040: 0000000000000000 | .quad 0
18 |
19 |
200x048: | main:
210x048: 30f71800000000000000 | irmovq ele1,%rdi
220x052: 805c00000000000000 | call sum_list
230x05b: 90 | ret
24 |
25 |
260x05c: | sum_list:
270x05c: 30f00000000000000000 | irmovq $0x0,%rax
280x066: 708500000000000000 | jmp test
290x06f: | loop:
300x06f: 50870000000000000000 | mrmovq 0x0(%rdi),%r8
310x079: 6080 | addq %r8,%rax
320x07b: 50770800000000000000 | mrmovq 0x8(%rdi),%rdi
330x085: | test:
340x085: 6277 | andq %rdi,%rdi
350x087: 746f00000000000000 | jne loop
360x090: 90 | ret
37 |
38 |
390x200: | .pos 0x200
400x200: | stack:
41
利用yis查看结果:
xxxxxxxxxx
101❯ ./yis sum.yo
2Stopped in 26 steps at PC = 0x13. Status 'HLT', CC Z=1 S=0 O=0
3Changes to registers:
4%rax: 0x0000000000000000 0x0000000000000cba
5%rsp: 0x0000000000000000 0x0000000000000200
6%r8: 0x0000000000000000 0x0000000000000c00
7
8Changes to memory:
90x01f0: 0x0000000000000000 0x000000000000005b
100x01f8: 0x0000000000000000 0x0000000000000013
可以看出返回值%rax为0xcba,没毛病!
递归版链表求和。c程序如下:
xxxxxxxxxx
171/* linked list element */
2typedef struct ELE {
3 long val;
4 struct ELE *next;
5} *list_ptr;
6
7/* rsum_list - Recursive version of sum_list */
8long rsum_list(list_ptr ls)
9{
10 if (!ls)
11 return 0;
12 else {
13 long val = ls->val;
14 long rest = rsum_list(ls->next);
15 return val + rest;
16 }
17}
本人手写的汇编如下:
xxxxxxxxxx
431# rsum.ys
2# Y86-64 for rsum_list
3 .pos 0
4 irmovq stack, %rsp
5 call main
6 halt
7
8
9 .align 8
10 ele1:
11 .quad 0x00a
12 .quad ele2
13 ele2:
14 .quad 0x0b0
15 .quad ele3
16 ele3:
17 .quad 0xc00
18 .quad 0
19
20
21main:
22 irmovq ele1, %rdi
23 call rsum_list
24 ret
25
26
27rsum_list:
28 pushq %r12
29 irmovq $0x0, %rax
30 andq %rdi, %rdi
31 je end
32 mrmovq 0x0(%rdi), %r12
33 mrmovq 0x8(%rdi), %rdi
34 call rsum_list
35 addq %r12, %rax
36end:
37 popq %r12
38 ret
39
40
41 .pos 0x200
42stack:
43
测试有:
xxxxxxxxxx
161❯ ./yas rsum.ys
2❯ ./yis rsum.yo
3Stopped in 42 steps at PC = 0x13. Status 'HLT', CC Z=0 S=0 O=0
4Changes to registers:
5%rax: 0x0000000000000000 0x0000000000000cba
6%rsp: 0x0000000000000000 0x0000000000000200
7
8Changes to memory:
90x01b8: 0x0000000000000000 0x0000000000000c00
100x01c0: 0x0000000000000000 0x0000000000000090
110x01c8: 0x0000000000000000 0x00000000000000b0
120x01d0: 0x0000000000000000 0x0000000000000090
130x01d8: 0x0000000000000000 0x000000000000000a
140x01e0: 0x0000000000000000 0x0000000000000090
150x01f0: 0x0000000000000000 0x000000000000005b
160x01f8: 0x0000000000000000 0x0000000000000013
可以看出返回值%rax为0xcba,没毛病!
把src地址上的内存拷贝到dest地址上,返回拷贝值的异或。c代码如下:
xxxxxxxxxx
121/* copy_block - Copy src to dest and return xor checksum of src */
2long copy_block(long *src, long *dest, long len)
3{
4 long result = 0;
5 while (len > 0) {
6 long val = *src++;
7 *dest++ = val;
8 result ^= val;
9 len--;
10 }
11 return result;
12}
本人写的汇编如下:
xxxxxxxxxx
561# copy.ys
2# Y86-64 for copy_block
3 .pos 0
4 irmovq stack, %rsp
5 call main
6 halt
7
8
9 .align 8
10# Source block
11src:
12 .quad 0x00a
13 .quad 0x0b0
14 .quad 0xc00
15# Destination block
16dest:
17 .quad 0x111
18 .quad 0x222
19 .quad 0x333
20
21
22main:
23 irmovq src, %rdi
24 irmovq dest, %rsi
25 irmovq $0x3, %rdx
26 call copy_block
27 ret
28
29
30copy_block:
31 pushq %r12
32 pushq %r13
33 pushq %r14
34 irmovq $0x1, %r12
35 irmovq $0x8, %r13
36 irmovq $0x0, %rax
37 jmp test
38loop:
39 mrmovq 0x0(%rdi), %r14
40 addq %r13, %rdi
41 rmmovq %r14, 0x0(%rsi)
42 addq %r13, %rsi
43 xorq %r14, %rax
44 subq %r12, %rdx
45test:
46 andq %rdx, %rdx
47 jg loop
48 popq %r14
49 popq %r13
50 popq %r12
51 ret
52
53
54 .pos 0x100
55stack:
56
测试有:
xxxxxxxxxx
151❯ ./yas copy.ys
2❯ ./yis copy.yo
3Stopped in 45 steps at PC = 0x13. Status 'HLT', CC Z=1 S=0 O=0
4Changes to registers:
5%rax: 0x0000000000000000 0x0000000000000cba
6%rsp: 0x0000000000000000 0x0000000000000100
7%rsi: 0x0000000000000000 0x0000000000000048
8%rdi: 0x0000000000000000 0x0000000000000030
9
10Changes to memory:
110x0030: 0x0000000000000111 0x000000000000000a
120x0038: 0x0000000000000222 0x00000000000000b0
130x0040: 0x0000000000000333 0x0000000000000c00
140x00f0: 0x0000000000000000 0x000000000000006f
150x00f8: 0x0000000000000000 0x0000000000000013
可见返回值%rax没有问题,0x111、0x222、0x333也改成了0xa、0xb、0xc。没毛病!
在sim/seq文件夹中,修改seq-full.hcl文件,实现iaddq指令
在书中Practice Problem 4.3找到iaddq指令,可见它是把一个八字节的常数V加到rB之中。
参考Figure 4.18的OPq和irmovq可以依葫芦画瓢得到iaddq在六个阶段的操作:
Stage | iaddq V, rB |
---|---|
Fetch | icode:ifun <--- M1[PC] |
rA:rB <--- M1[PC+1] | |
valC <--- M8[PC+2] | |
valP <--- PC+10 | |
Decode | valB <--- R[rB] |
Execute | valE <--- valB + valC |
Memory | |
Write back | R[rB] <--- valE |
PC update | PC <--- valP |
参考4.3.4节修改seq.hcl,增加iaddq有(只列出修改部分):
xxxxxxxxxx
541################ Fetch Stage ###################################
2
3bool instr_valid = icode in
4{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
5IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ }; # 加入IIADDQ
6
7# Does fetched instruction require a regid byte?
8bool need_regids =
9icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
10IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ }; # 加入IIADDQ
11
12# Does fetched instruction require a constant word?
13bool need_valC =
14icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ }; # 加入IIADDQ
15
16################ Decode Stage ###################################
17
18## What register should be used as the B source?
19word srcB = [
20icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ } : rB; # 加入IIADDQ
21icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
221 : RNONE; # Don't need register
23];
24
25## What register should be used as the E destination?
26word dstE = [
27icode in { IRRMOVQ } && Cnd : rB;
28icode in { IIRMOVQ, IOPQ, IIADDQ} : rB; # 加入IIADDQ
29icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
301 : RNONE; # Don't write any register
31];
32
33
34################ Execute Stage ###################################
35
36## Select input A to ALU
37word aluA = [
38icode in { IRRMOVQ, IOPQ } : valA;
39icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : valC; # 加入IIADDQ
40icode in { ICALL, IPUSHQ } : -8;
41icode in { IRET, IPOPQ } : 8;
42# Other instructions don't need ALU
43];
44
45## Select input B to ALU
46word aluB = [
47icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
48IPUSHQ, IRET, IPOPQ, IIADDQ } : valB; # 加入IIADDQ
49icode in { IRRMOVQ, IIRMOVQ } : 0;
50# Other instructions don't need ALU
51];
52
53## Should the condition codes be updated?
54bool set_cc = icode in { IOPQ, IIADDQ }; # 加入IIADDQ
make一下
xxxxxxxxxx
51❯ make VERSION=full # 懒的话直接修改makefile把version从std改成full就好了
2# Building the seq-full.hcl version of SEQ
3../misc/hcl2c -n seq-full.hcl <seq-full.hcl >seq-full.c
4gcc -Wall -O2 -I../misc -o ssim \
5 seq-full.c ssim.c ../misc/isa.c -lm
测试一个简单的Y86-64程序
xxxxxxxxxx
491❯ ./ssim -t ../y86-code/asumi.yo
2Y86-64 Processor: seq-full.hcl
3137 bytes of code read
4IF: Fetched irmovq at 0x0. ra=----, rb=%rsp, valC = 0x100
5IF: Fetched call at 0xa. ra=----, rb=----, valC = 0x38
6Wrote 0x13 to address 0xf8
7IF: Fetched irmovq at 0x38. ra=----, rb=%rdi, valC = 0x18
8IF: Fetched irmovq at 0x42. ra=----, rb=%rsi, valC = 0x4
9IF: Fetched call at 0x4c. ra=----, rb=----, valC = 0x56
10Wrote 0x55 to address 0xf0
11IF: Fetched xorq at 0x56. ra=%rax, rb=%rax, valC = 0x0
12IF: Fetched andq at 0x58. ra=%rsi, rb=%rsi, valC = 0x0
13IF: Fetched jmp at 0x5a. ra=----, rb=----, valC = 0x83
14IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
15IF: Fetched mrmovq at 0x63. ra=%r10, rb=%rdi, valC = 0x0
16IF: Fetched addq at 0x6d. ra=%r10, rb=%rax, valC = 0x0
17IF: Fetched iaddq at 0x6f. ra=----, rb=%rdi, valC = 0x8
18IF: Fetched iaddq at 0x79. ra=----, rb=%rsi, valC = 0xffffffffffffffff
19IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
20IF: Fetched mrmovq at 0x63. ra=%r10, rb=%rdi, valC = 0x0
21IF: Fetched addq at 0x6d. ra=%r10, rb=%rax, valC = 0x0
22IF: Fetched iaddq at 0x6f. ra=----, rb=%rdi, valC = 0x8
23IF: Fetched iaddq at 0x79. ra=----, rb=%rsi, valC = 0xffffffffffffffff
24IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
25IF: Fetched mrmovq at 0x63. ra=%r10, rb=%rdi, valC = 0x0
26IF: Fetched addq at 0x6d. ra=%r10, rb=%rax, valC = 0x0
27IF: Fetched iaddq at 0x6f. ra=----, rb=%rdi, valC = 0x8
28IF: Fetched iaddq at 0x79. ra=----, rb=%rsi, valC = 0xffffffffffffffff
29IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
30IF: Fetched mrmovq at 0x63. ra=%r10, rb=%rdi, valC = 0x0
31IF: Fetched addq at 0x6d. ra=%r10, rb=%rax, valC = 0x0
32IF: Fetched iaddq at 0x6f. ra=----, rb=%rdi, valC = 0x8
33IF: Fetched iaddq at 0x79. ra=----, rb=%rsi, valC = 0xffffffffffffffff
34IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
35IF: Fetched ret at 0x8c. ra=----, rb=----, valC = 0x0
36IF: Fetched ret at 0x55. ra=----, rb=----, valC = 0x0
37IF: Fetched halt at 0x13. ra=----, rb=----, valC = 0x0
3832 instructions executed
39Status = HLT
40Condition Codes: Z=1 S=0 O=0
41Changed Register State:
42%rax: 0x0000000000000000 0x0000abcdabcdabcd
43%rsp: 0x0000000000000000 0x0000000000000100
44%rdi: 0x0000000000000000 0x0000000000000038
45%r10: 0x0000000000000000 0x0000a000a000a000
46Changed Memory State:
470x00f0: 0x0000000000000000 0x0000000000000055
480x00f8: 0x0000000000000000 0x0000000000000013
49ISA Check Succeeds
测试除了加法运算的操作
xxxxxxxxxx
351❯ (cd ../y86-code; make testssim)
2../seq/ssim -t asum.yo > asum.seq
3../seq/ssim -t asumr.yo > asumr.seq
4../seq/ssim -t cjr.yo > cjr.seq
5../seq/ssim -t j-cc.yo > j-cc.seq
6../seq/ssim -t poptest.yo > poptest.seq
7../seq/ssim -t pushquestion.yo > pushquestion.seq
8../seq/ssim -t pushtest.yo > pushtest.seq
9../seq/ssim -t prog1.yo > prog1.seq
10../seq/ssim -t prog2.yo > prog2.seq
11../seq/ssim -t prog3.yo > prog3.seq
12../seq/ssim -t prog4.yo > prog4.seq
13../seq/ssim -t prog5.yo > prog5.seq
14../seq/ssim -t prog6.yo > prog6.seq
15../seq/ssim -t prog7.yo > prog7.seq
16../seq/ssim -t prog8.yo > prog8.seq
17../seq/ssim -t ret-hazard.yo > ret-hazard.seq
18grep "ISA Check" *.seq
19asum.seq:ISA Check Succeeds
20asumr.seq:ISA Check Succeeds
21cjr.seq:ISA Check Succeeds
22j-cc.seq:ISA Check Succeeds
23poptest.seq:ISA Check Succeeds
24prog1.seq:ISA Check Succeeds
25prog2.seq:ISA Check Succeeds
26prog3.seq:ISA Check Succeeds
27prog4.seq:ISA Check Succeeds
28prog5.seq:ISA Check Succeeds
29prog6.seq:ISA Check Succeeds
30prog7.seq:ISA Check Succeeds
31prog8.seq:ISA Check Succeeds
32pushquestion.seq:ISA Check Succeeds
33pushtest.seq:ISA Check Succeeds
34ret-hazard.seq:ISA Check Succeeds
35rm asum.seq asumr.seq cjr.seq j-cc.seq poptest.seq pushquestion.seq pushtest.seq prog1.seq prog2.seq prog3.seq prog4.seq prog5.seq prog6.seq prog7.seq prog8.seq ret-hazard.seq
测试除了iaddq以外的所有指令
xxxxxxxxxx
131❯ (cd ../ptest; make SIM=../seq/ssim)
2./optest.pl -s ../seq/ssim
3Simulating with ../seq/ssim
4 All 49 ISA Checks Succeed
5./jtest.pl -s ../seq/ssim
6Simulating with ../seq/ssim
7 All 64 ISA Checks Succeed
8./ctest.pl -s ../seq/ssim
9Simulating with ../seq/ssim
10 All 22 ISA Checks Succeed
11./htest.pl -s ../seq/ssim
12Simulating with ../seq/ssim
13 All 600 ISA Checks Succeed
测试所有指令
xxxxxxxxxx
131❯ (cd ../ptest; make SIM=../seq/ssim TFLAGS=-i)
2./optest.pl -s ../seq/ssim -i
3Simulating with ../seq/ssim
4 All 58 ISA Checks Succeed
5./jtest.pl -s ../seq/ssim -i
6Simulating with ../seq/ssim
7 All 96 ISA Checks Succeed
8./ctest.pl -s ../seq/ssim -i
9Simulating with ../seq/ssim
10 All 22 ISA Checks Succeed
11./htest.pl -s ../seq/ssim -i
12Simulating with ../seq/ssim
13 All 756 ISA Checks Succeed
全部succeed就好了!
在sim/pipe文件夹下,修改ncopy.ys和pipe-full.hcl两个文件,使得ncopy.ys跑得越快越好。handout里面写测试写的复杂极了,有点事无巨细的感觉。我就只用到以下几个:
IOPQ
;(cd ../ptest; make SIM=../pipe/psim TFLAGS=-i)
去掉TFLAGS就不测试iaddq了./correctness.pl
(用YIS测试);./correctness.pl -p
(用simulator测试)./benchmark.pl
就好了./check-len.pl < ncopy.yo
ncopy的c源码如下:
xxxxxxxxxx
181/*
2 * ncopy - copy src to dst, returning number of positive ints
3 * contained in src array.
4 */
5word_t ncopy(word_t *src, word_t *dst, word_t len)
6{
7 word_t count = 0;
8 word_t val;
9
10 while (len > 0) {
11 val = *src++;
12 *dst++ = val;
13 if (val > 0)
14 count++;
15 len--;
16 }
17 return count;
18}
这里从略,和之前两个Part及其类似,CPE从15.18变为12.70
观察ncopy.c可以看出程序具有数据相关。根据第五章的知识,我们可以采用循环展开突破延迟界限达到吞吐量界限——一种完全流水线下且利用所有功能单元的终极性能。抄了网上的代码如下:
xxxxxxxxxx
761# You can modify this portion
2 # Loop header
3 xorq %rax,%rax # count = 0;
4
5 iaddq $-5, %rdx
6 jg Loop6x6
7 iaddq $5, %rdx
8 jg Loop1
9 ret
10
11Loop1:
12 mrmovq (%rdi), %r8
13 rrmovq %rax, %r14
14 iaddq $1, %r14
15 andq %r8, %r8
16 cmovg %r14, %rax
17 rmmovq %r8, (%rsi)
18
19 iaddq $8, %rdi # src++
20 iaddq $8, %rsi # dst++
21 iaddq $-1, %rdx # len--
22
23 jg Loop1
24 ret
25
26Loop6x6:
27 mrmovq (%rdi), %r8
28 rrmovq %rax, %r14
29 iaddq $1, %r14
30 andq %r8, %r8
31 cmovg %r14, %rax
32 rmmovq %r8, (%rsi)
33
34 mrmovq 8(%rdi), %r8
35 rrmovq %rax, %r14
36 iaddq $1, %r14
37 andq %r8, %r8
38 cmovg %r14, %rax
39 rmmovq %r8, 8(%rsi)
40
41 mrmovq 16(%rdi), %r8
42 rrmovq %rax, %r14
43 iaddq $1, %r14
44 andq %r8, %r8
45 cmovg %r14, %rax
46 rmmovq %r8, 16(%rsi)
47
48 mrmovq 24(%rdi), %r8
49 rrmovq %rax, %r14
50 iaddq $1, %r14
51 andq %r8, %r8
52 cmovg %r14, %rax
53 rmmovq %r8, 24(%rsi)
54
55 mrmovq 32(%rdi), %r8
56 rrmovq %rax, %r14
57 iaddq $1, %r14
58 andq %r8, %r8
59 cmovg %r14, %rax
60 rmmovq %r8, 32(%rsi)
61
62 mrmovq 40(%rdi), %r8
63 rrmovq %rax, %r14
64 iaddq $1, %r14
65 andq %r8, %r8
66 cmovg %r14, %rax
67 rmmovq %r8, 40(%rsi)
68
69
70 iaddq $48, %rdi # src++
71 iaddq $48, %rsi # dst++
72 iaddq $-6, %rdx # len--
73
74 jg Loop6x6
75 iaddq $5, %rdx
76 jg Loop1
跑个分有CPE为8.63,得分37.4/60.0
从某种角度上来说,这个lab就这样差不多结束了。
本人于2021/10/24完成了Architecture Lab,耗时三天。难以评价这个lab,个人觉得自己水平还没有到去真正吸收这个lab的地步吧。回头想想当初读第四章的时候觉得,这章许多内容有点冗杂,同一个知识点讲述不集中。现在觉得作者还是有自己的初衷的,作者设计了Y86-64汇编语言和它的汇编工具,汇编工具又是由作者设计用来描述硬件的HCL语言所生成。所以第四章需要花费笔墨和读者讲述自己的Y86-64以及HCL,而对处理器的讨论却给冲淡了。有一种作者很用心,但是读者却难以收获什么的感觉。。。
现在主要还有三个lab没有做——cache lab、shell lab、malloc lab。performance lab被cmu替换成cache lab了,那我也不做;poxy lab用到的是第三部分的知识,主要就是套接字编程,当初也没看的很懂第三部分,网上虽然说这个lab体量很小,但是限于个人时间精力还是不做为妙。我想接下来要做这些: