perf patch related –每每看到xiao桑的代码都惊为天人

+typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int);
+struct kvm_event_key {
+	const char *name;
+	key_cmp_fun key;
+static int trace_vcpu = -1;
+#define GET_EVENT_KEY(func, field)					\
+static u64 get_event_ ##func(struct kvm_event *event, int vcpu)		\
+{									\
+	if (vcpu == -1)							\
+		return event->total.field;				\
+									\
+	if (vcpu >= event->max_vcpu)					\
+		return 0;						\
+									\
+	return event->vcpu[vcpu].field;					\
+#define COMPARE_EVENT_KEY(func, field)					\
+GET_EVENT_KEY(func, field)						\
+static int compare_kvm_event_ ## func(struct kvm_event *one,		\
+					struct kvm_event *two, int vcpu)\
+{									\
+	return get_event_ ##func(one, vcpu) >				\
+				get_event_ ##func(two, vcpu);		\
+GET_EVENT_KEY(time, time);
+COMPARE_EVENT_KEY(count, stats.n);
+COMPARE_EVENT_KEY(mean, stats.mean);
+#define DEF_SORT_NAME_KEY(name, compare_key)				\
+	{ #name, compare_kvm_event_ ## compare_key }
+static struct kvm_event_key keys[] = {
+	DEF_SORT_NAME_KEY(sample, count),
+	DEF_SORT_NAME_KEY(time, mean),
+	{ NULL, NULL }


C programming language calling procedure with assembly language description

Source code:

int g(int x)
return x + 3;

int f(int x)
return g(x);

int main(void)
return f(8) + 1;

The assembly language is generated by:
gcc –S –o main.s main.c -m32
Assembly Source Code











_main:                               ## @main
pushl %ebp                       ##  suppose ebp = esp = 2000
movl %esp, %ebp
subl $4, %esp
movl $20, (%esp)              ## set content in %esp to 20 for later use. (Parameter)
call f: push %eip; movl %(f), %eip
## suppose f is 23, then 23 stores in addresss 1992
## see the sketch below for more details
pushl %ebp                       ## save ebp and esp = esp – 4
movel %esp, %ebp           ## let the new ebp points to the new esp
subl $4, %esp
movl 8(%ebp), %eax         ## eax = (%ebp) + 8 = 20
movl %eax, (%esp)           ## (%esp) = 20
call g: push %eip, movl %g, %eip
to be contined by readers. 🙂

Source Code Analysis

Thanks for reading!

perf tool source code analysis: perf record

Perf tool in Linux Kernel is used to analyze various kinds of performance issues. More information could be accessed here, but this article goes with the typical calling procedure of the built in core function “cmd_record”. Source code of perf could be found in linux/tools/perf/perf.c. Now let’s begin with function main(). You could read this article for a quick review. Don’t panic!
Take perf record -a sleep 3 for example.
1. main()->run_argv()->handle_internal_command()->run_builtin()->
status = p->fn(argc, argv, prefix)->cmd_record()
build a new record struct rec with struct record *rec = &record;
/*record is initialized with the following code*/
static struct record record = {
    .opts = {
    .tool = {
Skip these data structures as you wish but don’t hesitate to look up for the variables later.
struct perf_evlist {
    struct list_head entries;
    struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];//1<<8
    struct fdarray     pollfd;
    struct thread_map *threads;
    struct cpu_map      *cpus;
    struct perf_evsel *selected;

Note: perf_evsel stands for one event, and perf_evlist stands for all events we selected, which perf uses to communicate with kernel.
2. rec->evlist = perf_evlist__new();
struct perf_evlist *evlist = zalloc(sizeof(*evlist));
perf_evlist__init(evlist, NULL, NULL);
2.1 init all 256 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]  to NULL
2.2 init struct list_head entries
2.3 perf_evlist__set_maps(evlist, NULL, NULL)
  • set evlist->cpu and evlist->threads to NULL
  • perf_evlist__propagate_maps
    1. struct perf_evsel *evsel;
    2. evlist__for_each(evlist, evsel) {
      set all evsel->cpu and evsel->thread in evlist to NULL;
      # __evlist__for_each(&(evlist)->entries, evsel)
      # list_for_each_entry(evsel, &(evlist)->entries, node)
      # struct list_head entries; entries belongs to struct evlist
      # struct list_head node; node belongs to struct evsel,
      # we use node to insert evsel into evlist->entries or in other list_heads
      # #define list_for_each_entry(evsel, list, node)
      # for (evsel = list_first_entry(list, typeof(*evsel), node);      
                   &evsel->node != (list);                    
                   evsel = list_next_entry;
      # evlist and evsel are connected through double linked list struct list_head entries in evlist and node in evsel. We’ll talk about it later.
  • fdarray__init(&evlist->pollfd, 64);
    # set fdarray.nr_autogrow to 64 and others to 0 or NULL;

3. perf_evlist__add_default(rec->evlist);
    struct perf_event_attr attr = {
        .type = PERF_TYPE_HARDWARE,
        .config = PERF_COUNT_HW_CPU_CYCLES,
add a new evsel named “cycles” to evlist
3.1 evsel = perf_evsel__new(&attr);
3.2 perf_evlist__add(evlist, evsel)

  • entry->evlist = evlist; # note: put evsel in one evlist for further useage
  • list_add_tail(&entry->node, &evlist->entries);
    # note: list_head is a double linked list which is quite commonly used in kernel.
4. target__parse_uid(&rec->
target->uid = UINT_MAX;
5. perf_evlist__create_maps(rec->evlist, &rec->
target = {pid = 0x0, tid = 0x0, cpu_list = 0x0, uid_str = 0x0, uid = 4294967295, system_wide = true, uses_mmap = true, default_per_cpu = true, per_thread = false}
5.1 evlist->threads = thread_map__new_str(target->pid, target->tid, target->uid)
  • thread_map__new_str(NULL, NULL, UINT_MAX)
  • thread_map__new_by_tid_str(NULL);
  • thread_map__new_dummy();
  • thread_map__alloc(1);
  • thread_map__realloc(NULL, __nr);

# allocate sizeof(struct thread_map) + sizeof(struct thread_map_data) for thread_map
5.2 cpu_map__new(target->cpu_list);

  • cpu_map__read_all_cpu_map()
    1. fp = open(/sys/devices/system/cpu/online, r)
    2. struct cpu_map *cpus = cpu_map__read(fp);
      # read online cpus and allocate a integer array for them. (tmp_cpus[0,1,2,3,…])
    3. cpu_map__trim_new(nr_cpus, tmp_cpus)
      struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)*nr_cpus);
      memcpy(cpus->map, tmp_cpus, sizeof(int)*nr_cpus);
      # copy the content of online cpus directly to cpus->map.

    Till now, target->cpu_list got online cpu information.
    Like: for (i = 0; i < nr_cpus; ++i) cpus->map[i] = i;

5.3 perf_evlist__propagate_maps(evlist, !!target->cpu_list)
ensure each evsel in evlist get the right cpu_list
5.4 record_opts__config(&rec->opts)
set recording frequency for perf
CORE FUNCTION __cmd_record(&record, argc, argv)
struct machine {
    char          *root_dir;
    struct dsos      dsos;
    struct map_groups kmaps;
    struct map      *vmlinux_maps[MAP__NR_TYPES];
    u64          kernel_start;
    symbol_filter_t      symbol_filter;
struct map_groups {
    struct maps     maps[MAP__NR_TYPES];
    struct machine     *machine;
atomic_t     refcnt;
1. perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool)
Will be called by perf record with write way.(perf report with read way)
perf_session__new(&rec->file, false, &rec->tool);
1.1 machines__init(&session->machines);
machine__init((struct machine)&machines->host, “”, HOST_KERNEL_ID);
  • map_groups__init((struct map_groups)&machine->kmaps, machine);
    maps__init((struct maps)&mg->maps[i]);
  • dsos__init(&machine->dsos)

1.2 ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
init ordered_events
1.3 perf_data_file__open(file)
by default open the output file as “” with create mode
1.4 perf_session__create_kernel_maps(session)
allocate memory for the initialized session->machines.machine

  • struct dso *kernel = machine__get_kernel(machine);
    • kernel = machine__findnew_kernel(machine, vmlinux_name, “[kernel]”, DSO_TYPE_KERNEL);
      machine__findnew_kernel(machine, “[kernel.kallsyms]”, “[kernel]”, 1);
      struct dso *dso = machine__findnew_dso(machine, “[kernel.kallsyms]”);
      dsos__findnew((struct dsos)&machine->dsos, “[kernel.kallsyms]”)
      find kernel with name “[kernel.kallsyms]”

      dso__get(__dsos__findnew(dsos, name));->
      __dsos__findnew(dsos, name)->
      __dsos__find(dsos, name, false);->

      __dso__find_by_longname(&dsos->root, name);->
      Make a dynamic shared object with “[kernel.kallsyms]” then insert it into &machine->dsos
      __dsos__addnew(dsos, name);->
      __dsos__add(dsos, dso);

    • dso__read_running_kernel_build_id(kernel, machine);
      Read build_id from /sys/kernel/notes which n_type=3 and n_namesz=3
      sysfs__read_build_id(path, dso->build_id, sizeof(dso->build_id)
      read_build_id(void *buf, buf_size,dso->build_id, sizeof(dso->build_id), false); // buf_size (stbuf.st_size of /sys/kernel/notes) typical value: 360B
      // File note is made of series of structure like
      struct {
           u32 n_namesz;
           u32 n_descsz;
           u32 n_type;
      } *nhdr;
      if (nhdr->n_type == NT_GNU_BUILD_ID &&
      nhdr->n_namesz == sizeof(“GNU”))
      In the first n_namesz stores a pointer points to the name of the very field. See the code here for the whole content of notes file.
      Copy the very desc to dso->build_id.
      Set dso->has_build_id = true.
  • machine__get_running_kernel_start(machine, &name);
    Figure out the start address of _text or _stext in /proc/kallsyms
    addr = kallsyms__get_function_start(filename, name); //filename = “/proc/kallsyms”
    kallsyms__parse(kallsyms_filename, &args, find_symbol_cb)
    Content of /proc/kallsyms filled with lines like 00000000 t fuse_async_req_send
    Read each line and find the very line c1000000 T _text | _stext and record the start address probably c1000000 to start address then return the hex value.(3238002688)
  • __machine__create_kernel_maps(machine, kernel) < 0)
    start = machine__get_running_kernel_start(machine, NULL);// do it again
    Create struct map *map for all MAP__NR_TYPES for passed in structure machine and allocate sizeof(struct map) + sizeof(struct kmap) for each of them.
    for type from 0 to MAP__NR_TYPES
    machine->vmlinux_maps[type] = map__new2(start, kernel, type);
    map__new2(start, kernel, type);->
    map__init(map, type, start, 0, 0, dso);
    void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64,
    pgoff, struct dso *dso)
    kmap = map__kmap(machine->vmlinux_maps[type]);

    struct kmap *kmap = (struct kmap *)(map + 1)
    Set the kmap structure points to machine->kmaps.
    kmap->kmaps = &machine->kmaps;
    MAP__NR_TYPES struct map vmlinux_maps
    |0 | 1|  2 | 3 | …. | MAP__NR_TYPES-1|
    stuct map *map; struct kmap *kmap;
    kmap->kmaps = machine->kmaps;
    map_groups__insert(&machine->kmaps, machine->vmlinux_maps[type]);
    __maps__insert(struct maps *maps, struct map *map)
    Insert new allocated map in machine->vmlinux[type] into kmaps in machine.

    struct machine {
        struct dsos      dsos;
        struct map_groups kmaps;
        struct map      *vmlinux_maps[MAP__NR_TYPES];
        u64          kernel_start;

    struct map {
    struct dso        *dso;
    struct map_groups    *groups;

    Now there is extra space of struct kmap after struct map in machine, and in witch kmaps(map_groups type) points to struct kmap of machine(type map_groups) itself. Another thing is
    1. the struct map_groups *group in the former struct map is initialized by NULL and then set to machine->kmaps;
    2. dso in struct map points to machine->dso
    3. struct map_groups *group in the later struct kmap points to struct map_groups in machine structure.
    machine {
    struct map vmlinux[MAP__NR_TYPES];
    //vmlinux[*]->dso = created dso;
    //(struct kmap *)(vmlinux[*]+1) -> kmaps= machine->kmaps;
    struct kmap {

        struct ref_reloc_sym    *ref_reloc_sym;
        struct map_groups    *kmaps;
    struct map_groups {
        struct maps     maps[MAP__NR_TYPES];
        struct machine     *machine;   // machine->kmaps -> machine = machine;
        atomic_t     refcnt;
  • machine__create_modules(machine);
    modules__parse(modules, machine, machine__create_module)
    Get start address and names for all modules.

    1. struct map *map = machine__findnew_module_map(machine, start, name);
      For each modules, find out whether OS have already has module inserted to machine->dsos, if not, a new dso will be created with passed in module name and then inserted into machine->dsos.
      struct map *map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION,;
      if (map == NULL) //Can’t find map, so create one for this module
      struct dso *dso = machine__findnew_module_dso(machine, &m, filename);
      if (dso != NULL)
      Find out if there is existing dso for this module name, if not, create one. Module numbers linked to dso is counted by dso->refcnt.
      struct map *map = map__new2(start, dso, MAP__FUNCTION);
      map_groups__insert(&machine->kmaps, map);
    2. dso__kernel_module_get_build_id(map->dso, machine->root_dir);
      Still for each modules, read /sys/module/[MODULE_NAME]/notes/ just like we did in sysfs__read_build_id(path, dso->build_id, sizeof(dso->build_id) in Symbol-minimal.c
      sysfs__read_build_id(filename, dso->build_id, sizeof(dso->build_id)
    3. machine__set_modules_path(machine);
      Go to /lib/modules/$(KENERL_VERSION)/, check all files including sub-directory if there is modules end with “.ko”, if yes, set the dso’s long_name to the module’s absolute path.kmod_path__parse_name(&m, dent->d_name);
      m->kmod = !strncmp(ext, “.ko”, 3);
      m->name = strdup(name)
      map_groups__set_module_path(mg, path, &m);
      Try to find module in (struct dso *)mg->maps by name. If succeed, set dso’s long_name with module’s absolute path.
      long_name = strdup(path);
      dso__set_long_name(map->dso, long_name, true);
  • map_groups__fixup_end(&machine->kmaps);
    Set prev->end = cur->start;
    last->end = ~0ULL;

Get back to __cmd_record

2. record__init_features(rec);
3. perf_evlist__prepare_workload(rec->evlist, &opts->target, argv, file->is_pipe,

  • Start  a new process using fork() for the suffix parameters e.g. sleep 3. evlist-> = fork();
    Child process(cp) tells parent process(pp) by:
    fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); // set go_pipe[0] to FD_CLOEXEC
    ret = read(go_pipe[0], &bf, 1)
    pp set cp go by
    if (read(child_ready_pipe[0], &bf, 1) == -1) {
        goto out_close_pipes;
    fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);// SET go_pipe[1] to FD_CLOEXEC
    Then cp runs execvp(argv[0], (char **)argv);

4. record__open(rec)

  • perf_evlist__config(rec.evlist, rec.opts);
    perf_do_probe_api(perf_probe_comm_exec, cpu, try[]);
    //const char *try[] = {“cycles:u”, “instructions:u”, “cpu-clock:u”, NULL};
    fd = sys_perf_event_open(&evsel->attr, pid, cpu, -1, flags);
    //sys_perf_event_open(&evsel->attr, -1, 0, -1, 8); just a probe here
    //This syscall creates a file descriptor that allows measuring performance //information. Each file descriptor corresponds to one event measured;
    //for more details please visit here.
  • Open each of the fd for each cpu on each threads for every evsel on evlist
    evlist__for_each(evlist, pos) {

    perf_evsel__open(pos, pos->cpus, pos->threads)
    __perf_evsel__open(evsel, cpus, threads);
    Set the very file descriptor to the evsel->fd->content[]
    for (cpu = 0; cpu < cpus->nr; cpu++) {
        for (thread = 0; thread < nthreads; thread++) {
            FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
    cpus->map[cpu], group_fd, flags);
    } // endof nthreads
    } // endof cpus->nr
    /*The pid and cpu arguments specifies which process and CPU to

    pid: 0 cpu: -1 measures the calling process for all cpu

    pid: 0 cpu: >= 0 measures the calling process for specified cpu

    pid: > 0 cpu: -1 measures the specified process for all cpu
    pid: > 0 cpu: >= 0 measures the specified process for specified cpu
    pid: -1 cpu: >= 0 measures the all process for specified cpu
    This requires CAP_SYS_ADMIN capability or a
    “/proc/sys/kernel/perf_event_paranoid value of less than 1.”
    pid: -1 cpu: -1 error.


    #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
    static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
    {return &xy->contents[x * xy->row_size + y * xy->entry_size];}
    #define FD(evsel, cpu, thread) (*(int *))xyarrary__entry(evsel->fd, cpu, thread){return evsel->fd->content[cpu * evsel->fd->row_size + thread * evsel->fd->entry_size]}
  • Created fds are stored in evsel->fd->contents[].
    Now perf got all fd with syscall in kernel space, we need to mmap them to userspace.
    perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
    perf_evlist__mmap_ex(evlist, 4294967295, false, 0, false)

    struct mmap_params mp = {
        .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),


    perf_evlist__mmap_per_cpu(evlist, &mp)

    for each online cpu: {
        int output = -1;
        for each thread:
    perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, thread, &output))


    perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, thread, &output)) ->
    evlist__for_each(evlist, evsel) {
    __perf_evlist__mmap(evlist, cpu, mp, *output)->
    //*output = FD(evsel, cpu, thread)
    evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, mp->prot,
    MAP_SHARED, *output, 0);
    Then memory could be accessed through evlist->mmap[cpu].base

    &mp->auxtrace_mp, evlist->mmap[cpu].base, *output)
    munmap(mm->base, mm->len)

Then we need to write and map information to file.
5. perf_session__write_header(session, rec->evlist, fd, false);
Write all f_attr of evsel in evlist down to fd

  1.  lseek(fd, sizeof(f_header), SEEK_SET);
  2. evlist__for_each(session->evlist, evsel) {
    evsel->id_offset = lseek(fd, 0, SEEK_CUR);
    //set offset for each evsel in evlist
    do_write(fd, evsel->id, evsel->ids * sizeof(u64))
    fd | f_header | id_offset * nr | f_attr * nr | data        |
    // record all evsel’s id info into id_offset section.
  3. evlist__for_each(evlist, evsel) {
    f_attr = (struct perf_file_attr){
    .attr = evsel->attr,
    .ids  = {
    .offset = evsel->id_offset,
    .size   = evsel->ids * sizeof(u64),
    do_write(fd, &f_attr, sizeof(f_attr));
    // write f_attr above into fd
    fd | f_header | id_offset * nr | f_attr * nr | data        |
    header->data_offset = lseek(fd, 0, SEEK_CUR);
    header->feat_offset = header->data_offset + header->data_size;
    // set offset for data and feat_offset
  4. do_write(fd, &f_header, sizeof(f_header));
    // write header of fd
    evlist__for_each(evlist, evsel) {
    f_attr = (struct perf_file_attr){
    .attr = evsel->attr,
    .ids  = {
    .offset = evsel->id_offset,
    .size   = evsel->ids * sizeof(u64),
    err = do_write(fd, &f_attr, sizeof(f_attr));
    fd | f_header | id_offset * nr | f_attr * nr | data        |
    f_header = (struct perf_file_header){
    .magic       = PERF_MAGIC,
    .size       = sizeof(f_header),
    .attr_size = sizeof(f_attr),
    .attrs = {
    .offset = attr_offset,
    .size   = evlist->nr_entries * sizeof(f_attr),
    .data = {
    .offset = header->data_offset,
    .size    = header->data_size,
    /* event_types is ignored, store zeros */
    fd | f_header | id_offset * nr | f_attr * nr | data        |
    attr_offset   data_offset  data_size
    attr_offset = lseek(fd, 0, SEEK_CUR);
    // set offset for attr in fd

6. perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,machine);
build a union perf_event *event;
size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),”%s%s”,
mmap_name, kmap->ref_reloc_sym->name); //”[kernel.kallsyms]_text
event->mmap.header.size = (sizeof(event->mmap) –
(sizeof(event->mmap.filename) – size) + machine->id_hdr_size);
event->mmap.pgoff = kmap->ref_reloc_sym->addr;
event->mmap.start = map->start;
event->mmap.len   = map->end – event->mmap.start;
event->   = machine->pid;
Insert event to rec by calling process_synthesized_event();
record__write(rec, event, event->header.size);

7. perf_event__synthesize_modules(tool, process_synthesized_event,

struct mmap_event {
    struct perf_event_header header;
    u32 pid, tid;
    u64 start;
    u64 len;
    u64 pgoff;
    char filename[PATH_MAX];
union perf_event{
struct mmap_event        mmap;
union perf_event *event;

Create perf_events (as shown above) for all maps in machine->kmaps->maps[MAP__FUNCTION] respectively and fill in members like
mmap.header.type = PERF_RECORD_MMAP
MMAP |struct perf_event_header header| pid, tid| start | len | pgoff | filename |
size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
// Length of long_name in dso
event->mmap.header.size =
sizeof(event->mmap) – (sizeof(event->mmap.filename) – size))
// Calculate the real length of mmap and write into header.size, step one.
memset(event->mmap.filename + size, 0, machine->id_hdr_size);
event->mmap.header.size += machine->id_hdr_size;
// Calculate the real length of mmap, step two.
memset(event->mmap.filename + size, 0, machine->id_hdr_size);
Writer machine->id_hdr_size to filename+size part for filename need to store long_name with the first size part.
MMAP | header| pid, tid | start | len | pgoff | filename |
filename |machine->dso->long_name| machine->id_hdr_size|
mmap.start, mmap.len,, mmap.filename. Then insert them all to “” file.
MMAP | header | pid, tid | start | len | pgoff | filename |

8. __machine__synthesize_threads
Since we called perf with -a option, perf_event__synthesize_threads will be called.
For all process in /proc/
__event__synthesize_thread(comm_event, mmap_event, fork_event, pid, (int full)1, process, tool, machine, mmap_data, proc_map_timeout);
For every task(_pid) in /proc/pid/task/, create an comm_event, fork_event, mmap_event for each of them (perf_event__prepare_comm)and get COMM, tigd and ppid for pid in /proc/pid/status by calling perf_event__prepare_comm(comm_event, _pid, machine, &tgid, &ppid).
PID = 3665
*tgid = 3665
*ppid = 2485

event-> = *tgid;(3665) // they share the same pid
event->comm.header.type = PERF_RECORD_COMM;

struct comm_event {
    struct perf_event_header {
    } header;
    u32 pid; 3665
    u32 tid; _pid in /proc/3665/task/
    char comm[16]; name+machine->id_hdr_size

Then insert fork_event, comm_event to
For the main thread, create an mmap_event and read /proc/pid/maps, each line printed will be recorded and then written into in terms of mmap_event.

perf_event__synthesize_mmap_events(tool, mmap_event, pid: 3665, tgid: 3665,
process, machine, mmap_data: false, proc_map_timeout: 500)
cat /proc/3665/maps
82a23000-82e75000 r-xp 00000000 08:01 134003    /lib/i386-linux-gnu/
sscanf(bf, “%llx-%llx %s %llx %x:%x %u %sn”,
&event->mmap2.start, &event->mmap2.len, prot,
&event->mmap2.pgoff, &event->mmap2.maj,
&ino, execname);
struct mmap2_event {
struct perf_event_header header;
u32 pid, tid; 3665, 3665
u64 start; 0x82a23000
u64 len; 452000 (event->mmap2.len -= event->mmap.start;)
u64 pgoff; 00000000
u32 maj; 08
u32 min; 01
u64 ino; 134003
u64 ino_generation;
u32 prot; 0x4 | 0x1
u32 flags; 0x02;
char filename[PATH_MAX]; “/lib/i386-linux-gnu/”
char execname[PATH_MAX] =  ” /lib/i386-linux-gnu/”
header.size = sizeof(event->mmap2)-sizeof(filename)+(strlen(filename)+1)+id_hdr_size

Goagent get “global name ‘SSLContext’ is not defined”

The thing is I updated my goagent from github with my ubuntu, however after that, every url begins with https through goagent is unaccessable.
Tracked log says: NameError: global name ‘SSLContext’ is not defined.
Check python installation package:
sudo apt-get install python-dev python-greenlet python-gevent python-vte python openssl python-crypto python-appindicator python-setuptools
Then: sudo easy_install -U gevent

Why is that?
gevent.ssl.SSLSocket.__init__()  tries to reference SSLContext which doesn’t exist in Python 2.7.8 yet, but it’s introduced in Python 2.7.9.
Bug fixed in the newest version of Python-gevent.
See here for more details.


Got my vps ddosed the other days, and so busy was my working days that I couldn’t spare my time to fix it. Now you see it just works fine, this article write down how I did.

On July 1st I received an email from my vps provider and they told me that I have over 10GB anonymous bit torrent data up/down load per day, hence they have to shut it down to avoid legal issues. Also, I was told that the hacker may left a backdoor even if I change my root password, so I could only reinstall my VPS system. The problem now is: how I could do to save my old home page. Luckily, they offered me an rescue mode, with which I can mount my old disk image, what’s worse is that I couldn’t use chroot. Terrible things just happened.

First of all, mount -t ext4 /media/temp /dev/xvdb and copy important files to my own host, including
wp-content/uploads                                                    //uploaded files
/etc/apache2/sites-available/      //my apache settings
/usr/share/wordpress/wp-config.php                       //wordpress settings
/etc/wordpress/*                                                          //wordpress settings
wp-content/uploads/2015_xx_xx_database.sql      //back up data base, most important

Second, install a new operating system, and install the following programs.
apt-get install apache2 php5  mysql-server mysql-common mysql-client
apt-get install libapache2-mod-auth-mysql php5-mysql php5-gd
Start apache service
service apache2 start
install phpmyadmin and wordpress
apt-get install phpmyadmin wordpress
Now I have wordpress in /usr/share/wordpress.
Upload settings to the corresponding positions.
scp root@dong:/etc/apache2/site-available/
It seems apache2 has made a limitation on its web config file that it must ends with .conf so rename to haodong.conf if you got “ does not exist
For /etc/apache2, which includes default website config containing /var/www/html, make my website work by a2dissite 000-default.conf since in apache2.conf there is

IncludeOptional sites-enabled/*.conf

scp wp-content/uploads root@dong:/usr/share/wordpress/uploads/
scp wp-config.php root@dong:/usr/share/wordpress/wp-config.php                      
scp etc/wordpress/* root@dong:/etc/wordpress

Add on Oct. 16 2015, it seems apache2 has changed its way of setting. Website folders are restrained to /var/www/html, so synchronize files in wordpress.

rsync -avP /usr/share/wordpress/  /var/www/html/

Make sure:
1. define(‘WPLANG’, ‘zh_CN’); exist in /etc/wordpress/wp-config.php
2. Enable write permition of htaccess since I need permenant link.
3. The location in /etc/apache2/sites-available/ is just the wordpress symbolic setting file.
4. Be careful on /etc/wordpress/config-xxx.php, which stored the name and password to mysql data file. Make sure you put them in a safe place, and if not, don’t hesitate to crash your head on a wall. :p
Technically, we finished reinstalling and enabling everthing, let’s try.
1. Open and input your mysql user and password.(Should be set when you install mysql, like root:123456), if you forget everything about mysql, you could reset DB_NAME, DB_USER, DB_PASSWORD by
mysql -u root -p
CREATE USER wordpressuser@localhost IDENTIFIED BY ‘password’;
GRANT ALL PRIVILEGES ON wordpress.* TO wordpressuser@localhost;
2. upload the sql file we set aside, remember wordpress only support upload file under 2MB, so we need to gzip it to a After that, open, set an administrator user:password pair, which will be stored in the wp-users table we just imported.
Should everything be done?
3. Open, oops, I forget my administrator user and password. How should I do? Edit the table wp-users table and set password field to “5d41402abc4b2a76b9719d911017c592” to reset my password to “hello”.
Login, and see how my new website going, but what I saw is an amount of messy code. Actually, the old mysql data file is stored in latin1, so before upload the data file, I need to set it to UTF8 in order to make Chinese characters looks well.
vi xxx_database.sql
gzip xxx.sql and upload the file again with UTF-8 Unicode (utf8)  and utf8_general_ci
Here we got beautiful Chinese words. But wait, from ssh, I still saw Chinese names abnormly showed in my screen.
vi /var/lib/locales/supported.d/local
zh_CN.UTF-8 UTF-8
en_US.UTF-8 UTF-8
zh_CN GB2312
vi /etc/environment
Then run locale-gen
Untill now, I could get access to all of my articles when I open my website. But there is one more thing. The media files could still not be touched. I could change uploads file to 777 but this may cause security issues, actually I don’t want to save my vps again. So what I do is:
Change uploads file to 777 temporarily and upload one picture, log on to my vps and see who owns wordpress as a user. And I got:
# ll wordpress/wp-conteng/uploads/xxx.jpg
OK, gotcha,
chown -R www-data:www-data /usr/share/wordpress
chmod 755 /usr/share/wordpress
Now everything works well.
Oh, there is still one more thing, I’d like to talk about is the security problem, I need to do something to avoid being hacked again.


I got my Windows 8.1 updated on Lenovo T440s Laptop the other days, and found my personal VPN unsuccessfully connected. What’s weired is my Android and IOS devices connected normally. So I reinstalled my PPTP and L2TP based on IP-SEC, and it turned out that the same issue with me. What’s going on?

First of all, I double checked that all different devices had been able to connect my VPN server, so it’s not about my VPS. Secondly, I updated win8 which may be quite the cause of abnormity. Thirdly, it verified that my Ubuntu client connected successfully.

So the anwer is clear, it has nothing to do with my VPS, but about the win8. Actually, (the following solution came from the internet) Microsoft has errors with the newest WAN MINI port drivers since Jan. 2013 which I found it  in a forum, and MS shouldn’t solve it! So the solutions are as follows:
1) Uninstall the drivers related to WAN Miniport (IP), WAN Miniport (IPv6) and WAN Miniport (Network Monitor).
2) Start -> Run ->  Regedit -> HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\Class{4d36e972-e325-11ce-bfc1-08002be10318} -> Export
3) Edit the exported regist file and locate the above three subkey and deleted them all, double check you don’t delete the wrong part. Those subkeys has a “DriverDesc” which matches your broken miniports.
4) Go back to Device Manager, and now you are able to update the WAN MINI PORT drivers with errors. Right click the WAN MINI PORT IP (4 eg) -> Update Driver Software -> Browse My Computer -> Let me pick driver from a list -> uncheck “Show compatible hardware” and wait for the drivers listing generation. Then choose the first “Microsoft” drivers listed and pull the polling row to the top and then choose the first [BlueTooth Personal], and ignore warnings. Then the driver will become a fake blue tooth driver and you can delete it now.
5) Repeat step 4 for the remaining  WAN MINIport IPv6 and  WAN MINIport (Network Monitor)
6) Reboot, then all the right drivers will be installed automatically.




唠叨两句,线性地址和物理地址经过硬件转化,所以看到奇奇怪怪的物理地址也不用担心,这跟逻辑地址没什么直接联系,另外,读代码之前要先明白这段代码是做swapper_pg_dir初始化用的,带着目的读会提高效率. 回顾x86的内存寻址过程,大概是CR3寄存器配合PDT,然后找到PTE的某项后加Page Offset找到所要的内容。不过这里是临时页表的初始化,NO PAE,OK,继续!

/arch/x86/kernel/head_32.S 页目录初始化代码节选
/* Physical address */
#define pa(X) ((X) – __PAGE_OFFSET)
page_pde_offset = (__PAGE_OFFSET >> 20);
  • movl $pa(__brk_base), %edi
  • movl $pa(initial_page_table), %edx
  • movl $PTE_IDENT_ATTR, %eax
    x86/include/asm/pgtable_types.h(#define PTE_IDENT_ATTR 0x003) PTE的属性是0x003,即PRESENT+RW,另外还有 #define PDE_IDENT_ATTR   0x067      /* PRESENT+RW+USER+DIRTY+ACCESSED */  #define PGD_IDENT_ATTR   0x001      /* PRESENT (no other attributes) */


  • leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
    leal     S,D 结果:&S->D
    movel S,D 结果:S->D
    movel (S), D 结果:&S->D
    leal (S), D 结果:S->D
  • movl %ecx,(%edx) /* Store identity PDE entry */
  • movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
    /*再把ecx送到swapper_pg_dir的第768项,page_pde_offset为0xc00,swapper_pg_dir每项为4字节,因此为768项,回顾1024个页表项,前768为用户空间,后256为内核空间,这里叫kernel PDE entry是这个意思*/
  • addl $4,%edx
  • movl $1024, %ecx
    /*tag 10做的工作就是创建一个PDE entry并放入ecx,然后加两个属性标志位存在swapper_pg_dir里*/
  • stosl/*eax(0x003)的内容放入edi指向的物理地址(pg0),然后edi+4,*/
  • addl $0x1000,%eax
    /*eax: 0x1003, 0x2003, 0x3003 …, 0x3ff003*/
    /*提示:pg0页有4K,按照edi+4依次递增放入eax的值,换句话说,pg0的第零项是0x0003,第一项0x1003…, 第1023项,0x3ff003*/
  • loop 11b
  • /*
  • * End condition: we must map up to the end + MAPPING_BEYOND_END.
  • */
  • movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp

    • Enough space to fit pagetables for the low memory linear map */
    • #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
      #define PTRS_PER_PGD 1024
      #define PAGE_SHIFT 12
      PAGE_TABLE_SIZE(0X40000000>>12) <<12
    • PAGE_OFFSET 以外的地址包括0x100000000-0xc0000000=0x40000000,共0x40000页,算得配套页表大小 PAGE_TABLE_SIZE为0x40000/1024=0x100(256个) MAPPING_BEYOND_END为0x100>>PAGE_SHIFT = 0x100000,即256KB
  • cmpl %ebp,%eax
    /*第二轮 eax = 0x7ff03 …. */
  • jb 10b
  • addl $__PAGE_OFFSET, %edi
  • movl %edi, pa(_brk_end)
  • shrl $12, %eax
  • movl %eax, pa(max_pfn_mapped)
  • /* Do early initialization of the fixmap area */
  • movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
  • movl %eax,pa(initial_page_table+0xffc)
swapper_pg_dir第0项内容是pg0的物理地址+PDE_INDENT_ATTR,也就是0x567067, 表示该页表是用户页表,可读写,可访问..(PRESENT+RW+USER+DIRTY+ACCESSED)
(回想stosl,即 movl eax, edi; addl $4, edi)pg0的第0项表示第一页的物理地址,内容是0x003, 第二页物理地址是0x1003 … 下同. 它们对应物理地址前4MB.