C programming language calling procedure with assembly language description

Source code:
main.c

int g(int x)
{
return x + 3;
}

int f(int x)
{
return g(x);
}

int main(void)
{
return f(8) + 1;
}

The assembly language is generated by:
gcc –S –o main.s main.c -m32
Assembly Source Code

 

 

 

 

 

 

 

 

 

 

_main:                               ## @main
pushl %ebp                       ##  suppose ebp = esp = 2000
movl %esp, %ebp
subl $4, %esp
movl $20, (%esp)              ## set content in %esp to 20 for later use. (Parameter)
call f: push %eip; movl %(f), %eip
## suppose f is 23, then 23 stores in addresss 1992
## see the sketch below for more details
_f:
pushl %ebp                       ## save ebp and esp = esp – 4
movel %esp, %ebp           ## let the new ebp points to the new esp
subl $4, %esp
movl 8(%ebp), %eax         ## eax = (%ebp) + 8 = 20
movl %eax, (%esp)           ## (%esp) = 20
call g: push %eip, movl %g, %eip
_g:
to be contined by readers. 🙂

Source Code Analysis

Thanks for reading!

perf tool source code analysis: perf record

Perf tool in Linux Kernel is used to analyze various kinds of performance issues. More information could be accessed here, but this article goes with the typical calling procedure of the built in core function “cmd_record”. Source code of perf could be found in linux/tools/perf/perf.c. Now let’s begin with function main(). You could read this article for a quick review. Don’t panic!
Take perf record -a sleep 3 for example.
Initialization:
1. main()->run_argv()->handle_internal_command()->run_builtin()->
status = p->fn(argc, argv, prefix)->cmd_record()
build a new record struct rec with struct record *rec = &record;
/*record is initialized with the following code*/
static struct record record = {
    .opts = {
        …
    },
    .tool = {
        …
    },
};
Skip these data structures as you wish but don’t hesitate to look up for the variables later.
struct perf_evlist {
    struct list_head entries;
    struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];//1<<8
    struct fdarray     pollfd;
    struct thread_map *threads;
    struct cpu_map      *cpus;
    struct perf_evsel *selected;

};
Note: perf_evsel stands for one event, and perf_evlist stands for all events we selected, which perf uses to communicate with kernel.
2. rec->evlist = perf_evlist__new();
struct perf_evlist *evlist = zalloc(sizeof(*evlist));
perf_evlist__init(evlist, NULL, NULL);
2.1 init all 256 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]  to NULL
2.2 init struct list_head entries
2.3 perf_evlist__set_maps(evlist, NULL, NULL)
  • set evlist->cpu and evlist->threads to NULL
  • perf_evlist__propagate_maps
    1. struct perf_evsel *evsel;
    2. evlist__for_each(evlist, evsel) {
      set all evsel->cpu and evsel->thread in evlist to NULL;
      }
      # __evlist__for_each(&(evlist)->entries, evsel)
      # list_for_each_entry(evsel, &(evlist)->entries, node)
      # struct list_head entries; entries belongs to struct evlist
      # struct list_head node; node belongs to struct evsel,
      # we use node to insert evsel into evlist->entries or in other list_heads
      # #define list_for_each_entry(evsel, list, node)
      # for (evsel = list_first_entry(list, typeof(*evsel), node);      
                   &evsel->node != (list);                    
                   evsel = list_next_entry;
      # evlist and evsel are connected through double linked list struct list_head entries in evlist and node in evsel. We’ll talk about it later.
  • fdarray__init(&evlist->pollfd, 64);
    # set fdarray.nr_autogrow to 64 and others to 0 or NULL;

3. perf_evlist__add_default(rec->evlist);
    struct perf_event_attr attr = {
        .type = PERF_TYPE_HARDWARE,
        .config = PERF_COUNT_HW_CPU_CYCLES,
    };
add a new evsel named “cycles” to evlist
3.1 evsel = perf_evsel__new(&attr);
3.2 perf_evlist__add(evlist, evsel)

  • entry->evlist = evlist; # note: put evsel in one evlist for further useage
  • list_add_tail(&entry->node, &evlist->entries);
    # note: list_head is a double linked list which is quite commonly used in kernel.
4. target__parse_uid(&rec->opts.target)
target->uid = UINT_MAX;
5. perf_evlist__create_maps(rec->evlist, &rec->opts.target)
target = {pid = 0x0, tid = 0x0, cpu_list = 0x0, uid_str = 0x0, uid = 4294967295, system_wide = true, uses_mmap = true, default_per_cpu = true, per_thread = false}
5.1 evlist->threads = thread_map__new_str(target->pid, target->tid, target->uid)
  • thread_map__new_str(NULL, NULL, UINT_MAX)
  • thread_map__new_by_tid_str(NULL);
  • thread_map__new_dummy();
  • thread_map__alloc(1);
  • thread_map__realloc(NULL, __nr);

# allocate sizeof(struct thread_map) + sizeof(struct thread_map_data) for thread_map
5.2 cpu_map__new(target->cpu_list);

  • cpu_map__read_all_cpu_map()
    1. fp = open(/sys/devices/system/cpu/online, r)
    2. struct cpu_map *cpus = cpu_map__read(fp);
      # read online cpus and allocate a integer array for them. (tmp_cpus[0,1,2,3,…])
    3. cpu_map__trim_new(nr_cpus, tmp_cpus)
      struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)*nr_cpus);
      memcpy(cpus->map, tmp_cpus, sizeof(int)*nr_cpus);
      # copy the content of online cpus directly to cpus->map.

    Till now, target->cpu_list got online cpu information.
    Like: for (i = 0; i < nr_cpus; ++i) cpus->map[i] = i;

5.3 perf_evlist__propagate_maps(evlist, !!target->cpu_list)
ensure each evsel in evlist get the right cpu_list
5.4 record_opts__config(&rec->opts)
set recording frequency for perf
CORE FUNCTION __cmd_record(&record, argc, argv)
struct machine {
    char          *root_dir;
    struct dsos      dsos;
    struct map_groups kmaps;
    struct map      *vmlinux_maps[MAP__NR_TYPES];
    u64          kernel_start;
    symbol_filter_t      symbol_filter;
    …
};
struct map_groups {
    struct maps     maps[MAP__NR_TYPES];
    struct machine     *machine;
atomic_t     refcnt;
};
1. perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool)
Will be called by perf record with write way.(perf report with read way)
perf_session__new(&rec->file, false, &rec->tool);
1.1 machines__init(&session->machines);
machine__init((struct machine)&machines->host, “”, HOST_KERNEL_ID);
  • map_groups__init((struct map_groups)&machine->kmaps, machine);
    maps__init((struct maps)&mg->maps[i]);
  • dsos__init(&machine->dsos)

1.2 ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
init ordered_events
1.3 perf_data_file__open(file)
by default open the output file as “perf.data” with create mode
1.4 perf_session__create_kernel_maps(session)
machine__create_kernel_maps(&session->machines.host)
allocate memory for the initialized session->machines.machine

  • struct dso *kernel = machine__get_kernel(machine);
    • kernel = machine__findnew_kernel(machine, vmlinux_name, “[kernel]”, DSO_TYPE_KERNEL);
      machine__findnew_kernel(machine, “[kernel.kallsyms]”, “[kernel]”, 1);
      struct dso *dso = machine__findnew_dso(machine, “[kernel.kallsyms]”);
      dsos__findnew((struct dsos)&machine->dsos, “[kernel.kallsyms]”)
      find kernel with name “[kernel.kallsyms]”

      dso__get(__dsos__findnew(dsos, name));->
      __dsos__findnew(dsos, name)->
      __dsos__find(dsos, name, false);->

      __dso__find_by_longname(&dsos->root, name);->
      Make a dynamic shared object with “[kernel.kallsyms]” then insert it into &machine->dsos
      __dsos__addnew(dsos, name);->
      dso__new(name);->
      __dsos__add(dsos, dso);

    • dso__read_running_kernel_build_id(kernel, machine);
      Read build_id from /sys/kernel/notes which n_type=3 and n_namesz=3
      sysfs__read_build_id(path, dso->build_id, sizeof(dso->build_id)
      read_build_id(void *buf, buf_size,dso->build_id, sizeof(dso->build_id), false); // buf_size (stbuf.st_size of /sys/kernel/notes) typical value: 360B
      //Symbol-minimal.c
      // File note is made of series of structure like
      struct {
           u32 n_namesz;
           u32 n_descsz;
           u32 n_type;
      } *nhdr;
      if (nhdr->n_type == NT_GNU_BUILD_ID &&
      nhdr->n_namesz == sizeof(“GNU”))
      In the first n_namesz stores a pointer points to the name of the very field. See the code here for the whole content of notes file.
      Copy the very desc to dso->build_id.
      Set dso->has_build_id = true.
  • machine__get_running_kernel_start(machine, &name);
    Figure out the start address of _text or _stext in /proc/kallsyms
    addr = kallsyms__get_function_start(filename, name); //filename = “/proc/kallsyms”
    kallsyms__parse(kallsyms_filename, &args, find_symbol_cb)
    Content of /proc/kallsyms filled with lines like 00000000 t fuse_async_req_send
    Read each line and find the very line c1000000 T _text | _stext and record the start address probably c1000000 to start address then return the hex value.(3238002688)
  • __machine__create_kernel_maps(machine, kernel) < 0)
    start = machine__get_running_kernel_start(machine, NULL);// do it again
    Create struct map *map for all MAP__NR_TYPES for passed in structure machine and allocate sizeof(struct map) + sizeof(struct kmap) for each of them.
    for type from 0 to MAP__NR_TYPES
    machine->vmlinux_maps[type] = map__new2(start, kernel, type);
    map__new2(start, kernel, type);->
    map__init(map, type, start, 0, 0, dso);
    void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64,
    pgoff, struct dso *dso)
    kmap = map__kmap(machine->vmlinux_maps[type]);

    struct kmap *kmap = (struct kmap *)(map + 1)
    Set the kmap structure points to machine->kmaps.
    kmap->kmaps = &machine->kmaps;
    MAP__NR_TYPES struct map vmlinux_maps
    |0 | 1|  2 | 3 | …. | MAP__NR_TYPES-1|
    stuct map *map; struct kmap *kmap;
    kmap->kmaps = machine->kmaps;
    map_groups__insert(&machine->kmaps, machine->vmlinux_maps[type]);
    __maps__insert(struct maps *maps, struct map *map)
    Insert new allocated map in machine->vmlinux[type] into kmaps in machine.
    note:

    struct machine {
        struct dsos      dsos;
        struct map_groups kmaps;
        struct map      *vmlinux_maps[MAP__NR_TYPES];
        u64          kernel_start;

    };
    struct map {
    struct dso        *dso;
    struct map_groups    *groups;
    ….
    };


    Now there is extra space of struct kmap after struct map in machine, and in witch kmaps(map_groups type) points to struct kmap of machine(type map_groups) itself. Another thing is
    1. the struct map_groups *group in the former struct map is initialized by NULL and then set to machine->kmaps;
    2. dso in struct map points to machine->dso
    3. struct map_groups *group in the later struct kmap points to struct map_groups in machine structure.
    machine {
    struct map vmlinux[MAP__NR_TYPES];
    //vmlinux[*]->dso = created dso;
    //(struct kmap *)(vmlinux[*]+1) -> kmaps= machine->kmaps;
    }
    struct kmap {

        struct ref_reloc_sym    *ref_reloc_sym;
        struct map_groups    *kmaps;
    };
    struct map_groups {
        struct maps     maps[MAP__NR_TYPES];
        struct machine     *machine;   // machine->kmaps -> machine = machine;
        atomic_t     refcnt;
    };
  • machine__create_modules(machine);
    modules__parse(modules, machine, machine__create_module)
    Get start address and names for all modules.

    1. struct map *map = machine__findnew_module_map(machine, start, name);
      For each modules, find out whether OS have already has module inserted to machine->dsos, if not, a new dso will be created with passed in module name and then inserted into machine->dsos.
      struct map *map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION, m.name);
      if (map == NULL) //Can’t find map, so create one for this module
      struct dso *dso = machine__findnew_module_dso(machine, &m, filename);
      if (dso != NULL)
      Find out if there is existing dso for this module name, if not, create one. Module numbers linked to dso is counted by dso->refcnt.
      struct map *map = map__new2(start, dso, MAP__FUNCTION);
      map_groups__insert(&machine->kmaps, map);
    2. dso__kernel_module_get_build_id(map->dso, machine->root_dir);
      Still for each modules, read /sys/module/[MODULE_NAME]/notes/.note.gnu.build-id just like we did in sysfs__read_build_id(path, dso->build_id, sizeof(dso->build_id) in Symbol-minimal.c
      sysfs__read_build_id(filename, dso->build_id, sizeof(dso->build_id)
    3. machine__set_modules_path(machine);
      Go to /lib/modules/$(KENERL_VERSION)/, check all files including sub-directory if there is modules end with “.ko”, if yes, set the dso’s long_name to the module’s absolute path.kmod_path__parse_name(&m, dent->d_name);
      m->kmod = !strncmp(ext, “.ko”, 3);
      m->name = strdup(name)
      map_groups__set_module_path(mg, path, &m);
      Try to find module in (struct dso *)mg->maps by name. If succeed, set dso’s long_name with module’s absolute path.
      long_name = strdup(path);
      dso__set_long_name(map->dso, long_name, true);
  • map_groups__fixup_end(&machine->kmaps);
    Set prev->end = cur->start;
    last->end = ~0ULL;

Get back to __cmd_record

2. record__init_features(rec);
3. perf_evlist__prepare_workload(rec->evlist, &opts->target, argv, file->is_pipe,
workload_exec_failed_signal);

  • Start  a new process using fork() for the suffix parameters e.g. sleep 3. evlist->workload.pid = fork();
    Child process(cp) tells parent process(pp) by:
    close(child_ready_pipe[0]);
    close(go_pipe[1]);
    fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); // set go_pipe[0] to FD_CLOEXEC
    close(child_ready_pipe[1]);
    ret = read(go_pipe[0], &bf, 1)
    pp set cp go by
    close(child_ready_pipe[1]);
    close(go_pipe[0]);
    if (read(child_ready_pipe[0], &bf, 1) == -1) {
        goto out_close_pipes;
    };
    fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);// SET go_pipe[1] to FD_CLOEXEC
    Then cp runs execvp(argv[0], (char **)argv);

4. record__open(rec)

  • perf_evlist__config(rec.evlist, rec.opts);
    perf_can_comm_exec();
    perf_probe_api(perf_probe_comm_exec);
    perf_do_probe_api(perf_probe_comm_exec, cpu, try[]);
    //const char *try[] = {“cycles:u”, “instructions:u”, “cpu-clock:u”, NULL};
    perf_event_open_cloexec_flag
    fd = sys_perf_event_open(&evsel->attr, pid, cpu, -1, flags);
    //sys_perf_event_open(&evsel->attr, -1, 0, -1, 8); just a probe here
    //This syscall creates a file descriptor that allows measuring performance //information. Each file descriptor corresponds to one event measured;
    //for more details please visit here.
  • Open each of the fd for each cpu on each threads for every evsel on evlist
    evlist__for_each(evlist, pos) {

    perf_evsel__open(pos, pos->cpus, pos->threads)
    __perf_evsel__open(evsel, cpus, threads);
    Set the very file descriptor to the evsel->fd->content[]
    for (cpu = 0; cpu < cpus->nr; cpu++) {
        for (thread = 0; thread < nthreads; thread++) {
            FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
    pid,
    cpus->map[cpu], group_fd, flags);
    } // endof nthreads
    } // endof cpus->nr
    /*The pid and cpu arguments specifies which process and CPU to
    monitor.

    pid: 0 cpu: -1 measures the calling process for all cpu

    pid: 0 cpu: >= 0 measures the calling process for specified cpu

    pid: > 0 cpu: -1 measures the specified process for all cpu
    pid: > 0 cpu: >= 0 measures the specified process for specified cpu
    pid: -1 cpu: >= 0 measures the all process for specified cpu
    This requires CAP_SYS_ADMIN capability or a
    “/proc/sys/kernel/perf_event_paranoid value of less than 1.”
    pid: -1 cpu: -1 error.

    */

    #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
    static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
    {return &xy->contents[x * xy->row_size + y * xy->entry_size];}
    #define FD(evsel, cpu, thread) (*(int *))xyarrary__entry(evsel->fd, cpu, thread){return evsel->fd->content[cpu * evsel->fd->row_size + thread * evsel->fd->entry_size]}
  • Created fds are stored in evsel->fd->contents[].
    Now perf got all fd with syscall in kernel space, we need to mmap them to userspace.
    perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
    opts->auxtrace_mmap_pages,
    (opts->auxtrace_snapshot_mode<0))
    perf_evlist__mmap_ex(evlist, 4294967295, false, 0, false)

    struct mmap_params mp = {
        .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
    };

    perf_evlist__alloc_mmap(evlist)
    perf_evlist__alloc_pollfd(evlist)

    perf_evlist__mmap_per_cpu(evlist, &mp)

    for each online cpu: {
        int output = -1;
        for each thread:
    perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, thread, &output))

    };

    perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, thread, &output)) ->
    evlist__for_each(evlist, evsel) {
    __perf_evlist__mmap(evlist, cpu, mp, *output)->
    //*output = FD(evsel, cpu, thread)
    evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, mp->prot,
    MAP_SHARED, *output, 0);
    Then memory could be accessed through evlist->mmap[cpu].base

    auxtrace_mmap__mmap(&evlist->mmap[cpu].auxtrace_mmap,
    &mp->auxtrace_mp, evlist->mmap[cpu].base, *output)
    munmap(mm->base, mm->len)
    }

Then we need to write and map information to perf.data file.
5. perf_session__write_header(session, rec->evlist, fd, false);
Write all f_attr of evsel in evlist down to fd

  1.  lseek(fd, sizeof(f_header), SEEK_SET);
  2. evlist__for_each(session->evlist, evsel) {
    evsel->id_offset = lseek(fd, 0, SEEK_CUR);
    //set offset for each evsel in evlist
    do_write(fd, evsel->id, evsel->ids * sizeof(u64))
    fd | f_header | id_offset * nr | f_attr * nr | data        |
    // record all evsel’s id info into id_offset section.
  3. evlist__for_each(evlist, evsel) {
    f_attr = (struct perf_file_attr){
    .attr = evsel->attr,
    .ids  = {
    .offset = evsel->id_offset,
    .size   = evsel->ids * sizeof(u64),
    }
    };
    do_write(fd, &f_attr, sizeof(f_attr));
    // write f_attr above into fd
    }
    fd | f_header | id_offset * nr | f_attr * nr | data        |
    header->data_offset = lseek(fd, 0, SEEK_CUR);
    header->feat_offset = header->data_offset + header->data_size;
    // set offset for data and feat_offset
  4. do_write(fd, &f_header, sizeof(f_header));
    // write header of fd
    evlist__for_each(evlist, evsel) {
    f_attr = (struct perf_file_attr){
    .attr = evsel->attr,
    .ids  = {
    .offset = evsel->id_offset,
    .size   = evsel->ids * sizeof(u64),
    }
    };
    err = do_write(fd, &f_attr, sizeof(f_attr));
    }
    fd | f_header | id_offset * nr | f_attr * nr | data        |
    f_header = (struct perf_file_header){
    .magic       = PERF_MAGIC,
    .size       = sizeof(f_header),
    .attr_size = sizeof(f_attr),
    .attrs = {
    .offset = attr_offset,
    .size   = evlist->nr_entries * sizeof(f_attr),
    },
    .data = {
    .offset = header->data_offset,
    .size    = header->data_size,
    },
    /* event_types is ignored, store zeros */
    };
    fd | f_header | id_offset * nr | f_attr * nr | data        |
    attr_offset   data_offset  data_size
    attr_offset = lseek(fd, 0, SEEK_CUR);
    // set offset for attr in fd

6. perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,machine);
build a union perf_event *event;
size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),”%s%s”,
mmap_name, kmap->ref_reloc_sym->name); //”[kernel.kallsyms]_text
event->mmap.header.size = (sizeof(event->mmap) –
(sizeof(event->mmap.filename) – size) + machine->id_hdr_size);
event->mmap.pgoff = kmap->ref_reloc_sym->addr;
event->mmap.start = map->start;
event->mmap.len   = map->end – event->mmap.start;
event->mmap.pid   = machine->pid;
Insert event to rec by calling process_synthesized_event();
record__write(rec, event, event->header.size);

7. perf_event__synthesize_modules(tool, process_synthesized_event,
machine)

struct mmap_event {
    struct perf_event_header header;
    u32 pid, tid;
    u64 start;
    u64 len;
    u64 pgoff;
    char filename[PATH_MAX];
};
union perf_event{
struct mmap_event        mmap;
}
union perf_event *event;

Create perf_events (as shown above) for all maps in machine->kmaps->maps[MAP__FUNCTION] respectively and fill in members like
mmap.header.type = PERF_RECORD_MMAP
MMAP |struct perf_event_header header| pid, tid| start | len | pgoff | filename |
size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
// Length of long_name in dso
event->mmap.header.size =
sizeof(event->mmap) – (sizeof(event->mmap.filename) – size))
// Calculate the real length of mmap and write into header.size, step one.
memset(event->mmap.filename + size, 0, machine->id_hdr_size);
event->mmap.header.size += machine->id_hdr_size;
// Calculate the real length of mmap, step two.
memset(event->mmap.filename + size, 0, machine->id_hdr_size);
Writer machine->id_hdr_size to filename+size part for filename need to store long_name with the first size part.
MMAP | header| pid, tid | start | len | pgoff | filename |
header.size
filename |machine->dso->long_name| machine->id_hdr_size|
mmap.start, mmap.len, mmap.pid, mmap.filename. Then insert them all to “perf.data” file.
MMAP | header | pid, tid | start | len | pgoff | filename |

8. __machine__synthesize_threads
Since we called perf with -a option, perf_event__synthesize_threads will be called.
For all process in /proc/
__event__synthesize_thread(comm_event, mmap_event, fork_event, pid, (int full)1, process, tool, machine, mmap_data, proc_map_timeout);
For every task(_pid) in /proc/pid/task/, create an comm_event, fork_event, mmap_event for each of them (perf_event__prepare_comm)and get COMM, tigd and ppid for pid in /proc/pid/status by calling perf_event__prepare_comm(comm_event, _pid, machine, &tgid, &ppid).
PID = 3665
COMM_EVENT
*tgid = 3665
*ppid = 2485

event->comm.pid = *tgid;(3665) // they share the same pid
event->comm.header.type = PERF_RECORD_COMM;

struct comm_event {
    struct perf_event_header {
        PERF_RECORD_COMM;
        size
    } header;
    u32 pid; 3665
    u32 tid; _pid in /proc/3665/task/
    char comm[16]; name+machine->id_hdr_size
    },
};

Then insert fork_event, comm_event to perf.data.
For the main thread, create an mmap_event and read /proc/pid/maps, each line printed will be recorded and then written into perf.data in terms of mmap_event.

perf_event__synthesize_mmap_events(tool, mmap_event, pid: 3665, tgid: 3665,
process, machine, mmap_data: false, proc_map_timeout: 500)
cat /proc/3665/maps
82a23000-82e75000 r-xp 00000000 08:01 134003    /lib/i386-linux-gnu/libnss_files-2.19.so
sscanf(bf, “%llx-%llx %s %llx %x:%x %u %sn”,
&event->mmap2.start, &event->mmap2.len, prot,
&event->mmap2.pgoff, &event->mmap2.maj,
&event->mmap2.min,
&ino, execname);
struct mmap2_event {
struct perf_event_header header;
u32 pid, tid; 3665, 3665
u64 start; 0x82a23000
u64 len; 452000 (event->mmap2.len -= event->mmap.start;)
u64 pgoff; 00000000
u32 maj; 08
u32 min; 01
u64 ino; 134003
u64 ino_generation;
u32 prot; 0x4 | 0x1
u32 flags; 0x02;
char filename[PATH_MAX]; “/lib/i386-linux-gnu/libnss_files-2.19.so”
};
char execname[PATH_MAX] =  ” /lib/i386-linux-gnu/libnss_files-2.19.so”
header.size = sizeof(event->mmap2)-sizeof(filename)+(strlen(filename)+1)+id_hdr_size

Goagent get “global name ‘SSLContext’ is not defined”

The thing is I updated my goagent from github with my ubuntu, however after that, every url begins with https through goagent is unaccessable.
Tracked log says: NameError: global name ‘SSLContext’ is not defined.
Check python installation package:
sudo apt-get install python-dev python-greenlet python-gevent python-vte python openssl python-crypto python-appindicator python-setuptools
Then: sudo easy_install -U gevent
Done!

Why is that?
gevent.ssl.SSLSocket.__init__()  tries to reference SSLContext which doesn’t exist in Python 2.7.8 yet, but it’s introduced in Python 2.7.9.
Bug fixed in the newest version of Python-gevent.
See here for more details.

HOW DID I FIX MY VPS AND RESCUE MY WORDPRESS DATA

Got my vps ddosed the other days, and so busy was my working days that I couldn’t spare my time to fix it. Now you see it just works fine, this article write down how I did.

On July 1st I received an email from my vps provider and they told me that I have over 10GB anonymous bit torrent data up/down load per day, hence they have to shut it down to avoid legal issues. Also, I was told that the hacker may left a backdoor even if I change my root password, so I could only reinstall my VPS system. The problem now is: how I could do to save my old home page. Luckily, they offered me an rescue mode, with which I can mount my old disk image, what’s worse is that I couldn’t use chroot. Terrible things just happened.

First of all, mount -t ext4 /media/temp /dev/xvdb and copy important files to my own host, including
wp-content/uploads                                                    //uploaded files
/etc/apache2/sites-available/www.haodong.org      //my apache settings
/usr/share/wordpress/wp-config.php                       //wordpress settings
/etc/wordpress/*                                                          //wordpress settings
wp-content/uploads/2015_xx_xx_database.sql      //back up data base, most important

Second, install a new operating system, and install the following programs.
apt-get install apache2 php5  mysql-server mysql-common mysql-client
apt-get install libapache2-mod-auth-mysql php5-mysql php5-gd
Start apache service
service apache2 start
install phpmyadmin and wordpress
apt-get install phpmyadmin wordpress
Now I have wordpress in /usr/share/wordpress.
Upload settings to the corresponding positions.
scp www.haodong.org root@dong:/etc/apache2/site-available/
a2ensite www.haodong.org
It seems apache2 has made a limitation on its web config file that it must ends with .conf so rename www.haodong.org to haodong.conf if you got “www.haodong.org does not exist
For /etc/apache2, which includes default website config containing /var/www/html, make my website work by a2dissite 000-default.conf since in apache2.conf there is

IncludeOptional sites-enabled/*.conf

scp wp-content/uploads root@dong:/usr/share/wordpress/uploads/
scp wp-config.php root@dong:/usr/share/wordpress/wp-config.php                      
scp etc/wordpress/* root@dong:/etc/wordpress

Add on Oct. 16 2015, it seems apache2 has changed its way of setting. Website folders are restrained to /var/www/html, so synchronize files in wordpress.

rsync -avP /usr/share/wordpress/  /var/www/html/

Make sure:
1. define(‘WPLANG’, ‘zh_CN’); exist in /etc/wordpress/wp-config.php
2. Enable write permition of htaccess since I need permenant link.
3. The location in /etc/apache2/sites-available/www.haodong.org is just the wordpress symbolic setting file.
4. Be careful on /etc/wordpress/config-xxx.php, which stored the name and password to mysql data file. Make sure you put them in a safe place, and if not, don’t hesitate to crash your head on a wall. :p
Technically, we finished reinstalling and enabling everthing, let’s try.
1. Open http://haodong.org/phpadmin and input your mysql user and password.(Should be set when you install mysql, like root:123456), if you forget everything about mysql, you could reset DB_NAME, DB_USER, DB_PASSWORD by
mysql -u root -p
CREATE DATABASE wordpress;
CREATE USER wordpressuser@localhost IDENTIFIED BY ‘password’;
GRANT ALL PRIVILEGES ON wordpress.* TO wordpressuser@localhost;
FLUSH PRIVILEGES;
2. upload the sql file we set aside, remember wordpress only support upload file under 2MB, so we need to gzip it to a xxx.sql.zip. After that, open http://haodong.org/wp-admin/install.php, set an administrator user:password pair, which will be stored in the wp-users table we just imported.
Should everything be done?
No.
3. Open www.haodong.org, oops, I forget my administrator user and password. How should I do? Edit the table wp-users table and set password field to “5d41402abc4b2a76b9719d911017c592” to reset my password to “hello”.
Login, and see how my new website going, but what I saw is an amount of messy code. Actually, the old mysql data file is stored in latin1, so before upload the data file, I need to set it to UTF8 in order to make Chinese characters looks well.
vi xxx_database.sql
:%s/latin1/UTF8/g
:wq
gzip xxx.sql and upload the file again with UTF-8 Unicode (utf8)  and utf8_general_ci
Here we got beautiful Chinese words. But wait, from ssh, I still saw Chinese names abnormly showed in my screen.
vi /var/lib/locales/supported.d/local
add
zh_CN.UTF-8 UTF-8
en_US.UTF-8 UTF-8
zh_CN.GBK GBK
zh_CN GB2312
vi /etc/environment
add
LANG=”zh_CN:UTF-8″
LANGUAGE=”zh_CN:zh:en_US:en”
LC_CTYPE=”zh_CN:UTF-8″
Then run locale-gen
Untill now, I could get access to all of my articles when I open my website. But there is one more thing. The media files could still not be touched. I could change uploads file to 777 but this may cause security issues, actually I don’t want to save my vps again. So what I do is:
Change uploads file to 777 temporarily and upload one picture, log on to my vps and see who owns wordpress as a user. And I got:
# ll wordpress/wp-conteng/uploads/xxx.jpg
www-data:www-data
OK, gotcha,
chown -R www-data:www-data /usr/share/wordpress
chmod 755 /usr/share/wordpress
Now everything works well.
Oh, there is still one more thing, I’d like to talk about is the security problem, I need to do something to avoid being hacked again.

Win8 VPN 720 ERROR FIX

I got my Windows 8.1 updated on Lenovo T440s Laptop the other days, and found my personal VPN unsuccessfully connected. What’s weired is my Android and IOS devices connected normally. So I reinstalled my PPTP and L2TP based on IP-SEC, and it turned out that the same issue with me. What’s going on?

First of all, I double checked that all different devices had been able to connect my VPN server, so it’s not about my VPS. Secondly, I updated win8 which may be quite the cause of abnormity. Thirdly, it verified that my Ubuntu client connected successfully.

So the anwer is clear, it has nothing to do with my VPS, but about the win8. Actually, (the following solution came from the internet) Microsoft has errors with the newest WAN MINI port drivers since Jan. 2013 which I found it  in a forum, and MS shouldn’t solve it! So the solutions are as follows:
1) Uninstall the drivers related to WAN Miniport (IP), WAN Miniport (IPv6) and WAN Miniport (Network Monitor).
2) Start -> Run ->  Regedit -> HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\Class{4d36e972-e325-11ce-bfc1-08002be10318} -> Export
3) Edit the exported regist file and locate the above three subkey and deleted them all, double check you don’t delete the wrong part. Those subkeys has a “DriverDesc” which matches your broken miniports.
4) Go back to Device Manager, and now you are able to update the WAN MINI PORT drivers with errors. Right click the WAN MINI PORT IP (4 eg) -> Update Driver Software -> Browse My Computer -> Let me pick driver from a list -> uncheck “Show compatible hardware” and wait for the drivers listing generation. Then choose the first “Microsoft” drivers listed and pull the polling row to the top and then choose the first [BlueTooth Personal], and ignore warnings. Then the driver will become a fake blue tooth driver and you can delete it now.
5) Repeat step 4 for the remaining  WAN MINIport IPv6 and  WAN MINIport (Network Monitor)
6) Reboot, then all the right drivers will be installed automatically.

Linux系统学习之内存初始化

接下来的一段时间准备系统学习一下Linux的源代码,能顺利回答下列问题,Linux的i386内存寻址部分就算过关了。(题目出自陈香兰老师授课讲稿)

在32位pc中,结合Linux2.6.26/arch/x86/kernel/head_32.S中228-251行相关代码,关于临时2级页表的初始化过程,假设pg0所在的物理地址是0x567000,回答下列问题(以下涉及到数值的地方,请用16进制表示):
填写在swapper_pg_dir中第0x0项的内容是什么,有什么含义?
若填写了swapper_pg_dir中第0x1项,则此内容是什么?
填写在pg0的第0x0项、第0x1项和第0x3FF项的内容是多少,有什么含义?
根据swapper_pg_dir的第0x0项和pg0的内容,这个临时页表所代表的地址空间中,0~4MB-1的空间被映射的物理地址空间范围是什么?
若内核地址空间从3G开始,那么填写在swapper_pd_dir中第0x300项和0x301项的内容是什么,与上述第0项和第1项有什么关系,有什么含义?

唠叨两句,线性地址和物理地址经过硬件转化,所以看到奇奇怪怪的物理地址也不用担心,这跟逻辑地址没什么直接联系,另外,读代码之前要先明白这段代码是做swapper_pg_dir初始化用的,带着目的读会提高效率. 回顾x86的内存寻址过程,大概是CR3寄存器配合PDT,然后找到PTE的某项后加Page Offset找到所要的内容。不过这里是临时页表的初始化,NO PAE,OK,继续!

/arch/x86/kernel/head_32.S 页目录初始化代码节选
/* Physical address */
#define pa(X) ((X) – __PAGE_OFFSET)
page_pde_offset = (__PAGE_OFFSET >> 20);
/*__PAGE_OFFSET是0xc0000000,低于这个地址的被划分为用户空间,因此page_pde_offset是0xc00,页目录偏移,也就是整个页目录的大小,看不懂没关系,稍后会用到*/
  • movl $pa(__brk_base), %edi
    /*__brk_base由内核编译时指定,表示初始时堆的开始地址,注意kernel将所有的初始化好的页表都放在堆的开始处,这里所说的地址都是指运行时的虚拟地址VA,具体这里应该是pg0对应的物理地址,存入edi,也就是第一个page所在的地址,swapper_pg_dir的第一项*/
  • movl $pa(initial_page_table), %edx
    /*initial_page_table在paging_init()初始化,将swapper_pg_dir(存放页全局目录PGD的地址)送入edx*/
  • movl $PTE_IDENT_ATTR, %eax
    /*
    x86/include/asm/pgtable_types.h(#define PTE_IDENT_ATTR 0x003) PTE的属性是0x003,即PRESENT+RW,另外还有 #define PDE_IDENT_ATTR   0x067      /* PRESENT+RW+USER+DIRTY+ACCESSED */  #define PGD_IDENT_ATTR   0x001      /* PRESENT (no other attributes) */
    */

10:

  • leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
    补充说明:leal(加载有效地址)是取寄存器的地址赋值到右边,
    leal     S,D 结果:&S->D
    movel S,D 结果:S->D
    movel (S), D 结果:&S->D
    leal (S), D 结果:S->D
    ecx=edi+PDE_INDENT_ATTR
    /*把edi的值即pg0所在物理地址加0x067(PDE的属性)放入ecx,构建一个页目录项*/
    /*第二次循环,edi寄存器指向pg0+1024×4,即pg1的物理地址不用再赋值,下同*/
  • movl %ecx,(%edx) /* Store identity PDE entry */
    /*然后把ecx(pg0及属性)送入swapper_pg_dir的第零项,注意edx存放的是swapper_pg_dir的物理地址,因此该行代码把pg0及属性写入swapper_pg_dir第一项*/
  • movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
    /*再把ecx送到swapper_pg_dir的第768项,page_pde_offset为0xc00,swapper_pg_dir每项为4字节,因此为768项,回顾1024个页表项,前768为用户空间,后256为内核空间,这里叫kernel PDE entry是这个意思*/
    /*第二轮循环,edx已经自加了4,下同*/
  • addl $4,%edx
    /*本段代码循环执行,edx的增量为4,即swapper_pg_dir每项的大小,先往后看*/
  • movl $1024, %ecx
    /*准备初始化1024个表项,这个是和loop配合使用的,记得否?*/
    /*tag 10做的工作就是创建一个PDE entry并放入ecx,然后加两个属性标志位存在swapper_pg_dir里*/
11:
  • stosl/*eax(0x003)的内容放入edi指向的物理地址(pg0),然后edi+4,*/
  • addl $0x1000,%eax
    /*eax: 0x1003, 0x2003, 0x3003 …, 0x3ff003*/
    /*提示:pg0页有4K,按照edi+4依次递增放入eax的值,换句话说,pg0的第零项是0x0003,第一项0x1003…, 第1023项,0x3ff003*/
    /*第二轮循环,eax每轮循环增长4K*1024=4M,最终会超过ebp的值,对应swapp_pg_dir每个PDE,下同*/
  • loop 11b
    /*循环执行前两行代码,初始化了pg0的全部1024个页表项,每个4B,这个时候,swapper_pg_dir的第一项,pg0已经全部初始化完毕(真累),再补充说明下,这个pg0其实就对应了物理地址前4MB了。
  • /*
  • * End condition: we must map up to the end + MAPPING_BEYOND_END.
  • */
  • movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
    /*ebp指向指向的物理地址+MAPPING_BEYOND_END+0x003*/
    /*需要映射到end+MAPPING_BEYOND_END,将内核最终地址保存到这里,当eax<ebp则重复初始化过程,ebp推算过程如下:

    • Enough space to fit pagetables for the low memory linear map */
    • #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
      #define PTRS_PER_PGD 1024
      #define PAGE_SHIFT 12
      PAGE_TABLE_SIZE(0X40000000>>12) <<12
    • MAPPING_BEYOND_END = \
      PAGE_TABLE_SIZE(((1<<32) – __PAGE_OFFSET) >> PAGE_SHIFT)  <<  \ PAGE_SHIFT
    • PAGE_OFFSET 以外的地址包括0x100000000-0xc0000000=0x40000000,共0x40000页,算得配套页表大小 PAGE_TABLE_SIZE为0x40000/1024=0x100(256个) MAPPING_BEYOND_END为0x100>>PAGE_SHIFT = 0x100000,即256KB
  • cmpl %ebp,%eax
    /*eax=0x3ff03*/
    /*第二轮 eax = 0x7ff03 …. */
  • jb 10b
    /*至此,执行完毕,swapper_pg_dir所指向的PDE已初始化完毕,*/
  • addl $__PAGE_OFFSET, %edi
  • movl %edi, pa(_brk_end)
  • shrl $12, %eax
  • movl %eax, pa(max_pfn_mapped)
  • /* Do early initialization of the fixmap area */
  • movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
  • movl %eax,pa(initial_page_table+0xffc)
至此,回到刚才的问题:
填写在swapper_pg_dir中第0x0项的内容是什么,有什么含义?
swapper_pg_dir第0项内容是pg0的物理地址+PDE_INDENT_ATTR,也就是0x567067, 表示该页表是用户页表,可读写,可访问..(PRESENT+RW+USER+DIRTY+ACCESSED)
若填写了swapper_pg_dir中第0x1项,则此内容是什么?
0x568067
填写在pg0的第0x0项、第0x1项和第0x3FF项的内容是多少,有什么含义?
(回想stosl,即 movl eax, edi; addl $4, edi)pg0的第0项表示第一页的物理地址,内容是0x003, 第二页物理地址是0x1003 … 下同. 它们对应物理地址前4MB.
根据swapper_pg_dir的第0x0项和pg0的内容,这个临时页表所代表的地址空间中,0~4MB-1的空间被映射的物理地址空间范围是什么?
显然是物理地址0~4MB-1
若内核地址空间从3G开始,那么填写在swapper_pd_dir中第0x300项和0x301项的内容是什么,与上述第0项和第1项有什么关系,有什么含义?
第0x300项(768项)与0x301项(769项)分别和0x000项与0x001相同,分别为0x567067和0x568067,这是为了实模式到保护模式的平稳过度,在启用分页后可以继续执行.
swapper_pg_dir包含的1024个页目录项,前768指向用户空间,后256指向内核空间