diff --git a/123.wl b/123.wl new file mode 100644 index 0000000000000000000000000000000000000000..4cb29ea38f70d7c61b2a3a25b02e3bdf44905402 --- /dev/null +++ b/123.wl @@ -0,0 +1,3 @@ +one +two +three diff --git a/123123.wl b/123123.wl new file mode 100644 index 0000000000000000000000000000000000000000..ee9bbd623e38b5db3a8e3db9a3e6a363c575a429 --- /dev/null +++ b/123123.wl @@ -0,0 +1,6 @@ +one +two +three +one +two +three diff --git a/README.md b/README.md index e708809a960d191d40bb83c08dcf5ec751930898..0b878aebb2d8add53ebfdcb2845df7f591fbfb9c 100644 --- a/README.md +++ b/README.md @@ -1,93 +1,293 @@ -# Debugging +# TP4 - Debugging +(This is a copy of the text that appears in the _Software engineering_ +assignment.) -## Getting started +This lab session is about debugging programs. It illustrates the use +of a debugger to step through a program execution to understand the +cause of an unexpected behavior. It also illustrates the use of +memory-monitoring tools to locate and correct memory handling errors. -To make it easy for you to get started with GitLab, here's a list of recommended next steps. +## Prerequisite + +Fork the [Debugging](git@git.unistra.fr:alain/debugging.git) project +and clone your fork on the local machine. + +This lab session requires the `valgrind` tool. If you work on an Apple +machine with an Apple processor, you are out of luck (at the time of +writing). -Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)! +## Debugging Tool Usage -## Add your files +To run `gdb` on a program compiled with option `-g`: + + gdb <program> -- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files -- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command: +If `gdb` prints the message `No debugging symbols found in <program>`, +you need to recompile all source files with `-g`. -``` -cd existing_repo -git remote add origin https://git.unistra.fr/alain/debugging.git -git branch -M main -git push -uf origin main -``` +The main `gdb` commands are: -## Integrate with your tools +| Command | Meaning | +|----------------------------|------------------------------------------------| +| `run [<args>] [< <redir>]` | start the program | +| `quit` | | +| `break <function>` | breakpoint at the start of `<function>` | +| | | +| `break <linenumber>` | ... at start if line | +| `next [<ntimes>]` | run until next source line in current function | +| `step [<ntimes>]` | ... entering (debuggable) function calls | +| `finish` | ... until end of current function | +| `continue` | ... until next breakpoint | +| `list` | show source code | +| `list <linenumber>` | ... around `<linenumber>` | +| `backtrace` | show current call stack | +| `up [<number>]` | move up the call stack 1 or `<number>` levels | +| `down [<number>]` | ... or down | +| `info locals` | print local (current function) variables | +| `info frame` | ... and a lot more about current function | +| `print <id>` | print value of variable `<id>` | +| `print <expr>` | ... or of expression `<expr>` | +| `print <arr>@<len>` | ... or <len> element of array `<arr>` | +| | | + +To run `valgrind` and check memory accesses: + + valgrind --leak-check=full <program> [<args>] + +Having compiled the program with `-g` is not mandatory, but it helps a +lot to understand the output of `valgrind`. + +## Buggy Computation + +Binomial coefficients $`\dbinom{n}{k}`$ (also noted $`C_n^k`$) are +frequently used in mathematical definitions and properties: they +appear in Pascal's triangle, $`\tbinom{n}{k}`$ is the coefficient of +$`x^k y^{n-k}`$ in the development of $`(x+y)^n`$, and they have a +myriad of other properties. + +The [`cnp.c`](cnp.c) program aims to compute the binomial coefficient, +using the following definition: + +$$ +\begin{aligned} + \dbinom{n}{k} &= \dfrac{n!}{k!(n-k)!} + %\\ & + = \dfrac{n\cdot(n-1)\cdots(n-k+1)}{1\cdot2\cdots k} + \\ & + = \dfrac{n-0}{1+0} \times \dfrac{n-1}{1+1} + \times\cdots\times \dfrac{n-(k-1)}{1+(k-1)} +\end{aligned} +$$ -- [ ] [Set up project integrations](https://git.unistra.fr/alain/debugging/-/settings/integrations) +The coefficient $`\tbinom{n}{k}`$ is always an integer. The last +definition makes it suitable for computation using only integers: when +performed from left to right, every product has an integer result. -## Collaborate with your team +The program `cnp.c` is a first attempt at implementing the computation +of a binomial coefficient. However, after compilation, a quick test +shows it gives the wrong answer, because $`\dbinom{6}{3} = +\dfrac{6\cdot5\cdot4}{1\cdot2\cdot3}`$ is equal to 20, not 12. -- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/) -- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html) -- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically) -- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/) -- [ ] [Set auto-merge](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html) + $ ./cnp 6 3 + 12 -## Test and Deploy +1. Use `gdb` to step through this execution and see what happens. Then + correct the program and verify that it gives the right answer. -Use the built-in continuous integration in GitLab. -- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html) -- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing (SAST)](https://docs.gitlab.com/ee/user/application_security/sast/) -- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html) -- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/) -- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html) +## Buggy Memory Accesses -*** +The [`bubble.c`](bubble.c) program implements a sorting algorithm +known as _bubble sort_. It sorts an array in _descending_ order +(largest values first). One sorting step (implemented in `bubble`) +traverses an initial portion of the array, exchanging any pair of +successive values which are not in the correct order. The first step +applies to the whole array, and brings the smallest value in the last +position (which is its final position). Bubble steps are then repeated +for an increasingly shorter initial portion, progressively bringing +values in their final position. After all steps, the array is sorted. -# Editing this README +(Remember that this is a terribly inefficient algorithm; we use it +here only for illustration.) -When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thanks to [makeareadme.com](https://www.makeareadme.com/) for this template. +1. Run the program under `valgrind` to discover the two major errors. +2. Fix the _block definitely lost_ error appearing in the output of + `valgrind`. Rerun `valgrind` to check your correction. -## Suggestions for a good README +Running the program shows that something is wrong. For instance: -Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information. + $ ./bubble 10 10 + array, before sorting: + 6 2 9 8 7 4 7 10 4 9 + array, sorted : + 4113 10 9 9 8 7 7 6 4 4 + +3. Use `gdb` to find out where the ghost value comes from. Correct the + program accordingly. + + +## Memory Management Errors + +The [`rpn.c`](rpn.c) program implements a simple _reverse polish +notation_ calculator: the program arguments are the terms of the +expression which is then evaluated and the result is printed. + +A reverse polish notation (RPN) expression is a sequence of numbers +and operators. Evaluating a RPN uses a stack of numbers, and processes +the sequence from left to right: an incoming number is pushed on the +stack, and an incoming operator is applied to the two numbers on top +of the stack, which are then removed from the stack, before the result +of the operation is pushed on the stack. Here is an example, starting +with an empty stack growing to the right: -## Name -Choose a self-explaining name for your project. + input => stack + ---------------- + 9 |9| + 5 |9|5| + 2 |9|5|2| + - |9|3| + 2 |9|3|2| + + |9|5| + - |4| -## Description -Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors. +Our program accepts only two operators (`+` and `-`), and the numbers +can be any floating point value; any other kind of argument is +ignored. If an operator applies when the stack contains less than two +numbers, the program should stop with an error message. If the stack +contains more than one number when the end of input is reached, a +warning message should be printed. -## Badges -On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge. +This current version of the program is full of bugs. We will solve +them progressively. -## Visuals -Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method. +### Compiler Warnings -## Installation -Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection. +1. The compiler produces warnings (and the executable is unusable). + Read them carefully and correct the program. -## Usage -Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README. +### Logical Bugs -## Support -Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc. +If you now try the program with -## Roadmap -If you have ideas for releases in the future, it is a good idea to list them in the README. + ./rpn 9 5 2 - 2 + - -## Contributing -State if you are open to contributions and what your requirements are for accepting them. +you notice that the result is wrong. -For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self. +2. Step through the program execution with `gdb` to find the error. + Correct the program and verify that it gives the correct answer. -You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser. +### Memory Leaks -## Authors and acknowledgment -Show your appreciation to those who have contributed to the project. +Using `valgrind` on this program shows several memory problems. -## License -For open source projects, say how it is licensed. +3. Make sure all allocated memory is freed before exiting the program. + If needed, write additional functions to release allocated memory. -## Project status -If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers. +### Other Faulty Behavior + +The program does not work as expected when the stack is insufficiently +filled (e.g., `./rpn 1 +`), or when the stack holds more than one +value after all arguments are processed (e.g., `./rpn 1 2 3 +`). + +4. Correct these mistakes, properly de-allocating memory before + exiting, and using `valgrind` until no more error occurs. + + +## Memory Allocation Discipline + +The [`wordlist.c`](wordlist.c) program reads lines of input +(typically, single words), and inserts them into a list. The list is +ordered lexicographically (as per `strcmp()`), and shall not contain +duplicate entries. At the end of the program, the list is printed, one +word per line. The output should be similar to the one of `sort -u` +(for input made of ASCII characters only). + +The list of words is implemented as a singly-linked list where every +cell points to an array of characters containing NUL-terminated +strings. This is illustrated in the following sketch, where `#` means +`NULL` (the null pointer) and `$` means `NUL` (the null character, +also noted `_0`). + + +---+---+ +---+---+ +---+---+ + | | -+----------->| | -+----------->| | # | + +-|-+---+ +-|-+---+ +-|-+---+ + | | | + v v v + +-+-+-+-+---+ +-+-+-+-+-+-+---+ +-+-+-+-+---+ + |o|n|e|$|...| |t|h|r|e|e|$|...| |t|w|o|$|...| + +-+-+-+-+---+ +-+-+-+-+-+-+---+ +-+-+-+-+---+ + +### Memory Allocation + +The idea of the program is to use `getline()` to allocate memory for +the strings. However, running the program on input file `123.wl` shows +only one word: + + $ ./wordlist < 123.wl + three + +1. Run the program under `gdb` to examine the content of the list + after each insertion. In particular, examine the value and content + of the string pointed to by `linetext` before calls to + `list_insert_sorted()`. + +2. Read the manual of `getline()` (in a terminal, with `man getline`), + to understand when this function actually allocates memory. Then + modify the program to make sure every input line gets its own + allocation. Verify that the output is correct on the `123.wl` input + file. + +### Memory Leaks + +Run the program with input from `123.wl` under `valgrind`. This shows +a whole lot of errors that seem related to the allocation of the list. + +3. Un-comment the call to `list_free()` at the end of `main()`, and + verify that this reduces the list of memory leaks, but introduces + new errors (and the program actually crashes). + +4. Run the program under `gdb` and focus on function `list_free()`. + Step through the instructions in this function, and print the + content of the cell pointed to by the `list` parameter after each + instruction. Then correct the code of `list_free()`. + +5. At this point, `valgrind` should show a single remaining memory + leak (the `getdelim()` function is called by `getline()`). Correct + the program to eliminate this last memory leak; run `valgrind` to + confirm that all leaks have been eliminated. + +### Allocation Strategy + +At this point, the allocation strategy is: + +- memory for the strings is allocated by `getline()` + +- memory for the list cells is allocated by `list_insert_sorted()` + +- memory for list and cells is freed by `list_free()` + +- memory allocated by the last call of `getline()` is freed explicitly + +Unfortunately, this logic is wrong, because some of the input lines +will not be kept in the list. Run the program under `valgrind` with +input file `123123.wl` to exhibit new memory leaks. (Note also that +blocks allocated by `getline()` are much larger than actually needed). + +Here is a new strategy: + +- all calls to `getline()` use the same block of allocated memory (as + was the case in the original version of the program) + +- memory for a list cell _and_ the string it holds is allocated by + `list_insert_sorted()` _only_ when the string is not already present + in the list + +- memory for cells and strings is freed by `list_free()` + +- the block used by `getline()` is freed explicitly at the end of the + program + +6. Implement this new strategy. Run `valgrind` to prove that no memory + is leaked with input from `123123.wl`. diff --git a/bubble.c b/bubble.c new file mode 100644 index 0000000000000000000000000000000000000000..6ed1d289effcc4e08b79e40bff6551aa72d95c08 --- /dev/null +++ b/bubble.c @@ -0,0 +1,67 @@ + +#include <stdlib.h> +#include <stdio.h> +#include <time.h> + + +typedef struct { + int * values ; + size_t size ; +} array ; + +void fill (array t, int m) +{ + size_t i = 0; + while (i < t.size) + t.values[i++] = rand () % (m+1); +} + +void bubble (array t, size_t k) +{ + for (size_t i=0 ; i<=k ; i++) + if (t.values[i+1] > t.values[i]) + { // swap [i] and [i+1] + int tmp = t.values[i+1]; + t.values[i+1] = t.values[i]; + t.values[i] = tmp; + } +} + +void sort (array t) +{ + for (ssize_t i=t.size-1 ; i>=0 ; i--) + bubble (t, i); +} + +void display (array t) +{ + for (size_t i=0 ; i<t.size ; i++) + printf ("%d\t", t.values[i]); + printf("\n"); +} + +int main (int argc, char ** argv) +{ + if (argc < 3 ) + { + fprintf(stderr, "usage: %s <nb elems> <max val>\n", argv[0]); + return 1; + } + + array t; // array of ints with its size + + t.size = atoi (argv[1]); // parsing first argument + t.values = malloc (t.size*sizeof(int)); // allocation of the array + int m = atoi (argv[2]); // parsing second argument + srand (time (NULL)); // initialize the random number generator + fill (t, m); // randomly filling the array + + printf ("array, before sorting:\n"); + display (t); + + sort (t); + printf ("array, sorted :\n"); + display (t); + + return EXIT_SUCCESS; +} diff --git a/cnp.c b/cnp.c new file mode 100644 index 0000000000000000000000000000000000000000..aa5fefd3c749a2b8e6bee7de02e80dbd21900b1e --- /dev/null +++ b/cnp.c @@ -0,0 +1,40 @@ + +#include <stdlib.h> +#include <stdio.h> + +/* + C(n,p) = C(n-1,p-1) + C(n-1,p) + = ... + = n! / (p!*(n-p!)) + = n*(n-1)*...*(n-p+1) / (1*2*...*p) + = n/1 * (n-1)/2 * ... * (n-p+1)/p + + C(6,3) = 6/1 * 5/2 * 4/3 = 6 * 5/2 * 4/3 = 15 * 4/3 = 20 +*/ + +unsigned long cnp (unsigned long n, unsigned long p) +{ + unsigned long r = 1; + for (unsigned long i=0 ; i<p ; i++) + { + r *= (n-i) / (i+1); + } + return r; +} + +int main (int argc, char * argv []) +{ + if (argc != 3) + { + fprintf (stderr, "usage: %s <n> <p>\n", argv[0]); + return EXIT_FAILURE; + } + unsigned long n = atol (argv[1]); + unsigned long p = atol (argv[2]); + + unsigned long r = cnp (n, p); + printf ("%lu\n", r); + + return EXIT_SUCCESS; +} + diff --git a/makefile b/makefile new file mode 100644 index 0000000000000000000000000000000000000000..b04866c76a47369ba4f94451345f3bfdc765b60a --- /dev/null +++ b/makefile @@ -0,0 +1,22 @@ + +CC=gcc +CFLAGS=-Wall -Wextra -g + +PROGS=cnp bubble rpn wordlist + +all: $(PROGS) + +# Here all rules would be like: +# +# someprog: someprog.c +# $(CC) $(CFLAGS) $< -o $@ +# +# It turns out "make" knows this: if it has to build "someprog" +# and finds "someprog.c", it will do the right thing. +# +# Therefore we don't need to write any rule by ourselves + + +# Useful/important when using "git": always "make clean" before commit/push +clean: + rm -f $(PROGS) diff --git a/rpn.c b/rpn.c new file mode 100644 index 0000000000000000000000000000000000000000..3276675fa41ea122e009f53e6afba00929f280d7 --- /dev/null +++ b/rpn.c @@ -0,0 +1,79 @@ + +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +/* stack cells */ +struct stack { + double num; + struct stack * prev; +}; +typedef struct stack stack_t; + +/* top: returns the value in the top element. The stack remains unaffected */ +double top (stack_t * s) +{ + return s->num; +} +/* push: a new element on top the stack, returning the new top */ +stack_t * push (double n, stack_t * p) +{ + stack_t t; + t.num = n; + t.prev = p; + return &t; +} +/* pop: forget the top element, return what's below */ +stack_t * pop (stack_t * s) +{ + return s->prev; +} + +/* isnum: true if "s" is the representation of a number, and if so + assigns its value to the floating point number pointed to by "v" */ +bool isnum (const char * s, double * v) +{ + return (sscanf (s, "%lf", v) == 1); +} + +/* process arguments as a reverse polish notation expression */ +int main (int argc, char * argv []) +{ + stack_t * stack = NULL; + for (int i=1 ; i<argc ; i++) + { + double * val; + if (isnum (argv[i], val)) + { + stack = push (*val, stack); + } + else if (strcmp (argv[i], "+") == 0) + { + double v1, v2, r; + v1 = top (stack); + stack = pop (stack); + v2 = top (stack); + stack = pop (stack); + r = v1 + v2; + stack = push (r, stack); + } + else if (strcmp (argv[i], "-") == 0) + { + double v1, v2, r; + v1 = top (stack); + stack = pop (stack); + v2 = top (stack); + stack = pop (stack); + r = v1 - v2; + stack = push (r, stack); + } + else + { + fprintf (stderr, "syntax error: '%s' (ignored)\n", argv[i]); + } + } + printf ("%lf\n", top (stack)); + + return EXIT_SUCCESS; +} diff --git a/wordlist.c b/wordlist.c new file mode 100644 index 0000000000000000000000000000000000000000..d3779ee95df95537dffbf249b88fb7ff115a85b4 --- /dev/null +++ b/wordlist.c @@ -0,0 +1,88 @@ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + + +struct list { + char * word; + struct list * next; +}; +typedef struct list list_t; + + +// This places the given "word" in a cell at the head of the list +// pointing its tail to the given "next" list +list_t * list_cons (char * word, list_t * next) +{ + list_t * l = malloc (sizeof (list_t)); + if (l == NULL) + { + perror ("list_cons"); + exit (EXIT_FAILURE); + } + l->word = word; + l->next = next; + return l; +} + +// This inserts the "word" into the "list", and may return a new list +// (when the word is first) or the updated "list" otherwise +list_t * list_insert_sorted (char * word, list_t * list) +{ + if (list == NULL) /* empty list */ + { + return list_cons (word, NULL); + } + else + { + int cmp = strcmp (word, list->word); + if (cmp == 0) /* word already here */ + { + return list; + } + else if (cmp < 0) /* "word" must appear before the head of "list" */ + { + return list_cons (word, list); + } + else /* too early for "word", go on traversing "list" */ + { + list->next = list_insert_sorted (word, list->next); + return list; + } + } +} + +// Deallocates all cells used by "list" +void list_free (list_t * list) +{ + if (list != NULL) + { + free (list->word); + free (list); + list_free (list->next); + } +} + + +int main (void) +{ + char * linetext = NULL; + size_t linesize = 0; + list_t * wordlist = NULL; + + while (getline (&linetext, &linesize, stdin) != -1) + { + size_t z = strlen (linetext) - 1; + linetext[z] = '\0'; /* crush '\n' left by getline */ + wordlist = list_insert_sorted (linetext, wordlist); + } + + for (list_t * l=wordlist ; l!=NULL ; l=l->next) + printf ("%s\n", l->word); + + // list_free (wordlist); + + return EXIT_SUCCESS; +} +