Question:
The exercise asks me to read information in a file, being them, cpf, name, email and age of several people, save in a struct, sort in ascending order by age, if ages equal by cpf, and print in another file with the same format as received, all information from a person is separated by a comma and from one person to another by a different line. And I already know the maximum size of the information, but I don't know the number of lines. I did it using static memory but it must create an array of structs using memory and dynamics and I'm not able to do that.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef struct{
char CPF[12];
char nome[41];
char email[31];
int idade;
}Dado;
int main()
{
FILE *arq, *arqout;
char ch;
int num=0,i,j,aux;
arq = fopen("read.txt","r");
while( fscanf(arq,"%c", &ch)!= EOF )
if(ch == '\n')
num++;
rewind(arq);
Dado pimpolho[num+1];
for (i=0;i<num;i++)
{
j=0;
fscanf(arq,"%c", &ch);
while( ch != ',')
{
pimpolho[i].CPF[j] = ch;
fscanf(arq,"%c", &ch);
j++;
}
pimpolho[i].CPF[j] = '\0';
j=0;
fscanf(arq,"%c", &ch);
while( ch != ',')
{
pimpolho[i].nome[j] = ch;
fscanf(arq,"%c", &ch);
j++;
}
pimpolho[i].nome[j] = '\0';
j=0;
fscanf(arq,"%c", &ch);
while( ch != ',')
{
pimpolho[i].email[j] = ch;
fscanf(arq,"%c", &ch);
j++;
}
pimpolho[i].email[j] = '\0';
fscanf(arq, "%d", &pimpolho[i].idade);
fscanf(arq,"%c", &ch);
}
for (i = num - 1; i > 0; i--)
for (j = 0; j < i; j++)
if (pimpolho[j].idade > pimpolho[j+1].idade)
{
pimpolho[num] = pimpolho[j];
pimpolho[j] = pimpolho[j+1];
pimpolho[j+1] = pimpolho[num];
}
for (i = num - 1; i > 0; i--)
for (j = 0; j < i; j++)
if (pimpolho[j].idade == pimpolho[j+1].idade)
{
aux = strncmp(pimpolho[j].CPF,pimpolho[j+1].CPF,11);
if (aux>0)
{
pimpolho[num] = pimpolho[j];
pimpolho[j] = pimpolho[j+1];
pimpolho[j+1] = pimpolho[num];
}
}
arqout = fopen("write.txt","w");
for (i=0;i<num;i++)
{
fprintf(arqout,"%s,%s,%s,%d\n",pimpolho[i].CPF,pimpolho[i].nome,pimpolho[i].email,pimpolho[i].idade);
}
fclose(arqout);
fclose(arq);
return 0;
}
Is it possible to change this with some changes?
Answer:
Dynamic allocation with vector
Dynamic allocation is as simple as calling malloc
directly:
Dado *pimpolho = malloc(sizeof(Dado) * (num+1));
The rest you have works because when you do pimpolho[i]
it is equivalent to doing *(pimpolho + i)
. It's simply a matter of whether you use array syntax or pointer syntax, with array syntax being simpler and so you should use it when possible.
Note : As I said in the comment, for the example that has this does not bring any advantage, on the contrary, it makes the allocation more complicated, as well as it can make some parts of the code more complicated (in this case not) and forces you to worry free up memory with free
when you no longer need it. In this case, as you are going to use the vector until the end of the program, it is not worth freeing the memory because it will be freed at the end of the program, but in other cases you have to do it under pain of having memory leaks.
So don't do this in your programs unless it has a concrete purpose and advantages.
refactoring
I don't want to miss out on some important refactorings you can do that aren't complicated. Avoid repetition of logic as much as possible, as this causes much more problems than it seems. Looking at the reading of the fields it has:
for (i=0;i<num;i++)
{
j=0;
fscanf(arq,"%c", &ch);
while( ch != ',')
{
pimpolho[i].CPF[j] = ch;
fscanf(arq,"%c", &ch);
j++;
}
pimpolho[i].CPF[j] = '\0';
j=0;
fscanf(arq,"%c", &ch);
while( ch != ',')
{
pimpolho[i].nome[j] = ch;
fscanf(arq,"%c", &ch);
j++;
}
pimpolho[i].nome[j] = '\0';
j=0;
fscanf(arq,"%c", &ch);
while( ch != ',')
{
pimpolho[i].email[j] = ch;
fscanf(arq,"%c", &ch);
j++;
}
pimpolho[i].email[j] = '\0';
fscanf(arq, "%d", &pimpolho[i].idade);
fscanf(arq,"%c", &ch);
}
This actually corresponds to the reading of the 4 fields, the CPF, name, email and age, the first 3 being the same. But the code was repeated. Not only is it more difficult to read, but it is longer and prone to being wrong when you need to change it because you have to change it in all the right places. Whenever this happens, abstract the same logic to a function and call it. Now see how much better it gets:
void ler_string_arq(FILE* arq, char *campo_destino){
int letra = 0;
char ch;
fscanf(arq,"%c", &ch);
while( ch != ',') {
campo_destino[letra] = ch;
fscanf(arq,"%c", &ch);
letra++;
}
campo_destino[letra] = '\0';
}
int main() {
//...
for (i=0; i<num; i++) {
ler_string_arq(arq, pimpolho[i].CPF);
ler_string_arq(arq, pimpolho[i].nome);
ler_string_arq(arq, pimpolho[i].email);
fscanf(arq, "%d", &pimpolho[i].idade);
fscanf(arq,"%c", &ch);
}
In sorting, it has the same problem because it repeats two sorting logics, first sorting by age, and then sorting those who have the same age:
for (i = num - 1; i > 0; i--)
for (j = 0; j < i; j++)
if (pimpolho[j].idade > pimpolho[j+1].idade)
{
pimpolho[num] = pimpolho[j];
pimpolho[j] = pimpolho[j+1];
pimpolho[j+1] = pimpolho[num];
}
for (i = num - 1; i > 0; i--)
for (j = 0; j < i; j++)
if (pimpolho[j].idade == pimpolho[j+1].idade)
{
aux = strncmp(pimpolho[j].CPF,pimpolho[j+1].CPF,11);
if (aux>0)
{
pimpolho[num] = pimpolho[j];
pimpolho[j] = pimpolho[j+1];
pimpolho[j+1] = pimpolho[num];
}
}
This is entirely unnecessary as you can do both at once:
for (i = num - 1; i > 0; i--)
for (j = 0; j < i; j++)
if (pimpolho[j].idade > pimpolho[j+1].idade ||
(pimpolho[j].idade == pimpolho[j+1].idade && strncmp(pimpolho[j].CPF,pimpolho[j+1].CPF,11) > 0)){
pimpolho[num] = pimpolho[j];
pimpolho[j] = pimpolho[j+1];
pimpolho[j+1] = pimpolho[num];
}
I won't go into further detail on this part, but keep in mind that the exchange made in sorting is by copying. This can become quite inefficient if the amount of data is very large as it forces you to copy lots of bytes from one side to the other to make the switch. The way to solve it is to use an array of pointers instead of an array with all the objects directly, but this involves changing almost all the code, and is probably overkill for the exercise in question.
There are other details that I can improve of course, but I only focused on the ones that are stronger and that have the most impact on the code in general.
Dynamic allocation with list
The small difference of using a list instead of a vector already makes perfect sense, because actually when using a list you don't need to know how many elements it has in advance. This makes it so that you don't have to go through the file twice, the first being to find the number of people there are. You can simply read, allocate and link the pointers to each other. Now the code itself is more complicated because it involves allocations, pointer exchanges, memory freeing, etc…
First it implies changing the structure so that each person can have a pointer to the next one:
typedef struct Dado{
// ^----
char CPF[12];
char nome[41];
char email[31];
int idade;
struct Dado* proximo; //<---
} Dado;
Then you have to have a way of realizing when you've reached the end of the file. The most direct is to interpret this by reading the first field, the CPF
. For this, the simplest thing is to change the ler_string_arq
function to return 0
when it has reached the end of the file:
int ler_string_arq(FILE* arq, char *campo_destino){
//^----tipo int agora
int letra = 0;
char ch;
if (fscanf(arq,"%c", &ch) != 1){ //se não leu um char então chegou ao fim
return 0;
}
while( ch != ',') {
campo_destino[letra] = ch;
fscanf(arq,"%c", &ch);
letra++;
}
campo_destino[letra] = '\0';
return 1;
}
Then the reading while
is now also quite different:
Dado *inicio_lista = NULL, *ultima = NULL; //ponteiros para lista e ultima pessoa
while(1) {
Dado *pessoa = malloc(sizeof(Dado)); //cria nova pessoa com alocação dinamica
pessoa->proximo = NULL; //proximo da pessoa criada é nulo
if (inicio_lista == NULL){ //se ainda nao tem nenhuma esta é a primeira
inicio_lista = pessoa;
}
if (ultima != NULL) { //se já tem pessoas liga a anterior a esta
ultima->proximo = pessoa;
}
if (!ler_string_arq(arq, pessoa->CPF)){ //se apanhou EOF
free(pessoa);
ultima->proximo = NULL;
break; //sai
}
ultima = pessoa;
ler_string_arq(arq, pessoa->nome);
ler_string_arq(arq, pessoa->email);
fscanf(arq, "%d", &pessoa->idade);
fscanf(arq,"%c", &ch);
}
fclose(arq);
For writing it would be quite the same changing mostly the syntax:
arqout = fopen("write.txt","w");
Dado* pessoa = inicio_lista;
while (pessoa!= NULL){ //enquanto nao chega ao fim da lista
fprintf(arqout,"%s,%s,%s,%d\n", pessoa->CPF, pessoa->nome, pessoa->email, pessoa->idade);
pessoa = pessoa->proximo; //avança para a proxima pessoa
}
Note that I purposely omitted the people sorting part, because now with a linked list this gets a lot more complicated and I don't want to go any further as the answer is already quite big. Normally, these sorts in lists are done with Merge Sort and they end up coming in handy in the scenario where I indicated to be sorting with pointers and that's why they are quite efficient.
Furthermore, the complexity between the two sorting algorithms is quite different as the Merge Sort runs in O(nlogn)
while the bubble sort it has runs in O(n²)
.