mirror of
https://github.com/BoredDevNL/BoredOS.git
synced 2026-05-15 10:48:38 +00:00
Extend grep with -r, -v, -l, -w and -x flags
Adds the following options to grep based on maintainer feedback: - `-r` / `-R` — recursive search through directories - `-v` — invert match, print non-matching lines - `-l` — print only filenames that contain matches - `-w` — match whole words only - `-x` — match whole lines only Multi-file output is automatically prefixed with the filename.
This commit is contained in:
parent
e313e9dfcc
commit
7a6769c2ec
1 changed files with 195 additions and 81 deletions
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright (c) 2023-2026 Chris (boreddevnl)
|
// Copyright (c) 2026 maro (whitehai11)
|
||||||
// This software is released under the GNU General Public License v3.0. See LICENSE file for details.
|
// This software is released under the GNU General Public License v3.0. See LICENSE file for details.
|
||||||
// This header needs to maintain in any file it is present in, as per the GPL license terms.
|
// This header needs to maintain in any file it is present in, as per the GPL license terms.
|
||||||
// BOREDOS_APP_DESC: Search for text inside a file.
|
// BOREDOS_APP_DESC: Search for text inside a file.
|
||||||
|
|
@ -10,6 +10,24 @@
|
||||||
|
|
||||||
#define READ_BUF_SIZE 4096
|
#define READ_BUF_SIZE 4096
|
||||||
#define LINE_BUF_SIZE 1024
|
#define LINE_BUF_SIZE 1024
|
||||||
|
#define MAX_PATH 512
|
||||||
|
#define MAX_ENTRIES 256
|
||||||
|
|
||||||
|
// Flags
|
||||||
|
static int g_show_numbers = 0;
|
||||||
|
static int g_ignore_case = 0;
|
||||||
|
static int g_count_only = 0;
|
||||||
|
static int g_invert = 0; // -v
|
||||||
|
static int g_files_only = 0; // -l
|
||||||
|
static int g_word_match = 0; // -w
|
||||||
|
static int g_line_match = 0; // -x
|
||||||
|
static int g_recursive = 0; // -r / -R
|
||||||
|
static int g_multi_file = 0; // more than one file → prefix output with filename
|
||||||
|
|
||||||
|
static const char *g_pattern = NULL;
|
||||||
|
|
||||||
|
// Total match count across all files (used for -c with -r)
|
||||||
|
static int g_total_matches = 0;
|
||||||
|
|
||||||
static int sc_strcmp(const char *a, const char *b) {
|
static int sc_strcmp(const char *a, const char *b) {
|
||||||
while (*a && *a == *b) { a++; b++; }
|
while (*a && *a == *b) { a++; b++; }
|
||||||
|
|
@ -17,14 +35,19 @@ static int sc_strcmp(const char *a, const char *b) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_usage(void) {
|
static void print_usage(void) {
|
||||||
printf("Usage: grep [options] <text> <file>\n");
|
printf("Usage: grep [options] <pattern> <file> [file...]\n");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Search for text inside a file.\n");
|
printf("Search for text inside files.\n");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Options:\n");
|
printf("Options:\n");
|
||||||
printf(" -n Show line numbers\n");
|
printf(" -n Show line numbers\n");
|
||||||
printf(" -i Case-insensitive search\n");
|
printf(" -i Case-insensitive search\n");
|
||||||
printf(" -c Print match count only\n");
|
printf(" -c Print match count only\n");
|
||||||
|
printf(" -v Invert match (print non-matching lines)\n");
|
||||||
|
printf(" -l Print only filenames with matches\n");
|
||||||
|
printf(" -w Match whole words only\n");
|
||||||
|
printf(" -x Match whole lines only\n");
|
||||||
|
printf(" -r, -R Recursive search in directories\n");
|
||||||
printf(" -h Show this help\n");
|
printf(" -h Show this help\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -33,6 +56,26 @@ static char to_lower(char c) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int is_word_char(char c) {
|
||||||
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
|
||||||
|
(c >= '0' && c <= '9') || c == '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if needle appears in haystack at position i as a whole word */
|
||||||
|
static int match_at(const char *haystack, int i, int h_len,
|
||||||
|
const char *needle, int n_len, int ignore_case) {
|
||||||
|
for (int j = 0; j < n_len; j++) {
|
||||||
|
char h = ignore_case ? to_lower(haystack[i + j]) : haystack[i + j];
|
||||||
|
char n = ignore_case ? to_lower(needle[j]) : needle[j];
|
||||||
|
if (h != n) return 0;
|
||||||
|
}
|
||||||
|
if (g_word_match) {
|
||||||
|
if (i > 0 && is_word_char(haystack[i - 1])) return 0;
|
||||||
|
if (i + n_len < h_len && is_word_char(haystack[i + n_len])) return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
static int str_contains(const char *haystack, const char *needle, int ignore_case) {
|
static int str_contains(const char *haystack, const char *needle, int ignore_case) {
|
||||||
int h_len = (int)strlen(haystack);
|
int h_len = (int)strlen(haystack);
|
||||||
int n_len = (int)strlen(needle);
|
int n_len = (int)strlen(needle);
|
||||||
|
|
@ -41,31 +84,146 @@ static int str_contains(const char *haystack, const char *needle, int ignore_cas
|
||||||
if (n_len > h_len) return 0;
|
if (n_len > h_len) return 0;
|
||||||
|
|
||||||
for (int i = 0; i <= h_len - n_len; i++) {
|
for (int i = 0; i <= h_len - n_len; i++) {
|
||||||
int match = 1;
|
if (match_at(haystack, i, h_len, needle, n_len, ignore_case))
|
||||||
for (int j = 0; j < n_len; j++) {
|
return 1;
|
||||||
char h = ignore_case ? to_lower(haystack[i + j]) : haystack[i + j];
|
|
||||||
char n = ignore_case ? to_lower(needle[j]) : needle[j];
|
|
||||||
if (h != n) { match = 0; break; }
|
|
||||||
}
|
|
||||||
if (match) return 1;
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int line_matches(const char *line) {
|
||||||
|
if (g_line_match) {
|
||||||
|
/* Whole line must equal pattern */
|
||||||
|
int h_len = (int)strlen(line);
|
||||||
|
int n_len = (int)strlen(g_pattern);
|
||||||
|
if (h_len != n_len) return 0;
|
||||||
|
return match_at(line, 0, h_len, g_pattern, n_len, g_ignore_case);
|
||||||
|
}
|
||||||
|
return str_contains(line, g_pattern, g_ignore_case);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Grep a single open file descriptor, printing results prefixed by filename if needed */
|
||||||
|
static int grep_fd(int fd, const char *filename) {
|
||||||
|
static char read_buf[READ_BUF_SIZE];
|
||||||
|
static char line[LINE_BUF_SIZE];
|
||||||
|
int line_pos = 0;
|
||||||
|
int line_num = 0;
|
||||||
|
int match_cnt = 0;
|
||||||
|
|
||||||
|
/* Helper: process one complete line */
|
||||||
|
#define PROCESS_LINE() do { \
|
||||||
|
line[line_pos] = '\0'; \
|
||||||
|
line_num++; \
|
||||||
|
int matched = line_matches(line); \
|
||||||
|
if (g_invert) matched = !matched; \
|
||||||
|
if (matched) { \
|
||||||
|
match_cnt++; \
|
||||||
|
if (!g_count_only && !g_files_only) { \
|
||||||
|
if (g_multi_file) printf("%s:", filename); \
|
||||||
|
if (g_show_numbers) printf("%d:", line_num); \
|
||||||
|
printf("%s\n", line); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
line_pos = 0; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
int bytes = sys_read(fd, read_buf, READ_BUF_SIZE);
|
||||||
|
if (bytes <= 0) break;
|
||||||
|
|
||||||
|
for (int i = 0; i < bytes; i++) {
|
||||||
|
char c = read_buf[i];
|
||||||
|
if (c == '\n' || line_pos >= LINE_BUF_SIZE - 1) {
|
||||||
|
PROCESS_LINE();
|
||||||
|
} else if (c != '\r') {
|
||||||
|
line[line_pos++] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (line_pos > 0) PROCESS_LINE();
|
||||||
|
|
||||||
|
#undef PROCESS_LINE
|
||||||
|
|
||||||
|
if (g_count_only)
|
||||||
|
printf("%s%d\n", g_multi_file ? filename : "", match_cnt > 0 ? match_cnt : 0);
|
||||||
|
|
||||||
|
if (g_files_only && match_cnt > 0)
|
||||||
|
printf("%s\n", filename);
|
||||||
|
|
||||||
|
g_total_matches += match_cnt;
|
||||||
|
return match_cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int grep_file(const char *path) {
|
||||||
|
int fd = sys_open(path, "r");
|
||||||
|
if (fd < 0) {
|
||||||
|
printf("grep: cannot open '%s'\n", path);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int n = grep_fd(fd, path);
|
||||||
|
sys_close(fd);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void grep_recursive(const char *path) {
|
||||||
|
FAT32_FileInfo info;
|
||||||
|
if (sys_get_file_info(path, &info) < 0) {
|
||||||
|
printf("grep: cannot access '%s'\n", path);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!info.is_directory) {
|
||||||
|
grep_file(path);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FAT32_FileInfo entries[MAX_ENTRIES];
|
||||||
|
int count = sys_list(path, entries, MAX_ENTRIES);
|
||||||
|
if (count < 0) return;
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
const char *name = entries[i].name;
|
||||||
|
if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
char full[MAX_PATH];
|
||||||
|
int plen = (int)strlen(path);
|
||||||
|
int nlen = (int)strlen(name);
|
||||||
|
if (plen + 1 + nlen + 1 > MAX_PATH) continue;
|
||||||
|
|
||||||
|
int slash = (plen == 1 && path[0] == '/') ? 0 : 1;
|
||||||
|
for (int j = 0; j < plen; j++) full[j] = path[j];
|
||||||
|
if (slash) full[plen] = '/';
|
||||||
|
for (int j = 0; j <= nlen; j++) full[plen + slash + j] = name[j];
|
||||||
|
|
||||||
|
if (entries[i].is_directory)
|
||||||
|
grep_recursive(full);
|
||||||
|
else
|
||||||
|
grep_file(full);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
int show_numbers = 0;
|
|
||||||
int ignore_case = 0;
|
|
||||||
int count_only = 0;
|
|
||||||
int arg_offset = 1;
|
int arg_offset = 1;
|
||||||
|
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
if (argv[i][0] != '-') break;
|
if (argv[i][0] != '-') break;
|
||||||
if (sc_strcmp(argv[i], "-n") == 0) {
|
if (sc_strcmp(argv[i], "-n") == 0) {
|
||||||
show_numbers = 1; arg_offset++;
|
g_show_numbers = 1; arg_offset++;
|
||||||
} else if (sc_strcmp(argv[i], "-i") == 0) {
|
} else if (sc_strcmp(argv[i], "-i") == 0) {
|
||||||
ignore_case = 1; arg_offset++;
|
g_ignore_case = 1; arg_offset++;
|
||||||
} else if (sc_strcmp(argv[i], "-c") == 0) {
|
} else if (sc_strcmp(argv[i], "-c") == 0) {
|
||||||
count_only = 1; arg_offset++;
|
g_count_only = 1; arg_offset++;
|
||||||
|
} else if (sc_strcmp(argv[i], "-v") == 0) {
|
||||||
|
g_invert = 1; arg_offset++;
|
||||||
|
} else if (sc_strcmp(argv[i], "-l") == 0) {
|
||||||
|
g_files_only = 1; arg_offset++;
|
||||||
|
} else if (sc_strcmp(argv[i], "-w") == 0) {
|
||||||
|
g_word_match = 1; arg_offset++;
|
||||||
|
} else if (sc_strcmp(argv[i], "-x") == 0) {
|
||||||
|
g_line_match = 1; arg_offset++;
|
||||||
|
} else if (sc_strcmp(argv[i], "-r") == 0 ||
|
||||||
|
sc_strcmp(argv[i], "-R") == 0) {
|
||||||
|
g_recursive = 1; arg_offset++;
|
||||||
} else if (sc_strcmp(argv[i], "-h") == 0 ||
|
} else if (sc_strcmp(argv[i], "-h") == 0 ||
|
||||||
sc_strcmp(argv[i], "--help") == 0) {
|
sc_strcmp(argv[i], "--help") == 0) {
|
||||||
print_usage();
|
print_usage();
|
||||||
|
|
@ -76,73 +234,29 @@ int main(int argc, char **argv) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (argc - arg_offset < 1) {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_pattern = argv[arg_offset];
|
||||||
|
|
||||||
|
// Need at least a path when not reading stdin
|
||||||
if (argc - arg_offset < 2) {
|
if (argc - arg_offset < 2) {
|
||||||
print_usage();
|
print_usage();
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *pattern = argv[arg_offset];
|
// Multiple files → prefix output with filename
|
||||||
const char *path = argv[arg_offset + 1];
|
g_multi_file = (argc - arg_offset > 2) || g_recursive;
|
||||||
|
|
||||||
int fd = sys_open(path, "r");
|
for (int i = arg_offset + 1; i < argc; i++) {
|
||||||
if (fd < 0) {
|
if (g_recursive) {
|
||||||
printf("grep: cannot open '%s'\n", path);
|
grep_recursive(argv[i]);
|
||||||
return 1;
|
} else {
|
||||||
}
|
grep_file(argv[i]);
|
||||||
|
|
||||||
static char read_buf[READ_BUF_SIZE];
|
|
||||||
static char line[LINE_BUF_SIZE];
|
|
||||||
int line_pos = 0;
|
|
||||||
int line_num = 0;
|
|
||||||
int match_cnt = 0;
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
int bytes = sys_read(fd, read_buf, READ_BUF_SIZE);
|
|
||||||
if (bytes <= 0) break;
|
|
||||||
|
|
||||||
for (int i = 0; i < bytes; i++) {
|
|
||||||
char c = read_buf[i];
|
|
||||||
|
|
||||||
if (c == '\n' || line_pos >= LINE_BUF_SIZE - 1) {
|
|
||||||
line[line_pos] = '\0';
|
|
||||||
line_num++;
|
|
||||||
|
|
||||||
if (str_contains(line, pattern, ignore_case)) {
|
|
||||||
match_cnt++;
|
|
||||||
if (!count_only) {
|
|
||||||
if (show_numbers)
|
|
||||||
printf("%d: %s\n", line_num, line);
|
|
||||||
else
|
|
||||||
printf("%s\n", line);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
line_pos = 0;
|
return g_total_matches > 0 ? 0 : 1;
|
||||||
} else if (c != '\r') {
|
|
||||||
line[line_pos++] = c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle last line if file doesn't end with newline
|
|
||||||
if (line_pos > 0) {
|
|
||||||
line[line_pos] = '\0';
|
|
||||||
line_num++;
|
|
||||||
if (str_contains(line, pattern, ignore_case)) {
|
|
||||||
match_cnt++;
|
|
||||||
if (!count_only) {
|
|
||||||
if (show_numbers)
|
|
||||||
printf("%d: %s\n", line_num, line);
|
|
||||||
else
|
|
||||||
printf("%s\n", line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sys_close(fd);
|
|
||||||
|
|
||||||
if (count_only)
|
|
||||||
printf("%d\n", match_cnt);
|
|
||||||
|
|
||||||
return match_cnt > 0 ? 0 : 1;
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue